{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.995334370139969, "eval_steps": 500, "global_step": 19260, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4140000000000001e-06, "logits/chosen": -2.4202768802642822, "logits/rejected": -3.126551389694214, "logps/chosen": -260.56768798828125, "logps/rejected": -361.36322021484375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.8280000000000003e-06, "logits/chosen": -2.8711280822753906, "logits/rejected": -2.7205255031585693, "logps/chosen": -259.7323913574219, "logps/rejected": -394.8915710449219, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.242e-06, "logits/chosen": -2.0405263900756836, "logits/rejected": -3.4492268562316895, "logps/chosen": -122.54353332519531, "logps/rejected": -291.80157470703125, "loss": 0.5677, "rewards/accuracies": 1.0, "rewards/chosen": 0.035251617431640625, "rewards/margins": 0.2739463746547699, "rewards/rejected": -0.23869477212429047, "step": 3 }, { "epoch": 0.0, "learning_rate": 5.6560000000000006e-06, "logits/chosen": -2.6915621757507324, "logits/rejected": -3.4167795181274414, "logps/chosen": -84.04771423339844, "logps/rejected": -238.65550231933594, "loss": 0.697, "rewards/accuracies": 0.5, "rewards/chosen": -0.0302263256162405, "rewards/margins": -0.007414435967803001, "rewards/rejected": -0.0228118896484375, "step": 4 }, { "epoch": 0.0, "learning_rate": 7.07e-06, "logits/chosen": -2.238853931427002, "logits/rejected": -3.143308401107788, "logps/chosen": -119.9222183227539, "logps/rejected": -292.68621826171875, "loss": 0.657, "rewards/accuracies": 0.5, "rewards/chosen": 0.08489532768726349, "rewards/margins": 0.07516937702894211, "rewards/rejected": 0.00972595065832138, "step": 5 }, { "epoch": 0.0, "learning_rate": 8.484e-06, "logits/chosen": -1.3456599712371826, "logits/rejected": -3.3603355884552, "logps/chosen": -27.96832275390625, "logps/rejected": -306.5863342285156, "loss": 0.7124, "rewards/accuracies": 0.5, "rewards/chosen": 0.008449267596006393, "rewards/margins": -0.030539996922016144, "rewards/rejected": 0.03898926079273224, "step": 6 }, { "epoch": 0.0, "learning_rate": 9.897999999999999e-06, "logits/chosen": -3.068819284439087, "logits/rejected": -3.2806434631347656, "logps/chosen": -453.6174621582031, "logps/rejected": -434.95880126953125, "loss": 0.8332, "rewards/accuracies": 0.0, "rewards/chosen": -0.16950340569019318, "rewards/margins": -0.2521854341030121, "rewards/rejected": 0.0826820358633995, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.1312000000000001e-05, "logits/chosen": -1.8165857791900635, "logits/rejected": -3.183947801589966, "logps/chosen": -72.44416809082031, "logps/rejected": -286.23101806640625, "loss": 0.5938, "rewards/accuracies": 1.0, "rewards/chosen": 0.10988616943359375, "rewards/margins": 0.2110435515642166, "rewards/rejected": -0.10115738213062286, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.2726e-05, "logits/chosen": -2.0957655906677246, "logits/rejected": -2.968160390853882, "logps/chosen": -447.4916076660156, "logps/rejected": -318.071533203125, "loss": 0.5199, "rewards/accuracies": 1.0, "rewards/chosen": 0.16742859780788422, "rewards/margins": 0.4336853325366974, "rewards/rejected": -0.266256719827652, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.414e-05, "logits/chosen": -2.533325672149658, "logits/rejected": -2.9467380046844482, "logps/chosen": -547.57958984375, "logps/rejected": -378.0293884277344, "loss": 0.5823, "rewards/accuracies": 1.0, "rewards/chosen": -0.09080658107995987, "rewards/margins": 0.24430465698242188, "rewards/rejected": -0.33511123061180115, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.4139266559468852e-05, "logits/chosen": -2.966395616531372, "logits/rejected": -3.264249563217163, "logps/chosen": -131.68482971191406, "logps/rejected": -132.3790740966797, "loss": 0.6202, "rewards/accuracies": 1.0, "rewards/chosen": -0.113805390894413, "rewards/margins": 0.152140811085701, "rewards/rejected": -0.2659462094306946, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.4138533118937704e-05, "logits/chosen": -2.3734185695648193, "logits/rejected": -3.13988995552063, "logps/chosen": -220.59910583496094, "logps/rejected": -283.2532958984375, "loss": 0.5707, "rewards/accuracies": 1.0, "rewards/chosen": -0.03280029445886612, "rewards/margins": 0.26441726088523865, "rewards/rejected": -0.29721754789352417, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.4137799678406556e-05, "logits/chosen": -2.756643533706665, "logits/rejected": -2.5610287189483643, "logps/chosen": -182.99818420410156, "logps/rejected": -156.63462829589844, "loss": 0.5594, "rewards/accuracies": 1.0, "rewards/chosen": -0.043285369873046875, "rewards/margins": 0.2959403991699219, "rewards/rejected": -0.33922576904296875, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.4137066237875408e-05, "logits/chosen": -2.4434421062469482, "logits/rejected": -3.361372470855713, "logps/chosen": -479.01458740234375, "logps/rejected": -498.15301513671875, "loss": 0.268, "rewards/accuracies": 1.0, "rewards/chosen": -0.23105773329734802, "rewards/margins": 1.2136032581329346, "rewards/rejected": -1.444661021232605, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.4136332797344261e-05, "logits/chosen": -3.121711492538452, "logits/rejected": -3.1127209663391113, "logps/chosen": -361.2501525878906, "logps/rejected": -183.83685302734375, "loss": 0.5223, "rewards/accuracies": 0.5, "rewards/chosen": -0.11655426025390625, "rewards/margins": 0.5393242239952087, "rewards/rejected": -0.655878484249115, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.4135599356813113e-05, "logits/chosen": -3.143578290939331, "logits/rejected": -3.1020920276641846, "logps/chosen": -133.34927368164062, "logps/rejected": -335.4996032714844, "loss": 0.3047, "rewards/accuracies": 1.0, "rewards/chosen": 0.06296158581972122, "rewards/margins": 1.1112480163574219, "rewards/rejected": -1.0482864379882812, "step": 16 }, { "epoch": 0.0, "learning_rate": 1.4134865916281965e-05, "logits/chosen": -3.029557466506958, "logits/rejected": -2.4934542179107666, "logps/chosen": -197.2841796875, "logps/rejected": -244.9111328125, "loss": 0.5087, "rewards/accuracies": 1.0, "rewards/chosen": -0.2200489044189453, "rewards/margins": 0.46376609802246094, "rewards/rejected": -0.6838150024414062, "step": 17 }, { "epoch": 0.0, "learning_rate": 1.4134132475750817e-05, "logits/chosen": -2.0106823444366455, "logits/rejected": -2.613617420196533, "logps/chosen": -143.53033447265625, "logps/rejected": -228.4197235107422, "loss": 0.4073, "rewards/accuracies": 1.0, "rewards/chosen": -0.24226073920726776, "rewards/margins": 0.7691009640693665, "rewards/rejected": -1.0113617181777954, "step": 18 }, { "epoch": 0.0, "learning_rate": 1.4133399035219669e-05, "logits/chosen": -2.9165804386138916, "logits/rejected": -2.8562114238739014, "logps/chosen": -186.90171813964844, "logps/rejected": -240.08616638183594, "loss": 0.3011, "rewards/accuracies": 1.0, "rewards/chosen": -0.04158630222082138, "rewards/margins": 1.0724579095840454, "rewards/rejected": -1.114044189453125, "step": 19 }, { "epoch": 0.0, "learning_rate": 1.413266559468852e-05, "logits/chosen": -3.302778482437134, "logits/rejected": -3.140728712081909, "logps/chosen": -61.504085540771484, "logps/rejected": -68.91488647460938, "loss": 0.6314, "rewards/accuracies": 0.5, "rewards/chosen": -0.011324595659971237, "rewards/margins": 0.13862237334251404, "rewards/rejected": -0.14994697272777557, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.4131932154157372e-05, "logits/chosen": -2.936633825302124, "logits/rejected": -3.169842004776001, "logps/chosen": -26.17203140258789, "logps/rejected": -160.30838012695312, "loss": 0.3308, "rewards/accuracies": 1.0, "rewards/chosen": -0.03120613284409046, "rewards/margins": 0.9863144159317017, "rewards/rejected": -1.017520546913147, "step": 21 }, { "epoch": 0.0, "learning_rate": 1.4131198713626224e-05, "logits/chosen": -2.2868919372558594, "logits/rejected": -2.9662461280822754, "logps/chosen": -247.630859375, "logps/rejected": -356.844482421875, "loss": 0.2399, "rewards/accuracies": 1.0, "rewards/chosen": -0.06923599541187286, "rewards/margins": 1.4882760047912598, "rewards/rejected": -1.5575119256973267, "step": 22 }, { "epoch": 0.0, "learning_rate": 1.4130465273095076e-05, "logits/chosen": -2.577047109603882, "logits/rejected": -3.195272922515869, "logps/chosen": -273.51263427734375, "logps/rejected": -328.28363037109375, "loss": 0.1857, "rewards/accuracies": 1.0, "rewards/chosen": 0.1862677037715912, "rewards/margins": 1.6283491849899292, "rewards/rejected": -1.4420814514160156, "step": 23 }, { "epoch": 0.0, "learning_rate": 1.412973183256393e-05, "logits/chosen": -3.3827428817749023, "logits/rejected": -3.128340482711792, "logps/chosen": -140.3740234375, "logps/rejected": -106.04458618164062, "loss": 0.8788, "rewards/accuracies": 0.5, "rewards/chosen": -0.6234138011932373, "rewards/margins": -0.22179672122001648, "rewards/rejected": -0.40161705017089844, "step": 24 }, { "epoch": 0.0, "learning_rate": 1.4128998392032782e-05, "logits/chosen": -1.7919650077819824, "logits/rejected": -2.9883224964141846, "logps/chosen": -122.33035278320312, "logps/rejected": -369.18243408203125, "loss": 0.3091, "rewards/accuracies": 1.0, "rewards/chosen": -0.007749367505311966, "rewards/margins": 1.855327844619751, "rewards/rejected": -1.863077163696289, "step": 25 }, { "epoch": 0.0, "learning_rate": 1.4128264951501634e-05, "logits/chosen": -3.317362070083618, "logits/rejected": -3.2223849296569824, "logps/chosen": -381.4422302246094, "logps/rejected": -314.4141845703125, "loss": 0.7788, "rewards/accuracies": 0.5, "rewards/chosen": -1.0278594493865967, "rewards/margins": 0.7416278123855591, "rewards/rejected": -1.7694873809814453, "step": 26 }, { "epoch": 0.0, "learning_rate": 1.4127531510970485e-05, "logits/chosen": -2.8917009830474854, "logits/rejected": -2.168525218963623, "logps/chosen": -459.10986328125, "logps/rejected": -260.7486877441406, "loss": 0.593, "rewards/accuracies": 0.5, "rewards/chosen": -0.6055648922920227, "rewards/margins": 0.6729962825775146, "rewards/rejected": -1.2785612344741821, "step": 27 }, { "epoch": 0.0, "learning_rate": 1.4126798070439339e-05, "logits/chosen": -2.8958985805511475, "logits/rejected": -1.6051552295684814, "logps/chosen": -164.15692138671875, "logps/rejected": -95.88513946533203, "loss": 1.5395, "rewards/accuracies": 0.5, "rewards/chosen": -1.3574696779251099, "rewards/margins": -1.0119473934173584, "rewards/rejected": -0.34552231431007385, "step": 28 }, { "epoch": 0.0, "learning_rate": 1.4126064629908191e-05, "logits/chosen": -2.0149965286254883, "logits/rejected": -2.954669713973999, "logps/chosen": -154.47116088867188, "logps/rejected": -246.57644653320312, "loss": 0.2645, "rewards/accuracies": 1.0, "rewards/chosen": 0.02750253677368164, "rewards/margins": 2.1425621509552, "rewards/rejected": -2.1150596141815186, "step": 29 }, { "epoch": 0.0, "learning_rate": 1.4125331189377043e-05, "logits/chosen": -2.7098991870880127, "logits/rejected": -2.8438878059387207, "logps/chosen": -214.23843383789062, "logps/rejected": -130.81744384765625, "loss": 0.6889, "rewards/accuracies": 0.5, "rewards/chosen": -0.5537437200546265, "rewards/margins": 0.008688002824783325, "rewards/rejected": -0.5624317526817322, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.4124597748845895e-05, "logits/chosen": -3.1580960750579834, "logits/rejected": -2.9024298191070557, "logps/chosen": -380.2968444824219, "logps/rejected": -490.69964599609375, "loss": 0.0874, "rewards/accuracies": 1.0, "rewards/chosen": -0.33735430240631104, "rewards/margins": 3.047351837158203, "rewards/rejected": -3.3847060203552246, "step": 31 }, { "epoch": 0.0, "learning_rate": 1.4123864308314747e-05, "logits/chosen": -2.968543529510498, "logits/rejected": -3.219364643096924, "logps/chosen": -181.55224609375, "logps/rejected": -256.7804260253906, "loss": 0.2053, "rewards/accuracies": 1.0, "rewards/chosen": -0.29361647367477417, "rewards/margins": 1.5720009803771973, "rewards/rejected": -1.8656173944473267, "step": 32 }, { "epoch": 0.01, "learning_rate": 1.41231308677836e-05, "logits/chosen": -2.778377056121826, "logits/rejected": -2.8896071910858154, "logps/chosen": -372.53546142578125, "logps/rejected": -346.61895751953125, "loss": 0.116, "rewards/accuracies": 1.0, "rewards/chosen": 0.24395142495632172, "rewards/margins": 2.210235595703125, "rewards/rejected": -1.966284155845642, "step": 33 }, { "epoch": 0.01, "learning_rate": 1.4122397427252452e-05, "logits/chosen": -1.3267899751663208, "logits/rejected": -2.7902166843414307, "logps/chosen": -101.08920288085938, "logps/rejected": -244.2568359375, "loss": 0.2138, "rewards/accuracies": 1.0, "rewards/chosen": -0.0019500777125358582, "rewards/margins": 1.444850206375122, "rewards/rejected": -1.4468002319335938, "step": 34 }, { "epoch": 0.01, "learning_rate": 1.4121663986721304e-05, "logits/chosen": -2.8224871158599854, "logits/rejected": -1.6505788564682007, "logps/chosen": -890.9483032226562, "logps/rejected": -469.29608154296875, "loss": 0.0524, "rewards/accuracies": 1.0, "rewards/chosen": -0.6125839352607727, "rewards/margins": 2.9355406761169434, "rewards/rejected": -3.5481247901916504, "step": 35 }, { "epoch": 0.01, "learning_rate": 1.4120930546190156e-05, "logits/chosen": -2.998866081237793, "logits/rejected": -2.6009199619293213, "logps/chosen": -347.08172607421875, "logps/rejected": -447.68017578125, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": 0.4468090236186981, "rewards/margins": 3.9539382457733154, "rewards/rejected": -3.507129192352295, "step": 36 }, { "epoch": 0.01, "learning_rate": 1.4120197105659008e-05, "logits/chosen": -2.599369764328003, "logits/rejected": -3.2651326656341553, "logps/chosen": -27.250289916992188, "logps/rejected": -147.46192932128906, "loss": 0.3564, "rewards/accuracies": 1.0, "rewards/chosen": -0.14668217301368713, "rewards/margins": 1.2027482986450195, "rewards/rejected": -1.3494304418563843, "step": 37 }, { "epoch": 0.01, "learning_rate": 1.411946366512786e-05, "logits/chosen": -2.984835386276245, "logits/rejected": -2.9738616943359375, "logps/chosen": -190.6011962890625, "logps/rejected": -405.1341552734375, "loss": 0.0801, "rewards/accuracies": 1.0, "rewards/chosen": -0.12021370232105255, "rewards/margins": 3.2977046966552734, "rewards/rejected": -3.4179184436798096, "step": 38 }, { "epoch": 0.01, "learning_rate": 1.4118730224596711e-05, "logits/chosen": -2.729628562927246, "logits/rejected": -2.944746971130371, "logps/chosen": -32.56704330444336, "logps/rejected": -203.5230712890625, "loss": 0.2299, "rewards/accuracies": 1.0, "rewards/chosen": 0.00658369017764926, "rewards/margins": 1.3536243438720703, "rewards/rejected": -1.3470406532287598, "step": 39 }, { "epoch": 0.01, "learning_rate": 1.4117996784065563e-05, "logits/chosen": -2.5957000255584717, "logits/rejected": -2.9750587940216064, "logps/chosen": -279.3152160644531, "logps/rejected": -402.0721130371094, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -0.07177047431468964, "rewards/margins": 3.3545312881469727, "rewards/rejected": -3.4263014793395996, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.4117263343534415e-05, "logits/chosen": -2.3292081356048584, "logits/rejected": -3.0516459941864014, "logps/chosen": -118.13911437988281, "logps/rejected": -256.521484375, "loss": 0.0955, "rewards/accuracies": 1.0, "rewards/chosen": 0.05770416557788849, "rewards/margins": 3.34348464012146, "rewards/rejected": -3.285780429840088, "step": 41 }, { "epoch": 0.01, "learning_rate": 1.4116529903003269e-05, "logits/chosen": -2.2826948165893555, "logits/rejected": -3.0719592571258545, "logps/chosen": -131.83828735351562, "logps/rejected": -502.65899658203125, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -0.2944015562534332, "rewards/margins": 5.425523281097412, "rewards/rejected": -5.7199249267578125, "step": 42 }, { "epoch": 0.01, "learning_rate": 1.411579646247212e-05, "logits/chosen": -2.5314977169036865, "logits/rejected": -2.399129867553711, "logps/chosen": -699.333740234375, "logps/rejected": -401.123046875, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -0.4309120178222656, "rewards/margins": 3.6440651416778564, "rewards/rejected": -4.074977397918701, "step": 43 }, { "epoch": 0.01, "learning_rate": 1.4115063021940972e-05, "logits/chosen": -2.6532936096191406, "logits/rejected": -3.0105814933776855, "logps/chosen": -196.9453582763672, "logps/rejected": -408.12310791015625, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -0.14475403726100922, "rewards/margins": 3.0799012184143066, "rewards/rejected": -3.2246551513671875, "step": 44 }, { "epoch": 0.01, "learning_rate": 1.4114329581409824e-05, "logits/chosen": -2.6611838340759277, "logits/rejected": -3.1411354541778564, "logps/chosen": -123.79290008544922, "logps/rejected": -207.4145050048828, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.42753010988235474, "rewards/margins": 3.1907477378845215, "rewards/rejected": -3.6182780265808105, "step": 45 }, { "epoch": 0.01, "learning_rate": 1.4113596140878676e-05, "logits/chosen": -1.9545180797576904, "logits/rejected": -2.9276978969573975, "logps/chosen": -219.49160766601562, "logps/rejected": -582.915283203125, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -0.9123855829238892, "rewards/margins": 5.33445930480957, "rewards/rejected": -6.24684476852417, "step": 46 }, { "epoch": 0.01, "learning_rate": 1.4112862700347528e-05, "logits/chosen": -3.055722236633301, "logits/rejected": -2.9924709796905518, "logps/chosen": -240.751220703125, "logps/rejected": -199.92752075195312, "loss": 0.3804, "rewards/accuracies": 1.0, "rewards/chosen": -0.9245437979698181, "rewards/margins": 0.9480330944061279, "rewards/rejected": -1.8725769519805908, "step": 47 }, { "epoch": 0.01, "learning_rate": 1.411212925981638e-05, "logits/chosen": -2.7822134494781494, "logits/rejected": -3.003478765487671, "logps/chosen": -136.05612182617188, "logps/rejected": -244.82127380371094, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": 0.23696213960647583, "rewards/margins": 3.1409072875976562, "rewards/rejected": -2.903945207595825, "step": 48 }, { "epoch": 0.01, "learning_rate": 1.4111395819285232e-05, "logits/chosen": -2.3850457668304443, "logits/rejected": -2.8804078102111816, "logps/chosen": -560.7880859375, "logps/rejected": -630.734375, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.9388450384140015, "rewards/margins": 4.782906532287598, "rewards/rejected": -5.7217512130737305, "step": 49 }, { "epoch": 0.01, "learning_rate": 1.4110662378754084e-05, "logits/chosen": -2.8799331188201904, "logits/rejected": -2.0609500408172607, "logps/chosen": -181.17518615722656, "logps/rejected": -110.10717010498047, "loss": 1.7526, "rewards/accuracies": 0.5, "rewards/chosen": -1.5476007461547852, "rewards/margins": 0.4945293664932251, "rewards/rejected": -2.0421299934387207, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.4109928938222937e-05, "logits/chosen": -2.320794105529785, "logits/rejected": -3.1591756343841553, "logps/chosen": -300.90960693359375, "logps/rejected": -418.32403564453125, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.585889458656311, "rewards/margins": 4.501832008361816, "rewards/rejected": -5.087721347808838, "step": 51 }, { "epoch": 0.01, "learning_rate": 1.410919549769179e-05, "logits/chosen": -2.6024179458618164, "logits/rejected": -3.0449678897857666, "logps/chosen": -306.19970703125, "logps/rejected": -294.9111022949219, "loss": 1.1886, "rewards/accuracies": 0.5, "rewards/chosen": -1.8001114130020142, "rewards/margins": 2.7324607372283936, "rewards/rejected": -4.532571792602539, "step": 52 }, { "epoch": 0.01, "learning_rate": 1.4108462057160641e-05, "logits/chosen": -2.7474617958068848, "logits/rejected": -3.3014471530914307, "logps/chosen": -168.3236846923828, "logps/rejected": -314.4865417480469, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -1.1680004596710205, "rewards/margins": 3.760110378265381, "rewards/rejected": -4.928110599517822, "step": 53 }, { "epoch": 0.01, "learning_rate": 1.4107728616629493e-05, "logits/chosen": -1.9675617218017578, "logits/rejected": -2.81427264213562, "logps/chosen": -219.14422607421875, "logps/rejected": -354.107421875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.43256688117980957, "rewards/margins": 6.432154655456543, "rewards/rejected": -6.864721775054932, "step": 54 }, { "epoch": 0.01, "learning_rate": 1.4106995176098345e-05, "logits/chosen": -1.4750783443450928, "logits/rejected": -2.7454674243927, "logps/chosen": -358.23236083984375, "logps/rejected": -598.4013671875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.035138726234436, "rewards/margins": 7.453986167907715, "rewards/rejected": -8.48912525177002, "step": 55 }, { "epoch": 0.01, "learning_rate": 1.4106261735567197e-05, "logits/chosen": -2.9609220027923584, "logits/rejected": -2.5556066036224365, "logps/chosen": -545.6329956054688, "logps/rejected": -328.7760925292969, "loss": 0.2841, "rewards/accuracies": 1.0, "rewards/chosen": -1.0657745599746704, "rewards/margins": 3.930992364883423, "rewards/rejected": -4.996767044067383, "step": 56 }, { "epoch": 0.01, "learning_rate": 1.4105528295036049e-05, "logits/chosen": -1.2026716470718384, "logits/rejected": -2.8355350494384766, "logps/chosen": -32.76978302001953, "logps/rejected": -223.06533813476562, "loss": 0.0684, "rewards/accuracies": 1.0, "rewards/chosen": -0.2653753161430359, "rewards/margins": 5.165182113647461, "rewards/rejected": -5.4305572509765625, "step": 57 }, { "epoch": 0.01, "learning_rate": 1.41047948545049e-05, "logits/chosen": -2.445309638977051, "logits/rejected": -2.6787617206573486, "logps/chosen": -550.3043212890625, "logps/rejected": -614.17529296875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.4687083959579468, "rewards/margins": 7.313422203063965, "rewards/rejected": -8.782130241394043, "step": 58 }, { "epoch": 0.01, "learning_rate": 1.4104061413973754e-05, "logits/chosen": -1.8509650230407715, "logits/rejected": -2.8477137088775635, "logps/chosen": -208.73211669921875, "logps/rejected": -323.7951965332031, "loss": 0.2192, "rewards/accuracies": 1.0, "rewards/chosen": -0.409042090177536, "rewards/margins": 3.7633588314056396, "rewards/rejected": -4.172400951385498, "step": 59 }, { "epoch": 0.01, "learning_rate": 1.4103327973442606e-05, "logits/chosen": -2.505031108856201, "logits/rejected": -2.6996185779571533, "logps/chosen": -230.56333923339844, "logps/rejected": -302.277587890625, "loss": 0.1183, "rewards/accuracies": 1.0, "rewards/chosen": -1.3922386169433594, "rewards/margins": 2.0882530212402344, "rewards/rejected": -3.4804916381835938, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.4102594532911458e-05, "logits/chosen": -2.622485637664795, "logits/rejected": -3.2762577533721924, "logps/chosen": -131.33973693847656, "logps/rejected": -404.7507019042969, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.4150056838989258, "rewards/margins": 6.370766639709473, "rewards/rejected": -6.785772323608398, "step": 61 }, { "epoch": 0.01, "learning_rate": 1.4101861092380311e-05, "logits/chosen": -1.6175916194915771, "logits/rejected": -2.5980286598205566, "logps/chosen": -160.11376953125, "logps/rejected": -195.63262939453125, "loss": 2.2194, "rewards/accuracies": 0.5, "rewards/chosen": -2.1569879055023193, "rewards/margins": 1.6641275882720947, "rewards/rejected": -3.821115493774414, "step": 62 }, { "epoch": 0.01, "learning_rate": 1.4101127651849163e-05, "logits/chosen": -2.8297348022460938, "logits/rejected": -2.856510877609253, "logps/chosen": -90.19642639160156, "logps/rejected": -284.11639404296875, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -0.5168472528457642, "rewards/margins": 4.477017879486084, "rewards/rejected": -4.993865013122559, "step": 63 }, { "epoch": 0.01, "learning_rate": 1.4100394211318015e-05, "logits/chosen": -2.9810194969177246, "logits/rejected": -2.8967714309692383, "logps/chosen": -196.25204467773438, "logps/rejected": -252.80722045898438, "loss": 0.0527, "rewards/accuracies": 1.0, "rewards/chosen": -1.0261684656143188, "rewards/margins": 4.35045051574707, "rewards/rejected": -5.3766188621521, "step": 64 }, { "epoch": 0.01, "learning_rate": 1.4099660770786867e-05, "logits/chosen": -1.5990504026412964, "logits/rejected": -2.907993793487549, "logps/chosen": -140.81544494628906, "logps/rejected": -558.9159545898438, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3803569972515106, "rewards/margins": 8.099209785461426, "rewards/rejected": -8.479567527770996, "step": 65 }, { "epoch": 0.01, "learning_rate": 1.4098927330255719e-05, "logits/chosen": -3.0204856395721436, "logits/rejected": -2.4211068153381348, "logps/chosen": -269.89044189453125, "logps/rejected": -237.43154907226562, "loss": 3.2492, "rewards/accuracies": 0.5, "rewards/chosen": -4.467381477355957, "rewards/margins": -1.0057878494262695, "rewards/rejected": -3.4615936279296875, "step": 66 }, { "epoch": 0.01, "learning_rate": 1.409819388972457e-05, "logits/chosen": -3.217348337173462, "logits/rejected": -3.3953685760498047, "logps/chosen": -39.697086334228516, "logps/rejected": -136.46435546875, "loss": 0.1677, "rewards/accuracies": 1.0, "rewards/chosen": -0.11833896487951279, "rewards/margins": 2.5045955181121826, "rewards/rejected": -2.622934579849243, "step": 67 }, { "epoch": 0.01, "learning_rate": 1.4097460449193424e-05, "logits/chosen": -1.8180181980133057, "logits/rejected": -3.0541343688964844, "logps/chosen": -93.00628662109375, "logps/rejected": -429.23394775390625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7367753982543945, "rewards/margins": 9.170452117919922, "rewards/rejected": -9.9072265625, "step": 68 }, { "epoch": 0.01, "learning_rate": 1.4096727008662276e-05, "logits/chosen": -2.593038558959961, "logits/rejected": -2.6929855346679688, "logps/chosen": -195.23004150390625, "logps/rejected": -298.7722473144531, "loss": 0.2764, "rewards/accuracies": 1.0, "rewards/chosen": -0.8530391454696655, "rewards/margins": 3.5395944118499756, "rewards/rejected": -4.392633438110352, "step": 69 }, { "epoch": 0.01, "learning_rate": 1.4095993568131128e-05, "logits/chosen": -3.009730815887451, "logits/rejected": -1.8470121622085571, "logps/chosen": -478.59197998046875, "logps/rejected": -109.81805419921875, "loss": 3.3686, "rewards/accuracies": 0.0, "rewards/chosen": -3.8656387329101562, "rewards/margins": -3.282195568084717, "rewards/rejected": -0.5834430456161499, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.409526012759998e-05, "logits/chosen": -2.8218958377838135, "logits/rejected": -2.9878311157226562, "logps/chosen": -151.1978302001953, "logps/rejected": -391.93756103515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.6019611358642578, "rewards/margins": 7.154431343078613, "rewards/rejected": -8.756392478942871, "step": 71 }, { "epoch": 0.01, "learning_rate": 1.4094526687068832e-05, "logits/chosen": -2.8202035427093506, "logits/rejected": -3.041990280151367, "logps/chosen": -24.37290382385254, "logps/rejected": -185.93161010742188, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": 0.04012651368975639, "rewards/margins": 4.4250688552856445, "rewards/rejected": -4.384942054748535, "step": 72 }, { "epoch": 0.01, "learning_rate": 1.4093793246537684e-05, "logits/chosen": -2.7656209468841553, "logits/rejected": -3.03047776222229, "logps/chosen": -174.87388610839844, "logps/rejected": -436.8455505371094, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9565904140472412, "rewards/margins": 7.208645343780518, "rewards/rejected": -8.16523551940918, "step": 73 }, { "epoch": 0.01, "learning_rate": 1.4093059806006536e-05, "logits/chosen": -3.028514862060547, "logits/rejected": -1.5148447751998901, "logps/chosen": -664.4035034179688, "logps/rejected": -244.63558959960938, "loss": 4.6969, "rewards/accuracies": 0.5, "rewards/chosen": -6.251135349273682, "rewards/margins": -3.2918028831481934, "rewards/rejected": -2.9593324661254883, "step": 74 }, { "epoch": 0.01, "learning_rate": 1.4092326365475387e-05, "logits/chosen": -3.099137783050537, "logits/rejected": -3.144040107727051, "logps/chosen": -261.35223388671875, "logps/rejected": -347.80487060546875, "loss": 0.2171, "rewards/accuracies": 1.0, "rewards/chosen": -2.020773410797119, "rewards/margins": 3.571377754211426, "rewards/rejected": -5.592151165008545, "step": 75 }, { "epoch": 0.01, "learning_rate": 1.409159292494424e-05, "logits/chosen": -2.7334706783294678, "logits/rejected": -1.8484560251235962, "logps/chosen": -569.9850463867188, "logps/rejected": -386.86907958984375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.616192579269409, "rewards/margins": 6.653825283050537, "rewards/rejected": -9.270017623901367, "step": 76 }, { "epoch": 0.01, "learning_rate": 1.4090859484413093e-05, "logits/chosen": -3.0540425777435303, "logits/rejected": -2.97210955619812, "logps/chosen": -197.98077392578125, "logps/rejected": -155.15423583984375, "loss": 0.4672, "rewards/accuracies": 1.0, "rewards/chosen": -1.017446517944336, "rewards/margins": 0.531777560710907, "rewards/rejected": -1.5492241382598877, "step": 77 }, { "epoch": 0.01, "learning_rate": 1.4090126043881945e-05, "logits/chosen": -0.8959410190582275, "logits/rejected": -2.6812076568603516, "logps/chosen": -12.990964889526367, "logps/rejected": -475.5232849121094, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.15956316888332367, "rewards/margins": 13.661026954650879, "rewards/rejected": -13.501463890075684, "step": 78 }, { "epoch": 0.01, "learning_rate": 1.4089392603350797e-05, "logits/chosen": -2.1871495246887207, "logits/rejected": -2.746021270751953, "logps/chosen": -62.959014892578125, "logps/rejected": -158.9863739013672, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": -0.567073404788971, "rewards/margins": 3.1819586753845215, "rewards/rejected": -3.7490322589874268, "step": 79 }, { "epoch": 0.01, "learning_rate": 1.4088659162819649e-05, "logits/chosen": -2.6375534534454346, "logits/rejected": -2.7307848930358887, "logps/chosen": -270.4930114746094, "logps/rejected": -372.5838317871094, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": -1.7161247730255127, "rewards/margins": 4.023059844970703, "rewards/rejected": -5.739184379577637, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.40879257222885e-05, "logits/chosen": -1.7470145225524902, "logits/rejected": -2.7702338695526123, "logps/chosen": -62.55423355102539, "logps/rejected": -269.4376220703125, "loss": 0.188, "rewards/accuracies": 1.0, "rewards/chosen": -0.45104044675827026, "rewards/margins": 5.718761444091797, "rewards/rejected": -6.169802188873291, "step": 81 }, { "epoch": 0.01, "learning_rate": 1.4087192281757352e-05, "logits/chosen": -3.062570810317993, "logits/rejected": -2.720118522644043, "logps/chosen": -817.0414428710938, "logps/rejected": -543.6349487304688, "loss": 0.0528, "rewards/accuracies": 1.0, "rewards/chosen": -1.8446091413497925, "rewards/margins": 5.187206268310547, "rewards/rejected": -7.031815528869629, "step": 82 }, { "epoch": 0.01, "learning_rate": 1.4086458841226204e-05, "logits/chosen": -2.8377223014831543, "logits/rejected": -3.072373390197754, "logps/chosen": -306.1324462890625, "logps/rejected": -368.23370361328125, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": -2.0255494117736816, "rewards/margins": 6.4500017166137695, "rewards/rejected": -8.475550651550293, "step": 83 }, { "epoch": 0.01, "learning_rate": 1.4085725400695056e-05, "logits/chosen": -2.224365472793579, "logits/rejected": -2.982485771179199, "logps/chosen": -181.11338806152344, "logps/rejected": -490.67236328125, "loss": 2.4705, "rewards/accuracies": 0.5, "rewards/chosen": -3.0073137283325195, "rewards/margins": 3.3408286571502686, "rewards/rejected": -6.348142147064209, "step": 84 }, { "epoch": 0.01, "learning_rate": 1.4084991960163908e-05, "logits/chosen": -2.511768102645874, "logits/rejected": -2.9686481952667236, "logps/chosen": -233.01132202148438, "logps/rejected": -341.1041259765625, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -1.253481388092041, "rewards/margins": 5.830150604248047, "rewards/rejected": -7.083631992340088, "step": 85 }, { "epoch": 0.01, "learning_rate": 1.4084258519632762e-05, "logits/chosen": -2.5626261234283447, "logits/rejected": -3.2329444885253906, "logps/chosen": -75.59342956542969, "logps/rejected": -255.39071655273438, "loss": 0.2198, "rewards/accuracies": 1.0, "rewards/chosen": -0.9348710179328918, "rewards/margins": 6.5154218673706055, "rewards/rejected": -7.450293064117432, "step": 86 }, { "epoch": 0.01, "learning_rate": 1.4083525079101613e-05, "logits/chosen": -2.7153337001800537, "logits/rejected": -1.5484904050827026, "logps/chosen": -644.5648803710938, "logps/rejected": -195.96646118164062, "loss": 6.2618, "rewards/accuracies": 0.5, "rewards/chosen": -7.686749458312988, "rewards/margins": -4.428825378417969, "rewards/rejected": -3.2579240798950195, "step": 87 }, { "epoch": 0.01, "learning_rate": 1.4082791638570465e-05, "logits/chosen": -2.989985227584839, "logits/rejected": -2.000473976135254, "logps/chosen": -553.5145263671875, "logps/rejected": -389.85198974609375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.2258713245391846, "rewards/margins": 7.348170280456543, "rewards/rejected": -8.574041366577148, "step": 88 }, { "epoch": 0.01, "learning_rate": 1.4082058198039317e-05, "logits/chosen": -2.7222344875335693, "logits/rejected": -3.07248854637146, "logps/chosen": -208.29104614257812, "logps/rejected": -385.8736572265625, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.573817491531372, "rewards/margins": 7.496880054473877, "rewards/rejected": -9.070697784423828, "step": 89 }, { "epoch": 0.01, "learning_rate": 1.4081324757508169e-05, "logits/chosen": -2.9212591648101807, "logits/rejected": -3.327638626098633, "logps/chosen": -18.52159881591797, "logps/rejected": -209.32440185546875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.14013256132602692, "rewards/margins": 5.719881057739258, "rewards/rejected": -5.860013961791992, "step": 90 }, { "epoch": 0.01, "learning_rate": 1.4080591316977021e-05, "logits/chosen": -2.5953521728515625, "logits/rejected": -1.9019293785095215, "logps/chosen": -277.9371337890625, "logps/rejected": -409.8018493652344, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.0505270957946777, "rewards/margins": 8.051078796386719, "rewards/rejected": -10.101605415344238, "step": 91 }, { "epoch": 0.01, "learning_rate": 1.4079857876445873e-05, "logits/chosen": -2.811116933822632, "logits/rejected": -1.9557486772537231, "logps/chosen": -325.7215881347656, "logps/rejected": -257.9326171875, "loss": 0.259, "rewards/accuracies": 1.0, "rewards/chosen": -2.62593936920166, "rewards/margins": 1.7965567111968994, "rewards/rejected": -4.4224958419799805, "step": 92 }, { "epoch": 0.01, "learning_rate": 1.4079124435914725e-05, "logits/chosen": -2.644092082977295, "logits/rejected": -2.2853140830993652, "logps/chosen": -117.55805206298828, "logps/rejected": -248.83621215820312, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -1.5235624313354492, "rewards/margins": 4.555681228637695, "rewards/rejected": -6.0792436599731445, "step": 93 }, { "epoch": 0.01, "learning_rate": 1.4078390995383578e-05, "logits/chosen": -2.706035852432251, "logits/rejected": -2.337390899658203, "logps/chosen": -248.60650634765625, "logps/rejected": -302.57379150390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.50856614112854, "rewards/margins": 7.313673973083496, "rewards/rejected": -8.822239875793457, "step": 94 }, { "epoch": 0.01, "learning_rate": 1.407765755485243e-05, "logits/chosen": -2.423990488052368, "logits/rejected": -2.8913729190826416, "logps/chosen": -202.10958862304688, "logps/rejected": -358.60345458984375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.7065911889076233, "rewards/margins": 6.621737480163574, "rewards/rejected": -7.328329086303711, "step": 95 }, { "epoch": 0.01, "learning_rate": 1.4076924114321284e-05, "logits/chosen": -2.6394293308258057, "logits/rejected": -2.9760360717773438, "logps/chosen": -133.45297241210938, "logps/rejected": -162.77191162109375, "loss": 1.0882, "rewards/accuracies": 0.5, "rewards/chosen": -1.5576903820037842, "rewards/margins": 2.194801092147827, "rewards/rejected": -3.7524914741516113, "step": 96 }, { "epoch": 0.02, "learning_rate": 1.4076190673790136e-05, "logits/chosen": -2.8776588439941406, "logits/rejected": -1.9003697633743286, "logps/chosen": -372.47930908203125, "logps/rejected": -154.8984375, "loss": 9.2394, "rewards/accuracies": 0.0, "rewards/chosen": -9.82741641998291, "rewards/margins": -9.239204406738281, "rewards/rejected": -0.5882118344306946, "step": 97 }, { "epoch": 0.02, "learning_rate": 1.4075457233258987e-05, "logits/chosen": -3.148327350616455, "logits/rejected": -3.2848455905914307, "logps/chosen": -142.601318359375, "logps/rejected": -124.483642578125, "loss": 2.2259, "rewards/accuracies": 0.5, "rewards/chosen": -2.2202982902526855, "rewards/margins": 1.7602510452270508, "rewards/rejected": -3.9805493354797363, "step": 98 }, { "epoch": 0.02, "learning_rate": 1.407472379272784e-05, "logits/chosen": -3.02988862991333, "logits/rejected": -2.9936115741729736, "logps/chosen": -573.9237060546875, "logps/rejected": -529.4547729492188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.540155053138733, "rewards/margins": 9.371994972229004, "rewards/rejected": -10.912150382995605, "step": 99 }, { "epoch": 0.02, "learning_rate": 1.4073990352196691e-05, "logits/chosen": -3.2112154960632324, "logits/rejected": -3.127882957458496, "logps/chosen": -258.3874816894531, "logps/rejected": -262.8049621582031, "loss": 0.1228, "rewards/accuracies": 1.0, "rewards/chosen": -1.0279884338378906, "rewards/margins": 2.0816664695739746, "rewards/rejected": -3.1096549034118652, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.4073256911665543e-05, "logits/chosen": -2.8855087757110596, "logits/rejected": -2.6994435787200928, "logps/chosen": -137.1128387451172, "logps/rejected": -174.25665283203125, "loss": 2.907, "rewards/accuracies": 0.5, "rewards/chosen": -3.777805805206299, "rewards/margins": 1.9745888710021973, "rewards/rejected": -5.752394199371338, "step": 101 }, { "epoch": 0.02, "learning_rate": 1.4072523471134395e-05, "logits/chosen": -1.1936187744140625, "logits/rejected": -2.73915433883667, "logps/chosen": -43.142059326171875, "logps/rejected": -269.9305419921875, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -0.2865304946899414, "rewards/margins": 5.182249069213867, "rewards/rejected": -5.468779563903809, "step": 102 }, { "epoch": 0.02, "learning_rate": 1.4071790030603247e-05, "logits/chosen": -2.612938642501831, "logits/rejected": -2.2905681133270264, "logps/chosen": -248.273681640625, "logps/rejected": -240.90823364257812, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": -1.8896363973617554, "rewards/margins": 3.1885738372802734, "rewards/rejected": -5.078210353851318, "step": 103 }, { "epoch": 0.02, "learning_rate": 1.40710565900721e-05, "logits/chosen": -1.4308547973632812, "logits/rejected": -2.655660390853882, "logps/chosen": -118.47073364257812, "logps/rejected": -373.6507263183594, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.48640233278274536, "rewards/margins": 8.42124080657959, "rewards/rejected": -8.90764331817627, "step": 104 }, { "epoch": 0.02, "learning_rate": 1.4070323149540952e-05, "logits/chosen": -2.037747859954834, "logits/rejected": -2.9258549213409424, "logps/chosen": -74.89553833007812, "logps/rejected": -249.07127380371094, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.766288161277771, "rewards/margins": 5.024596214294434, "rewards/rejected": -5.790884494781494, "step": 105 }, { "epoch": 0.02, "learning_rate": 1.4069589709009804e-05, "logits/chosen": -3.0467324256896973, "logits/rejected": -3.049813985824585, "logps/chosen": -183.87232971191406, "logps/rejected": -239.73509216308594, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.3050895631313324, "rewards/margins": 6.3434529304504395, "rewards/rejected": -6.648542404174805, "step": 106 }, { "epoch": 0.02, "learning_rate": 1.4068856268478656e-05, "logits/chosen": -2.3465147018432617, "logits/rejected": -2.804060697555542, "logps/chosen": -343.57135009765625, "logps/rejected": -576.6207275390625, "loss": 1.827, "rewards/accuracies": 0.5, "rewards/chosen": -3.6464529037475586, "rewards/margins": 0.12361156940460205, "rewards/rejected": -3.77006459236145, "step": 107 }, { "epoch": 0.02, "learning_rate": 1.4068122827947508e-05, "logits/chosen": -2.4034242630004883, "logits/rejected": -3.132439613342285, "logps/chosen": -96.64022827148438, "logps/rejected": -310.78857421875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.2982571125030518, "rewards/margins": 6.6716413497924805, "rewards/rejected": -7.969898223876953, "step": 108 }, { "epoch": 0.02, "learning_rate": 1.406738938741636e-05, "logits/chosen": -2.7480053901672363, "logits/rejected": -1.3815901279449463, "logps/chosen": -444.853271484375, "logps/rejected": -146.7725372314453, "loss": 4.2715, "rewards/accuracies": 0.5, "rewards/chosen": -5.063936710357666, "rewards/margins": -0.9074156284332275, "rewards/rejected": -4.156520843505859, "step": 109 }, { "epoch": 0.02, "learning_rate": 1.4066655946885212e-05, "logits/chosen": -2.9923951625823975, "logits/rejected": -2.1286721229553223, "logps/chosen": -325.11370849609375, "logps/rejected": -129.2069549560547, "loss": 2.6227, "rewards/accuracies": 0.5, "rewards/chosen": -2.729405403137207, "rewards/margins": -0.5575985908508301, "rewards/rejected": -2.171806812286377, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.4065922506354064e-05, "logits/chosen": -1.9610718488693237, "logits/rejected": -3.106618881225586, "logps/chosen": -156.64944458007812, "logps/rejected": -405.6996765136719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4212760925292969, "rewards/margins": 8.434539794921875, "rewards/rejected": -8.855816841125488, "step": 111 }, { "epoch": 0.02, "learning_rate": 1.4065189065822915e-05, "logits/chosen": -2.826230525970459, "logits/rejected": -2.756176233291626, "logps/chosen": -178.4102325439453, "logps/rejected": -165.47659301757812, "loss": 2.4541, "rewards/accuracies": 0.5, "rewards/chosen": -2.561177968978882, "rewards/margins": -1.411751627922058, "rewards/rejected": -1.1494263410568237, "step": 112 }, { "epoch": 0.02, "learning_rate": 1.4064455625291769e-05, "logits/chosen": -2.8706600666046143, "logits/rejected": -1.943257212638855, "logps/chosen": -413.5459899902344, "logps/rejected": -267.8484191894531, "loss": 2.7017, "rewards/accuracies": 0.5, "rewards/chosen": -3.193995714187622, "rewards/margins": 0.8744444847106934, "rewards/rejected": -4.0684404373168945, "step": 113 }, { "epoch": 0.02, "learning_rate": 1.4063722184760621e-05, "logits/chosen": -2.891338586807251, "logits/rejected": -2.3528926372528076, "logps/chosen": -531.6866455078125, "logps/rejected": -458.5242614746094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.365936279296875, "rewards/margins": 8.883954048156738, "rewards/rejected": -10.249890327453613, "step": 114 }, { "epoch": 0.02, "learning_rate": 1.4062988744229473e-05, "logits/chosen": -1.4573860168457031, "logits/rejected": -2.798884630203247, "logps/chosen": -42.12174606323242, "logps/rejected": -312.0538024902344, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.05296477675437927, "rewards/margins": 8.315526962280273, "rewards/rejected": -8.368491172790527, "step": 115 }, { "epoch": 0.02, "learning_rate": 1.4062255303698325e-05, "logits/chosen": -2.7352709770202637, "logits/rejected": -2.9397740364074707, "logps/chosen": -298.05010986328125, "logps/rejected": -374.38543701171875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.9482697248458862, "rewards/margins": 6.085353851318359, "rewards/rejected": -7.033623695373535, "step": 116 }, { "epoch": 0.02, "learning_rate": 1.4061521863167177e-05, "logits/chosen": -2.379978656768799, "logits/rejected": -2.234849452972412, "logps/chosen": -559.9256591796875, "logps/rejected": -533.5009765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.3653228282928467, "rewards/margins": 6.849520683288574, "rewards/rejected": -9.21484375, "step": 117 }, { "epoch": 0.02, "learning_rate": 1.4060788422636028e-05, "logits/chosen": -2.6121339797973633, "logits/rejected": -3.262575626373291, "logps/chosen": -47.444366455078125, "logps/rejected": -166.77243041992188, "loss": 0.2099, "rewards/accuracies": 1.0, "rewards/chosen": -0.6785165071487427, "rewards/margins": 4.3305864334106445, "rewards/rejected": -5.009102821350098, "step": 118 }, { "epoch": 0.02, "learning_rate": 1.406005498210488e-05, "logits/chosen": -2.9990217685699463, "logits/rejected": -2.665900945663452, "logps/chosen": -244.85992431640625, "logps/rejected": -289.5667724609375, "loss": 5.6496, "rewards/accuracies": 0.5, "rewards/chosen": -6.276189804077148, "rewards/margins": -3.75709867477417, "rewards/rejected": -2.5190908908843994, "step": 119 }, { "epoch": 0.02, "learning_rate": 1.4059321541573732e-05, "logits/chosen": -1.9140093326568604, "logits/rejected": -3.0359041690826416, "logps/chosen": -166.79962158203125, "logps/rejected": -404.4422912597656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.059725962579250336, "rewards/margins": 8.013431549072266, "rewards/rejected": -8.073156356811523, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.4058588101042584e-05, "logits/chosen": -2.442105531692505, "logits/rejected": -3.2534661293029785, "logps/chosen": -52.98470687866211, "logps/rejected": -123.00999450683594, "loss": 0.2176, "rewards/accuracies": 1.0, "rewards/chosen": -0.3291938900947571, "rewards/margins": 1.8115787506103516, "rewards/rejected": -2.140772581100464, "step": 121 }, { "epoch": 0.02, "learning_rate": 1.4057854660511438e-05, "logits/chosen": -1.6526856422424316, "logits/rejected": -3.0311577320098877, "logps/chosen": -67.76577758789062, "logps/rejected": -313.8304748535156, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.3949180543422699, "rewards/margins": 7.000816345214844, "rewards/rejected": -7.395734786987305, "step": 122 }, { "epoch": 0.02, "learning_rate": 1.405712121998029e-05, "logits/chosen": -2.7323882579803467, "logits/rejected": -2.914923906326294, "logps/chosen": -296.2641296386719, "logps/rejected": -436.0594177246094, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.44600069522857666, "rewards/margins": 6.405708312988281, "rewards/rejected": -6.851709365844727, "step": 123 }, { "epoch": 0.02, "learning_rate": 1.4056387779449141e-05, "logits/chosen": -2.9800710678100586, "logits/rejected": -3.2600553035736084, "logps/chosen": -807.1800537109375, "logps/rejected": -670.4857177734375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.148036241531372, "rewards/margins": 6.424774169921875, "rewards/rejected": -8.572810173034668, "step": 124 }, { "epoch": 0.02, "learning_rate": 1.4055654338917993e-05, "logits/chosen": -2.951738119125366, "logits/rejected": -2.928091049194336, "logps/chosen": -462.62908935546875, "logps/rejected": -284.0361328125, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": -1.4591400623321533, "rewards/margins": 3.42445707321167, "rewards/rejected": -4.883596897125244, "step": 125 }, { "epoch": 0.02, "learning_rate": 1.4054920898386845e-05, "logits/chosen": -1.9357908964157104, "logits/rejected": -3.2472918033599854, "logps/chosen": -133.62445068359375, "logps/rejected": -402.4200439453125, "loss": 1.2776, "rewards/accuracies": 0.5, "rewards/chosen": -2.5739705562591553, "rewards/margins": 2.8085975646972656, "rewards/rejected": -5.382568359375, "step": 126 }, { "epoch": 0.02, "learning_rate": 1.4054187457855697e-05, "logits/chosen": -1.352041244506836, "logits/rejected": -2.3438780307769775, "logps/chosen": -47.41897201538086, "logps/rejected": -226.47592163085938, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": -0.0940118357539177, "rewards/margins": 3.060260772705078, "rewards/rejected": -3.1542725563049316, "step": 127 }, { "epoch": 0.02, "learning_rate": 1.405345401732455e-05, "logits/chosen": -1.2290518283843994, "logits/rejected": -2.622897148132324, "logps/chosen": -75.5901870727539, "logps/rejected": -412.4236755371094, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": 0.0034929662942886353, "rewards/margins": 5.025552749633789, "rewards/rejected": -5.022059917449951, "step": 128 }, { "epoch": 0.02, "learning_rate": 1.4052720576793402e-05, "logits/chosen": -2.5669708251953125, "logits/rejected": -3.051974058151245, "logps/chosen": -24.31934356689453, "logps/rejected": -173.94747924804688, "loss": 0.0977, "rewards/accuracies": 1.0, "rewards/chosen": -0.21173784136772156, "rewards/margins": 3.5031790733337402, "rewards/rejected": -3.7149171829223633, "step": 129 }, { "epoch": 0.02, "learning_rate": 1.4051987136262254e-05, "logits/chosen": -2.7419581413269043, "logits/rejected": -1.8567566871643066, "logps/chosen": -302.43798828125, "logps/rejected": -309.5819091796875, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -0.9847992062568665, "rewards/margins": 5.7281107902526855, "rewards/rejected": -6.712910175323486, "step": 130 }, { "epoch": 0.02, "learning_rate": 1.4051253695731108e-05, "logits/chosen": -0.6596185564994812, "logits/rejected": -2.7690045833587646, "logps/chosen": -66.03289031982422, "logps/rejected": -301.6844482421875, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": -0.7958170175552368, "rewards/margins": 4.543050765991211, "rewards/rejected": -5.338868141174316, "step": 131 }, { "epoch": 0.02, "learning_rate": 1.405052025519996e-05, "logits/chosen": -2.5537092685699463, "logits/rejected": -2.8506507873535156, "logps/chosen": -83.65706634521484, "logps/rejected": -128.05015563964844, "loss": 2.2325, "rewards/accuracies": 0.5, "rewards/chosen": -2.133936643600464, "rewards/margins": 0.9239468574523926, "rewards/rejected": -3.0578835010528564, "step": 132 }, { "epoch": 0.02, "learning_rate": 1.4049786814668812e-05, "logits/chosen": -3.2083847522735596, "logits/rejected": -2.993487596511841, "logps/chosen": -509.5464782714844, "logps/rejected": -392.9215087890625, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -2.4677934646606445, "rewards/margins": 3.2326202392578125, "rewards/rejected": -5.700413703918457, "step": 133 }, { "epoch": 0.02, "learning_rate": 1.4049053374137664e-05, "logits/chosen": -2.975555419921875, "logits/rejected": -2.631242513656616, "logps/chosen": -151.8843994140625, "logps/rejected": -100.668212890625, "loss": 4.112, "rewards/accuracies": 0.0, "rewards/chosen": -4.69008731842041, "rewards/margins": -4.093465805053711, "rewards/rejected": -0.5966214537620544, "step": 134 }, { "epoch": 0.02, "learning_rate": 1.4048319933606515e-05, "logits/chosen": -2.6429362297058105, "logits/rejected": -2.9044272899627686, "logps/chosen": -203.18658447265625, "logps/rejected": -283.4639587402344, "loss": 1.9964, "rewards/accuracies": 0.5, "rewards/chosen": -2.397343397140503, "rewards/margins": 0.170676589012146, "rewards/rejected": -2.5680198669433594, "step": 135 }, { "epoch": 0.02, "learning_rate": 1.4047586493075367e-05, "logits/chosen": -3.087475061416626, "logits/rejected": -2.9166338443756104, "logps/chosen": -325.31396484375, "logps/rejected": -186.274658203125, "loss": 6.3196, "rewards/accuracies": 0.0, "rewards/chosen": -7.076940536499023, "rewards/margins": -6.317698955535889, "rewards/rejected": -0.7592416405677795, "step": 136 }, { "epoch": 0.02, "learning_rate": 1.404685305254422e-05, "logits/chosen": -2.786686897277832, "logits/rejected": -2.9908299446105957, "logps/chosen": -45.22657775878906, "logps/rejected": -167.27682495117188, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.18756437301635742, "rewards/margins": 4.119537830352783, "rewards/rejected": -4.307102203369141, "step": 137 }, { "epoch": 0.02, "learning_rate": 1.4046119612013071e-05, "logits/chosen": -1.5089609622955322, "logits/rejected": -2.7998995780944824, "logps/chosen": -246.18975830078125, "logps/rejected": -482.97369384765625, "loss": 0.6128, "rewards/accuracies": 0.5, "rewards/chosen": -1.8226937055587769, "rewards/margins": 4.57216739654541, "rewards/rejected": -6.394861221313477, "step": 138 }, { "epoch": 0.02, "learning_rate": 1.4045386171481923e-05, "logits/chosen": -2.402240753173828, "logits/rejected": -2.9281318187713623, "logps/chosen": -667.38134765625, "logps/rejected": -654.2086181640625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.100324273109436, "rewards/margins": 6.508388519287109, "rewards/rejected": -7.608713150024414, "step": 139 }, { "epoch": 0.02, "learning_rate": 1.4044652730950777e-05, "logits/chosen": -2.5996451377868652, "logits/rejected": -2.872141122817993, "logps/chosen": -136.02090454101562, "logps/rejected": -171.0716552734375, "loss": 1.155, "rewards/accuracies": 0.5, "rewards/chosen": -1.3558441400527954, "rewards/margins": 2.7775707244873047, "rewards/rejected": -4.1334147453308105, "step": 140 }, { "epoch": 0.02, "learning_rate": 1.4043919290419628e-05, "logits/chosen": -2.022608995437622, "logits/rejected": -2.865109920501709, "logps/chosen": -226.61318969726562, "logps/rejected": -346.3448486328125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8287773132324219, "rewards/margins": 6.22662353515625, "rewards/rejected": -7.055400848388672, "step": 141 }, { "epoch": 0.02, "learning_rate": 1.404318584988848e-05, "logits/chosen": -3.018886089324951, "logits/rejected": -2.9977457523345947, "logps/chosen": -331.5029296875, "logps/rejected": -251.478515625, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": -0.7245743274688721, "rewards/margins": 3.0904366970062256, "rewards/rejected": -3.8150110244750977, "step": 142 }, { "epoch": 0.02, "learning_rate": 1.4042452409357332e-05, "logits/chosen": -1.30589759349823, "logits/rejected": -2.897169828414917, "logps/chosen": -189.08770751953125, "logps/rejected": -496.49224853515625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8998379111289978, "rewards/margins": 8.02004623413086, "rewards/rejected": -8.91988468170166, "step": 143 }, { "epoch": 0.02, "learning_rate": 1.4041718968826184e-05, "logits/chosen": -2.875225782394409, "logits/rejected": -2.039557695388794, "logps/chosen": -106.57221221923828, "logps/rejected": -164.0631103515625, "loss": 0.5338, "rewards/accuracies": 0.5, "rewards/chosen": -1.4710561037063599, "rewards/margins": 2.410243511199951, "rewards/rejected": -3.8812997341156006, "step": 144 }, { "epoch": 0.02, "learning_rate": 1.4040985528295036e-05, "logits/chosen": -2.994858980178833, "logits/rejected": -3.164658784866333, "logps/chosen": -176.02247619628906, "logps/rejected": -239.53567504882812, "loss": 0.1489, "rewards/accuracies": 1.0, "rewards/chosen": -1.1596201658248901, "rewards/margins": 2.602823257446289, "rewards/rejected": -3.7624433040618896, "step": 145 }, { "epoch": 0.02, "learning_rate": 1.4040252087763888e-05, "logits/chosen": -2.6293702125549316, "logits/rejected": -2.8725709915161133, "logps/chosen": -465.7054748535156, "logps/rejected": -309.61260986328125, "loss": 3.7882, "rewards/accuracies": 0.5, "rewards/chosen": -4.636641025543213, "rewards/margins": 0.6337246894836426, "rewards/rejected": -5.2703657150268555, "step": 146 }, { "epoch": 0.02, "learning_rate": 1.403951864723274e-05, "logits/chosen": -0.7212904095649719, "logits/rejected": -2.8510398864746094, "logps/chosen": -51.58115005493164, "logps/rejected": -454.3642578125, "loss": 0.1103, "rewards/accuracies": 1.0, "rewards/chosen": -0.4947052001953125, "rewards/margins": 6.067193031311035, "rewards/rejected": -6.561898231506348, "step": 147 }, { "epoch": 0.02, "learning_rate": 1.4038785206701592e-05, "logits/chosen": -2.8864495754241943, "logits/rejected": -2.999298334121704, "logps/chosen": -715.2362670898438, "logps/rejected": -679.9295043945312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9642700552940369, "rewards/margins": 8.447247505187988, "rewards/rejected": -9.411518096923828, "step": 148 }, { "epoch": 0.02, "learning_rate": 1.4038051766170445e-05, "logits/chosen": -2.9549381732940674, "logits/rejected": -3.1335530281066895, "logps/chosen": -18.24860191345215, "logps/rejected": -109.90200805664062, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": 0.014121484011411667, "rewards/margins": 3.318458080291748, "rewards/rejected": -3.3043365478515625, "step": 149 }, { "epoch": 0.02, "learning_rate": 1.4037318325639297e-05, "logits/chosen": -1.2642600536346436, "logits/rejected": -3.052471876144409, "logps/chosen": -54.64570999145508, "logps/rejected": -426.58917236328125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5906059145927429, "rewards/margins": 8.07583999633789, "rewards/rejected": -8.666446685791016, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.4036584885108149e-05, "logits/chosen": -2.309497117996216, "logits/rejected": -3.0908682346343994, "logps/chosen": -24.336490631103516, "logps/rejected": -168.9246063232422, "loss": 0.0625, "rewards/accuracies": 1.0, "rewards/chosen": 0.3870885968208313, "rewards/margins": 3.342562437057495, "rewards/rejected": -2.9554738998413086, "step": 151 }, { "epoch": 0.02, "learning_rate": 1.4035851444577e-05, "logits/chosen": -1.7026439905166626, "logits/rejected": -2.811178207397461, "logps/chosen": -137.76690673828125, "logps/rejected": -291.13897705078125, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.4663711488246918, "rewards/margins": 5.073132514953613, "rewards/rejected": -5.539503574371338, "step": 152 }, { "epoch": 0.02, "learning_rate": 1.4035118004045853e-05, "logits/chosen": -2.166564702987671, "logits/rejected": -2.8809943199157715, "logps/chosen": -168.63070678710938, "logps/rejected": -384.974853515625, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.989078164100647, "rewards/margins": 7.034571647644043, "rewards/rejected": -8.023649215698242, "step": 153 }, { "epoch": 0.02, "learning_rate": 1.4034384563514704e-05, "logits/chosen": -3.0582938194274902, "logits/rejected": -2.514402389526367, "logps/chosen": -371.5284729003906, "logps/rejected": -330.0867919921875, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -2.1748108863830566, "rewards/margins": 4.377372741699219, "rewards/rejected": -6.552183628082275, "step": 154 }, { "epoch": 0.02, "learning_rate": 1.4033651122983556e-05, "logits/chosen": -2.29469633102417, "logits/rejected": -2.910482883453369, "logps/chosen": -275.9214172363281, "logps/rejected": -270.4170837402344, "loss": 5.9641, "rewards/accuracies": 0.5, "rewards/chosen": -5.951199531555176, "rewards/margins": -2.0870234966278076, "rewards/rejected": -3.8641762733459473, "step": 155 }, { "epoch": 0.02, "learning_rate": 1.4032917682452408e-05, "logits/chosen": -2.924412727355957, "logits/rejected": -3.1416471004486084, "logps/chosen": -487.7003173828125, "logps/rejected": -611.8228759765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9965379238128662, "rewards/margins": 7.972347736358643, "rewards/rejected": -9.96888542175293, "step": 156 }, { "epoch": 0.02, "learning_rate": 1.4032184241921262e-05, "logits/chosen": -2.281754493713379, "logits/rejected": -2.8770415782928467, "logps/chosen": -165.88255310058594, "logps/rejected": -170.8311767578125, "loss": 3.4009, "rewards/accuracies": 0.5, "rewards/chosen": -3.867661952972412, "rewards/margins": 0.07129979133605957, "rewards/rejected": -3.9389617443084717, "step": 157 }, { "epoch": 0.02, "learning_rate": 1.4031450801390114e-05, "logits/chosen": -3.0567214488983154, "logits/rejected": -2.796515703201294, "logps/chosen": -212.04458618164062, "logps/rejected": -74.03748321533203, "loss": 5.0288, "rewards/accuracies": 0.5, "rewards/chosen": -4.578733921051025, "rewards/margins": -2.94968581199646, "rewards/rejected": -1.6290478706359863, "step": 158 }, { "epoch": 0.02, "learning_rate": 1.4030717360858966e-05, "logits/chosen": -2.766436815261841, "logits/rejected": -2.952218770980835, "logps/chosen": -139.02610778808594, "logps/rejected": -105.6429443359375, "loss": 2.1537, "rewards/accuracies": 0.5, "rewards/chosen": -2.0377235412597656, "rewards/margins": -0.6108757257461548, "rewards/rejected": -1.4268479347229004, "step": 159 }, { "epoch": 0.02, "learning_rate": 1.4029983920327817e-05, "logits/chosen": -1.3386187553405762, "logits/rejected": -3.1346518993377686, "logps/chosen": -209.28555297851562, "logps/rejected": -480.1346435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.19554367661476135, "rewards/margins": 11.952312469482422, "rewards/rejected": -11.756769180297852, "step": 160 }, { "epoch": 0.03, "learning_rate": 1.402925047979667e-05, "logits/chosen": -2.9172701835632324, "logits/rejected": -1.8863176107406616, "logps/chosen": -356.021240234375, "logps/rejected": -307.37896728515625, "loss": 2.6097, "rewards/accuracies": 0.5, "rewards/chosen": -3.134721517562866, "rewards/margins": 1.0307724475860596, "rewards/rejected": -4.165493965148926, "step": 161 }, { "epoch": 0.03, "learning_rate": 1.4028517039265523e-05, "logits/chosen": -3.2558367252349854, "logits/rejected": -3.3846962451934814, "logps/chosen": -32.59844207763672, "logps/rejected": -64.80052947998047, "loss": 0.3154, "rewards/accuracies": 1.0, "rewards/chosen": 0.07949458062648773, "rewards/margins": 1.2479130029678345, "rewards/rejected": -1.168418526649475, "step": 162 }, { "epoch": 0.03, "learning_rate": 1.4027783598734375e-05, "logits/chosen": -2.9455056190490723, "logits/rejected": -2.640122175216675, "logps/chosen": -198.4468231201172, "logps/rejected": -161.84371948242188, "loss": 3.3331, "rewards/accuracies": 0.0, "rewards/chosen": -3.9197335243225098, "rewards/margins": -3.294912815093994, "rewards/rejected": -0.6248207092285156, "step": 163 }, { "epoch": 0.03, "learning_rate": 1.4027050158203227e-05, "logits/chosen": -3.2489192485809326, "logits/rejected": -2.5723774433135986, "logps/chosen": -645.4959106445312, "logps/rejected": -308.4246826171875, "loss": 0.8624, "rewards/accuracies": 0.5, "rewards/chosen": -1.5014221668243408, "rewards/margins": 1.470977783203125, "rewards/rejected": -2.972399950027466, "step": 164 }, { "epoch": 0.03, "learning_rate": 1.4026316717672079e-05, "logits/chosen": -2.7662055492401123, "logits/rejected": -3.2221639156341553, "logps/chosen": -176.7675323486328, "logps/rejected": -329.1629943847656, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.24641019105911255, "rewards/margins": 4.756302833557129, "rewards/rejected": -5.002713203430176, "step": 165 }, { "epoch": 0.03, "learning_rate": 1.4025583277140932e-05, "logits/chosen": -3.258957624435425, "logits/rejected": -2.2052667140960693, "logps/chosen": -515.95947265625, "logps/rejected": -241.6313934326172, "loss": 4.592, "rewards/accuracies": 0.5, "rewards/chosen": -5.929142951965332, "rewards/margins": -2.709671974182129, "rewards/rejected": -3.219470977783203, "step": 166 }, { "epoch": 0.03, "learning_rate": 1.4024849836609784e-05, "logits/chosen": -2.4335196018218994, "logits/rejected": -3.043889045715332, "logps/chosen": -110.653076171875, "logps/rejected": -301.668701171875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.45313262939453125, "rewards/margins": 5.373100280761719, "rewards/rejected": -4.9199676513671875, "step": 167 }, { "epoch": 0.03, "learning_rate": 1.4024116396078636e-05, "logits/chosen": -3.2198078632354736, "logits/rejected": -3.125908613204956, "logps/chosen": -154.2895050048828, "logps/rejected": -129.55987548828125, "loss": 1.8915, "rewards/accuracies": 0.5, "rewards/chosen": -2.5390892028808594, "rewards/margins": -0.3363828659057617, "rewards/rejected": -2.2027065753936768, "step": 168 }, { "epoch": 0.03, "learning_rate": 1.4023382955547488e-05, "logits/chosen": -2.9452576637268066, "logits/rejected": -2.4228785037994385, "logps/chosen": -485.2295837402344, "logps/rejected": -361.5985412597656, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -0.7806699872016907, "rewards/margins": 4.303671360015869, "rewards/rejected": -5.084341526031494, "step": 169 }, { "epoch": 0.03, "learning_rate": 1.402264951501634e-05, "logits/chosen": -2.9367477893829346, "logits/rejected": -2.4608850479125977, "logps/chosen": -69.421630859375, "logps/rejected": -46.425594329833984, "loss": 1.6044, "rewards/accuracies": 0.5, "rewards/chosen": -0.9808119535446167, "rewards/margins": 0.10795879364013672, "rewards/rejected": -1.0887707471847534, "step": 170 }, { "epoch": 0.03, "learning_rate": 1.4021916074485192e-05, "logits/chosen": -2.470182418823242, "logits/rejected": -3.0301692485809326, "logps/chosen": -94.4593505859375, "logps/rejected": -185.52877807617188, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -0.24326668679714203, "rewards/margins": 3.6235971450805664, "rewards/rejected": -3.866863965988159, "step": 171 }, { "epoch": 0.03, "learning_rate": 1.4021182633954043e-05, "logits/chosen": -3.0517959594726562, "logits/rejected": -1.7564762830734253, "logps/chosen": -441.0694274902344, "logps/rejected": -202.08885192871094, "loss": 6.2879, "rewards/accuracies": 0.0, "rewards/chosen": -5.822798252105713, "rewards/margins": -6.266595363616943, "rewards/rejected": 0.4437970519065857, "step": 172 }, { "epoch": 0.03, "learning_rate": 1.4020449193422895e-05, "logits/chosen": -2.186342239379883, "logits/rejected": -2.980644941329956, "logps/chosen": -89.68164825439453, "logps/rejected": -426.5390625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.31329527497291565, "rewards/margins": 8.024435997009277, "rewards/rejected": -8.33773136138916, "step": 173 }, { "epoch": 0.03, "learning_rate": 1.4019715752891747e-05, "logits/chosen": -2.4582693576812744, "logits/rejected": -2.666163682937622, "logps/chosen": -233.96304321289062, "logps/rejected": -381.7255859375, "loss": 2.527, "rewards/accuracies": 0.5, "rewards/chosen": -2.74106764793396, "rewards/margins": 3.7768054008483887, "rewards/rejected": -6.5178728103637695, "step": 174 }, { "epoch": 0.03, "learning_rate": 1.40189823123606e-05, "logits/chosen": -3.0581724643707275, "logits/rejected": -2.773857593536377, "logps/chosen": -104.623291015625, "logps/rejected": -106.34844970703125, "loss": 1.3995, "rewards/accuracies": 0.5, "rewards/chosen": -1.383456826210022, "rewards/margins": 0.3954504728317261, "rewards/rejected": -1.778907299041748, "step": 175 }, { "epoch": 0.03, "learning_rate": 1.4018248871829453e-05, "logits/chosen": -2.360483169555664, "logits/rejected": -2.9586284160614014, "logps/chosen": -16.147541046142578, "logps/rejected": -136.52880859375, "loss": 0.0691, "rewards/accuracies": 1.0, "rewards/chosen": 0.6231199502944946, "rewards/margins": 2.9645183086395264, "rewards/rejected": -2.341398239135742, "step": 176 }, { "epoch": 0.03, "learning_rate": 1.4017515431298304e-05, "logits/chosen": -2.78810977935791, "logits/rejected": -2.5771408081054688, "logps/chosen": -559.9594116210938, "logps/rejected": -409.59027099609375, "loss": 0.9019, "rewards/accuracies": 0.5, "rewards/chosen": -1.1936745643615723, "rewards/margins": 1.252575397491455, "rewards/rejected": -2.4462499618530273, "step": 177 }, { "epoch": 0.03, "learning_rate": 1.4016781990767156e-05, "logits/chosen": -2.420776128768921, "logits/rejected": -3.213085412979126, "logps/chosen": -263.00848388671875, "logps/rejected": -308.2733459472656, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": 0.4499450922012329, "rewards/margins": 4.335144996643066, "rewards/rejected": -3.885200023651123, "step": 178 }, { "epoch": 0.03, "learning_rate": 1.4016048550236008e-05, "logits/chosen": -2.252261161804199, "logits/rejected": -3.033332347869873, "logps/chosen": -60.64086151123047, "logps/rejected": -259.25531005859375, "loss": 0.034, "rewards/accuracies": 1.0, "rewards/chosen": 0.5736101865768433, "rewards/margins": 6.173299789428711, "rewards/rejected": -5.599689483642578, "step": 179 }, { "epoch": 0.03, "learning_rate": 1.401531510970486e-05, "logits/chosen": -2.621873378753662, "logits/rejected": -2.999246835708618, "logps/chosen": -20.214799880981445, "logps/rejected": -132.81753540039062, "loss": 0.1498, "rewards/accuracies": 1.0, "rewards/chosen": 0.18175077438354492, "rewards/margins": 1.9827884435653687, "rewards/rejected": -1.8010375499725342, "step": 180 }, { "epoch": 0.03, "learning_rate": 1.4014581669173712e-05, "logits/chosen": -2.981433868408203, "logits/rejected": -2.6836729049682617, "logps/chosen": -313.1529846191406, "logps/rejected": -209.10853576660156, "loss": 1.6657, "rewards/accuracies": 0.5, "rewards/chosen": -1.5567810535430908, "rewards/margins": -0.4783722162246704, "rewards/rejected": -1.0784088373184204, "step": 181 }, { "epoch": 0.03, "learning_rate": 1.4013848228642564e-05, "logits/chosen": -2.7217626571655273, "logits/rejected": -3.1177475452423096, "logps/chosen": -389.81524658203125, "logps/rejected": -502.818359375, "loss": 2.0215, "rewards/accuracies": 0.5, "rewards/chosen": -2.3588149547576904, "rewards/margins": -1.384434461593628, "rewards/rejected": -0.9743804931640625, "step": 182 }, { "epoch": 0.03, "learning_rate": 1.4013114788111416e-05, "logits/chosen": -2.3580970764160156, "logits/rejected": -2.989363431930542, "logps/chosen": -44.48986053466797, "logps/rejected": -237.87643432617188, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": 0.2550448477268219, "rewards/margins": 3.4398975372314453, "rewards/rejected": -3.1848526000976562, "step": 183 }, { "epoch": 0.03, "learning_rate": 1.401238134758027e-05, "logits/chosen": -2.6938347816467285, "logits/rejected": -3.2226288318634033, "logps/chosen": -267.8742370605469, "logps/rejected": -334.1830749511719, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -0.5043007135391235, "rewards/margins": 3.5806069374084473, "rewards/rejected": -4.084907531738281, "step": 184 }, { "epoch": 0.03, "learning_rate": 1.4011647907049121e-05, "logits/chosen": -2.752960205078125, "logits/rejected": -1.980102777481079, "logps/chosen": -676.5855712890625, "logps/rejected": -368.43389892578125, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.34827765822410583, "rewards/margins": 4.337497711181641, "rewards/rejected": -4.685774803161621, "step": 185 }, { "epoch": 0.03, "learning_rate": 1.4010914466517973e-05, "logits/chosen": -2.6495749950408936, "logits/rejected": -3.1008641719818115, "logps/chosen": -222.82022094726562, "logps/rejected": -313.95623779296875, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": -1.010088324546814, "rewards/margins": 3.3271327018737793, "rewards/rejected": -4.337221145629883, "step": 186 }, { "epoch": 0.03, "learning_rate": 1.4010181025986825e-05, "logits/chosen": -1.5732256174087524, "logits/rejected": -2.9653141498565674, "logps/chosen": -61.436012268066406, "logps/rejected": -219.15145874023438, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -0.1113157719373703, "rewards/margins": 3.7373766899108887, "rewards/rejected": -3.8486924171447754, "step": 187 }, { "epoch": 0.03, "learning_rate": 1.4009447585455677e-05, "logits/chosen": -2.5830814838409424, "logits/rejected": -2.9924404621124268, "logps/chosen": -173.12435913085938, "logps/rejected": -176.20803833007812, "loss": 1.7468, "rewards/accuracies": 0.5, "rewards/chosen": -2.1008567810058594, "rewards/margins": -0.40064239501953125, "rewards/rejected": -1.7002143859863281, "step": 188 }, { "epoch": 0.03, "learning_rate": 1.4008714144924529e-05, "logits/chosen": -1.695604681968689, "logits/rejected": -2.784745693206787, "logps/chosen": -130.01947021484375, "logps/rejected": -305.35595703125, "loss": 0.4822, "rewards/accuracies": 0.5, "rewards/chosen": -0.5053685903549194, "rewards/margins": 3.4083871841430664, "rewards/rejected": -3.9137558937072754, "step": 189 }, { "epoch": 0.03, "learning_rate": 1.400798070439338e-05, "logits/chosen": -2.672028064727783, "logits/rejected": -3.3623335361480713, "logps/chosen": -309.25933837890625, "logps/rejected": -421.78387451171875, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.13108177483081818, "rewards/margins": 4.654809951782227, "rewards/rejected": -4.523728370666504, "step": 190 }, { "epoch": 0.03, "learning_rate": 1.4007247263862232e-05, "logits/chosen": -2.37326717376709, "logits/rejected": -2.9246158599853516, "logps/chosen": -526.8088989257812, "logps/rejected": -461.46112060546875, "loss": 0.0613, "rewards/accuracies": 1.0, "rewards/chosen": 0.2484390288591385, "rewards/margins": 3.475825548171997, "rewards/rejected": -3.227386474609375, "step": 191 }, { "epoch": 0.03, "learning_rate": 1.4006513823331084e-05, "logits/chosen": -1.0575746297836304, "logits/rejected": -2.685439109802246, "logps/chosen": -17.340839385986328, "logps/rejected": -383.7186279296875, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.024475909769535065, "rewards/margins": 4.621527671813965, "rewards/rejected": -4.646003723144531, "step": 192 }, { "epoch": 0.03, "learning_rate": 1.4005780382799938e-05, "logits/chosen": -2.531087636947632, "logits/rejected": -3.2132153511047363, "logps/chosen": -33.64259719848633, "logps/rejected": -257.5414733886719, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": 0.20988836884498596, "rewards/margins": 4.79128885269165, "rewards/rejected": -4.581400394439697, "step": 193 }, { "epoch": 0.03, "learning_rate": 1.400504694226879e-05, "logits/chosen": -2.8685154914855957, "logits/rejected": -1.3490946292877197, "logps/chosen": -155.78343200683594, "logps/rejected": -101.32732391357422, "loss": 2.6867, "rewards/accuracies": 0.5, "rewards/chosen": -2.7401745319366455, "rewards/margins": -0.711931586265564, "rewards/rejected": -2.028243064880371, "step": 194 }, { "epoch": 0.03, "learning_rate": 1.4004313501737642e-05, "logits/chosen": -2.875171661376953, "logits/rejected": -1.3497910499572754, "logps/chosen": -531.8514404296875, "logps/rejected": -414.8194274902344, "loss": 4.3855, "rewards/accuracies": 0.5, "rewards/chosen": -5.381968021392822, "rewards/margins": -1.4193804264068604, "rewards/rejected": -3.9625871181488037, "step": 195 }, { "epoch": 0.03, "learning_rate": 1.4003580061206495e-05, "logits/chosen": -2.512120246887207, "logits/rejected": -3.0070035457611084, "logps/chosen": -226.131103515625, "logps/rejected": -282.70550537109375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.5476722717285156, "rewards/margins": 5.298675537109375, "rewards/rejected": -5.846347808837891, "step": 196 }, { "epoch": 0.03, "learning_rate": 1.4002846620675347e-05, "logits/chosen": -1.5433167219161987, "logits/rejected": -2.8402724266052246, "logps/chosen": -116.49999237060547, "logps/rejected": -343.837890625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.1901814490556717, "rewards/margins": 5.225398063659668, "rewards/rejected": -5.035216331481934, "step": 197 }, { "epoch": 0.03, "learning_rate": 1.4002113180144199e-05, "logits/chosen": -3.18461537361145, "logits/rejected": -3.400237560272217, "logps/chosen": -110.51262664794922, "logps/rejected": -235.12875366210938, "loss": 0.0482, "rewards/accuracies": 1.0, "rewards/chosen": -0.13583707809448242, "rewards/margins": 4.222536563873291, "rewards/rejected": -4.358373641967773, "step": 198 }, { "epoch": 0.03, "learning_rate": 1.4001379739613051e-05, "logits/chosen": -2.011627197265625, "logits/rejected": -2.999981164932251, "logps/chosen": -36.15868377685547, "logps/rejected": -139.6876220703125, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -0.35740146040916443, "rewards/margins": 3.3277029991149902, "rewards/rejected": -3.6851043701171875, "step": 199 }, { "epoch": 0.03, "learning_rate": 1.4000646299081903e-05, "logits/chosen": -2.77205491065979, "logits/rejected": -1.8283618688583374, "logps/chosen": -116.6405029296875, "logps/rejected": -137.00277709960938, "loss": 0.0884, "rewards/accuracies": 1.0, "rewards/chosen": -0.2612747251987457, "rewards/margins": 2.7358205318450928, "rewards/rejected": -2.9970953464508057, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.3999912858550755e-05, "logits/chosen": -2.594717025756836, "logits/rejected": -2.8854799270629883, "logps/chosen": -102.17144012451172, "logps/rejected": -311.78033447265625, "loss": 1.1485, "rewards/accuracies": 0.5, "rewards/chosen": -0.7208024263381958, "rewards/margins": 0.8595740795135498, "rewards/rejected": -1.5803765058517456, "step": 201 }, { "epoch": 0.03, "learning_rate": 1.3999179418019608e-05, "logits/chosen": -1.9328033924102783, "logits/rejected": -2.955244779586792, "logps/chosen": -54.533931732177734, "logps/rejected": -269.7506103515625, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -0.0701960027217865, "rewards/margins": 4.074545383453369, "rewards/rejected": -4.144741535186768, "step": 202 }, { "epoch": 0.03, "learning_rate": 1.399844597748846e-05, "logits/chosen": -2.53410005569458, "logits/rejected": -3.0759475231170654, "logps/chosen": -132.3956298828125, "logps/rejected": -241.48818969726562, "loss": 0.0702, "rewards/accuracies": 1.0, "rewards/chosen": -0.35886192321777344, "rewards/margins": 3.2528491020202637, "rewards/rejected": -3.611711025238037, "step": 203 }, { "epoch": 0.03, "learning_rate": 1.3997712536957312e-05, "logits/chosen": -2.716634511947632, "logits/rejected": -3.299154758453369, "logps/chosen": -131.50794982910156, "logps/rejected": -244.27293395996094, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": -0.4414242208003998, "rewards/margins": 3.7082955837249756, "rewards/rejected": -4.149720191955566, "step": 204 }, { "epoch": 0.03, "learning_rate": 1.3996979096426164e-05, "logits/chosen": -2.648254871368408, "logits/rejected": -2.9003798961639404, "logps/chosen": -10.891579627990723, "logps/rejected": -141.72593688964844, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 0.872689962387085, "rewards/margins": 5.183638572692871, "rewards/rejected": -4.310948371887207, "step": 205 }, { "epoch": 0.03, "learning_rate": 1.3996245655895016e-05, "logits/chosen": -2.754286050796509, "logits/rejected": -2.943732738494873, "logps/chosen": -51.96488952636719, "logps/rejected": -271.35736083984375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.12009581923484802, "rewards/margins": 5.691976070404053, "rewards/rejected": -5.571880340576172, "step": 206 }, { "epoch": 0.03, "learning_rate": 1.3995512215363868e-05, "logits/chosen": -2.804008722305298, "logits/rejected": -2.9000227451324463, "logps/chosen": -651.2376098632812, "logps/rejected": -595.7244873046875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.16806335747241974, "rewards/margins": 8.659149169921875, "rewards/rejected": -8.49108600616455, "step": 207 }, { "epoch": 0.03, "learning_rate": 1.399477877483272e-05, "logits/chosen": -2.7897112369537354, "logits/rejected": -2.776685953140259, "logps/chosen": -173.26925659179688, "logps/rejected": -233.0579071044922, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": -0.3883964419364929, "rewards/margins": 4.054144382476807, "rewards/rejected": -4.442541122436523, "step": 208 }, { "epoch": 0.03, "learning_rate": 1.3994045334301571e-05, "logits/chosen": -2.3699331283569336, "logits/rejected": -2.477566957473755, "logps/chosen": -234.68844604492188, "logps/rejected": -341.6419982910156, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.6891021728515625, "rewards/margins": 6.073931694030762, "rewards/rejected": -6.763033390045166, "step": 209 }, { "epoch": 0.03, "learning_rate": 1.3993311893770423e-05, "logits/chosen": -2.4613120555877686, "logits/rejected": -2.9397993087768555, "logps/chosen": -266.5517578125, "logps/rejected": -343.9771423339844, "loss": 2.0536, "rewards/accuracies": 0.5, "rewards/chosen": -2.2804572582244873, "rewards/margins": 2.0546913146972656, "rewards/rejected": -4.335148811340332, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.3992578453239277e-05, "logits/chosen": -1.4784244298934937, "logits/rejected": -2.7119572162628174, "logps/chosen": -72.05863952636719, "logps/rejected": -329.05072021484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.04064007103443146, "rewards/margins": 8.296626091003418, "rewards/rejected": -8.255986213684082, "step": 211 }, { "epoch": 0.03, "learning_rate": 1.3991845012708129e-05, "logits/chosen": -2.4906318187713623, "logits/rejected": -2.959563732147217, "logps/chosen": -61.70597839355469, "logps/rejected": -252.44223022460938, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.9041376113891602, "rewards/margins": 7.155435562133789, "rewards/rejected": -6.251297950744629, "step": 212 }, { "epoch": 0.03, "learning_rate": 1.399111157217698e-05, "logits/chosen": -2.48078989982605, "logits/rejected": -3.209472179412842, "logps/chosen": -32.28090286254883, "logps/rejected": -267.20953369140625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 0.48920193314552307, "rewards/margins": 4.973154544830322, "rewards/rejected": -4.483952522277832, "step": 213 }, { "epoch": 0.03, "learning_rate": 1.3990378131645832e-05, "logits/chosen": -2.758432626724243, "logits/rejected": -0.44189536571502686, "logps/chosen": -543.770751953125, "logps/rejected": -7.292972564697266, "loss": 7.7526, "rewards/accuracies": 0.0, "rewards/chosen": -6.877898693084717, "rewards/margins": -7.736393451690674, "rewards/rejected": 0.8584948778152466, "step": 214 }, { "epoch": 0.03, "learning_rate": 1.3989644691114684e-05, "logits/chosen": -2.672595977783203, "logits/rejected": -2.850132465362549, "logps/chosen": -60.81037902832031, "logps/rejected": -132.025390625, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": 0.7347140908241272, "rewards/margins": 3.4332289695739746, "rewards/rejected": -2.698514938354492, "step": 215 }, { "epoch": 0.03, "learning_rate": 1.3988911250583536e-05, "logits/chosen": -3.1627893447875977, "logits/rejected": -2.9065122604370117, "logps/chosen": -528.2924194335938, "logps/rejected": -423.0738830566406, "loss": 1.9847, "rewards/accuracies": 0.5, "rewards/chosen": -2.4836435317993164, "rewards/margins": -0.6745811700820923, "rewards/rejected": -1.8090622425079346, "step": 216 }, { "epoch": 0.03, "learning_rate": 1.3988177810052388e-05, "logits/chosen": -2.4890635013580322, "logits/rejected": -2.734408378601074, "logps/chosen": -180.44009399414062, "logps/rejected": -201.62814331054688, "loss": 3.7131, "rewards/accuracies": 0.5, "rewards/chosen": -3.721134901046753, "rewards/margins": 0.25934863090515137, "rewards/rejected": -3.9804835319519043, "step": 217 }, { "epoch": 0.03, "learning_rate": 1.398744436952124e-05, "logits/chosen": -3.1387438774108887, "logits/rejected": -2.6427700519561768, "logps/chosen": -210.23582458496094, "logps/rejected": -56.122596740722656, "loss": 4.3772, "rewards/accuracies": 0.0, "rewards/chosen": -4.182665824890137, "rewards/margins": -4.364424705505371, "rewards/rejected": 0.18175873160362244, "step": 218 }, { "epoch": 0.03, "learning_rate": 1.3986710928990092e-05, "logits/chosen": -2.4068191051483154, "logits/rejected": -2.9689176082611084, "logps/chosen": -424.5736999511719, "logps/rejected": -510.7087707519531, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.9510124325752258, "rewards/margins": 6.56685733795166, "rewards/rejected": -7.517869472503662, "step": 219 }, { "epoch": 0.03, "learning_rate": 1.3985977488458945e-05, "logits/chosen": -2.916299819946289, "logits/rejected": -2.70816707611084, "logps/chosen": -92.209228515625, "logps/rejected": -15.54775333404541, "loss": 1.3256, "rewards/accuracies": 0.5, "rewards/chosen": 0.035837531089782715, "rewards/margins": -0.8830551505088806, "rewards/rejected": 0.9188927412033081, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.3985244047927797e-05, "logits/chosen": -2.194429874420166, "logits/rejected": -2.782792329788208, "logps/chosen": -195.36541748046875, "logps/rejected": -197.57188415527344, "loss": 3.727, "rewards/accuracies": 0.5, "rewards/chosen": -3.4538216590881348, "rewards/margins": -1.3388917446136475, "rewards/rejected": -2.1149299144744873, "step": 221 }, { "epoch": 0.03, "learning_rate": 1.398451060739665e-05, "logits/chosen": -2.968801975250244, "logits/rejected": -2.998476266860962, "logps/chosen": -124.653076171875, "logps/rejected": -167.4777374267578, "loss": 0.1949, "rewards/accuracies": 1.0, "rewards/chosen": -0.3519600033760071, "rewards/margins": 1.549687147140503, "rewards/rejected": -1.9016472101211548, "step": 222 }, { "epoch": 0.03, "learning_rate": 1.3983777166865501e-05, "logits/chosen": -2.80216121673584, "logits/rejected": -3.091679334640503, "logps/chosen": -82.06759643554688, "logps/rejected": -255.58189392089844, "loss": 0.1406, "rewards/accuracies": 1.0, "rewards/chosen": 0.19088135659694672, "rewards/margins": 3.702784776687622, "rewards/rejected": -3.5119035243988037, "step": 223 }, { "epoch": 0.03, "learning_rate": 1.3983043726334353e-05, "logits/chosen": -2.0884101390838623, "logits/rejected": -1.4268525838851929, "logps/chosen": -362.0898742675781, "logps/rejected": -276.34063720703125, "loss": 0.0658, "rewards/accuracies": 1.0, "rewards/chosen": 0.4966115951538086, "rewards/margins": 4.166255950927734, "rewards/rejected": -3.669644355773926, "step": 224 }, { "epoch": 0.03, "learning_rate": 1.3982310285803205e-05, "logits/chosen": -2.8098831176757812, "logits/rejected": -1.9291096925735474, "logps/chosen": -226.30088806152344, "logps/rejected": -132.0711669921875, "loss": 2.0646, "rewards/accuracies": 0.5, "rewards/chosen": -2.65522837638855, "rewards/margins": -0.9782644510269165, "rewards/rejected": -1.6769639253616333, "step": 225 }, { "epoch": 0.04, "learning_rate": 1.3981576845272057e-05, "logits/chosen": -2.8602306842803955, "logits/rejected": -2.9689128398895264, "logps/chosen": -102.79602813720703, "logps/rejected": -166.56594848632812, "loss": 0.1171, "rewards/accuracies": 1.0, "rewards/chosen": -0.3023456633090973, "rewards/margins": 2.3253021240234375, "rewards/rejected": -2.627647876739502, "step": 226 }, { "epoch": 0.04, "learning_rate": 1.3980843404740909e-05, "logits/chosen": -2.7383577823638916, "logits/rejected": -2.982428789138794, "logps/chosen": -484.5577392578125, "logps/rejected": -481.6104736328125, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.7109077572822571, "rewards/margins": 5.302299499511719, "rewards/rejected": -6.01320743560791, "step": 227 }, { "epoch": 0.04, "learning_rate": 1.3980109964209762e-05, "logits/chosen": -2.5533223152160645, "logits/rejected": -3.07688045501709, "logps/chosen": -182.6111602783203, "logps/rejected": -239.54937744140625, "loss": 0.107, "rewards/accuracies": 1.0, "rewards/chosen": -0.22020837664604187, "rewards/margins": 3.3915648460388184, "rewards/rejected": -3.6117730140686035, "step": 228 }, { "epoch": 0.04, "learning_rate": 1.3979376523678614e-05, "logits/chosen": -2.523798704147339, "logits/rejected": -3.3202733993530273, "logps/chosen": -80.97786712646484, "logps/rejected": -274.00201416015625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.21287879347801208, "rewards/margins": 6.520842552185059, "rewards/rejected": -6.3079633712768555, "step": 229 }, { "epoch": 0.04, "learning_rate": 1.3978643083147468e-05, "logits/chosen": -2.875487804412842, "logits/rejected": -1.676425576210022, "logps/chosen": -239.4599609375, "logps/rejected": -149.17987060546875, "loss": 3.4343, "rewards/accuracies": 0.5, "rewards/chosen": -3.2694733142852783, "rewards/margins": -1.8929105997085571, "rewards/rejected": -1.376562476158142, "step": 230 }, { "epoch": 0.04, "learning_rate": 1.397790964261632e-05, "logits/chosen": -2.575434684753418, "logits/rejected": -2.9902172088623047, "logps/chosen": -143.10733032226562, "logps/rejected": -112.06012725830078, "loss": 3.3427, "rewards/accuracies": 0.0, "rewards/chosen": -3.3093485832214355, "rewards/margins": -3.2788097858428955, "rewards/rejected": -0.030538946390151978, "step": 231 }, { "epoch": 0.04, "learning_rate": 1.3977176202085171e-05, "logits/chosen": -2.1401877403259277, "logits/rejected": -2.3629868030548096, "logps/chosen": -300.4337158203125, "logps/rejected": -483.166259765625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.025202989578247, "rewards/margins": 5.7222747802734375, "rewards/rejected": -6.7474775314331055, "step": 232 }, { "epoch": 0.04, "learning_rate": 1.3976442761554023e-05, "logits/chosen": -2.831821918487549, "logits/rejected": -2.0485053062438965, "logps/chosen": -151.90716552734375, "logps/rejected": -186.53211975097656, "loss": 0.062, "rewards/accuracies": 1.0, "rewards/chosen": -0.30743086338043213, "rewards/margins": 3.721210479736328, "rewards/rejected": -4.028641700744629, "step": 233 }, { "epoch": 0.04, "learning_rate": 1.3975709321022875e-05, "logits/chosen": -2.8478124141693115, "logits/rejected": -1.9858901500701904, "logps/chosen": -215.32388305664062, "logps/rejected": -133.57334899902344, "loss": 2.444, "rewards/accuracies": 0.5, "rewards/chosen": -2.5872535705566406, "rewards/margins": 0.4375627040863037, "rewards/rejected": -3.0248162746429443, "step": 234 }, { "epoch": 0.04, "learning_rate": 1.3974975880491727e-05, "logits/chosen": -3.0070221424102783, "logits/rejected": -2.6931850910186768, "logps/chosen": -882.427001953125, "logps/rejected": -468.3951110839844, "loss": 2.3666, "rewards/accuracies": 0.5, "rewards/chosen": -2.6745729446411133, "rewards/margins": 1.6200404167175293, "rewards/rejected": -4.294613361358643, "step": 235 }, { "epoch": 0.04, "learning_rate": 1.3974242439960579e-05, "logits/chosen": -3.0407955646514893, "logits/rejected": -2.047717809677124, "logps/chosen": -491.1300048828125, "logps/rejected": -331.09521484375, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": -0.45499420166015625, "rewards/margins": 3.080474853515625, "rewards/rejected": -3.5354690551757812, "step": 236 }, { "epoch": 0.04, "learning_rate": 1.397350899942943e-05, "logits/chosen": -3.252899646759033, "logits/rejected": -2.4658029079437256, "logps/chosen": -438.24237060546875, "logps/rejected": -103.72933959960938, "loss": 2.827, "rewards/accuracies": 0.0, "rewards/chosen": -2.372117042541504, "rewards/margins": -2.7648861408233643, "rewards/rejected": 0.3927692472934723, "step": 237 }, { "epoch": 0.04, "learning_rate": 1.3972775558898284e-05, "logits/chosen": -2.519871234893799, "logits/rejected": -2.9586679935455322, "logps/chosen": -411.8274841308594, "logps/rejected": -407.7987060546875, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -0.4264678955078125, "rewards/margins": 3.887768030166626, "rewards/rejected": -4.314235687255859, "step": 238 }, { "epoch": 0.04, "learning_rate": 1.3972042118367136e-05, "logits/chosen": -2.2041091918945312, "logits/rejected": -2.9371092319488525, "logps/chosen": -86.25765991210938, "logps/rejected": -180.57945251464844, "loss": 1.6584, "rewards/accuracies": 0.5, "rewards/chosen": -1.0676497220993042, "rewards/margins": 2.1026949882507324, "rewards/rejected": -3.170344829559326, "step": 239 }, { "epoch": 0.04, "learning_rate": 1.3971308677835988e-05, "logits/chosen": -2.976508855819702, "logits/rejected": -2.6100780963897705, "logps/chosen": -262.93804931640625, "logps/rejected": -190.8821258544922, "loss": 0.6537, "rewards/accuracies": 0.5, "rewards/chosen": 0.756771445274353, "rewards/margins": 2.7503738403320312, "rewards/rejected": -1.9936022758483887, "step": 240 }, { "epoch": 0.04, "learning_rate": 1.397057523730484e-05, "logits/chosen": -2.4352166652679443, "logits/rejected": -2.9361844062805176, "logps/chosen": -89.21510314941406, "logps/rejected": -213.4376220703125, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 1.1986665725708008, "rewards/margins": 4.661130905151367, "rewards/rejected": -3.4624640941619873, "step": 241 }, { "epoch": 0.04, "learning_rate": 1.3969841796773692e-05, "logits/chosen": -2.4714951515197754, "logits/rejected": -3.2496628761291504, "logps/chosen": -803.8233032226562, "logps/rejected": -767.661865234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.20408782362937927, "rewards/margins": 8.411643981933594, "rewards/rejected": -8.615732192993164, "step": 242 }, { "epoch": 0.04, "learning_rate": 1.3969108356242544e-05, "logits/chosen": -1.1778755187988281, "logits/rejected": -2.981464385986328, "logps/chosen": -13.263879776000977, "logps/rejected": -236.55397033691406, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": 0.5241872668266296, "rewards/margins": 4.0946173667907715, "rewards/rejected": -3.570430040359497, "step": 243 }, { "epoch": 0.04, "learning_rate": 1.3968374915711396e-05, "logits/chosen": -1.994807243347168, "logits/rejected": -2.9723169803619385, "logps/chosen": -56.705345153808594, "logps/rejected": -225.79896545410156, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": 0.464634507894516, "rewards/margins": 5.034939289093018, "rewards/rejected": -4.570304870605469, "step": 244 }, { "epoch": 0.04, "learning_rate": 1.3967641475180247e-05, "logits/chosen": -3.195833444595337, "logits/rejected": -2.3156163692474365, "logps/chosen": -473.47637939453125, "logps/rejected": -273.79345703125, "loss": 4.0651, "rewards/accuracies": 0.5, "rewards/chosen": -4.564801216125488, "rewards/margins": -2.18404221534729, "rewards/rejected": -2.380758762359619, "step": 245 }, { "epoch": 0.04, "learning_rate": 1.3966908034649101e-05, "logits/chosen": -2.467707872390747, "logits/rejected": -2.901888132095337, "logps/chosen": -3.7224225997924805, "logps/rejected": -91.42822265625, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": 0.9773526191711426, "rewards/margins": 3.3132100105285645, "rewards/rejected": -2.335857391357422, "step": 246 }, { "epoch": 0.04, "learning_rate": 1.3966174594117953e-05, "logits/chosen": -2.865260362625122, "logits/rejected": -3.0203161239624023, "logps/chosen": -133.73193359375, "logps/rejected": -233.52195739746094, "loss": 2.2256, "rewards/accuracies": 0.5, "rewards/chosen": -2.653221607208252, "rewards/margins": -1.128064513206482, "rewards/rejected": -1.5251572132110596, "step": 247 }, { "epoch": 0.04, "learning_rate": 1.3965441153586805e-05, "logits/chosen": -3.156003952026367, "logits/rejected": -2.4098188877105713, "logps/chosen": -812.48974609375, "logps/rejected": -442.1146240234375, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -0.47387391328811646, "rewards/margins": 3.9572739601135254, "rewards/rejected": -4.431147575378418, "step": 248 }, { "epoch": 0.04, "learning_rate": 1.3964707713055657e-05, "logits/chosen": -2.926936388015747, "logits/rejected": -2.391514778137207, "logps/chosen": -121.32210540771484, "logps/rejected": -68.26001739501953, "loss": 0.518, "rewards/accuracies": 0.5, "rewards/chosen": 0.2868309020996094, "rewards/margins": 1.990034818649292, "rewards/rejected": -1.7032039165496826, "step": 249 }, { "epoch": 0.04, "learning_rate": 1.3963974272524509e-05, "logits/chosen": -1.1364414691925049, "logits/rejected": -2.6888904571533203, "logps/chosen": -130.55604553222656, "logps/rejected": -278.01483154296875, "loss": 2.1997, "rewards/accuracies": 0.5, "rewards/chosen": -2.4078831672668457, "rewards/margins": 1.6601381301879883, "rewards/rejected": -4.068021297454834, "step": 250 }, { "epoch": 0.04, "learning_rate": 1.396324083199336e-05, "logits/chosen": -2.2426233291625977, "logits/rejected": -3.0064384937286377, "logps/chosen": -189.06642150878906, "logps/rejected": -473.736572265625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.066095732152462, "rewards/margins": 10.831977844238281, "rewards/rejected": -10.898073196411133, "step": 251 }, { "epoch": 0.04, "learning_rate": 1.3962507391462212e-05, "logits/chosen": -1.7733064889907837, "logits/rejected": -2.9559249877929688, "logps/chosen": -713.5653076171875, "logps/rejected": -641.1895141601562, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7332763671875, "rewards/margins": 7.070425033569336, "rewards/rejected": -7.803701877593994, "step": 252 }, { "epoch": 0.04, "learning_rate": 1.3961773950931064e-05, "logits/chosen": -2.5987932682037354, "logits/rejected": -2.852625846862793, "logps/chosen": -177.18580627441406, "logps/rejected": -388.4474182128906, "loss": 0.1215, "rewards/accuracies": 1.0, "rewards/chosen": -0.512484073638916, "rewards/margins": 9.179387092590332, "rewards/rejected": -9.69187068939209, "step": 253 }, { "epoch": 0.04, "learning_rate": 1.3961040510399916e-05, "logits/chosen": -2.0357847213745117, "logits/rejected": -2.9368066787719727, "logps/chosen": -87.50627136230469, "logps/rejected": -281.1829833984375, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6149610877037048, "rewards/margins": 3.960925340652466, "rewards/rejected": -4.5758867263793945, "step": 254 }, { "epoch": 0.04, "learning_rate": 1.396030706986877e-05, "logits/chosen": -2.834095001220703, "logits/rejected": -3.235002040863037, "logps/chosen": -65.0529556274414, "logps/rejected": -201.80078125, "loss": 0.0612, "rewards/accuracies": 1.0, "rewards/chosen": -0.023458480834960938, "rewards/margins": 2.876775026321411, "rewards/rejected": -2.900233507156372, "step": 255 }, { "epoch": 0.04, "learning_rate": 1.3959573629337621e-05, "logits/chosen": -2.5768542289733887, "logits/rejected": -3.052607297897339, "logps/chosen": -227.2740020751953, "logps/rejected": -326.59136962890625, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": -0.17493173480033875, "rewards/margins": 4.36568021774292, "rewards/rejected": -4.54061222076416, "step": 256 }, { "epoch": 0.04, "learning_rate": 1.3958840188806473e-05, "logits/chosen": -2.7251622676849365, "logits/rejected": -2.7081363201141357, "logps/chosen": -181.31845092773438, "logps/rejected": -347.0906677246094, "loss": 0.5016, "rewards/accuracies": 0.5, "rewards/chosen": -0.7762565612792969, "rewards/margins": 8.967679023742676, "rewards/rejected": -9.743935585021973, "step": 257 }, { "epoch": 0.04, "learning_rate": 1.3958106748275325e-05, "logits/chosen": -2.354517698287964, "logits/rejected": -2.8261096477508545, "logps/chosen": -218.68142700195312, "logps/rejected": -332.9437561035156, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": 0.249043270945549, "rewards/margins": 3.256369113922119, "rewards/rejected": -3.0073256492614746, "step": 258 }, { "epoch": 0.04, "learning_rate": 1.3957373307744177e-05, "logits/chosen": -2.779975414276123, "logits/rejected": -3.24334454536438, "logps/chosen": -172.26425170898438, "logps/rejected": -291.00701904296875, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -0.3276382386684418, "rewards/margins": 3.947110176086426, "rewards/rejected": -4.2747483253479, "step": 259 }, { "epoch": 0.04, "learning_rate": 1.3956639867213029e-05, "logits/chosen": -2.9817419052124023, "logits/rejected": -2.988861560821533, "logps/chosen": -148.39193725585938, "logps/rejected": -321.1582946777344, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -0.13809891045093536, "rewards/margins": 3.7422938346862793, "rewards/rejected": -3.880392551422119, "step": 260 }, { "epoch": 0.04, "learning_rate": 1.3955906426681881e-05, "logits/chosen": -1.8921922445297241, "logits/rejected": -1.993295669555664, "logps/chosen": -333.33416748046875, "logps/rejected": -288.3796691894531, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": -0.6356170773506165, "rewards/margins": 6.3933634757995605, "rewards/rejected": -7.028980731964111, "step": 261 }, { "epoch": 0.04, "learning_rate": 1.3955172986150734e-05, "logits/chosen": -2.319666624069214, "logits/rejected": -2.949737548828125, "logps/chosen": -362.0819091796875, "logps/rejected": -539.3499755859375, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.34875258803367615, "rewards/margins": 4.967220306396484, "rewards/rejected": -5.315972805023193, "step": 262 }, { "epoch": 0.04, "learning_rate": 1.3954439545619586e-05, "logits/chosen": -1.8772445917129517, "logits/rejected": -2.6442863941192627, "logps/chosen": -446.5323791503906, "logps/rejected": -539.889404296875, "loss": 3.7773, "rewards/accuracies": 0.5, "rewards/chosen": -4.037822246551514, "rewards/margins": 0.11236715316772461, "rewards/rejected": -4.150189399719238, "step": 263 }, { "epoch": 0.04, "learning_rate": 1.395370610508844e-05, "logits/chosen": -2.776529312133789, "logits/rejected": -1.5078333616256714, "logps/chosen": -333.7565612792969, "logps/rejected": -255.32620239257812, "loss": 3.2014, "rewards/accuracies": 0.5, "rewards/chosen": -3.360495090484619, "rewards/margins": 0.7357692718505859, "rewards/rejected": -4.096264362335205, "step": 264 }, { "epoch": 0.04, "learning_rate": 1.3952972664557292e-05, "logits/chosen": -1.7566472291946411, "logits/rejected": -2.8995583057403564, "logps/chosen": -249.28793334960938, "logps/rejected": -538.498779296875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.30523186922073364, "rewards/margins": 6.067130088806152, "rewards/rejected": -6.37236213684082, "step": 265 }, { "epoch": 0.04, "learning_rate": 1.3952239224026144e-05, "logits/chosen": -3.2689061164855957, "logits/rejected": -2.4075939655303955, "logps/chosen": -380.8009338378906, "logps/rejected": -254.84519958496094, "loss": 0.1591, "rewards/accuracies": 1.0, "rewards/chosen": -1.4287776947021484, "rewards/margins": 2.122317314147949, "rewards/rejected": -3.5510950088500977, "step": 266 }, { "epoch": 0.04, "learning_rate": 1.3951505783494996e-05, "logits/chosen": -2.00604510307312, "logits/rejected": -2.9953773021698, "logps/chosen": -103.81523895263672, "logps/rejected": -263.35137939453125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 0.22962987422943115, "rewards/margins": 5.138448715209961, "rewards/rejected": -4.90881872177124, "step": 267 }, { "epoch": 0.04, "learning_rate": 1.3950772342963847e-05, "logits/chosen": -2.7557218074798584, "logits/rejected": -3.182690143585205, "logps/chosen": -308.67584228515625, "logps/rejected": -327.95343017578125, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.6128600835800171, "rewards/margins": 4.018969535827637, "rewards/rejected": -4.631829738616943, "step": 268 }, { "epoch": 0.04, "learning_rate": 1.39500389024327e-05, "logits/chosen": -2.055312156677246, "logits/rejected": -2.955883741378784, "logps/chosen": -220.2537841796875, "logps/rejected": -292.96343994140625, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": -0.4760269224643707, "rewards/margins": 3.2749223709106445, "rewards/rejected": -3.7509491443634033, "step": 269 }, { "epoch": 0.04, "learning_rate": 1.3949305461901551e-05, "logits/chosen": -2.8597609996795654, "logits/rejected": -1.5525504350662231, "logps/chosen": -226.28750610351562, "logps/rejected": -113.93035125732422, "loss": 3.7274, "rewards/accuracies": 0.5, "rewards/chosen": -3.6783645153045654, "rewards/margins": -2.0265109539031982, "rewards/rejected": -1.6518535614013672, "step": 270 }, { "epoch": 0.04, "learning_rate": 1.3948572021370403e-05, "logits/chosen": -3.0751359462738037, "logits/rejected": -3.087829828262329, "logps/chosen": -142.0892791748047, "logps/rejected": -51.236167907714844, "loss": 2.7343, "rewards/accuracies": 0.5, "rewards/chosen": -1.6161322593688965, "rewards/margins": -1.8139724731445312, "rewards/rejected": 0.1978401243686676, "step": 271 }, { "epoch": 0.04, "learning_rate": 1.3947838580839255e-05, "logits/chosen": -2.2617292404174805, "logits/rejected": -2.5647051334381104, "logps/chosen": -217.7241973876953, "logps/rejected": -306.6126403808594, "loss": 0.295, "rewards/accuracies": 1.0, "rewards/chosen": -0.6289230585098267, "rewards/margins": 3.7993361949920654, "rewards/rejected": -4.428259372711182, "step": 272 }, { "epoch": 0.04, "learning_rate": 1.3947105140308109e-05, "logits/chosen": -2.797147035598755, "logits/rejected": -1.5323922634124756, "logps/chosen": -415.34979248046875, "logps/rejected": -258.1165466308594, "loss": 0.0507, "rewards/accuracies": 1.0, "rewards/chosen": -0.098155677318573, "rewards/margins": 2.9568521976470947, "rewards/rejected": -3.0550079345703125, "step": 273 }, { "epoch": 0.04, "learning_rate": 1.394637169977696e-05, "logits/chosen": -2.6802196502685547, "logits/rejected": -3.1924233436584473, "logps/chosen": -177.78189086914062, "logps/rejected": -405.0536193847656, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.5822327136993408, "rewards/margins": 7.620902061462402, "rewards/rejected": -7.038669109344482, "step": 274 }, { "epoch": 0.04, "learning_rate": 1.3945638259245812e-05, "logits/chosen": -2.8072149753570557, "logits/rejected": -2.4984936714172363, "logps/chosen": -529.4964599609375, "logps/rejected": -437.04901123046875, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": -1.193550944328308, "rewards/margins": 4.6997551918029785, "rewards/rejected": -5.893305778503418, "step": 275 }, { "epoch": 0.04, "learning_rate": 1.3944904818714664e-05, "logits/chosen": -1.5160119533538818, "logits/rejected": -2.977653741836548, "logps/chosen": -131.07574462890625, "logps/rejected": -325.1929626464844, "loss": 0.0806, "rewards/accuracies": 1.0, "rewards/chosen": -0.17802467942237854, "rewards/margins": 3.8961856365203857, "rewards/rejected": -4.074210166931152, "step": 276 }, { "epoch": 0.04, "learning_rate": 1.3944171378183516e-05, "logits/chosen": -2.9175186157226562, "logits/rejected": -2.836869955062866, "logps/chosen": -92.02204895019531, "logps/rejected": -72.35475158691406, "loss": 1.2374, "rewards/accuracies": 0.5, "rewards/chosen": -0.23274746537208557, "rewards/margins": 0.20649445056915283, "rewards/rejected": -0.4392419457435608, "step": 277 }, { "epoch": 0.04, "learning_rate": 1.3943437937652368e-05, "logits/chosen": -2.7567501068115234, "logits/rejected": -3.1804656982421875, "logps/chosen": -234.2810821533203, "logps/rejected": -322.1249694824219, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.11983165889978409, "rewards/margins": 4.465878963470459, "rewards/rejected": -4.5857110023498535, "step": 278 }, { "epoch": 0.04, "learning_rate": 1.394270449712122e-05, "logits/chosen": -2.476978302001953, "logits/rejected": -3.083524465560913, "logps/chosen": -169.0852508544922, "logps/rejected": -326.77081298828125, "loss": 0.1068, "rewards/accuracies": 1.0, "rewards/chosen": 0.4763715863227844, "rewards/margins": 4.606768608093262, "rewards/rejected": -4.130396842956543, "step": 279 }, { "epoch": 0.04, "learning_rate": 1.3941971056590072e-05, "logits/chosen": -2.2134783267974854, "logits/rejected": -2.8065738677978516, "logps/chosen": -160.11244201660156, "logps/rejected": -308.46746826171875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.5936569571495056, "rewards/margins": 7.087551116943359, "rewards/rejected": -7.681207656860352, "step": 280 }, { "epoch": 0.04, "learning_rate": 1.3941237616058924e-05, "logits/chosen": -2.2852160930633545, "logits/rejected": -2.9036808013916016, "logps/chosen": -214.75564575195312, "logps/rejected": -279.3262634277344, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": -0.0207214392721653, "rewards/margins": 3.22029447555542, "rewards/rejected": -3.241015672683716, "step": 281 }, { "epoch": 0.04, "learning_rate": 1.3940504175527777e-05, "logits/chosen": -2.5142171382904053, "logits/rejected": -2.909477710723877, "logps/chosen": -75.58283996582031, "logps/rejected": -408.0465087890625, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 0.6271282434463501, "rewards/margins": 5.357914447784424, "rewards/rejected": -4.730786323547363, "step": 282 }, { "epoch": 0.04, "learning_rate": 1.3939770734996629e-05, "logits/chosen": -2.277092218399048, "logits/rejected": -3.156695604324341, "logps/chosen": -137.95941162109375, "logps/rejected": -282.587158203125, "loss": 0.0657, "rewards/accuracies": 1.0, "rewards/chosen": -0.4925430417060852, "rewards/margins": 4.265351295471191, "rewards/rejected": -4.757894039154053, "step": 283 }, { "epoch": 0.04, "learning_rate": 1.3939037294465481e-05, "logits/chosen": -2.71185564994812, "logits/rejected": -2.9037559032440186, "logps/chosen": -148.6932830810547, "logps/rejected": -202.97906494140625, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 0.5129855871200562, "rewards/margins": 4.298712730407715, "rewards/rejected": -3.785727024078369, "step": 284 }, { "epoch": 0.04, "learning_rate": 1.3938303853934333e-05, "logits/chosen": -2.321836471557617, "logits/rejected": -3.0209245681762695, "logps/chosen": -248.88784790039062, "logps/rejected": -369.9033203125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.26368945837020874, "rewards/margins": 7.85190486907959, "rewards/rejected": -8.115594863891602, "step": 285 }, { "epoch": 0.04, "learning_rate": 1.3937570413403185e-05, "logits/chosen": -2.9047515392303467, "logits/rejected": -2.548764944076538, "logps/chosen": -299.0241394042969, "logps/rejected": -270.5968017578125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.4245586693286896, "rewards/margins": 6.113763332366943, "rewards/rejected": -6.5383219718933105, "step": 286 }, { "epoch": 0.04, "learning_rate": 1.3936836972872036e-05, "logits/chosen": -2.687074661254883, "logits/rejected": -3.387108564376831, "logps/chosen": -5.8227386474609375, "logps/rejected": -230.26226806640625, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": 1.024827241897583, "rewards/margins": 6.855764389038086, "rewards/rejected": -5.830937385559082, "step": 287 }, { "epoch": 0.04, "learning_rate": 1.3936103532340888e-05, "logits/chosen": -2.0566139221191406, "logits/rejected": -2.813296318054199, "logps/chosen": -268.1513977050781, "logps/rejected": -326.39251708984375, "loss": 3.3594, "rewards/accuracies": 0.5, "rewards/chosen": -3.076927900314331, "rewards/margins": 0.8020591735839844, "rewards/rejected": -3.8789870738983154, "step": 288 }, { "epoch": 0.04, "learning_rate": 1.393537009180974e-05, "logits/chosen": -1.8429728746414185, "logits/rejected": -2.8902041912078857, "logps/chosen": -36.97590637207031, "logps/rejected": -166.11126708984375, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": 1.113722801208496, "rewards/margins": 5.084023475646973, "rewards/rejected": -3.9703001976013184, "step": 289 }, { "epoch": 0.05, "learning_rate": 1.3934636651278592e-05, "logits/chosen": -2.5782034397125244, "logits/rejected": -3.0454013347625732, "logps/chosen": -159.59957885742188, "logps/rejected": -318.9581298828125, "loss": 2.7555, "rewards/accuracies": 0.5, "rewards/chosen": -1.9171210527420044, "rewards/margins": 3.0130186080932617, "rewards/rejected": -4.930140018463135, "step": 290 }, { "epoch": 0.05, "learning_rate": 1.3933903210747446e-05, "logits/chosen": -3.0880625247955322, "logits/rejected": -3.140936851501465, "logps/chosen": -275.37646484375, "logps/rejected": -263.1954650878906, "loss": 4.6253, "rewards/accuracies": 0.5, "rewards/chosen": -4.042414665222168, "rewards/margins": -3.784609079360962, "rewards/rejected": -0.2578052580356598, "step": 291 }, { "epoch": 0.05, "learning_rate": 1.3933169770216298e-05, "logits/chosen": -2.6558425426483154, "logits/rejected": -3.0031275749206543, "logps/chosen": -115.26768493652344, "logps/rejected": -222.58792114257812, "loss": 0.0634, "rewards/accuracies": 1.0, "rewards/chosen": -0.3662467896938324, "rewards/margins": 3.068967342376709, "rewards/rejected": -3.435214042663574, "step": 292 }, { "epoch": 0.05, "learning_rate": 1.393243632968515e-05, "logits/chosen": -2.4341602325439453, "logits/rejected": -2.9997053146362305, "logps/chosen": -37.647727966308594, "logps/rejected": -156.640380859375, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": 0.5062353610992432, "rewards/margins": 3.0373592376708984, "rewards/rejected": -2.5311238765716553, "step": 293 }, { "epoch": 0.05, "learning_rate": 1.3931702889154001e-05, "logits/chosen": -3.1819002628326416, "logits/rejected": -2.158078193664551, "logps/chosen": -424.31927490234375, "logps/rejected": -113.76271057128906, "loss": 6.2542, "rewards/accuracies": 0.5, "rewards/chosen": -5.2428364753723145, "rewards/margins": -4.715061187744141, "rewards/rejected": -0.5277751684188843, "step": 294 }, { "epoch": 0.05, "learning_rate": 1.3930969448622853e-05, "logits/chosen": -3.06001877784729, "logits/rejected": -2.8266735076904297, "logps/chosen": -107.53952026367188, "logps/rejected": -152.70066833496094, "loss": 0.1093, "rewards/accuracies": 1.0, "rewards/chosen": -0.7472606897354126, "rewards/margins": 3.2785134315490723, "rewards/rejected": -4.0257744789123535, "step": 295 }, { "epoch": 0.05, "learning_rate": 1.3930236008091707e-05, "logits/chosen": -2.9255306720733643, "logits/rejected": -2.588822841644287, "logps/chosen": -172.93447875976562, "logps/rejected": -190.17996215820312, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 0.20773468911647797, "rewards/margins": 5.514286994934082, "rewards/rejected": -5.306551933288574, "step": 296 }, { "epoch": 0.05, "learning_rate": 1.3929502567560559e-05, "logits/chosen": -3.0490541458129883, "logits/rejected": -3.112224817276001, "logps/chosen": -312.6667175292969, "logps/rejected": -304.1916198730469, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": -0.6516085267066956, "rewards/margins": 3.4510886669158936, "rewards/rejected": -4.102697372436523, "step": 297 }, { "epoch": 0.05, "learning_rate": 1.392876912702941e-05, "logits/chosen": -2.285433292388916, "logits/rejected": -2.8885657787323, "logps/chosen": -207.17747497558594, "logps/rejected": -382.9029541015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.43635788559913635, "rewards/margins": 9.486183166503906, "rewards/rejected": -9.049825668334961, "step": 298 }, { "epoch": 0.05, "learning_rate": 1.3928035686498262e-05, "logits/chosen": -0.33261942863464355, "logits/rejected": -2.6866540908813477, "logps/chosen": -17.841306686401367, "logps/rejected": -763.2633666992188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.5990104675292969, "rewards/margins": 9.912117958068848, "rewards/rejected": -9.31310749053955, "step": 299 }, { "epoch": 0.05, "learning_rate": 1.3927302245967116e-05, "logits/chosen": -2.781924247741699, "logits/rejected": -2.540998935699463, "logps/chosen": -193.09912109375, "logps/rejected": -131.6890411376953, "loss": 2.6003, "rewards/accuracies": 0.5, "rewards/chosen": -1.935017704963684, "rewards/margins": -0.5247430801391602, "rewards/rejected": -1.410274624824524, "step": 300 }, { "epoch": 0.05, "learning_rate": 1.3926568805435968e-05, "logits/chosen": -2.8394923210144043, "logits/rejected": -3.1215426921844482, "logps/chosen": -52.195892333984375, "logps/rejected": -168.32298278808594, "loss": 0.2878, "rewards/accuracies": 1.0, "rewards/chosen": 0.24270300567150116, "rewards/margins": 1.2482095956802368, "rewards/rejected": -1.0055066347122192, "step": 301 }, { "epoch": 0.05, "learning_rate": 1.392583536490482e-05, "logits/chosen": -2.0941696166992188, "logits/rejected": -2.960080146789551, "logps/chosen": -129.8282928466797, "logps/rejected": -278.10052490234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.19553136825561523, "rewards/margins": 7.341855049133301, "rewards/rejected": -7.1463236808776855, "step": 302 }, { "epoch": 0.05, "learning_rate": 1.3925101924373672e-05, "logits/chosen": -1.3332796096801758, "logits/rejected": -2.8377511501312256, "logps/chosen": -122.747802734375, "logps/rejected": -365.8153381347656, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.6994819641113281, "rewards/margins": 6.355632781982422, "rewards/rejected": -5.656150817871094, "step": 303 }, { "epoch": 0.05, "learning_rate": 1.3924368483842524e-05, "logits/chosen": -2.6030218601226807, "logits/rejected": -1.6228910684585571, "logps/chosen": -182.0048828125, "logps/rejected": -177.4789581298828, "loss": 4.4275, "rewards/accuracies": 0.5, "rewards/chosen": -3.4349420070648193, "rewards/margins": -0.9460539817810059, "rewards/rejected": -2.4888882637023926, "step": 304 }, { "epoch": 0.05, "learning_rate": 1.3923635043311375e-05, "logits/chosen": -2.366795539855957, "logits/rejected": -2.8366575241088867, "logps/chosen": -322.2510681152344, "logps/rejected": -501.7949523925781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.41961920261383057, "rewards/margins": 10.191840171813965, "rewards/rejected": -10.611459732055664, "step": 305 }, { "epoch": 0.05, "learning_rate": 1.3922901602780227e-05, "logits/chosen": -2.8661162853240967, "logits/rejected": -2.1992440223693848, "logps/chosen": -203.2389373779297, "logps/rejected": -210.4805908203125, "loss": 3.7449, "rewards/accuracies": 0.5, "rewards/chosen": -2.980651617050171, "rewards/margins": -1.6768057346343994, "rewards/rejected": -1.3038456439971924, "step": 306 }, { "epoch": 0.05, "learning_rate": 1.3922168162249079e-05, "logits/chosen": -2.954777479171753, "logits/rejected": -3.3031702041625977, "logps/chosen": -142.68362426757812, "logps/rejected": -281.9704895019531, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.2065403163433075, "rewards/margins": 7.9069671630859375, "rewards/rejected": -7.7004265785217285, "step": 307 }, { "epoch": 0.05, "learning_rate": 1.3921434721717931e-05, "logits/chosen": -2.0574188232421875, "logits/rejected": -2.9996588230133057, "logps/chosen": -217.62551879882812, "logps/rejected": -272.00897216796875, "loss": 3.9737, "rewards/accuracies": 0.5, "rewards/chosen": -3.586179494857788, "rewards/margins": -1.2322602272033691, "rewards/rejected": -2.353919267654419, "step": 308 }, { "epoch": 0.05, "learning_rate": 1.3920701281186785e-05, "logits/chosen": -2.8231656551361084, "logits/rejected": -2.1238296031951904, "logps/chosen": -529.377197265625, "logps/rejected": -449.90594482421875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.1654701232910156, "rewards/margins": 5.859148025512695, "rewards/rejected": -7.024618148803711, "step": 309 }, { "epoch": 0.05, "learning_rate": 1.3919967840655636e-05, "logits/chosen": -3.07685923576355, "logits/rejected": -2.521005392074585, "logps/chosen": -772.7178344726562, "logps/rejected": -413.69158935546875, "loss": 2.5978, "rewards/accuracies": 0.5, "rewards/chosen": -2.9421615600585938, "rewards/margins": 1.190352439880371, "rewards/rejected": -4.132513999938965, "step": 310 }, { "epoch": 0.05, "learning_rate": 1.3919234400124488e-05, "logits/chosen": -1.9509044885635376, "logits/rejected": -2.689737558364868, "logps/chosen": -69.39073944091797, "logps/rejected": -151.05918884277344, "loss": 1.3582, "rewards/accuracies": 0.5, "rewards/chosen": -0.5273385047912598, "rewards/margins": 2.77752423286438, "rewards/rejected": -3.3048627376556396, "step": 311 }, { "epoch": 0.05, "learning_rate": 1.391850095959334e-05, "logits/chosen": -1.9655139446258545, "logits/rejected": -3.142629384994507, "logps/chosen": -80.87063598632812, "logps/rejected": -362.8827819824219, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 0.8511381149291992, "rewards/margins": 6.345690727233887, "rewards/rejected": -5.4945526123046875, "step": 312 }, { "epoch": 0.05, "learning_rate": 1.3917767519062192e-05, "logits/chosen": -2.237454652786255, "logits/rejected": -2.109445571899414, "logps/chosen": -878.1951293945312, "logps/rejected": -263.3418884277344, "loss": 0.458, "rewards/accuracies": 0.5, "rewards/chosen": -1.7184863090515137, "rewards/margins": 1.0494600534439087, "rewards/rejected": -2.767946243286133, "step": 313 }, { "epoch": 0.05, "learning_rate": 1.3917034078531044e-05, "logits/chosen": -3.1115736961364746, "logits/rejected": -2.463711738586426, "logps/chosen": -277.76971435546875, "logps/rejected": -232.90524291992188, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -0.6304641962051392, "rewards/margins": 4.554163932800293, "rewards/rejected": -5.184628486633301, "step": 314 }, { "epoch": 0.05, "learning_rate": 1.3916300637999896e-05, "logits/chosen": -1.9904905557632446, "logits/rejected": -3.1952602863311768, "logps/chosen": -97.48612976074219, "logps/rejected": -563.7520141601562, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.219390869140625, "rewards/margins": 10.756692886352539, "rewards/rejected": -10.537302017211914, "step": 315 }, { "epoch": 0.05, "learning_rate": 1.3915567197468748e-05, "logits/chosen": -3.102969169616699, "logits/rejected": -3.038426399230957, "logps/chosen": -324.2418518066406, "logps/rejected": -387.3912658691406, "loss": 3.6684, "rewards/accuracies": 0.5, "rewards/chosen": -3.239211082458496, "rewards/margins": -1.162696123123169, "rewards/rejected": -2.0765151977539062, "step": 316 }, { "epoch": 0.05, "learning_rate": 1.39148337569376e-05, "logits/chosen": -2.0437917709350586, "logits/rejected": -2.7522921562194824, "logps/chosen": -124.5902328491211, "logps/rejected": -246.6603240966797, "loss": 0.1257, "rewards/accuracies": 1.0, "rewards/chosen": -0.0013900771737098694, "rewards/margins": 3.3707847595214844, "rewards/rejected": -3.3721747398376465, "step": 317 }, { "epoch": 0.05, "learning_rate": 1.3914100316406453e-05, "logits/chosen": -2.310972213745117, "logits/rejected": -3.073568105697632, "logps/chosen": -112.41171264648438, "logps/rejected": -231.35220336914062, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.461120069026947, "rewards/margins": 6.157299995422363, "rewards/rejected": -5.696179389953613, "step": 318 }, { "epoch": 0.05, "learning_rate": 1.3913366875875305e-05, "logits/chosen": -2.385560989379883, "logits/rejected": -3.1771364212036133, "logps/chosen": -399.19866943359375, "logps/rejected": -627.1541748046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7472991943359375, "rewards/margins": 7.6948747634887695, "rewards/rejected": -8.442173957824707, "step": 319 }, { "epoch": 0.05, "learning_rate": 1.3912633435344157e-05, "logits/chosen": -1.1705121994018555, "logits/rejected": -2.862576961517334, "logps/chosen": -27.451786041259766, "logps/rejected": -420.7578125, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 0.846758246421814, "rewards/margins": 13.471020698547363, "rewards/rejected": -12.624262809753418, "step": 320 }, { "epoch": 0.05, "learning_rate": 1.3911899994813009e-05, "logits/chosen": -2.0829405784606934, "logits/rejected": -2.97725510597229, "logps/chosen": -27.817150115966797, "logps/rejected": -243.11627197265625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 1.2137941122055054, "rewards/margins": 6.087145805358887, "rewards/rejected": -4.873351097106934, "step": 321 }, { "epoch": 0.05, "learning_rate": 1.391116655428186e-05, "logits/chosen": -2.9542529582977295, "logits/rejected": -2.529770851135254, "logps/chosen": -453.90057373046875, "logps/rejected": -332.4295349121094, "loss": 1.1817, "rewards/accuracies": 0.5, "rewards/chosen": -1.75341796875, "rewards/margins": 0.2802920341491699, "rewards/rejected": -2.03371000289917, "step": 322 }, { "epoch": 0.05, "learning_rate": 1.3910433113750713e-05, "logits/chosen": -1.7992568016052246, "logits/rejected": -2.8292410373687744, "logps/chosen": -111.12773895263672, "logps/rejected": -406.03125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 0.235333651304245, "rewards/margins": 5.001413345336914, "rewards/rejected": -4.766079902648926, "step": 323 }, { "epoch": 0.05, "learning_rate": 1.3909699673219564e-05, "logits/chosen": -2.171631097793579, "logits/rejected": -3.083379030227661, "logps/chosen": -67.41607666015625, "logps/rejected": -315.07855224609375, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.4393196105957031, "rewards/margins": 7.007916450500488, "rewards/rejected": -7.447236061096191, "step": 324 }, { "epoch": 0.05, "learning_rate": 1.3908966232688416e-05, "logits/chosen": -2.1292197704315186, "logits/rejected": -2.806459903717041, "logps/chosen": -2.5344207286834717, "logps/rejected": -195.07958984375, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": 1.2204115390777588, "rewards/margins": 5.425337791442871, "rewards/rejected": -4.204926013946533, "step": 325 }, { "epoch": 0.05, "learning_rate": 1.3908232792157268e-05, "logits/chosen": -3.029651641845703, "logits/rejected": -2.3400979042053223, "logps/chosen": -295.6902160644531, "logps/rejected": -71.82342529296875, "loss": 9.9733, "rewards/accuracies": 0.0, "rewards/chosen": -9.761344909667969, "rewards/margins": -9.82044792175293, "rewards/rejected": 0.059103868901729584, "step": 326 }, { "epoch": 0.05, "learning_rate": 1.3907499351626122e-05, "logits/chosen": -1.841147780418396, "logits/rejected": -2.919201135635376, "logps/chosen": -186.53549194335938, "logps/rejected": -454.4222412109375, "loss": 1.5291, "rewards/accuracies": 0.5, "rewards/chosen": -0.8700008392333984, "rewards/margins": 9.454195022583008, "rewards/rejected": -10.32419490814209, "step": 327 }, { "epoch": 0.05, "learning_rate": 1.3906765911094974e-05, "logits/chosen": -1.9771291017532349, "logits/rejected": -3.0636608600616455, "logps/chosen": -281.84100341796875, "logps/rejected": -421.60797119140625, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -0.6669197082519531, "rewards/margins": 5.463076114654541, "rewards/rejected": -6.129995822906494, "step": 328 }, { "epoch": 0.05, "learning_rate": 1.3906032470563826e-05, "logits/chosen": -2.8054678440093994, "logits/rejected": -2.8572144508361816, "logps/chosen": -189.41909790039062, "logps/rejected": -303.16522216796875, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.791882336139679, "rewards/margins": 4.208065986633301, "rewards/rejected": -4.999948501586914, "step": 329 }, { "epoch": 0.05, "learning_rate": 1.3905299030032679e-05, "logits/chosen": -2.9665632247924805, "logits/rejected": -2.010711431503296, "logps/chosen": -486.6039733886719, "logps/rejected": -169.0181121826172, "loss": 3.2296, "rewards/accuracies": 0.5, "rewards/chosen": -3.6318116188049316, "rewards/margins": -1.2899105548858643, "rewards/rejected": -2.3419010639190674, "step": 330 }, { "epoch": 0.05, "learning_rate": 1.3904565589501531e-05, "logits/chosen": -3.002145290374756, "logits/rejected": -2.485414743423462, "logps/chosen": -276.7193298339844, "logps/rejected": -197.67086791992188, "loss": 0.1722, "rewards/accuracies": 1.0, "rewards/chosen": -0.6425662040710449, "rewards/margins": 1.8546878099441528, "rewards/rejected": -2.497253894805908, "step": 331 }, { "epoch": 0.05, "learning_rate": 1.3903832148970383e-05, "logits/chosen": -3.0064306259155273, "logits/rejected": -2.299574613571167, "logps/chosen": -628.1903076171875, "logps/rejected": -469.5073547363281, "loss": 2.5634, "rewards/accuracies": 0.5, "rewards/chosen": -2.952230930328369, "rewards/margins": -0.09292745590209961, "rewards/rejected": -2.8593034744262695, "step": 332 }, { "epoch": 0.05, "learning_rate": 1.3903098708439235e-05, "logits/chosen": -3.236258029937744, "logits/rejected": -2.546889066696167, "logps/chosen": -558.408203125, "logps/rejected": -189.8551788330078, "loss": 3.4738, "rewards/accuracies": 0.5, "rewards/chosen": -3.2532546520233154, "rewards/margins": 0.954411506652832, "rewards/rejected": -4.207666397094727, "step": 333 }, { "epoch": 0.05, "learning_rate": 1.3902365267908087e-05, "logits/chosen": -2.849761724472046, "logits/rejected": -2.6260323524475098, "logps/chosen": -164.5991668701172, "logps/rejected": -208.09542846679688, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -0.37448734045028687, "rewards/margins": 3.9002716541290283, "rewards/rejected": -4.274759292602539, "step": 334 }, { "epoch": 0.05, "learning_rate": 1.3901631827376939e-05, "logits/chosen": -2.672257661819458, "logits/rejected": -2.2534658908843994, "logps/chosen": -146.84286499023438, "logps/rejected": -151.68385314941406, "loss": 2.431, "rewards/accuracies": 0.5, "rewards/chosen": -2.139735460281372, "rewards/margins": 1.5691261291503906, "rewards/rejected": -3.7088615894317627, "step": 335 }, { "epoch": 0.05, "learning_rate": 1.3900898386845792e-05, "logits/chosen": -2.804002046585083, "logits/rejected": -3.3135416507720947, "logps/chosen": -91.8569564819336, "logps/rejected": -260.8874816894531, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 0.2526888847351074, "rewards/margins": 5.243745803833008, "rewards/rejected": -4.9910569190979, "step": 336 }, { "epoch": 0.05, "learning_rate": 1.3900164946314644e-05, "logits/chosen": -2.6411774158477783, "logits/rejected": -2.203629970550537, "logps/chosen": -427.2987060546875, "logps/rejected": -546.6029663085938, "loss": 3.4702, "rewards/accuracies": 0.5, "rewards/chosen": -3.359105348587036, "rewards/margins": 0.06146669387817383, "rewards/rejected": -3.42057204246521, "step": 337 }, { "epoch": 0.05, "learning_rate": 1.3899431505783496e-05, "logits/chosen": -1.1540741920471191, "logits/rejected": -2.343513011932373, "logps/chosen": -322.65771484375, "logps/rejected": -692.9654541015625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.2926986813545227, "rewards/margins": 8.282876968383789, "rewards/rejected": -8.57557487487793, "step": 338 }, { "epoch": 0.05, "learning_rate": 1.3898698065252348e-05, "logits/chosen": -2.443464994430542, "logits/rejected": -2.94093656539917, "logps/chosen": -84.64054107666016, "logps/rejected": -337.38714599609375, "loss": 0.1373, "rewards/accuracies": 1.0, "rewards/chosen": 0.7027137875556946, "rewards/margins": 3.983853340148926, "rewards/rejected": -3.281139373779297, "step": 339 }, { "epoch": 0.05, "learning_rate": 1.38979646247212e-05, "logits/chosen": -2.2805466651916504, "logits/rejected": -3.162217378616333, "logps/chosen": -21.233640670776367, "logps/rejected": -206.86517333984375, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 0.6801583170890808, "rewards/margins": 4.650113105773926, "rewards/rejected": -3.9699549674987793, "step": 340 }, { "epoch": 0.05, "learning_rate": 1.3897231184190051e-05, "logits/chosen": -2.941690444946289, "logits/rejected": -2.6677863597869873, "logps/chosen": -220.4258270263672, "logps/rejected": -265.9454040527344, "loss": 1.0248, "rewards/accuracies": 0.5, "rewards/chosen": -0.22988474369049072, "rewards/margins": 1.077271819114685, "rewards/rejected": -1.3071564435958862, "step": 341 }, { "epoch": 0.05, "learning_rate": 1.3896497743658903e-05, "logits/chosen": -1.1463364362716675, "logits/rejected": -2.6779391765594482, "logps/chosen": -39.331687927246094, "logps/rejected": -271.46929931640625, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": 0.8020272254943848, "rewards/margins": 4.706301689147949, "rewards/rejected": -3.9042739868164062, "step": 342 }, { "epoch": 0.05, "learning_rate": 1.3895764303127755e-05, "logits/chosen": -2.300013303756714, "logits/rejected": -2.5207152366638184, "logps/chosen": -254.82379150390625, "logps/rejected": -347.638916015625, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -0.22707712650299072, "rewards/margins": 4.126070022583008, "rewards/rejected": -4.353147029876709, "step": 343 }, { "epoch": 0.05, "learning_rate": 1.3895030862596609e-05, "logits/chosen": -2.3330883979797363, "logits/rejected": -3.063227653503418, "logps/chosen": -5.920400619506836, "logps/rejected": -161.20712280273438, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 1.135776400566101, "rewards/margins": 5.345501899719238, "rewards/rejected": -4.209725379943848, "step": 344 }, { "epoch": 0.05, "learning_rate": 1.389429742206546e-05, "logits/chosen": -2.7426979541778564, "logits/rejected": -3.0584475994110107, "logps/chosen": -25.455265045166016, "logps/rejected": -188.3311767578125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 0.8358726501464844, "rewards/margins": 4.479780197143555, "rewards/rejected": -3.643907070159912, "step": 345 }, { "epoch": 0.05, "learning_rate": 1.3893563981534313e-05, "logits/chosen": -1.8593720197677612, "logits/rejected": -2.638382911682129, "logps/chosen": -141.6522674560547, "logps/rejected": -403.1040954589844, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.004250586032867432, "rewards/margins": 8.80588150024414, "rewards/rejected": -8.801630973815918, "step": 346 }, { "epoch": 0.05, "learning_rate": 1.3892830541003164e-05, "logits/chosen": -1.920513391494751, "logits/rejected": -3.156593084335327, "logps/chosen": -141.21067810058594, "logps/rejected": -299.82305908203125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 0.0478748083114624, "rewards/margins": 5.1205668449401855, "rewards/rejected": -5.072691917419434, "step": 347 }, { "epoch": 0.05, "learning_rate": 1.3892097100472016e-05, "logits/chosen": -2.708235025405884, "logits/rejected": -3.139195680618286, "logps/chosen": -502.928466796875, "logps/rejected": -522.630126953125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.30686837434768677, "rewards/margins": 6.587331771850586, "rewards/rejected": -6.894200325012207, "step": 348 }, { "epoch": 0.05, "learning_rate": 1.3891363659940868e-05, "logits/chosen": -2.5236048698425293, "logits/rejected": -3.0248279571533203, "logps/chosen": -407.3932189941406, "logps/rejected": -412.42303466796875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.291813611984253, "rewards/margins": 6.122706890106201, "rewards/rejected": -7.414520263671875, "step": 349 }, { "epoch": 0.05, "learning_rate": 1.389063021940972e-05, "logits/chosen": -1.3902031183242798, "logits/rejected": -2.8123903274536133, "logps/chosen": -95.53833770751953, "logps/rejected": -288.1845703125, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.529007613658905, "rewards/margins": 4.819619178771973, "rewards/rejected": -4.290611743927002, "step": 350 }, { "epoch": 0.05, "learning_rate": 1.3889896778878572e-05, "logits/chosen": -2.8384389877319336, "logits/rejected": -2.7707982063293457, "logps/chosen": -166.71746826171875, "logps/rejected": -380.03619384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.1734798401594162, "rewards/margins": 12.184722900390625, "rewards/rejected": -12.358203887939453, "step": 351 }, { "epoch": 0.05, "learning_rate": 1.3889163338347424e-05, "logits/chosen": -3.2399046421051025, "logits/rejected": -1.5556267499923706, "logps/chosen": -420.88494873046875, "logps/rejected": -86.23277282714844, "loss": 7.4546, "rewards/accuracies": 0.0, "rewards/chosen": -6.048117637634277, "rewards/margins": -7.453390121459961, "rewards/rejected": 1.4052717685699463, "step": 352 }, { "epoch": 0.05, "learning_rate": 1.3888429897816277e-05, "logits/chosen": -1.7509404420852661, "logits/rejected": -3.036130666732788, "logps/chosen": -130.12197875976562, "logps/rejected": -364.07415771484375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.6542198061943054, "rewards/margins": 6.636277198791504, "rewards/rejected": -5.982057571411133, "step": 353 }, { "epoch": 0.06, "learning_rate": 1.388769645728513e-05, "logits/chosen": -3.158616065979004, "logits/rejected": -2.9407007694244385, "logps/chosen": -672.1802978515625, "logps/rejected": -503.5794677734375, "loss": 1.1114, "rewards/accuracies": 0.5, "rewards/chosen": -0.6604488492012024, "rewards/margins": 0.7064653635025024, "rewards/rejected": -1.36691415309906, "step": 354 }, { "epoch": 0.06, "learning_rate": 1.3886963016753981e-05, "logits/chosen": -2.1233723163604736, "logits/rejected": -2.614712715148926, "logps/chosen": -270.59716796875, "logps/rejected": -351.47442626953125, "loss": 0.2823, "rewards/accuracies": 1.0, "rewards/chosen": 0.09888839721679688, "rewards/margins": 5.143876552581787, "rewards/rejected": -5.04498815536499, "step": 355 }, { "epoch": 0.06, "learning_rate": 1.3886229576222833e-05, "logits/chosen": -2.870330572128296, "logits/rejected": -1.9609330892562866, "logps/chosen": -449.34796142578125, "logps/rejected": -216.5862274169922, "loss": 4.9975, "rewards/accuracies": 0.5, "rewards/chosen": -6.579970836639404, "rewards/margins": -2.332995891571045, "rewards/rejected": -4.246974945068359, "step": 356 }, { "epoch": 0.06, "learning_rate": 1.3885496135691685e-05, "logits/chosen": -2.4848577976226807, "logits/rejected": -2.728445291519165, "logps/chosen": -74.45081329345703, "logps/rejected": -231.9608612060547, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 0.7488502860069275, "rewards/margins": 6.843048095703125, "rewards/rejected": -6.094197750091553, "step": 357 }, { "epoch": 0.06, "learning_rate": 1.3884762695160537e-05, "logits/chosen": -2.1778454780578613, "logits/rejected": -2.978867769241333, "logps/chosen": -270.8974304199219, "logps/rejected": -449.66729736328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.608594536781311, "rewards/margins": 9.141765594482422, "rewards/rejected": -9.750360488891602, "step": 358 }, { "epoch": 0.06, "learning_rate": 1.3884029254629389e-05, "logits/chosen": -2.0721817016601562, "logits/rejected": -2.8078932762145996, "logps/chosen": -554.5044555664062, "logps/rejected": -369.26593017578125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4742237329483032, "rewards/margins": 6.229221343994141, "rewards/rejected": -6.7034454345703125, "step": 359 }, { "epoch": 0.06, "learning_rate": 1.388329581409824e-05, "logits/chosen": -2.4828038215637207, "logits/rejected": -3.001204490661621, "logps/chosen": -227.7986602783203, "logps/rejected": -473.7403564453125, "loss": 1.8666, "rewards/accuracies": 0.5, "rewards/chosen": -2.6417436599731445, "rewards/margins": 3.5059990882873535, "rewards/rejected": -6.147743225097656, "step": 360 }, { "epoch": 0.06, "learning_rate": 1.3882562373567092e-05, "logits/chosen": -1.845572590827942, "logits/rejected": -3.109607458114624, "logps/chosen": -28.45865821838379, "logps/rejected": -405.22674560546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.182520866394043, "rewards/margins": 7.723621368408203, "rewards/rejected": -6.54110050201416, "step": 361 }, { "epoch": 0.06, "learning_rate": 1.3881828933035946e-05, "logits/chosen": -2.822345495223999, "logits/rejected": -3.4153308868408203, "logps/chosen": -77.31564331054688, "logps/rejected": -245.31954956054688, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": 0.6380329132080078, "rewards/margins": 5.376171588897705, "rewards/rejected": -4.738138675689697, "step": 362 }, { "epoch": 0.06, "learning_rate": 1.3881095492504798e-05, "logits/chosen": -1.1275030374526978, "logits/rejected": -3.0266127586364746, "logps/chosen": -97.45182800292969, "logps/rejected": -283.49346923828125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.20164471864700317, "rewards/margins": 5.789755821228027, "rewards/rejected": -5.991400241851807, "step": 363 }, { "epoch": 0.06, "learning_rate": 1.3880362051973651e-05, "logits/chosen": -3.0359280109405518, "logits/rejected": -3.1708550453186035, "logps/chosen": -63.81685256958008, "logps/rejected": -176.4009552001953, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": 0.9528567790985107, "rewards/margins": 3.7790136337280273, "rewards/rejected": -2.8261566162109375, "step": 364 }, { "epoch": 0.06, "learning_rate": 1.3879628611442503e-05, "logits/chosen": -2.5566086769104004, "logits/rejected": -3.0901131629943848, "logps/chosen": -99.41554260253906, "logps/rejected": -267.09234619140625, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": 0.6124401092529297, "rewards/margins": 5.819063186645508, "rewards/rejected": -5.206623077392578, "step": 365 }, { "epoch": 0.06, "learning_rate": 1.3878895170911355e-05, "logits/chosen": -2.0143320560455322, "logits/rejected": -2.8281826972961426, "logps/chosen": -212.15579223632812, "logps/rejected": -274.090576171875, "loss": 2.9225, "rewards/accuracies": 0.5, "rewards/chosen": -2.4247617721557617, "rewards/margins": -1.0067967176437378, "rewards/rejected": -1.4179649353027344, "step": 366 }, { "epoch": 0.06, "learning_rate": 1.3878161730380207e-05, "logits/chosen": -2.6608707904815674, "logits/rejected": -3.0271172523498535, "logps/chosen": -33.00279235839844, "logps/rejected": -126.66792297363281, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": 0.27960097789764404, "rewards/margins": 5.401357173919678, "rewards/rejected": -5.121756553649902, "step": 367 }, { "epoch": 0.06, "learning_rate": 1.3877428289849059e-05, "logits/chosen": -1.1735265254974365, "logits/rejected": -2.9795656204223633, "logps/chosen": -126.44290161132812, "logps/rejected": -367.0316162109375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.7680187225341797, "rewards/margins": 8.437301635742188, "rewards/rejected": -7.669282913208008, "step": 368 }, { "epoch": 0.06, "learning_rate": 1.387669484931791e-05, "logits/chosen": -3.032600164413452, "logits/rejected": -3.353985548019409, "logps/chosen": -36.46079635620117, "logps/rejected": -279.0821533203125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.7120937705039978, "rewards/margins": 6.442378997802734, "rewards/rejected": -5.730285167694092, "step": 369 }, { "epoch": 0.06, "learning_rate": 1.3875961408786763e-05, "logits/chosen": -2.9051942825317383, "logits/rejected": -3.0486931800842285, "logps/chosen": -88.93927764892578, "logps/rejected": -305.61334228515625, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 0.884380578994751, "rewards/margins": 6.3606109619140625, "rewards/rejected": -5.476230144500732, "step": 370 }, { "epoch": 0.06, "learning_rate": 1.3875227968255616e-05, "logits/chosen": -2.6725032329559326, "logits/rejected": -2.9332211017608643, "logps/chosen": -94.83768463134766, "logps/rejected": -164.384521484375, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": 1.065687656402588, "rewards/margins": 4.085762977600098, "rewards/rejected": -3.0200753211975098, "step": 371 }, { "epoch": 0.06, "learning_rate": 1.3874494527724468e-05, "logits/chosen": -2.577195882797241, "logits/rejected": -2.876668930053711, "logps/chosen": -247.49566650390625, "logps/rejected": -328.1097106933594, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": 0.7255325317382812, "rewards/margins": 4.4713640213012695, "rewards/rejected": -3.7458314895629883, "step": 372 }, { "epoch": 0.06, "learning_rate": 1.387376108719332e-05, "logits/chosen": -2.688365936279297, "logits/rejected": -2.513784885406494, "logps/chosen": -370.800537109375, "logps/rejected": -296.9779357910156, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.9909881353378296, "rewards/margins": 6.370901584625244, "rewards/rejected": -7.361889839172363, "step": 373 }, { "epoch": 0.06, "learning_rate": 1.3873027646662172e-05, "logits/chosen": -2.104586601257324, "logits/rejected": -2.8954031467437744, "logps/chosen": -2.5872690677642822, "logps/rejected": -181.0822296142578, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 1.2959840297698975, "rewards/margins": 5.6884307861328125, "rewards/rejected": -4.392446517944336, "step": 374 }, { "epoch": 0.06, "learning_rate": 1.3872294206131024e-05, "logits/chosen": -2.7136871814727783, "logits/rejected": -2.2852418422698975, "logps/chosen": -534.4885864257812, "logps/rejected": -431.9966125488281, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": 0.29700881242752075, "rewards/margins": 3.34358549118042, "rewards/rejected": -3.046576738357544, "step": 375 }, { "epoch": 0.06, "learning_rate": 1.3871560765599876e-05, "logits/chosen": -2.7427423000335693, "logits/rejected": -2.85212779045105, "logps/chosen": -171.33863830566406, "logps/rejected": -245.91893005371094, "loss": 0.0971, "rewards/accuracies": 1.0, "rewards/chosen": 0.5082771182060242, "rewards/margins": 4.881030559539795, "rewards/rejected": -4.372753620147705, "step": 376 }, { "epoch": 0.06, "learning_rate": 1.3870827325068728e-05, "logits/chosen": -2.9470582008361816, "logits/rejected": -3.2214624881744385, "logps/chosen": -34.359527587890625, "logps/rejected": -208.26417541503906, "loss": 0.0335, "rewards/accuracies": 1.0, "rewards/chosen": 0.9258931875228882, "rewards/margins": 7.318386554718018, "rewards/rejected": -6.39249324798584, "step": 377 }, { "epoch": 0.06, "learning_rate": 1.387009388453758e-05, "logits/chosen": -2.069885730743408, "logits/rejected": -2.786018133163452, "logps/chosen": -14.902754783630371, "logps/rejected": -126.95975494384766, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": 1.042151927947998, "rewards/margins": 4.303502559661865, "rewards/rejected": -3.261350631713867, "step": 378 }, { "epoch": 0.06, "learning_rate": 1.3869360444006431e-05, "logits/chosen": -2.140514850616455, "logits/rejected": -2.6075775623321533, "logps/chosen": -216.15988159179688, "logps/rejected": -250.87579345703125, "loss": 0.7222, "rewards/accuracies": 0.5, "rewards/chosen": -1.0023552179336548, "rewards/margins": 4.087184906005859, "rewards/rejected": -5.089540481567383, "step": 379 }, { "epoch": 0.06, "learning_rate": 1.3868627003475285e-05, "logits/chosen": -3.035088539123535, "logits/rejected": -1.832505464553833, "logps/chosen": -422.495849609375, "logps/rejected": -242.55307006835938, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -1.2108455896377563, "rewards/margins": 3.8141629695892334, "rewards/rejected": -5.025008201599121, "step": 380 }, { "epoch": 0.06, "learning_rate": 1.3867893562944137e-05, "logits/chosen": -2.499361276626587, "logits/rejected": -2.8893070220947266, "logps/chosen": -137.50070190429688, "logps/rejected": -217.55520629882812, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.394422709941864, "rewards/margins": 6.213526248931885, "rewards/rejected": -5.819103717803955, "step": 381 }, { "epoch": 0.06, "learning_rate": 1.3867160122412989e-05, "logits/chosen": -2.6912503242492676, "logits/rejected": -1.9160646200180054, "logps/chosen": -360.1126403808594, "logps/rejected": -422.4327697753906, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -0.3254266679286957, "rewards/margins": 9.618600845336914, "rewards/rejected": -9.9440279006958, "step": 382 }, { "epoch": 0.06, "learning_rate": 1.386642668188184e-05, "logits/chosen": -1.8021044731140137, "logits/rejected": -2.9126269817352295, "logps/chosen": -220.92352294921875, "logps/rejected": -301.71319580078125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.679085612297058, "rewards/margins": 6.528969764709473, "rewards/rejected": -8.20805549621582, "step": 383 }, { "epoch": 0.06, "learning_rate": 1.3865693241350692e-05, "logits/chosen": -1.5986840724945068, "logits/rejected": -2.995952844619751, "logps/chosen": -76.58384704589844, "logps/rejected": -549.8770751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.19832736253738403, "rewards/margins": 14.60151481628418, "rewards/rejected": -14.799840927124023, "step": 384 }, { "epoch": 0.06, "learning_rate": 1.3864959800819544e-05, "logits/chosen": -2.663919448852539, "logits/rejected": -2.6091928482055664, "logps/chosen": -264.93072509765625, "logps/rejected": -275.96685791015625, "loss": 6.1333, "rewards/accuracies": 0.5, "rewards/chosen": -6.549927711486816, "rewards/margins": -3.4509451389312744, "rewards/rejected": -3.098982334136963, "step": 385 }, { "epoch": 0.06, "learning_rate": 1.3864226360288396e-05, "logits/chosen": -3.261597156524658, "logits/rejected": -2.9005393981933594, "logps/chosen": -259.10272216796875, "logps/rejected": -114.01620483398438, "loss": 2.4489, "rewards/accuracies": 0.0, "rewards/chosen": -4.27190637588501, "rewards/margins": -2.209521770477295, "rewards/rejected": -2.0623843669891357, "step": 386 }, { "epoch": 0.06, "learning_rate": 1.3863492919757248e-05, "logits/chosen": -2.7410364151000977, "logits/rejected": -3.081723690032959, "logps/chosen": -37.12324523925781, "logps/rejected": -154.28414916992188, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": 1.2676033973693848, "rewards/margins": 3.920917272567749, "rewards/rejected": -2.6533138751983643, "step": 387 }, { "epoch": 0.06, "learning_rate": 1.38627594792261e-05, "logits/chosen": -2.206240653991699, "logits/rejected": -2.6778292655944824, "logps/chosen": -447.66168212890625, "logps/rejected": -392.49615478515625, "loss": 5.4831, "rewards/accuracies": 0.5, "rewards/chosen": -7.538547039031982, "rewards/margins": 1.4411330223083496, "rewards/rejected": -8.979680061340332, "step": 388 }, { "epoch": 0.06, "learning_rate": 1.3862026038694953e-05, "logits/chosen": -2.8341081142425537, "logits/rejected": -1.2993662357330322, "logps/chosen": -441.4864501953125, "logps/rejected": -138.62742614746094, "loss": 8.9597, "rewards/accuracies": 0.0, "rewards/chosen": -7.880126953125, "rewards/margins": -8.959402084350586, "rewards/rejected": 1.0792757272720337, "step": 389 }, { "epoch": 0.06, "learning_rate": 1.3861292598163805e-05, "logits/chosen": -2.8979952335357666, "logits/rejected": -2.7514867782592773, "logps/chosen": -193.271484375, "logps/rejected": -75.84745788574219, "loss": 4.3791, "rewards/accuracies": 0.5, "rewards/chosen": -3.796473264694214, "rewards/margins": -1.4532887935638428, "rewards/rejected": -2.343184471130371, "step": 390 }, { "epoch": 0.06, "learning_rate": 1.3860559157632657e-05, "logits/chosen": -3.132884979248047, "logits/rejected": -2.803273916244507, "logps/chosen": -110.82927703857422, "logps/rejected": -108.31501770019531, "loss": 0.3447, "rewards/accuracies": 1.0, "rewards/chosen": 1.367098093032837, "rewards/margins": 2.9310197830200195, "rewards/rejected": -1.563921570777893, "step": 391 }, { "epoch": 0.06, "learning_rate": 1.3859825717101509e-05, "logits/chosen": -2.9186954498291016, "logits/rejected": -2.110495090484619, "logps/chosen": -774.166748046875, "logps/rejected": -572.8814697265625, "loss": 2.3743, "rewards/accuracies": 0.5, "rewards/chosen": -2.272592067718506, "rewards/margins": 1.5401062965393066, "rewards/rejected": -3.8126983642578125, "step": 392 }, { "epoch": 0.06, "learning_rate": 1.3859092276570361e-05, "logits/chosen": -0.985338032245636, "logits/rejected": -2.313802719116211, "logps/chosen": -179.44834899902344, "logps/rejected": -442.1324462890625, "loss": 2.9238, "rewards/accuracies": 0.5, "rewards/chosen": -1.960842490196228, "rewards/margins": 2.258758068084717, "rewards/rejected": -4.219600200653076, "step": 393 }, { "epoch": 0.06, "learning_rate": 1.3858358836039213e-05, "logits/chosen": -2.450324058532715, "logits/rejected": -2.723217248916626, "logps/chosen": -59.387184143066406, "logps/rejected": -237.05624389648438, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": 0.43549346923828125, "rewards/margins": 5.77569580078125, "rewards/rejected": -5.340202331542969, "step": 394 }, { "epoch": 0.06, "learning_rate": 1.3857625395508065e-05, "logits/chosen": -3.1532368659973145, "logits/rejected": -3.3550384044647217, "logps/chosen": -26.211811065673828, "logps/rejected": -109.4498291015625, "loss": 0.0788, "rewards/accuracies": 1.0, "rewards/chosen": 1.4343430995941162, "rewards/margins": 4.377831935882568, "rewards/rejected": -2.943488836288452, "step": 395 }, { "epoch": 0.06, "learning_rate": 1.3856891954976917e-05, "logits/chosen": -2.933751344680786, "logits/rejected": -2.111509323120117, "logps/chosen": -284.78790283203125, "logps/rejected": -141.3123321533203, "loss": 3.2785, "rewards/accuracies": 0.5, "rewards/chosen": -4.121160984039307, "rewards/margins": -2.172062873840332, "rewards/rejected": -1.9490981101989746, "step": 396 }, { "epoch": 0.06, "learning_rate": 1.385615851444577e-05, "logits/chosen": -2.6406021118164062, "logits/rejected": -3.169369697570801, "logps/chosen": -23.457141876220703, "logps/rejected": -211.40611267089844, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 0.2688769996166229, "rewards/margins": 5.811870574951172, "rewards/rejected": -5.542993545532227, "step": 397 }, { "epoch": 0.06, "learning_rate": 1.3855425073914624e-05, "logits/chosen": -2.8467955589294434, "logits/rejected": -2.804081439971924, "logps/chosen": -45.45710754394531, "logps/rejected": -63.194480895996094, "loss": 0.2569, "rewards/accuracies": 1.0, "rewards/chosen": 0.20381566882133484, "rewards/margins": 1.4223599433898926, "rewards/rejected": -1.2185442447662354, "step": 398 }, { "epoch": 0.06, "learning_rate": 1.3854691633383476e-05, "logits/chosen": -2.683859348297119, "logits/rejected": -3.286489963531494, "logps/chosen": -154.95013427734375, "logps/rejected": -230.84371948242188, "loss": 0.7259, "rewards/accuracies": 0.5, "rewards/chosen": 0.03633728623390198, "rewards/margins": 3.6673977375030518, "rewards/rejected": -3.6310606002807617, "step": 399 }, { "epoch": 0.06, "learning_rate": 1.3853958192852328e-05, "logits/chosen": -2.264390707015991, "logits/rejected": -2.772141695022583, "logps/chosen": -80.13089752197266, "logps/rejected": -211.2816162109375, "loss": 0.1751, "rewards/accuracies": 1.0, "rewards/chosen": 0.6450614333152771, "rewards/margins": 2.9098918437957764, "rewards/rejected": -2.2648305892944336, "step": 400 }, { "epoch": 0.06, "learning_rate": 1.385322475232118e-05, "logits/chosen": -3.004436492919922, "logits/rejected": -2.5756165981292725, "logps/chosen": -371.62335205078125, "logps/rejected": -299.0638427734375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.10076543688774109, "rewards/margins": 6.006044387817383, "rewards/rejected": -6.106810092926025, "step": 401 }, { "epoch": 0.06, "learning_rate": 1.3852491311790031e-05, "logits/chosen": -3.2100391387939453, "logits/rejected": -2.2497215270996094, "logps/chosen": -250.87677001953125, "logps/rejected": -7.871741771697998, "loss": 3.8457, "rewards/accuracies": 0.5, "rewards/chosen": -1.991642713546753, "rewards/margins": -3.2591588497161865, "rewards/rejected": 1.2675158977508545, "step": 402 }, { "epoch": 0.06, "learning_rate": 1.3851757871258883e-05, "logits/chosen": -2.8358917236328125, "logits/rejected": -2.781944990158081, "logps/chosen": -173.54542541503906, "logps/rejected": -261.0726318359375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 0.6474418640136719, "rewards/margins": 5.572409152984619, "rewards/rejected": -4.924967288970947, "step": 403 }, { "epoch": 0.06, "learning_rate": 1.3851024430727735e-05, "logits/chosen": -3.0503156185150146, "logits/rejected": -2.826631784439087, "logps/chosen": -688.9356079101562, "logps/rejected": -526.836669921875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.7244579792022705, "rewards/margins": 6.955191612243652, "rewards/rejected": -7.679649353027344, "step": 404 }, { "epoch": 0.06, "learning_rate": 1.3850290990196587e-05, "logits/chosen": -2.4262001514434814, "logits/rejected": -3.1377265453338623, "logps/chosen": -286.2833557128906, "logps/rejected": -376.126220703125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.0907859802246094, "rewards/margins": 5.585309982299805, "rewards/rejected": -6.676095962524414, "step": 405 }, { "epoch": 0.06, "learning_rate": 1.3849557549665439e-05, "logits/chosen": -2.2084970474243164, "logits/rejected": -2.8812015056610107, "logps/chosen": -79.20325469970703, "logps/rejected": -310.9911193847656, "loss": 0.4554, "rewards/accuracies": 0.5, "rewards/chosen": -1.0411607027053833, "rewards/margins": 1.8152152299880981, "rewards/rejected": -2.8563759326934814, "step": 406 }, { "epoch": 0.06, "learning_rate": 1.3848824109134292e-05, "logits/chosen": -2.6141414642333984, "logits/rejected": -2.9203665256500244, "logps/chosen": -18.887853622436523, "logps/rejected": -229.13900756835938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.9843272566795349, "rewards/margins": 10.318187713623047, "rewards/rejected": -9.333861351013184, "step": 407 }, { "epoch": 0.06, "learning_rate": 1.3848090668603144e-05, "logits/chosen": -1.2306170463562012, "logits/rejected": -3.0270724296569824, "logps/chosen": -164.1097412109375, "logps/rejected": -484.5460205078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 1.3441472053527832, "rewards/margins": 8.37464427947998, "rewards/rejected": -7.0304975509643555, "step": 408 }, { "epoch": 0.06, "learning_rate": 1.3847357228071996e-05, "logits/chosen": -2.797639846801758, "logits/rejected": -2.9763376712799072, "logps/chosen": -2.7271077632904053, "logps/rejected": -171.50936889648438, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 1.3077375888824463, "rewards/margins": 4.773643493652344, "rewards/rejected": -3.4659056663513184, "step": 409 }, { "epoch": 0.06, "learning_rate": 1.3846623787540848e-05, "logits/chosen": -2.6129343509674072, "logits/rejected": -2.672030448913574, "logps/chosen": -73.04120635986328, "logps/rejected": -189.98748779296875, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 0.2961137890815735, "rewards/margins": 5.745598316192627, "rewards/rejected": -5.449484348297119, "step": 410 }, { "epoch": 0.06, "learning_rate": 1.38458903470097e-05, "logits/chosen": -2.7645270824432373, "logits/rejected": -2.195033311843872, "logps/chosen": -349.0298156738281, "logps/rejected": -332.874267578125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.6485780477523804, "rewards/margins": 6.321483612060547, "rewards/rejected": -5.672905445098877, "step": 411 }, { "epoch": 0.06, "learning_rate": 1.3845156906478552e-05, "logits/chosen": -2.4218430519104004, "logits/rejected": -3.052065372467041, "logps/chosen": -250.97628784179688, "logps/rejected": -537.2102661132812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.07177353650331497, "rewards/margins": 10.8446683883667, "rewards/rejected": -10.916441917419434, "step": 412 }, { "epoch": 0.06, "learning_rate": 1.3844423465947404e-05, "logits/chosen": -1.9247829914093018, "logits/rejected": -2.776275873184204, "logps/chosen": -48.03286361694336, "logps/rejected": -376.567626953125, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 0.4016408622264862, "rewards/margins": 7.974271774291992, "rewards/rejected": -7.572630882263184, "step": 413 }, { "epoch": 0.06, "learning_rate": 1.3843690025416256e-05, "logits/chosen": -3.001281976699829, "logits/rejected": -2.602546453475952, "logps/chosen": -462.0146484375, "logps/rejected": -394.3196716308594, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8937163949012756, "rewards/margins": 6.405792236328125, "rewards/rejected": -7.299508571624756, "step": 414 }, { "epoch": 0.06, "learning_rate": 1.3842956584885107e-05, "logits/chosen": -2.4039742946624756, "logits/rejected": -2.9539618492126465, "logps/chosen": -391.5706481933594, "logps/rejected": -340.4508056640625, "loss": 0.2451, "rewards/accuracies": 1.0, "rewards/chosen": -0.9943618774414062, "rewards/margins": 3.60280704498291, "rewards/rejected": -4.597168922424316, "step": 415 }, { "epoch": 0.06, "learning_rate": 1.3842223144353961e-05, "logits/chosen": -2.4979088306427, "logits/rejected": -2.911222457885742, "logps/chosen": -5.349496364593506, "logps/rejected": -101.56166076660156, "loss": 0.2211, "rewards/accuracies": 1.0, "rewards/chosen": 0.913013219833374, "rewards/margins": 2.565734624862671, "rewards/rejected": -1.6527214050292969, "step": 416 }, { "epoch": 0.06, "learning_rate": 1.3841489703822813e-05, "logits/chosen": -1.4211851358413696, "logits/rejected": -2.7424070835113525, "logps/chosen": -200.58770751953125, "logps/rejected": -289.5647277832031, "loss": 2.8288, "rewards/accuracies": 0.5, "rewards/chosen": -2.2606325149536133, "rewards/margins": 1.464494228363037, "rewards/rejected": -3.7251265048980713, "step": 417 }, { "epoch": 0.07, "learning_rate": 1.3840756263291665e-05, "logits/chosen": -3.0329644680023193, "logits/rejected": -3.145205020904541, "logps/chosen": -502.86285400390625, "logps/rejected": -328.6346740722656, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.1515274047851562, "rewards/margins": 5.018664836883545, "rewards/rejected": -6.170192241668701, "step": 418 }, { "epoch": 0.07, "learning_rate": 1.3840022822760517e-05, "logits/chosen": -2.5426738262176514, "logits/rejected": -2.8878278732299805, "logps/chosen": -211.1091766357422, "logps/rejected": -247.77862548828125, "loss": 0.0335, "rewards/accuracies": 1.0, "rewards/chosen": -2.5094711780548096, "rewards/margins": 3.6669363975524902, "rewards/rejected": -6.176407337188721, "step": 419 }, { "epoch": 0.07, "learning_rate": 1.3839289382229368e-05, "logits/chosen": -2.925767421722412, "logits/rejected": -2.8088109493255615, "logps/chosen": -195.52972412109375, "logps/rejected": -128.84951782226562, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 0.5109202265739441, "rewards/margins": 4.921554088592529, "rewards/rejected": -4.4106340408325195, "step": 420 }, { "epoch": 0.07, "learning_rate": 1.383855594169822e-05, "logits/chosen": -2.874502420425415, "logits/rejected": -2.3035640716552734, "logps/chosen": -114.91714477539062, "logps/rejected": -3.3608076572418213, "loss": 3.6781, "rewards/accuracies": 0.0, "rewards/chosen": -2.3856353759765625, "rewards/margins": -3.615399122238159, "rewards/rejected": 1.2297637462615967, "step": 421 }, { "epoch": 0.07, "learning_rate": 1.3837822501167072e-05, "logits/chosen": -1.6399132013320923, "logits/rejected": -2.6482043266296387, "logps/chosen": -168.5205535888672, "logps/rejected": -113.46807098388672, "loss": 5.0513, "rewards/accuracies": 0.5, "rewards/chosen": -3.8072280883789062, "rewards/margins": -2.013697385787964, "rewards/rejected": -1.7935309410095215, "step": 422 }, { "epoch": 0.07, "learning_rate": 1.3837089060635924e-05, "logits/chosen": -3.081648826599121, "logits/rejected": -2.6687700748443604, "logps/chosen": -214.35043334960938, "logps/rejected": -225.24398803710938, "loss": 2.1747, "rewards/accuracies": 0.5, "rewards/chosen": -1.9791009426116943, "rewards/margins": 1.3609683513641357, "rewards/rejected": -3.340069055557251, "step": 423 }, { "epoch": 0.07, "learning_rate": 1.3836355620104776e-05, "logits/chosen": -2.7861924171447754, "logits/rejected": -3.000333547592163, "logps/chosen": -700.4558715820312, "logps/rejected": -659.4454345703125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.19371797144412994, "rewards/margins": 5.528068542480469, "rewards/rejected": -5.7217864990234375, "step": 424 }, { "epoch": 0.07, "learning_rate": 1.383562217957363e-05, "logits/chosen": -2.3520987033843994, "logits/rejected": -2.6929829120635986, "logps/chosen": -260.5356750488281, "logps/rejected": -354.1016540527344, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.5064640045166016, "rewards/margins": 8.120437622070312, "rewards/rejected": -7.613973617553711, "step": 425 }, { "epoch": 0.07, "learning_rate": 1.3834888739042481e-05, "logits/chosen": -2.4441888332366943, "logits/rejected": -2.8434693813323975, "logps/chosen": -178.21652221679688, "logps/rejected": -364.5037536621094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.949389636516571, "rewards/margins": 7.797028541564941, "rewards/rejected": -8.746417999267578, "step": 426 }, { "epoch": 0.07, "learning_rate": 1.3834155298511333e-05, "logits/chosen": -2.818491220474243, "logits/rejected": -3.1520020961761475, "logps/chosen": -113.9337158203125, "logps/rejected": -284.9523010253906, "loss": 0.0449, "rewards/accuracies": 1.0, "rewards/chosen": -0.45869940519332886, "rewards/margins": 5.40257453918457, "rewards/rejected": -5.861273765563965, "step": 427 }, { "epoch": 0.07, "learning_rate": 1.3833421857980185e-05, "logits/chosen": -1.5097370147705078, "logits/rejected": -2.8968851566314697, "logps/chosen": -104.18762969970703, "logps/rejected": -489.9107360839844, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.1583574414253235, "rewards/margins": 5.983917236328125, "rewards/rejected": -6.142274856567383, "step": 428 }, { "epoch": 0.07, "learning_rate": 1.3832688417449037e-05, "logits/chosen": -2.971980333328247, "logits/rejected": -3.157341241836548, "logps/chosen": -189.31005859375, "logps/rejected": -342.67401123046875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.131969928741455, "rewards/margins": 5.53607702255249, "rewards/rejected": -6.668046951293945, "step": 429 }, { "epoch": 0.07, "learning_rate": 1.3831954976917889e-05, "logits/chosen": -2.575507164001465, "logits/rejected": -1.5878643989562988, "logps/chosen": -247.48904418945312, "logps/rejected": -124.46561431884766, "loss": 1.2415, "rewards/accuracies": 0.5, "rewards/chosen": -1.1409943103790283, "rewards/margins": -0.16914260387420654, "rewards/rejected": -0.9718517065048218, "step": 430 }, { "epoch": 0.07, "learning_rate": 1.3831221536386743e-05, "logits/chosen": -2.3068301677703857, "logits/rejected": -2.669844627380371, "logps/chosen": -131.69342041015625, "logps/rejected": -151.8177490234375, "loss": 1.5327, "rewards/accuracies": 0.5, "rewards/chosen": -0.6967406868934631, "rewards/margins": 3.674670457839966, "rewards/rejected": -4.371410846710205, "step": 431 }, { "epoch": 0.07, "learning_rate": 1.3830488095855594e-05, "logits/chosen": -2.0151476860046387, "logits/rejected": -2.9757800102233887, "logps/chosen": -175.00411987304688, "logps/rejected": -581.3938598632812, "loss": 0.0469, "rewards/accuracies": 1.0, "rewards/chosen": 0.4673389494419098, "rewards/margins": 9.29311466217041, "rewards/rejected": -8.825775146484375, "step": 432 }, { "epoch": 0.07, "learning_rate": 1.3829754655324448e-05, "logits/chosen": -2.8644721508026123, "logits/rejected": -2.672043800354004, "logps/chosen": -140.9895477294922, "logps/rejected": -260.245849609375, "loss": 0.0845, "rewards/accuracies": 1.0, "rewards/chosen": -0.38978198170661926, "rewards/margins": 5.384082317352295, "rewards/rejected": -5.773864269256592, "step": 433 }, { "epoch": 0.07, "learning_rate": 1.38290212147933e-05, "logits/chosen": -1.845261812210083, "logits/rejected": -2.9324557781219482, "logps/chosen": -23.26296615600586, "logps/rejected": -204.97683715820312, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": 0.7435266971588135, "rewards/margins": 4.471434593200684, "rewards/rejected": -3.72790789604187, "step": 434 }, { "epoch": 0.07, "learning_rate": 1.3828287774262152e-05, "logits/chosen": -2.916003704071045, "logits/rejected": -1.7922028303146362, "logps/chosen": -437.7844543457031, "logps/rejected": -434.8477478027344, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.715613603591919, "rewards/margins": 6.4975104331970215, "rewards/rejected": -7.2131242752075195, "step": 435 }, { "epoch": 0.07, "learning_rate": 1.3827554333731004e-05, "logits/chosen": -1.3617160320281982, "logits/rejected": -2.194561243057251, "logps/chosen": -140.6145477294922, "logps/rejected": -308.3543395996094, "loss": 0.2207, "rewards/accuracies": 1.0, "rewards/chosen": 0.08339765667915344, "rewards/margins": 2.6729483604431152, "rewards/rejected": -2.589550733566284, "step": 436 }, { "epoch": 0.07, "learning_rate": 1.3826820893199856e-05, "logits/chosen": -2.6509692668914795, "logits/rejected": -3.06313419342041, "logps/chosen": -93.75223541259766, "logps/rejected": -300.99139404296875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.2642067074775696, "rewards/margins": 7.204368591308594, "rewards/rejected": -7.468575477600098, "step": 437 }, { "epoch": 0.07, "learning_rate": 1.3826087452668707e-05, "logits/chosen": -2.0163676738739014, "logits/rejected": -2.949927568435669, "logps/chosen": -154.61253356933594, "logps/rejected": -597.5394287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5228679180145264, "rewards/margins": 13.670263290405273, "rewards/rejected": -13.147396087646484, "step": 438 }, { "epoch": 0.07, "learning_rate": 1.382535401213756e-05, "logits/chosen": -2.8274717330932617, "logits/rejected": -3.3551995754241943, "logps/chosen": -255.6253662109375, "logps/rejected": -401.9012756347656, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -0.20332235097885132, "rewards/margins": 4.793431282043457, "rewards/rejected": -4.996753215789795, "step": 439 }, { "epoch": 0.07, "learning_rate": 1.3824620571606411e-05, "logits/chosen": -1.7857507467269897, "logits/rejected": -2.837900161743164, "logps/chosen": -6.2370524406433105, "logps/rejected": -140.2274627685547, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 1.3994684219360352, "rewards/margins": 3.9061946868896484, "rewards/rejected": -2.5067262649536133, "step": 440 }, { "epoch": 0.07, "learning_rate": 1.3823887131075263e-05, "logits/chosen": -2.308344841003418, "logits/rejected": -2.7293078899383545, "logps/chosen": -129.53787231445312, "logps/rejected": -179.74212646484375, "loss": 2.2078, "rewards/accuracies": 0.5, "rewards/chosen": -1.6292701959609985, "rewards/margins": 2.1119980812072754, "rewards/rejected": -3.7412686347961426, "step": 441 }, { "epoch": 0.07, "learning_rate": 1.3823153690544117e-05, "logits/chosen": -2.954688787460327, "logits/rejected": -3.097334623336792, "logps/chosen": -15.795409202575684, "logps/rejected": -237.7635498046875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 0.8291710019111633, "rewards/margins": 8.616056442260742, "rewards/rejected": -7.7868852615356445, "step": 442 }, { "epoch": 0.07, "learning_rate": 1.3822420250012968e-05, "logits/chosen": -2.810943841934204, "logits/rejected": -2.26840877532959, "logps/chosen": -274.45184326171875, "logps/rejected": -227.20152282714844, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -0.823371171951294, "rewards/margins": 4.869680881500244, "rewards/rejected": -5.693052291870117, "step": 443 }, { "epoch": 0.07, "learning_rate": 1.382168680948182e-05, "logits/chosen": -2.7743561267852783, "logits/rejected": -2.424081325531006, "logps/chosen": -346.81817626953125, "logps/rejected": -223.75241088867188, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": 0.9403852224349976, "rewards/margins": 4.021551609039307, "rewards/rejected": -3.0811662673950195, "step": 444 }, { "epoch": 0.07, "learning_rate": 1.3820953368950672e-05, "logits/chosen": -2.843700647354126, "logits/rejected": -2.597930669784546, "logps/chosen": -636.705810546875, "logps/rejected": -379.565185546875, "loss": 0.0743, "rewards/accuracies": 1.0, "rewards/chosen": -1.1856857538223267, "rewards/margins": 3.5560312271118164, "rewards/rejected": -4.741717338562012, "step": 445 }, { "epoch": 0.07, "learning_rate": 1.3820219928419524e-05, "logits/chosen": -3.0643362998962402, "logits/rejected": -3.249660015106201, "logps/chosen": -632.1461791992188, "logps/rejected": -712.6393432617188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.7122222781181335, "rewards/margins": 8.169766426086426, "rewards/rejected": -8.881988525390625, "step": 446 }, { "epoch": 0.07, "learning_rate": 1.3819486487888376e-05, "logits/chosen": -1.8797703981399536, "logits/rejected": -3.085239887237549, "logps/chosen": -71.85527038574219, "logps/rejected": -304.36468505859375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 0.679106593132019, "rewards/margins": 6.18596887588501, "rewards/rejected": -5.506862163543701, "step": 447 }, { "epoch": 0.07, "learning_rate": 1.3818753047357228e-05, "logits/chosen": -2.728872776031494, "logits/rejected": -1.5742741823196411, "logps/chosen": -488.1467590332031, "logps/rejected": -330.96002197265625, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.1688629388809204, "rewards/margins": 4.6964616775512695, "rewards/rejected": -5.8653244972229, "step": 448 }, { "epoch": 0.07, "learning_rate": 1.381801960682608e-05, "logits/chosen": -2.8424479961395264, "logits/rejected": -3.063464641571045, "logps/chosen": -86.9183349609375, "logps/rejected": -189.091552734375, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -0.9520528316497803, "rewards/margins": 5.087833404541016, "rewards/rejected": -6.039886474609375, "step": 449 }, { "epoch": 0.07, "learning_rate": 1.3817286166294932e-05, "logits/chosen": -2.496497631072998, "logits/rejected": -2.8661465644836426, "logps/chosen": -559.7799072265625, "logps/rejected": -574.8720703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.49267733097076416, "rewards/margins": 7.684211730957031, "rewards/rejected": -8.176889419555664, "step": 450 }, { "epoch": 0.07, "learning_rate": 1.3816552725763785e-05, "logits/chosen": -2.6435604095458984, "logits/rejected": -2.9762744903564453, "logps/chosen": -107.80967712402344, "logps/rejected": -250.2644500732422, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.46655958890914917, "rewards/margins": 6.685419082641602, "rewards/rejected": -7.151978492736816, "step": 451 }, { "epoch": 0.07, "learning_rate": 1.3815819285232637e-05, "logits/chosen": -1.8342597484588623, "logits/rejected": -2.825599431991577, "logps/chosen": -105.72477722167969, "logps/rejected": -362.189697265625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.3269721269607544, "rewards/margins": 6.540003299713135, "rewards/rejected": -7.866975784301758, "step": 452 }, { "epoch": 0.07, "learning_rate": 1.3815085844701489e-05, "logits/chosen": -2.5060055255889893, "logits/rejected": -2.7937355041503906, "logps/chosen": -95.74170684814453, "logps/rejected": -76.8724365234375, "loss": 3.227, "rewards/accuracies": 0.5, "rewards/chosen": -2.588571310043335, "rewards/margins": -2.128871440887451, "rewards/rejected": -0.45969974994659424, "step": 453 }, { "epoch": 0.07, "learning_rate": 1.381435240417034e-05, "logits/chosen": -2.4587209224700928, "logits/rejected": -2.9603097438812256, "logps/chosen": -83.48016357421875, "logps/rejected": -242.55322265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.15196456015110016, "rewards/margins": 7.941720485687256, "rewards/rejected": -7.789755821228027, "step": 454 }, { "epoch": 0.07, "learning_rate": 1.3813618963639193e-05, "logits/chosen": -1.833828091621399, "logits/rejected": -3.1066300868988037, "logps/chosen": -42.79707336425781, "logps/rejected": -209.97653198242188, "loss": 0.093, "rewards/accuracies": 1.0, "rewards/chosen": -0.18427926301956177, "rewards/margins": 4.059757709503174, "rewards/rejected": -4.244036674499512, "step": 455 }, { "epoch": 0.07, "learning_rate": 1.3812885523108045e-05, "logits/chosen": -3.040508270263672, "logits/rejected": -2.9335198402404785, "logps/chosen": -293.42059326171875, "logps/rejected": -305.8091735839844, "loss": 2.9427, "rewards/accuracies": 0.5, "rewards/chosen": -3.565139055252075, "rewards/margins": 2.4963433742523193, "rewards/rejected": -6.0614824295043945, "step": 456 }, { "epoch": 0.07, "learning_rate": 1.3812152082576896e-05, "logits/chosen": -2.7806620597839355, "logits/rejected": -3.0864779949188232, "logps/chosen": -96.64185333251953, "logps/rejected": -261.24334716796875, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 0.6058178544044495, "rewards/margins": 4.5560455322265625, "rewards/rejected": -3.950227737426758, "step": 457 }, { "epoch": 0.07, "learning_rate": 1.3811418642045748e-05, "logits/chosen": -2.0309009552001953, "logits/rejected": -2.88224196434021, "logps/chosen": -179.84317016601562, "logps/rejected": -469.2563171386719, "loss": 2.5587, "rewards/accuracies": 0.5, "rewards/chosen": -4.299777030944824, "rewards/margins": 0.872183084487915, "rewards/rejected": -5.171960353851318, "step": 458 }, { "epoch": 0.07, "learning_rate": 1.38106852015146e-05, "logits/chosen": -1.7689423561096191, "logits/rejected": -3.1236941814422607, "logps/chosen": -119.71440887451172, "logps/rejected": -339.07147216796875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4241012632846832, "rewards/margins": 8.006776809692383, "rewards/rejected": -8.430877685546875, "step": 459 }, { "epoch": 0.07, "learning_rate": 1.3809951760983454e-05, "logits/chosen": -2.6708154678344727, "logits/rejected": -2.96905779838562, "logps/chosen": -16.9676513671875, "logps/rejected": -217.60153198242188, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": 0.8873641490936279, "rewards/margins": 5.989622116088867, "rewards/rejected": -5.102258682250977, "step": 460 }, { "epoch": 0.07, "learning_rate": 1.3809218320452306e-05, "logits/chosen": -2.7579081058502197, "logits/rejected": -3.0035667419433594, "logps/chosen": -6.0598344802856445, "logps/rejected": -139.95181274414062, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": 1.0270698070526123, "rewards/margins": 4.432622909545898, "rewards/rejected": -3.405552864074707, "step": 461 }, { "epoch": 0.07, "learning_rate": 1.3808484879921158e-05, "logits/chosen": -1.8217525482177734, "logits/rejected": -2.9326364994049072, "logps/chosen": -190.41757202148438, "logps/rejected": -419.4647216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.20992697775363922, "rewards/margins": 11.07978630065918, "rewards/rejected": -11.289712905883789, "step": 462 }, { "epoch": 0.07, "learning_rate": 1.380775143939001e-05, "logits/chosen": -2.6105353832244873, "logits/rejected": -2.4445865154266357, "logps/chosen": -456.4530334472656, "logps/rejected": -470.3014831542969, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.7198890447616577, "rewards/margins": 6.870527744293213, "rewards/rejected": -7.59041690826416, "step": 463 }, { "epoch": 0.07, "learning_rate": 1.3807017998858861e-05, "logits/chosen": -2.7290542125701904, "logits/rejected": -2.2848188877105713, "logps/chosen": -216.2278289794922, "logps/rejected": -374.1112365722656, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.22487393021583557, "rewards/margins": 10.969966888427734, "rewards/rejected": -10.745092391967773, "step": 464 }, { "epoch": 0.07, "learning_rate": 1.3806284558327715e-05, "logits/chosen": -2.5916929244995117, "logits/rejected": -3.280407667160034, "logps/chosen": -57.27231979370117, "logps/rejected": -352.10784912109375, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 0.3174324333667755, "rewards/margins": 11.063461303710938, "rewards/rejected": -10.7460298538208, "step": 465 }, { "epoch": 0.07, "learning_rate": 1.3805551117796567e-05, "logits/chosen": -2.033172369003296, "logits/rejected": -2.9316048622131348, "logps/chosen": -94.80809783935547, "logps/rejected": -380.98785400390625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.4978131651878357, "rewards/margins": 9.939741134643555, "rewards/rejected": -9.44192886352539, "step": 466 }, { "epoch": 0.07, "learning_rate": 1.3804817677265419e-05, "logits/chosen": -2.0698435306549072, "logits/rejected": -2.8196282386779785, "logps/chosen": -104.48622131347656, "logps/rejected": -229.6299591064453, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.9511898756027222, "rewards/margins": 5.157276630401611, "rewards/rejected": -6.108466148376465, "step": 467 }, { "epoch": 0.07, "learning_rate": 1.380408423673427e-05, "logits/chosen": -2.7001731395721436, "logits/rejected": -2.199636459350586, "logps/chosen": -452.1656494140625, "logps/rejected": -58.62699508666992, "loss": 8.4761, "rewards/accuracies": 0.0, "rewards/chosen": -8.30152416229248, "rewards/margins": -8.473708152770996, "rewards/rejected": 0.17218372225761414, "step": 468 }, { "epoch": 0.07, "learning_rate": 1.3803350796203124e-05, "logits/chosen": -2.8359873294830322, "logits/rejected": -2.0670225620269775, "logps/chosen": -388.7858581542969, "logps/rejected": -237.6861572265625, "loss": 2.2639, "rewards/accuracies": 0.5, "rewards/chosen": -3.3017053604125977, "rewards/margins": 1.5102508068084717, "rewards/rejected": -4.811956405639648, "step": 469 }, { "epoch": 0.07, "learning_rate": 1.3802617355671976e-05, "logits/chosen": -2.9058542251586914, "logits/rejected": -2.767233371734619, "logps/chosen": -499.75244140625, "logps/rejected": -485.12353515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.4500808715820312, "rewards/margins": 10.224955558776855, "rewards/rejected": -13.675036430358887, "step": 470 }, { "epoch": 0.07, "learning_rate": 1.3801883915140828e-05, "logits/chosen": -3.2384467124938965, "logits/rejected": -3.462350368499756, "logps/chosen": -22.004623413085938, "logps/rejected": -183.48048400878906, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.4472549557685852, "rewards/margins": 7.208812713623047, "rewards/rejected": -6.761557579040527, "step": 471 }, { "epoch": 0.07, "learning_rate": 1.380115047460968e-05, "logits/chosen": -3.015810012817383, "logits/rejected": -1.7835510969161987, "logps/chosen": -457.2923278808594, "logps/rejected": -209.28382873535156, "loss": 5.6029, "rewards/accuracies": 0.5, "rewards/chosen": -6.3792595863342285, "rewards/margins": -1.9981701374053955, "rewards/rejected": -4.381089210510254, "step": 472 }, { "epoch": 0.07, "learning_rate": 1.3800417034078532e-05, "logits/chosen": -2.170177698135376, "logits/rejected": -2.697603225708008, "logps/chosen": -153.71502685546875, "logps/rejected": -226.74783325195312, "loss": 1.8639, "rewards/accuracies": 0.5, "rewards/chosen": -2.268731117248535, "rewards/margins": 5.071704864501953, "rewards/rejected": -7.34043550491333, "step": 473 }, { "epoch": 0.07, "learning_rate": 1.3799683593547383e-05, "logits/chosen": -2.6472015380859375, "logits/rejected": -2.0756685733795166, "logps/chosen": -280.3670349121094, "logps/rejected": -118.40380859375, "loss": 4.0874, "rewards/accuracies": 0.0, "rewards/chosen": -3.8074119091033936, "rewards/margins": -3.9201316833496094, "rewards/rejected": 0.1127198189496994, "step": 474 }, { "epoch": 0.07, "learning_rate": 1.3798950153016235e-05, "logits/chosen": -0.8893142342567444, "logits/rejected": -2.6826794147491455, "logps/chosen": -85.15756225585938, "logps/rejected": -583.487060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.044865988194942474, "rewards/margins": 15.963178634643555, "rewards/rejected": -15.918312072753906, "step": 475 }, { "epoch": 0.07, "learning_rate": 1.3798216712485087e-05, "logits/chosen": -2.8750369548797607, "logits/rejected": -1.148283839225769, "logps/chosen": -365.0557861328125, "logps/rejected": -54.6091423034668, "loss": 6.8328, "rewards/accuracies": 0.0, "rewards/chosen": -6.668584823608398, "rewards/margins": -6.831753253936768, "rewards/rejected": 0.16316860914230347, "step": 476 }, { "epoch": 0.07, "learning_rate": 1.3797483271953939e-05, "logits/chosen": -1.9518024921417236, "logits/rejected": -2.9493463039398193, "logps/chosen": -117.6416015625, "logps/rejected": -365.4299621582031, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -0.4841041564941406, "rewards/margins": 9.865270614624023, "rewards/rejected": -10.349374771118164, "step": 477 }, { "epoch": 0.07, "learning_rate": 1.3796749831422793e-05, "logits/chosen": -2.7055435180664062, "logits/rejected": -2.8914923667907715, "logps/chosen": -297.9332275390625, "logps/rejected": -268.18695068359375, "loss": 3.33, "rewards/accuracies": 0.5, "rewards/chosen": -4.180826187133789, "rewards/margins": -0.3031303882598877, "rewards/rejected": -3.8776955604553223, "step": 478 }, { "epoch": 0.07, "learning_rate": 1.3796016390891645e-05, "logits/chosen": -3.3109171390533447, "logits/rejected": -2.7756011486053467, "logps/chosen": -276.9942626953125, "logps/rejected": -105.59263610839844, "loss": 5.0974, "rewards/accuracies": 0.5, "rewards/chosen": -5.052073001861572, "rewards/margins": -3.7369251251220703, "rewards/rejected": -1.3151477575302124, "step": 479 }, { "epoch": 0.07, "learning_rate": 1.3795282950360496e-05, "logits/chosen": -1.1046172380447388, "logits/rejected": -2.167839288711548, "logps/chosen": -185.2142333984375, "logps/rejected": -256.365478515625, "loss": 3.015, "rewards/accuracies": 0.5, "rewards/chosen": -3.719700574874878, "rewards/margins": 0.9693517684936523, "rewards/rejected": -4.689052581787109, "step": 480 }, { "epoch": 0.07, "learning_rate": 1.3794549509829348e-05, "logits/chosen": -2.747288465499878, "logits/rejected": -2.6073150634765625, "logps/chosen": -463.52032470703125, "logps/rejected": -432.1882629394531, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.19335594773292542, "rewards/margins": 7.42035436630249, "rewards/rejected": -7.613710403442383, "step": 481 }, { "epoch": 0.07, "learning_rate": 1.37938160692982e-05, "logits/chosen": -2.948714017868042, "logits/rejected": -2.6681034564971924, "logps/chosen": -233.7258758544922, "logps/rejected": -199.00421142578125, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": -1.5327198505401611, "rewards/margins": 4.812731742858887, "rewards/rejected": -6.345451354980469, "step": 482 }, { "epoch": 0.08, "learning_rate": 1.3793082628767052e-05, "logits/chosen": -2.798464775085449, "logits/rejected": -3.151254415512085, "logps/chosen": -212.90850830078125, "logps/rejected": -307.95977783203125, "loss": 0.2769, "rewards/accuracies": 1.0, "rewards/chosen": -1.5763015747070312, "rewards/margins": 2.73484468460083, "rewards/rejected": -4.3111467361450195, "step": 483 }, { "epoch": 0.08, "learning_rate": 1.3792349188235904e-05, "logits/chosen": -1.5388634204864502, "logits/rejected": -2.9529788494110107, "logps/chosen": -108.27706909179688, "logps/rejected": -328.4915771484375, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.7832863330841064, "rewards/margins": 5.8925933837890625, "rewards/rejected": -6.675879955291748, "step": 484 }, { "epoch": 0.08, "learning_rate": 1.3791615747704756e-05, "logits/chosen": -2.5110878944396973, "logits/rejected": -3.290034770965576, "logps/chosen": -101.32872009277344, "logps/rejected": -166.73846435546875, "loss": 1.9352, "rewards/accuracies": 0.5, "rewards/chosen": -3.0729150772094727, "rewards/margins": 1.1512824296951294, "rewards/rejected": -4.2241973876953125, "step": 485 }, { "epoch": 0.08, "learning_rate": 1.3790882307173608e-05, "logits/chosen": -2.7045743465423584, "logits/rejected": -2.667771100997925, "logps/chosen": -807.4609375, "logps/rejected": -401.37310791015625, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -1.359405517578125, "rewards/margins": 3.510634660720825, "rewards/rejected": -4.870039939880371, "step": 486 }, { "epoch": 0.08, "learning_rate": 1.3790148866642461e-05, "logits/chosen": -0.7927528023719788, "logits/rejected": -1.474202036857605, "logps/chosen": -128.2970733642578, "logps/rejected": -338.0848083496094, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.5432395935058594, "rewards/margins": 4.1498589515686035, "rewards/rejected": -4.693098545074463, "step": 487 }, { "epoch": 0.08, "learning_rate": 1.3789415426111313e-05, "logits/chosen": -2.668905735015869, "logits/rejected": -3.239593029022217, "logps/chosen": -60.942779541015625, "logps/rejected": -198.965576171875, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.679380476474762, "rewards/margins": 5.471615791320801, "rewards/rejected": -6.150996208190918, "step": 488 }, { "epoch": 0.08, "learning_rate": 1.3788681985580165e-05, "logits/chosen": -3.128880023956299, "logits/rejected": -3.3990821838378906, "logps/chosen": -30.155996322631836, "logps/rejected": -249.7283477783203, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 0.18635672330856323, "rewards/margins": 4.6451497077941895, "rewards/rejected": -4.458792686462402, "step": 489 }, { "epoch": 0.08, "learning_rate": 1.3787948545049017e-05, "logits/chosen": -2.4522781372070312, "logits/rejected": -2.727736711502075, "logps/chosen": -58.09573745727539, "logps/rejected": -260.7276306152344, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.020030975341796875, "rewards/margins": 9.154203414916992, "rewards/rejected": -9.174234390258789, "step": 490 }, { "epoch": 0.08, "learning_rate": 1.3787215104517869e-05, "logits/chosen": -1.4396222829818726, "logits/rejected": -1.907140851020813, "logps/chosen": -180.9053497314453, "logps/rejected": -166.6807403564453, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -0.6849214434623718, "rewards/margins": 4.700538635253906, "rewards/rejected": -5.385459899902344, "step": 491 }, { "epoch": 0.08, "learning_rate": 1.378648166398672e-05, "logits/chosen": -0.888194739818573, "logits/rejected": -3.1429853439331055, "logps/chosen": -77.82119750976562, "logps/rejected": -489.0047912597656, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -0.3638116717338562, "rewards/margins": 3.145413875579834, "rewards/rejected": -3.509225606918335, "step": 492 }, { "epoch": 0.08, "learning_rate": 1.3785748223455573e-05, "logits/chosen": -2.28987979888916, "logits/rejected": -3.2075963020324707, "logps/chosen": -258.4901123046875, "logps/rejected": -464.20458984375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.011845350265503, "rewards/margins": 6.362173557281494, "rewards/rejected": -7.374019145965576, "step": 493 }, { "epoch": 0.08, "learning_rate": 1.3785014782924424e-05, "logits/chosen": -2.4092233180999756, "logits/rejected": -2.712958812713623, "logps/chosen": -265.5015869140625, "logps/rejected": -222.15122985839844, "loss": 0.3162, "rewards/accuracies": 1.0, "rewards/chosen": -0.8156513571739197, "rewards/margins": 1.7133296728134155, "rewards/rejected": -2.5289812088012695, "step": 494 }, { "epoch": 0.08, "learning_rate": 1.3784281342393276e-05, "logits/chosen": -1.7947547435760498, "logits/rejected": -2.960402011871338, "logps/chosen": -313.012939453125, "logps/rejected": -553.384033203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.9598983526229858, "rewards/margins": 7.745495796203613, "rewards/rejected": -8.705394744873047, "step": 495 }, { "epoch": 0.08, "learning_rate": 1.378354790186213e-05, "logits/chosen": -2.3156332969665527, "logits/rejected": -2.8768486976623535, "logps/chosen": -179.32821655273438, "logps/rejected": -391.6994323730469, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.5918567776679993, "rewards/margins": 7.501132011413574, "rewards/rejected": -8.092988967895508, "step": 496 }, { "epoch": 0.08, "learning_rate": 1.3782814461330982e-05, "logits/chosen": -2.2868878841400146, "logits/rejected": -3.150175094604492, "logps/chosen": -205.91839599609375, "logps/rejected": -241.93136596679688, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.7772944569587708, "rewards/margins": 4.242217063903809, "rewards/rejected": -5.0195112228393555, "step": 497 }, { "epoch": 0.08, "learning_rate": 1.3782081020799834e-05, "logits/chosen": -3.0902674198150635, "logits/rejected": -2.67084002494812, "logps/chosen": -312.2012634277344, "logps/rejected": -267.8807678222656, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -1.0958648920059204, "rewards/margins": 4.058733940124512, "rewards/rejected": -5.154599189758301, "step": 498 }, { "epoch": 0.08, "learning_rate": 1.3781347580268687e-05, "logits/chosen": -2.6963469982147217, "logits/rejected": -2.468642234802246, "logps/chosen": -270.44061279296875, "logps/rejected": -365.87933349609375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.2630608081817627, "rewards/margins": 6.690708160400391, "rewards/rejected": -7.953768730163574, "step": 499 }, { "epoch": 0.08, "learning_rate": 1.3780614139737539e-05, "logits/chosen": -2.1511785984039307, "logits/rejected": -2.991509437561035, "logps/chosen": -255.8072052001953, "logps/rejected": -404.500732421875, "loss": 3.9056, "rewards/accuracies": 0.5, "rewards/chosen": -4.995153427124023, "rewards/margins": 1.1423420906066895, "rewards/rejected": -6.137495517730713, "step": 500 }, { "epoch": 0.08, "learning_rate": 1.3779880699206391e-05, "logits/chosen": -2.9539248943328857, "logits/rejected": -2.832488536834717, "logps/chosen": -171.2398681640625, "logps/rejected": -156.98744201660156, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -0.734270453453064, "rewards/margins": 3.706010341644287, "rewards/rejected": -4.440280914306641, "step": 501 }, { "epoch": 0.08, "learning_rate": 1.3779147258675243e-05, "logits/chosen": -1.4632459878921509, "logits/rejected": -2.6214921474456787, "logps/chosen": -65.5797119140625, "logps/rejected": -436.76251220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.2226090431213379, "rewards/margins": 11.585225105285645, "rewards/rejected": -11.80783462524414, "step": 502 }, { "epoch": 0.08, "learning_rate": 1.3778413818144095e-05, "logits/chosen": -2.463958740234375, "logits/rejected": -3.19826602935791, "logps/chosen": -295.3800048828125, "logps/rejected": -391.4101257324219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.3049757778644562, "rewards/margins": 8.643224716186523, "rewards/rejected": -8.948200225830078, "step": 503 }, { "epoch": 0.08, "learning_rate": 1.3777680377612947e-05, "logits/chosen": -2.665102958679199, "logits/rejected": -3.022160053253174, "logps/chosen": -12.011028289794922, "logps/rejected": -183.3484649658203, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.5590900778770447, "rewards/margins": 7.307170867919922, "rewards/rejected": -6.748080253601074, "step": 504 }, { "epoch": 0.08, "learning_rate": 1.37769469370818e-05, "logits/chosen": -2.9998204708099365, "logits/rejected": -2.709643602371216, "logps/chosen": -189.430419921875, "logps/rejected": -143.52816772460938, "loss": 1.7543, "rewards/accuracies": 0.5, "rewards/chosen": -2.526254177093506, "rewards/margins": 0.9426206350326538, "rewards/rejected": -3.468874931335449, "step": 505 }, { "epoch": 0.08, "learning_rate": 1.3776213496550652e-05, "logits/chosen": -2.791323661804199, "logits/rejected": -2.9571871757507324, "logps/chosen": -268.540283203125, "logps/rejected": -206.218505859375, "loss": 2.07, "rewards/accuracies": 0.5, "rewards/chosen": -3.3931221961975098, "rewards/margins": 1.7105004787445068, "rewards/rejected": -5.1036224365234375, "step": 506 }, { "epoch": 0.08, "learning_rate": 1.3775480056019504e-05, "logits/chosen": -2.5650088787078857, "logits/rejected": -2.7959959506988525, "logps/chosen": -120.1650390625, "logps/rejected": -309.631591796875, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.1210942268371582, "rewards/margins": 5.320064544677734, "rewards/rejected": -6.441159248352051, "step": 507 }, { "epoch": 0.08, "learning_rate": 1.3774746615488356e-05, "logits/chosen": -2.685631275177002, "logits/rejected": -2.56620717048645, "logps/chosen": -317.106201171875, "logps/rejected": -378.5807189941406, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3117859363555908, "rewards/margins": 6.960864067077637, "rewards/rejected": -8.272650718688965, "step": 508 }, { "epoch": 0.08, "learning_rate": 1.3774013174957208e-05, "logits/chosen": -1.2468440532684326, "logits/rejected": -2.7116587162017822, "logps/chosen": -65.42813110351562, "logps/rejected": -301.347412109375, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/chosen": -0.5321650505065918, "rewards/margins": 6.374589920043945, "rewards/rejected": -6.906754970550537, "step": 509 }, { "epoch": 0.08, "learning_rate": 1.377327973442606e-05, "logits/chosen": -2.0574710369110107, "logits/rejected": -2.9110805988311768, "logps/chosen": -228.96990966796875, "logps/rejected": -511.5446472167969, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.0508718490600586, "rewards/margins": 8.125992774963379, "rewards/rejected": -10.176864624023438, "step": 510 }, { "epoch": 0.08, "learning_rate": 1.3772546293894911e-05, "logits/chosen": -2.603274345397949, "logits/rejected": -2.8550076484680176, "logps/chosen": -157.3768310546875, "logps/rejected": -145.08261108398438, "loss": 4.187, "rewards/accuracies": 0.5, "rewards/chosen": -4.841710567474365, "rewards/margins": -0.17384767532348633, "rewards/rejected": -4.667862892150879, "step": 511 }, { "epoch": 0.08, "learning_rate": 1.3771812853363763e-05, "logits/chosen": -1.7774766683578491, "logits/rejected": -2.925244092941284, "logps/chosen": -32.01725387573242, "logps/rejected": -283.6837463378906, "loss": 0.0911, "rewards/accuracies": 1.0, "rewards/chosen": -0.26886337995529175, "rewards/margins": 3.667732000350952, "rewards/rejected": -3.9365954399108887, "step": 512 }, { "epoch": 0.08, "learning_rate": 1.3771079412832615e-05, "logits/chosen": -2.474759817123413, "logits/rejected": -3.14821457862854, "logps/chosen": -68.6778335571289, "logps/rejected": -267.5906982421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5090017318725586, "rewards/margins": 7.066897869110107, "rewards/rejected": -7.575899600982666, "step": 513 }, { "epoch": 0.08, "learning_rate": 1.3770345972301469e-05, "logits/chosen": -2.5332627296447754, "logits/rejected": -2.852928400039673, "logps/chosen": -288.92437744140625, "logps/rejected": -322.13446044921875, "loss": 4.07, "rewards/accuracies": 0.5, "rewards/chosen": -5.008718967437744, "rewards/margins": 1.4237327575683594, "rewards/rejected": -6.4324517250061035, "step": 514 }, { "epoch": 0.08, "learning_rate": 1.376961253177032e-05, "logits/chosen": -2.4136507511138916, "logits/rejected": -2.8183484077453613, "logps/chosen": -53.2242317199707, "logps/rejected": -277.2352294921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.6123206615447998, "rewards/margins": 7.250086307525635, "rewards/rejected": -7.862407207489014, "step": 515 }, { "epoch": 0.08, "learning_rate": 1.3768879091239173e-05, "logits/chosen": -2.7261531352996826, "logits/rejected": -2.901026487350464, "logps/chosen": -125.65986633300781, "logps/rejected": -208.35079956054688, "loss": 0.086, "rewards/accuracies": 1.0, "rewards/chosen": -1.694576621055603, "rewards/margins": 3.4824445247650146, "rewards/rejected": -5.177021026611328, "step": 516 }, { "epoch": 0.08, "learning_rate": 1.3768145650708024e-05, "logits/chosen": -1.6170551776885986, "logits/rejected": -2.7425827980041504, "logps/chosen": -64.846923828125, "logps/rejected": -343.1903076171875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.36183130741119385, "rewards/margins": 10.044475555419922, "rewards/rejected": -10.406307220458984, "step": 517 }, { "epoch": 0.08, "learning_rate": 1.3767412210176876e-05, "logits/chosen": -1.1001793146133423, "logits/rejected": -2.7843778133392334, "logps/chosen": -48.03807067871094, "logps/rejected": -322.245849609375, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.16044387221336365, "rewards/margins": 4.331521511077881, "rewards/rejected": -4.491965293884277, "step": 518 }, { "epoch": 0.08, "learning_rate": 1.3766678769645728e-05, "logits/chosen": -2.8947458267211914, "logits/rejected": -3.2188379764556885, "logps/chosen": -128.44715881347656, "logps/rejected": -334.59722900390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.4470687806606293, "rewards/margins": 7.66855525970459, "rewards/rejected": -8.115623474121094, "step": 519 }, { "epoch": 0.08, "learning_rate": 1.376594532911458e-05, "logits/chosen": -2.660729169845581, "logits/rejected": -1.4583815336227417, "logps/chosen": -252.3843994140625, "logps/rejected": -154.10507202148438, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -0.7111774682998657, "rewards/margins": 3.9888250827789307, "rewards/rejected": -4.700002670288086, "step": 520 }, { "epoch": 0.08, "learning_rate": 1.3765211888583432e-05, "logits/chosen": -3.2707197666168213, "logits/rejected": -2.9280288219451904, "logps/chosen": -1066.81689453125, "logps/rejected": -670.2335815429688, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.2868409156799316, "rewards/margins": 5.793495178222656, "rewards/rejected": -8.08033561706543, "step": 521 }, { "epoch": 0.08, "learning_rate": 1.3764478448052284e-05, "logits/chosen": -2.6200578212738037, "logits/rejected": -2.76450777053833, "logps/chosen": -106.46920013427734, "logps/rejected": -239.44143676757812, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.9515216946601868, "rewards/margins": 6.987005710601807, "rewards/rejected": -7.9385271072387695, "step": 522 }, { "epoch": 0.08, "learning_rate": 1.3763745007521137e-05, "logits/chosen": -0.988864004611969, "logits/rejected": -2.835986375808716, "logps/chosen": -47.20869445800781, "logps/rejected": -354.56329345703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.38204246759414673, "rewards/margins": 8.295255661010742, "rewards/rejected": -8.677297592163086, "step": 523 }, { "epoch": 0.08, "learning_rate": 1.376301156698999e-05, "logits/chosen": -3.2457635402679443, "logits/rejected": -3.3410136699676514, "logps/chosen": -76.78939819335938, "logps/rejected": -132.43072509765625, "loss": 0.0693, "rewards/accuracies": 1.0, "rewards/chosen": -0.6939104199409485, "rewards/margins": 4.459999084472656, "rewards/rejected": -5.153909683227539, "step": 524 }, { "epoch": 0.08, "learning_rate": 1.3762278126458841e-05, "logits/chosen": -1.889552116394043, "logits/rejected": -2.855762243270874, "logps/chosen": -190.54483032226562, "logps/rejected": -364.060302734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7583587765693665, "rewards/margins": 7.37811279296875, "rewards/rejected": -8.13647174835205, "step": 525 }, { "epoch": 0.08, "learning_rate": 1.3761544685927693e-05, "logits/chosen": -2.739187479019165, "logits/rejected": -2.806466579437256, "logps/chosen": -96.0267333984375, "logps/rejected": -74.24774932861328, "loss": 1.5014, "rewards/accuracies": 0.5, "rewards/chosen": -1.693697214126587, "rewards/margins": 0.9283576011657715, "rewards/rejected": -2.6220548152923584, "step": 526 }, { "epoch": 0.08, "learning_rate": 1.3760811245396545e-05, "logits/chosen": -2.9071855545043945, "logits/rejected": -2.172650098800659, "logps/chosen": -347.9208984375, "logps/rejected": -446.5374755859375, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -2.4632370471954346, "rewards/margins": 7.363922595977783, "rewards/rejected": -9.827159881591797, "step": 527 }, { "epoch": 0.08, "learning_rate": 1.3760077804865397e-05, "logits/chosen": -2.7409093379974365, "logits/rejected": -1.2980101108551025, "logps/chosen": -218.14337158203125, "logps/rejected": -133.0749053955078, "loss": 1.3676, "rewards/accuracies": 0.5, "rewards/chosen": -2.3928425312042236, "rewards/margins": 1.437127947807312, "rewards/rejected": -3.829970359802246, "step": 528 }, { "epoch": 0.08, "learning_rate": 1.3759344364334249e-05, "logits/chosen": -2.811953544616699, "logits/rejected": -2.5305614471435547, "logps/chosen": -292.3861389160156, "logps/rejected": -391.0501403808594, "loss": 3.7007, "rewards/accuracies": 0.5, "rewards/chosen": -4.522006511688232, "rewards/margins": 1.1293766498565674, "rewards/rejected": -5.651383399963379, "step": 529 }, { "epoch": 0.08, "learning_rate": 1.37586109238031e-05, "logits/chosen": -2.9828884601593018, "logits/rejected": -3.1178417205810547, "logps/chosen": -123.74836730957031, "logps/rejected": -220.0404052734375, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": -0.8746957778930664, "rewards/margins": 3.3444647789001465, "rewards/rejected": -4.219160556793213, "step": 530 }, { "epoch": 0.08, "learning_rate": 1.3757877483271954e-05, "logits/chosen": -2.9580891132354736, "logits/rejected": -1.9801372289657593, "logps/chosen": -157.84422302246094, "logps/rejected": -51.03733444213867, "loss": 3.9523, "rewards/accuracies": 0.0, "rewards/chosen": -4.23349142074585, "rewards/margins": -3.926726818084717, "rewards/rejected": -0.30676451325416565, "step": 531 }, { "epoch": 0.08, "learning_rate": 1.3757144042740806e-05, "logits/chosen": -2.0038535594940186, "logits/rejected": -2.651658058166504, "logps/chosen": -157.96771240234375, "logps/rejected": -186.90708923339844, "loss": 2.8744, "rewards/accuracies": 0.5, "rewards/chosen": -3.126065492630005, "rewards/margins": 0.46770358085632324, "rewards/rejected": -3.593769073486328, "step": 532 }, { "epoch": 0.08, "learning_rate": 1.375641060220966e-05, "logits/chosen": -1.301399827003479, "logits/rejected": -2.9059741497039795, "logps/chosen": -271.213623046875, "logps/rejected": -495.30511474609375, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -1.1785888671875, "rewards/margins": 6.2012939453125, "rewards/rejected": -7.3798828125, "step": 533 }, { "epoch": 0.08, "learning_rate": 1.3755677161678511e-05, "logits/chosen": -2.9060542583465576, "logits/rejected": -2.7552266120910645, "logps/chosen": -254.0919189453125, "logps/rejected": -289.3302307128906, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.7787179946899414, "rewards/margins": 6.265214920043945, "rewards/rejected": -9.04393196105957, "step": 534 }, { "epoch": 0.08, "learning_rate": 1.3754943721147363e-05, "logits/chosen": -3.132075309753418, "logits/rejected": -3.126991033554077, "logps/chosen": -162.16709899902344, "logps/rejected": -311.72552490234375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.44903719425201416, "rewards/margins": 5.5623979568481445, "rewards/rejected": -6.011434555053711, "step": 535 }, { "epoch": 0.08, "learning_rate": 1.3754210280616215e-05, "logits/chosen": -1.2339000701904297, "logits/rejected": -2.9072322845458984, "logps/chosen": -67.13813018798828, "logps/rejected": -331.66314697265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.44532546401023865, "rewards/margins": 7.267124176025391, "rewards/rejected": -7.71245002746582, "step": 536 }, { "epoch": 0.08, "learning_rate": 1.3753476840085067e-05, "logits/chosen": -3.0774388313293457, "logits/rejected": -3.270756959915161, "logps/chosen": -513.7559204101562, "logps/rejected": -495.802001953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9859634637832642, "rewards/margins": 7.672001838684082, "rewards/rejected": -8.657965660095215, "step": 537 }, { "epoch": 0.08, "learning_rate": 1.3752743399553919e-05, "logits/chosen": -2.705143928527832, "logits/rejected": -3.259552478790283, "logps/chosen": -13.959578514099121, "logps/rejected": -189.55844116210938, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 0.16604018211364746, "rewards/margins": 5.577644348144531, "rewards/rejected": -5.411604404449463, "step": 538 }, { "epoch": 0.08, "learning_rate": 1.375200995902277e-05, "logits/chosen": -2.0703279972076416, "logits/rejected": -2.6093688011169434, "logps/chosen": -154.12191772460938, "logps/rejected": -185.40611267089844, "loss": 1.1109, "rewards/accuracies": 0.5, "rewards/chosen": -2.2676234245300293, "rewards/margins": 1.5432450771331787, "rewards/rejected": -3.810868501663208, "step": 539 }, { "epoch": 0.08, "learning_rate": 1.3751276518491624e-05, "logits/chosen": -2.867568016052246, "logits/rejected": -2.4874300956726074, "logps/chosen": -286.9031982421875, "logps/rejected": -268.7620849609375, "loss": 3.3913, "rewards/accuracies": 0.5, "rewards/chosen": -5.891912937164307, "rewards/margins": -1.538201928138733, "rewards/rejected": -4.353711128234863, "step": 540 }, { "epoch": 0.08, "learning_rate": 1.3750543077960476e-05, "logits/chosen": -2.9930427074432373, "logits/rejected": -3.1862595081329346, "logps/chosen": -191.40174865722656, "logps/rejected": -331.50177001953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.4856940507888794, "rewards/margins": 9.434723854064941, "rewards/rejected": -8.949029922485352, "step": 541 }, { "epoch": 0.08, "learning_rate": 1.3749809637429328e-05, "logits/chosen": -2.677154064178467, "logits/rejected": -1.7727547883987427, "logps/chosen": -279.4721374511719, "logps/rejected": -346.7710876464844, "loss": 1.5232, "rewards/accuracies": 0.5, "rewards/chosen": -2.5426089763641357, "rewards/margins": 3.723599910736084, "rewards/rejected": -6.266209125518799, "step": 542 }, { "epoch": 0.08, "learning_rate": 1.374907619689818e-05, "logits/chosen": -2.0318405628204346, "logits/rejected": -3.1149373054504395, "logps/chosen": -181.98548889160156, "logps/rejected": -463.2093505859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.6769559383392334, "rewards/margins": 7.51805305480957, "rewards/rejected": -9.195009231567383, "step": 543 }, { "epoch": 0.08, "learning_rate": 1.3748342756367032e-05, "logits/chosen": -2.813075542449951, "logits/rejected": -2.9537360668182373, "logps/chosen": -52.31135177612305, "logps/rejected": -130.23509216308594, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.8889598846435547, "rewards/margins": 4.7575531005859375, "rewards/rejected": -5.646512985229492, "step": 544 }, { "epoch": 0.08, "learning_rate": 1.3747609315835884e-05, "logits/chosen": -2.898942470550537, "logits/rejected": -2.9599549770355225, "logps/chosen": -264.903076171875, "logps/rejected": -335.0221862792969, "loss": 2.7154, "rewards/accuracies": 0.5, "rewards/chosen": -3.1144356727600098, "rewards/margins": 0.5996322631835938, "rewards/rejected": -3.7140679359436035, "step": 545 }, { "epoch": 0.08, "learning_rate": 1.3746875875304736e-05, "logits/chosen": -2.725381851196289, "logits/rejected": -2.8936469554901123, "logps/chosen": -150.14361572265625, "logps/rejected": -256.19720458984375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8132728338241577, "rewards/margins": 6.650794982910156, "rewards/rejected": -7.4640679359436035, "step": 546 }, { "epoch": 0.09, "learning_rate": 1.3746142434773588e-05, "logits/chosen": -1.9561805725097656, "logits/rejected": -2.964357376098633, "logps/chosen": -150.13961791992188, "logps/rejected": -369.48004150390625, "loss": 0.2184, "rewards/accuracies": 1.0, "rewards/chosen": -0.5156633853912354, "rewards/margins": 2.1533491611480713, "rewards/rejected": -2.6690125465393066, "step": 547 }, { "epoch": 0.09, "learning_rate": 1.374540899424244e-05, "logits/chosen": -1.4261316061019897, "logits/rejected": -2.790560722351074, "logps/chosen": -112.79332733154297, "logps/rejected": -277.8419189453125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.4337103068828583, "rewards/margins": 6.051733016967773, "rewards/rejected": -6.485443115234375, "step": 548 }, { "epoch": 0.09, "learning_rate": 1.3744675553711293e-05, "logits/chosen": -2.6076180934906006, "logits/rejected": -2.2702395915985107, "logps/chosen": -215.36050415039062, "logps/rejected": -124.67851257324219, "loss": 3.4143, "rewards/accuracies": 0.0, "rewards/chosen": -4.706826210021973, "rewards/margins": -3.374079942703247, "rewards/rejected": -1.332746148109436, "step": 549 }, { "epoch": 0.09, "learning_rate": 1.3743942113180145e-05, "logits/chosen": -2.8336455821990967, "logits/rejected": -1.9580916166305542, "logps/chosen": -151.08148193359375, "logps/rejected": -184.09249877929688, "loss": 3.3897, "rewards/accuracies": 0.5, "rewards/chosen": -3.611246347427368, "rewards/margins": 0.8081498146057129, "rewards/rejected": -4.41939640045166, "step": 550 }, { "epoch": 0.09, "learning_rate": 1.3743208672648997e-05, "logits/chosen": -3.0908944606781006, "logits/rejected": -2.5240745544433594, "logps/chosen": -188.6431884765625, "logps/rejected": -175.15512084960938, "loss": 2.855, "rewards/accuracies": 0.5, "rewards/chosen": -2.5285987854003906, "rewards/margins": 1.7728137969970703, "rewards/rejected": -4.301412582397461, "step": 551 }, { "epoch": 0.09, "learning_rate": 1.3742475232117849e-05, "logits/chosen": -2.562469720840454, "logits/rejected": -2.916132688522339, "logps/chosen": -38.192420959472656, "logps/rejected": -182.264404296875, "loss": 0.0534, "rewards/accuracies": 1.0, "rewards/chosen": -0.4293188154697418, "rewards/margins": 3.9588794708251953, "rewards/rejected": -4.388198375701904, "step": 552 }, { "epoch": 0.09, "learning_rate": 1.37417417915867e-05, "logits/chosen": -3.263132333755493, "logits/rejected": -2.8846826553344727, "logps/chosen": -258.2142028808594, "logps/rejected": -54.493507385253906, "loss": 1.7134, "rewards/accuracies": 0.5, "rewards/chosen": -1.9764528274536133, "rewards/margins": 0.4439460039138794, "rewards/rejected": -2.4203989505767822, "step": 553 }, { "epoch": 0.09, "learning_rate": 1.3741008351055552e-05, "logits/chosen": -2.857004404067993, "logits/rejected": -2.5138940811157227, "logps/chosen": -462.8991394042969, "logps/rejected": -395.4851379394531, "loss": 3.0257, "rewards/accuracies": 0.0, "rewards/chosen": -3.7716691493988037, "rewards/margins": -2.831066608428955, "rewards/rejected": -0.9406025409698486, "step": 554 }, { "epoch": 0.09, "learning_rate": 1.3740274910524404e-05, "logits/chosen": -2.6763293743133545, "logits/rejected": -2.704620838165283, "logps/chosen": -293.51226806640625, "logps/rejected": -220.02513122558594, "loss": 0.0507, "rewards/accuracies": 1.0, "rewards/chosen": 0.3985244631767273, "rewards/margins": 3.365926504135132, "rewards/rejected": -2.9674019813537598, "step": 555 }, { "epoch": 0.09, "learning_rate": 1.3739541469993256e-05, "logits/chosen": -2.7699906826019287, "logits/rejected": -2.858267307281494, "logps/chosen": -205.14093017578125, "logps/rejected": -200.4163818359375, "loss": 2.1348, "rewards/accuracies": 0.5, "rewards/chosen": -3.0920326709747314, "rewards/margins": 0.552462100982666, "rewards/rejected": -3.6444947719573975, "step": 556 }, { "epoch": 0.09, "learning_rate": 1.3738808029462108e-05, "logits/chosen": -3.0862817764282227, "logits/rejected": -2.637246608734131, "logps/chosen": -86.2972640991211, "logps/rejected": -148.0502166748047, "loss": 0.8082, "rewards/accuracies": 0.5, "rewards/chosen": -0.6361535787582397, "rewards/margins": 3.2687606811523438, "rewards/rejected": -3.904914140701294, "step": 557 }, { "epoch": 0.09, "learning_rate": 1.3738074588930962e-05, "logits/chosen": -2.466259479522705, "logits/rejected": -3.1213390827178955, "logps/chosen": -5.88721227645874, "logps/rejected": -105.86509704589844, "loss": 0.1016, "rewards/accuracies": 1.0, "rewards/chosen": 1.0184993743896484, "rewards/margins": 3.174645185470581, "rewards/rejected": -2.1561458110809326, "step": 558 }, { "epoch": 0.09, "learning_rate": 1.3737341148399813e-05, "logits/chosen": -1.8968435525894165, "logits/rejected": -3.1897823810577393, "logps/chosen": -125.61280822753906, "logps/rejected": -254.91476440429688, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": 0.44202232360839844, "rewards/margins": 3.9749767780303955, "rewards/rejected": -3.532954454421997, "step": 559 }, { "epoch": 0.09, "learning_rate": 1.3736607707868665e-05, "logits/chosen": -2.9418528079986572, "logits/rejected": -2.9380710124969482, "logps/chosen": -162.0081787109375, "logps/rejected": -199.6961669921875, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": 0.07378238439559937, "rewards/margins": 3.420506477355957, "rewards/rejected": -3.346724033355713, "step": 560 }, { "epoch": 0.09, "learning_rate": 1.3735874267337517e-05, "logits/chosen": -1.8562666177749634, "logits/rejected": -2.6127634048461914, "logps/chosen": -328.9013977050781, "logps/rejected": -339.58709716796875, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": -0.6439237594604492, "rewards/margins": 3.7165474891662598, "rewards/rejected": -4.360471248626709, "step": 561 }, { "epoch": 0.09, "learning_rate": 1.3735140826806369e-05, "logits/chosen": -2.817534923553467, "logits/rejected": -2.830402374267578, "logps/chosen": -132.911376953125, "logps/rejected": -248.71514892578125, "loss": 0.6153, "rewards/accuracies": 0.5, "rewards/chosen": -1.2370314598083496, "rewards/margins": 1.8858307600021362, "rewards/rejected": -3.1228623390197754, "step": 562 }, { "epoch": 0.09, "learning_rate": 1.3734407386275221e-05, "logits/chosen": -2.786017656326294, "logits/rejected": -2.5795836448669434, "logps/chosen": -473.07958984375, "logps/rejected": -603.3557739257812, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/chosen": -0.48507386445999146, "rewards/margins": 5.066089153289795, "rewards/rejected": -5.5511627197265625, "step": 563 }, { "epoch": 0.09, "learning_rate": 1.3733673945744073e-05, "logits/chosen": -2.6476404666900635, "logits/rejected": -3.0728447437286377, "logps/chosen": -489.59234619140625, "logps/rejected": -713.0593872070312, "loss": 2.1052, "rewards/accuracies": 0.5, "rewards/chosen": -2.4677369594573975, "rewards/margins": 0.4728055000305176, "rewards/rejected": -2.940542459487915, "step": 564 }, { "epoch": 0.09, "learning_rate": 1.3732940505212926e-05, "logits/chosen": -2.9185328483581543, "logits/rejected": -3.2535805702209473, "logps/chosen": -52.11281204223633, "logps/rejected": -187.64352416992188, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.3140847682952881, "rewards/margins": 4.464764595031738, "rewards/rejected": -4.7788496017456055, "step": 565 }, { "epoch": 0.09, "learning_rate": 1.3732207064681778e-05, "logits/chosen": -3.089416742324829, "logits/rejected": -2.7466065883636475, "logps/chosen": -171.00672912597656, "logps/rejected": -153.41732788085938, "loss": 2.405, "rewards/accuracies": 0.5, "rewards/chosen": -2.2559385299682617, "rewards/margins": -0.9837300777435303, "rewards/rejected": -1.2722084522247314, "step": 566 }, { "epoch": 0.09, "learning_rate": 1.3731473624150632e-05, "logits/chosen": -1.4864691495895386, "logits/rejected": -2.9549930095672607, "logps/chosen": -85.06967163085938, "logps/rejected": -295.00396728515625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.9019964933395386, "rewards/margins": 6.170848846435547, "rewards/rejected": -5.268852233886719, "step": 567 }, { "epoch": 0.09, "learning_rate": 1.3730740183619484e-05, "logits/chosen": -2.479660749435425, "logits/rejected": -3.4089274406433105, "logps/chosen": -64.53307342529297, "logps/rejected": -288.0858154296875, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": 0.7540709972381592, "rewards/margins": 3.4671883583068848, "rewards/rejected": -2.7131171226501465, "step": 568 }, { "epoch": 0.09, "learning_rate": 1.3730006743088336e-05, "logits/chosen": -3.1404573917388916, "logits/rejected": -2.9064202308654785, "logps/chosen": -660.5827026367188, "logps/rejected": -390.3087463378906, "loss": 1.121, "rewards/accuracies": 0.5, "rewards/chosen": 0.4402923882007599, "rewards/margins": 2.182835578918457, "rewards/rejected": -1.7425432205200195, "step": 569 }, { "epoch": 0.09, "learning_rate": 1.3729273302557188e-05, "logits/chosen": -3.0491347312927246, "logits/rejected": -2.766310453414917, "logps/chosen": -160.15863037109375, "logps/rejected": -260.40985107421875, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": 0.3365156054496765, "rewards/margins": 4.022368431091309, "rewards/rejected": -3.6858530044555664, "step": 570 }, { "epoch": 0.09, "learning_rate": 1.372853986202604e-05, "logits/chosen": -2.3652491569519043, "logits/rejected": -3.053382158279419, "logps/chosen": -334.6768798828125, "logps/rejected": -400.90484619140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.20917853713035583, "rewards/margins": 9.300199508666992, "rewards/rejected": -9.509378433227539, "step": 571 }, { "epoch": 0.09, "learning_rate": 1.3727806421494891e-05, "logits/chosen": -2.8226466178894043, "logits/rejected": -1.9440358877182007, "logps/chosen": -750.5669555664062, "logps/rejected": -496.77630615234375, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": -0.47331544756889343, "rewards/margins": 2.820727586746216, "rewards/rejected": -3.2940430641174316, "step": 572 }, { "epoch": 0.09, "learning_rate": 1.3727072980963743e-05, "logits/chosen": -2.9014885425567627, "logits/rejected": -2.293256998062134, "logps/chosen": -186.40138244628906, "logps/rejected": -111.64563751220703, "loss": 2.5191, "rewards/accuracies": 0.5, "rewards/chosen": -1.9812577962875366, "rewards/margins": -0.48481154441833496, "rewards/rejected": -1.496446132659912, "step": 573 }, { "epoch": 0.09, "learning_rate": 1.3726339540432595e-05, "logits/chosen": -0.9227712750434875, "logits/rejected": -2.3863024711608887, "logps/chosen": -116.52653503417969, "logps/rejected": -194.92819213867188, "loss": 0.3875, "rewards/accuracies": 0.5, "rewards/chosen": -0.15863953530788422, "rewards/margins": 1.37412691116333, "rewards/rejected": -1.5327664613723755, "step": 574 }, { "epoch": 0.09, "learning_rate": 1.3725606099901447e-05, "logits/chosen": -1.4638367891311646, "logits/rejected": -3.0020813941955566, "logps/chosen": -185.98324584960938, "logps/rejected": -318.3727722167969, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -0.26772117614746094, "rewards/margins": 4.347906589508057, "rewards/rejected": -4.615628242492676, "step": 575 }, { "epoch": 0.09, "learning_rate": 1.37248726593703e-05, "logits/chosen": -3.0836021900177, "logits/rejected": -2.653388023376465, "logps/chosen": -117.71089935302734, "logps/rejected": -211.56964111328125, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/chosen": 0.7422052025794983, "rewards/margins": 3.793517589569092, "rewards/rejected": -3.051312208175659, "step": 576 }, { "epoch": 0.09, "learning_rate": 1.3724139218839152e-05, "logits/chosen": -2.913891553878784, "logits/rejected": -2.7453653812408447, "logps/chosen": -271.27545166015625, "logps/rejected": -233.27438354492188, "loss": 2.4946, "rewards/accuracies": 0.5, "rewards/chosen": -2.2460687160491943, "rewards/margins": -1.4196374416351318, "rewards/rejected": -0.8264312744140625, "step": 577 }, { "epoch": 0.09, "learning_rate": 1.3723405778308004e-05, "logits/chosen": -2.6838340759277344, "logits/rejected": -2.9025826454162598, "logps/chosen": -430.0129089355469, "logps/rejected": -410.2777099609375, "loss": 0.1078, "rewards/accuracies": 1.0, "rewards/chosen": -1.0987954139709473, "rewards/margins": 3.344717264175415, "rewards/rejected": -4.443512916564941, "step": 578 }, { "epoch": 0.09, "learning_rate": 1.3722672337776856e-05, "logits/chosen": -2.466113567352295, "logits/rejected": -2.7479496002197266, "logps/chosen": -170.7948455810547, "logps/rejected": -263.49591064453125, "loss": 0.0951, "rewards/accuracies": 1.0, "rewards/chosen": -0.47938501834869385, "rewards/margins": 5.792444229125977, "rewards/rejected": -6.271829605102539, "step": 579 }, { "epoch": 0.09, "learning_rate": 1.3721938897245708e-05, "logits/chosen": -2.2631208896636963, "logits/rejected": -3.1985654830932617, "logps/chosen": -678.01220703125, "logps/rejected": -810.594482421875, "loss": 4.2325, "rewards/accuracies": 0.5, "rewards/chosen": -3.471428155899048, "rewards/margins": 0.12918710708618164, "rewards/rejected": -3.6006152629852295, "step": 580 }, { "epoch": 0.09, "learning_rate": 1.372120545671456e-05, "logits/chosen": -2.6321728229522705, "logits/rejected": -3.037348985671997, "logps/chosen": -97.35582733154297, "logps/rejected": -234.43118286132812, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -0.009795010089874268, "rewards/margins": 3.4030933380126953, "rewards/rejected": -3.412888526916504, "step": 581 }, { "epoch": 0.09, "learning_rate": 1.3720472016183412e-05, "logits/chosen": -2.1541028022766113, "logits/rejected": -2.820178747177124, "logps/chosen": -81.820556640625, "logps/rejected": -183.79415893554688, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 0.4047561287879944, "rewards/margins": 5.166922092437744, "rewards/rejected": -4.7621660232543945, "step": 582 }, { "epoch": 0.09, "learning_rate": 1.3719738575652264e-05, "logits/chosen": -2.8784613609313965, "logits/rejected": -2.7112884521484375, "logps/chosen": -171.9295654296875, "logps/rejected": -167.52056884765625, "loss": 1.0638, "rewards/accuracies": 0.5, "rewards/chosen": -1.5322532653808594, "rewards/margins": 1.215092420578003, "rewards/rejected": -2.7473456859588623, "step": 583 }, { "epoch": 0.09, "learning_rate": 1.3719005135121115e-05, "logits/chosen": -1.2746191024780273, "logits/rejected": -2.822589874267578, "logps/chosen": -136.5149383544922, "logps/rejected": -574.506103515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.7449591159820557, "rewards/margins": 8.494230270385742, "rewards/rejected": -7.749270439147949, "step": 584 }, { "epoch": 0.09, "learning_rate": 1.3718271694589969e-05, "logits/chosen": -1.7755056619644165, "logits/rejected": -2.870242118835449, "logps/chosen": -55.214805603027344, "logps/rejected": -386.25830078125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.5023225545883179, "rewards/margins": 5.979683876037598, "rewards/rejected": -6.482006072998047, "step": 585 }, { "epoch": 0.09, "learning_rate": 1.3717538254058821e-05, "logits/chosen": -2.7758171558380127, "logits/rejected": -2.9539101123809814, "logps/chosen": -254.76010131835938, "logps/rejected": -124.27458953857422, "loss": 3.9411, "rewards/accuracies": 0.5, "rewards/chosen": -3.9180872440338135, "rewards/margins": -0.7836918830871582, "rewards/rejected": -3.1343953609466553, "step": 586 }, { "epoch": 0.09, "learning_rate": 1.3716804813527673e-05, "logits/chosen": -2.9907119274139404, "logits/rejected": -2.0278890132904053, "logps/chosen": -302.6346435546875, "logps/rejected": -204.42359924316406, "loss": 4.0021, "rewards/accuracies": 0.0, "rewards/chosen": -2.7658088207244873, "rewards/margins": -3.9835636615753174, "rewards/rejected": 1.21775484085083, "step": 587 }, { "epoch": 0.09, "learning_rate": 1.3716071372996525e-05, "logits/chosen": -2.8540596961975098, "logits/rejected": -2.42466402053833, "logps/chosen": -206.01620483398438, "logps/rejected": -180.03195190429688, "loss": 2.3187, "rewards/accuracies": 0.5, "rewards/chosen": -1.6896675825119019, "rewards/margins": 1.2133796215057373, "rewards/rejected": -2.9030473232269287, "step": 588 }, { "epoch": 0.09, "learning_rate": 1.3715337932465377e-05, "logits/chosen": -2.3535211086273193, "logits/rejected": -3.2190604209899902, "logps/chosen": -37.11387634277344, "logps/rejected": -235.1450958251953, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.0526061058044434, "rewards/margins": 7.671614646911621, "rewards/rejected": -6.619009017944336, "step": 589 }, { "epoch": 0.09, "learning_rate": 1.3714604491934228e-05, "logits/chosen": -2.122737169265747, "logits/rejected": -3.2588796615600586, "logps/chosen": -70.63728332519531, "logps/rejected": -304.62408447265625, "loss": 0.2934, "rewards/accuracies": 1.0, "rewards/chosen": 0.005080759525299072, "rewards/margins": 4.742553234100342, "rewards/rejected": -4.7374725341796875, "step": 590 }, { "epoch": 0.09, "learning_rate": 1.371387105140308e-05, "logits/chosen": -2.556079626083374, "logits/rejected": -2.801084041595459, "logps/chosen": -55.22430419921875, "logps/rejected": -241.95571899414062, "loss": 1.0605, "rewards/accuracies": 0.5, "rewards/chosen": -2.2884626388549805, "rewards/margins": 1.3615764379501343, "rewards/rejected": -3.650038957595825, "step": 591 }, { "epoch": 0.09, "learning_rate": 1.3713137610871932e-05, "logits/chosen": -2.4413001537323, "logits/rejected": -2.9936206340789795, "logps/chosen": -61.89069747924805, "logps/rejected": -291.5626525878906, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": 0.0010515209287405014, "rewards/margins": 5.731744766235352, "rewards/rejected": -5.730693340301514, "step": 592 }, { "epoch": 0.09, "learning_rate": 1.3712404170340784e-05, "logits/chosen": -2.8445656299591064, "logits/rejected": -2.830274820327759, "logps/chosen": -648.9110717773438, "logps/rejected": -605.0131225585938, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -1.0936142206192017, "rewards/margins": 4.918421745300293, "rewards/rejected": -6.012036323547363, "step": 593 }, { "epoch": 0.09, "learning_rate": 1.3711670729809638e-05, "logits/chosen": -1.7238595485687256, "logits/rejected": -3.0332436561584473, "logps/chosen": -94.00859069824219, "logps/rejected": -273.9420471191406, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": 0.4033970236778259, "rewards/margins": 4.380510330200195, "rewards/rejected": -3.9771132469177246, "step": 594 }, { "epoch": 0.09, "learning_rate": 1.371093728927849e-05, "logits/chosen": -3.0097391605377197, "logits/rejected": -3.102383852005005, "logps/chosen": -98.38153076171875, "logps/rejected": -157.04190063476562, "loss": 0.7589, "rewards/accuracies": 0.5, "rewards/chosen": 0.9440721273422241, "rewards/margins": 2.2047934532165527, "rewards/rejected": -1.260721206665039, "step": 595 }, { "epoch": 0.09, "learning_rate": 1.3710203848747341e-05, "logits/chosen": -2.875032901763916, "logits/rejected": -2.8959691524505615, "logps/chosen": -515.9842529296875, "logps/rejected": -560.072021484375, "loss": 0.0668, "rewards/accuracies": 1.0, "rewards/chosen": 0.1322934925556183, "rewards/margins": 6.6497297286987305, "rewards/rejected": -6.5174360275268555, "step": 596 }, { "epoch": 0.09, "learning_rate": 1.3709470408216193e-05, "logits/chosen": -2.709763765335083, "logits/rejected": -2.7829182147979736, "logps/chosen": -400.322998046875, "logps/rejected": -498.65447998046875, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -0.7589157223701477, "rewards/margins": 4.9717559814453125, "rewards/rejected": -5.7306718826293945, "step": 597 }, { "epoch": 0.09, "learning_rate": 1.3708736967685045e-05, "logits/chosen": -2.615828037261963, "logits/rejected": -2.964052677154541, "logps/chosen": -188.655029296875, "logps/rejected": -162.69937133789062, "loss": 0.1218, "rewards/accuracies": 1.0, "rewards/chosen": -0.5195838809013367, "rewards/margins": 2.204674243927002, "rewards/rejected": -2.7242579460144043, "step": 598 }, { "epoch": 0.09, "learning_rate": 1.3708003527153899e-05, "logits/chosen": -2.3824288845062256, "logits/rejected": -2.9128730297088623, "logps/chosen": -34.355010986328125, "logps/rejected": -227.63589477539062, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.8899731636047363, "rewards/margins": 7.728318214416504, "rewards/rejected": -6.838344573974609, "step": 599 }, { "epoch": 0.09, "learning_rate": 1.370727008662275e-05, "logits/chosen": -1.7740683555603027, "logits/rejected": -2.9529058933258057, "logps/chosen": -187.0409698486328, "logps/rejected": -285.7607421875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.009911775588989258, "rewards/margins": 5.704312324523926, "rewards/rejected": -5.694400310516357, "step": 600 }, { "epoch": 0.09, "learning_rate": 1.3706536646091603e-05, "logits/chosen": -2.528183698654175, "logits/rejected": -2.724172830581665, "logps/chosen": -308.1166076660156, "logps/rejected": -328.9288635253906, "loss": 0.182, "rewards/accuracies": 1.0, "rewards/chosen": -0.45946645736694336, "rewards/margins": 3.2263669967651367, "rewards/rejected": -3.68583345413208, "step": 601 }, { "epoch": 0.09, "learning_rate": 1.3705803205560454e-05, "logits/chosen": -2.8446366786956787, "logits/rejected": -3.1367902755737305, "logps/chosen": -54.38520812988281, "logps/rejected": -164.75938415527344, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 1.2063953876495361, "rewards/margins": 5.777455806732178, "rewards/rejected": -4.5710601806640625, "step": 602 }, { "epoch": 0.09, "learning_rate": 1.3705069765029308e-05, "logits/chosen": -1.42880380153656, "logits/rejected": -2.698859930038452, "logps/chosen": -78.56346893310547, "logps/rejected": -235.83938598632812, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": 0.4614608585834503, "rewards/margins": 4.625795364379883, "rewards/rejected": -4.164334297180176, "step": 603 }, { "epoch": 0.09, "learning_rate": 1.370433632449816e-05, "logits/chosen": -3.0426852703094482, "logits/rejected": -1.9709409475326538, "logps/chosen": -292.35711669921875, "logps/rejected": -105.42938232421875, "loss": 1.3811, "rewards/accuracies": 0.5, "rewards/chosen": -0.682963490486145, "rewards/margins": 1.4315111637115479, "rewards/rejected": -2.1144745349884033, "step": 604 }, { "epoch": 0.09, "learning_rate": 1.3703602883967012e-05, "logits/chosen": -0.8913238048553467, "logits/rejected": -2.1259353160858154, "logps/chosen": -120.45512390136719, "logps/rejected": -247.88323974609375, "loss": 2.4744, "rewards/accuracies": 0.5, "rewards/chosen": -1.9970009326934814, "rewards/margins": 1.3870065212249756, "rewards/rejected": -3.384007453918457, "step": 605 }, { "epoch": 0.09, "learning_rate": 1.3702869443435864e-05, "logits/chosen": -1.692537546157837, "logits/rejected": -2.972001075744629, "logps/chosen": -55.324806213378906, "logps/rejected": -281.258056640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7956339120864868, "rewards/margins": 8.537172317504883, "rewards/rejected": -9.332806587219238, "step": 606 }, { "epoch": 0.09, "learning_rate": 1.3702136002904715e-05, "logits/chosen": -2.5974466800689697, "logits/rejected": -3.1245105266571045, "logps/chosen": -163.40646362304688, "logps/rejected": -252.50856018066406, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": 0.14152374863624573, "rewards/margins": 6.610318660736084, "rewards/rejected": -6.468794822692871, "step": 607 }, { "epoch": 0.09, "learning_rate": 1.3701402562373567e-05, "logits/chosen": -2.8967273235321045, "logits/rejected": -2.4686009883880615, "logps/chosen": -97.75332641601562, "logps/rejected": -185.31723022460938, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.5265337228775024, "rewards/margins": 6.456401824951172, "rewards/rejected": -5.929868698120117, "step": 608 }, { "epoch": 0.09, "learning_rate": 1.370066912184242e-05, "logits/chosen": -3.0478475093841553, "logits/rejected": -2.3468756675720215, "logps/chosen": -559.1307373046875, "logps/rejected": -507.20953369140625, "loss": 3.6989, "rewards/accuracies": 0.5, "rewards/chosen": -4.600563049316406, "rewards/margins": -1.6498854160308838, "rewards/rejected": -2.9506776332855225, "step": 609 }, { "epoch": 0.09, "learning_rate": 1.3699935681311271e-05, "logits/chosen": -2.892075777053833, "logits/rejected": -2.96052885055542, "logps/chosen": -302.3741455078125, "logps/rejected": -354.5412292480469, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": 0.4406730830669403, "rewards/margins": 4.530429363250732, "rewards/rejected": -4.089756011962891, "step": 610 }, { "epoch": 0.1, "learning_rate": 1.3699202240780123e-05, "logits/chosen": -1.9738622903823853, "logits/rejected": -2.6198625564575195, "logps/chosen": -263.7083435058594, "logps/rejected": -286.0153503417969, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/chosen": 1.46616792678833, "rewards/margins": 5.060275077819824, "rewards/rejected": -3.594107151031494, "step": 611 }, { "epoch": 0.1, "learning_rate": 1.3698468800248977e-05, "logits/chosen": -2.9752256870269775, "logits/rejected": -2.8133485317230225, "logps/chosen": -600.6847534179688, "logps/rejected": -891.5919799804688, "loss": 2.5782, "rewards/accuracies": 0.5, "rewards/chosen": -3.9269516468048096, "rewards/margins": -1.0469290018081665, "rewards/rejected": -2.8800225257873535, "step": 612 }, { "epoch": 0.1, "learning_rate": 1.3697735359717828e-05, "logits/chosen": -2.8121862411499023, "logits/rejected": -2.088634729385376, "logps/chosen": -305.420166015625, "logps/rejected": -287.76617431640625, "loss": 3.6302, "rewards/accuracies": 0.5, "rewards/chosen": -2.844207525253296, "rewards/margins": -1.0126218795776367, "rewards/rejected": -1.8315857648849487, "step": 613 }, { "epoch": 0.1, "learning_rate": 1.369700191918668e-05, "logits/chosen": -2.652677059173584, "logits/rejected": -3.0970332622528076, "logps/chosen": -423.6785888671875, "logps/rejected": -410.3865966796875, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9091949462890625, "rewards/margins": 4.373629570007324, "rewards/rejected": -5.2828240394592285, "step": 614 }, { "epoch": 0.1, "learning_rate": 1.3696268478655532e-05, "logits/chosen": -2.115039825439453, "logits/rejected": -2.886775255203247, "logps/chosen": -114.14554595947266, "logps/rejected": -281.5378112792969, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": 0.8851845264434814, "rewards/margins": 7.648981094360352, "rewards/rejected": -6.763796329498291, "step": 615 }, { "epoch": 0.1, "learning_rate": 1.3695535038124384e-05, "logits/chosen": -1.9244436025619507, "logits/rejected": -3.0832479000091553, "logps/chosen": -73.79573059082031, "logps/rejected": -275.4056701660156, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -0.5013467669487, "rewards/margins": 4.94421911239624, "rewards/rejected": -5.445565700531006, "step": 616 }, { "epoch": 0.1, "learning_rate": 1.3694801597593236e-05, "logits/chosen": -1.9911340475082397, "logits/rejected": -3.1939797401428223, "logps/chosen": -31.464019775390625, "logps/rejected": -271.6624450683594, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.19900557398796082, "rewards/margins": 6.848776817321777, "rewards/rejected": -6.649771213531494, "step": 617 }, { "epoch": 0.1, "learning_rate": 1.3694068157062088e-05, "logits/chosen": -2.8796017169952393, "logits/rejected": -1.729843020439148, "logps/chosen": -945.5035400390625, "logps/rejected": -416.34783935546875, "loss": 0.2677, "rewards/accuracies": 1.0, "rewards/chosen": -0.711596667766571, "rewards/margins": 1.6021881103515625, "rewards/rejected": -2.3137848377227783, "step": 618 }, { "epoch": 0.1, "learning_rate": 1.369333471653094e-05, "logits/chosen": -2.7600221633911133, "logits/rejected": -2.97956919670105, "logps/chosen": -20.000240325927734, "logps/rejected": -200.08331298828125, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": 0.23590035736560822, "rewards/margins": 6.093886375427246, "rewards/rejected": -5.857985973358154, "step": 619 }, { "epoch": 0.1, "learning_rate": 1.3692601275999793e-05, "logits/chosen": -2.843630313873291, "logits/rejected": -2.7312090396881104, "logps/chosen": -87.71214294433594, "logps/rejected": -69.9585189819336, "loss": 1.4928, "rewards/accuracies": 0.5, "rewards/chosen": -0.2833913564682007, "rewards/margins": 0.6410607099533081, "rewards/rejected": -0.9244521856307983, "step": 620 }, { "epoch": 0.1, "learning_rate": 1.3691867835468645e-05, "logits/chosen": -2.7546749114990234, "logits/rejected": -2.3183743953704834, "logps/chosen": -449.21685791015625, "logps/rejected": -443.48748779296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.39501363039016724, "rewards/margins": 10.479449272155762, "rewards/rejected": -10.874463081359863, "step": 621 }, { "epoch": 0.1, "learning_rate": 1.3691134394937497e-05, "logits/chosen": -3.100754976272583, "logits/rejected": -3.1968047618865967, "logps/chosen": -312.26702880859375, "logps/rejected": -358.8587646484375, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -1.1753628253936768, "rewards/margins": 4.295326232910156, "rewards/rejected": -5.470688819885254, "step": 622 }, { "epoch": 0.1, "learning_rate": 1.3690400954406349e-05, "logits/chosen": -1.0564813613891602, "logits/rejected": -2.319953203201294, "logps/chosen": -101.05674743652344, "logps/rejected": -660.7303466796875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.3379928469657898, "rewards/margins": 11.226295471191406, "rewards/rejected": -11.564289093017578, "step": 623 }, { "epoch": 0.1, "learning_rate": 1.36896675138752e-05, "logits/chosen": -2.0550060272216797, "logits/rejected": -2.8565454483032227, "logps/chosen": -233.91571044921875, "logps/rejected": -302.38482666015625, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -1.4555038213729858, "rewards/margins": 3.508674144744873, "rewards/rejected": -4.964178085327148, "step": 624 }, { "epoch": 0.1, "learning_rate": 1.3688934073344053e-05, "logits/chosen": -1.548863410949707, "logits/rejected": -2.389130115509033, "logps/chosen": -121.91365051269531, "logps/rejected": -259.2571105957031, "loss": 0.2252, "rewards/accuracies": 1.0, "rewards/chosen": -0.9274879693984985, "rewards/margins": 4.662508964538574, "rewards/rejected": -5.589996814727783, "step": 625 }, { "epoch": 0.1, "learning_rate": 1.3688200632812905e-05, "logits/chosen": -2.8469033241271973, "logits/rejected": -2.0782978534698486, "logps/chosen": -351.11822509765625, "logps/rejected": -180.7418212890625, "loss": 6.9308, "rewards/accuracies": 0.0, "rewards/chosen": -8.40031623840332, "rewards/margins": -6.92971134185791, "rewards/rejected": -1.4706047773361206, "step": 626 }, { "epoch": 0.1, "learning_rate": 1.3687467192281756e-05, "logits/chosen": -1.6379718780517578, "logits/rejected": -2.991201400756836, "logps/chosen": -128.08456420898438, "logps/rejected": -332.9424743652344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.21817970275878906, "rewards/margins": 8.088774681091309, "rewards/rejected": -7.870595455169678, "step": 627 }, { "epoch": 0.1, "learning_rate": 1.3686733751750608e-05, "logits/chosen": -1.956179141998291, "logits/rejected": -2.9989261627197266, "logps/chosen": -133.1600799560547, "logps/rejected": -309.50189208984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.244032621383667, "rewards/margins": 6.710467338562012, "rewards/rejected": -7.9544997215271, "step": 628 }, { "epoch": 0.1, "learning_rate": 1.3686000311219462e-05, "logits/chosen": -2.5983834266662598, "logits/rejected": -2.605321168899536, "logps/chosen": -736.0672607421875, "logps/rejected": -530.704345703125, "loss": 4.2626, "rewards/accuracies": 0.5, "rewards/chosen": -5.679666042327881, "rewards/margins": -0.41365814208984375, "rewards/rejected": -5.266007900238037, "step": 629 }, { "epoch": 0.1, "learning_rate": 1.3685266870688314e-05, "logits/chosen": -1.875541090965271, "logits/rejected": -2.734127998352051, "logps/chosen": -92.11888122558594, "logps/rejected": -261.1474609375, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 0.8541618585586548, "rewards/margins": 7.510354042053223, "rewards/rejected": -6.656192302703857, "step": 630 }, { "epoch": 0.1, "learning_rate": 1.3684533430157166e-05, "logits/chosen": -2.590430498123169, "logits/rejected": -2.714899778366089, "logps/chosen": -121.65431213378906, "logps/rejected": -136.6382598876953, "loss": 1.2033, "rewards/accuracies": 0.5, "rewards/chosen": -0.4688533544540405, "rewards/margins": 1.473330020904541, "rewards/rejected": -1.9421833753585815, "step": 631 }, { "epoch": 0.1, "learning_rate": 1.3683799989626018e-05, "logits/chosen": -2.939495325088501, "logits/rejected": -2.5463781356811523, "logps/chosen": -574.2069091796875, "logps/rejected": -405.437255859375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.24723052978515625, "rewards/margins": 5.769598484039307, "rewards/rejected": -6.016829013824463, "step": 632 }, { "epoch": 0.1, "learning_rate": 1.3683066549094871e-05, "logits/chosen": -2.857901096343994, "logits/rejected": -3.0612523555755615, "logps/chosen": -129.51339721679688, "logps/rejected": -211.5516357421875, "loss": 0.365, "rewards/accuracies": 0.5, "rewards/chosen": -0.2780546247959137, "rewards/margins": 4.297946453094482, "rewards/rejected": -4.576000690460205, "step": 633 }, { "epoch": 0.1, "learning_rate": 1.3682333108563723e-05, "logits/chosen": -2.7781503200531006, "logits/rejected": -2.1052868366241455, "logps/chosen": -305.3497619628906, "logps/rejected": -292.92449951171875, "loss": 2.6062, "rewards/accuracies": 0.5, "rewards/chosen": -3.5673959255218506, "rewards/margins": -0.5373759269714355, "rewards/rejected": -3.030019998550415, "step": 634 }, { "epoch": 0.1, "learning_rate": 1.3681599668032575e-05, "logits/chosen": -2.6100714206695557, "logits/rejected": -3.0111026763916016, "logps/chosen": -54.45119094848633, "logps/rejected": -159.27365112304688, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": -0.43233931064605713, "rewards/margins": 3.315185785293579, "rewards/rejected": -3.747525215148926, "step": 635 }, { "epoch": 0.1, "learning_rate": 1.3680866227501427e-05, "logits/chosen": -1.7224787473678589, "logits/rejected": -3.191648483276367, "logps/chosen": -73.54289245605469, "logps/rejected": -341.9045715332031, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.6488620042800903, "rewards/margins": 5.775112152099609, "rewards/rejected": -6.42397403717041, "step": 636 }, { "epoch": 0.1, "learning_rate": 1.3680132786970279e-05, "logits/chosen": -2.8153936862945557, "logits/rejected": -2.9204695224761963, "logps/chosen": -77.92816162109375, "logps/rejected": -163.7822723388672, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": 0.3337547183036804, "rewards/margins": 3.680424451828003, "rewards/rejected": -3.3466696739196777, "step": 637 }, { "epoch": 0.1, "learning_rate": 1.3679399346439132e-05, "logits/chosen": -0.9979802370071411, "logits/rejected": -3.0528056621551514, "logps/chosen": -15.883861541748047, "logps/rejected": -288.8904724121094, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.10675764083862305, "rewards/margins": 5.571750640869141, "rewards/rejected": -5.464993476867676, "step": 638 }, { "epoch": 0.1, "learning_rate": 1.3678665905907984e-05, "logits/chosen": -2.8125417232513428, "logits/rejected": -2.3840041160583496, "logps/chosen": -354.33349609375, "logps/rejected": -449.7990417480469, "loss": 1.7704, "rewards/accuracies": 0.5, "rewards/chosen": -2.8987205028533936, "rewards/margins": 1.6820794343948364, "rewards/rejected": -4.5808000564575195, "step": 639 }, { "epoch": 0.1, "learning_rate": 1.3677932465376836e-05, "logits/chosen": -2.981696605682373, "logits/rejected": -3.3044991493225098, "logps/chosen": -474.09613037109375, "logps/rejected": -474.52984619140625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8610816597938538, "rewards/margins": 6.867344856262207, "rewards/rejected": -7.728425979614258, "step": 640 }, { "epoch": 0.1, "learning_rate": 1.3677199024845688e-05, "logits/chosen": -1.7264176607131958, "logits/rejected": -3.139308214187622, "logps/chosen": -94.9530258178711, "logps/rejected": -398.3861083984375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.483992099761963, "rewards/margins": 7.867987155914307, "rewards/rejected": -9.35197925567627, "step": 641 }, { "epoch": 0.1, "learning_rate": 1.367646558431454e-05, "logits/chosen": -1.8439844846725464, "logits/rejected": -3.0064032077789307, "logps/chosen": -102.20686340332031, "logps/rejected": -409.60955810546875, "loss": 1.2625, "rewards/accuracies": 0.5, "rewards/chosen": -2.257889747619629, "rewards/margins": 1.475956916809082, "rewards/rejected": -3.733846664428711, "step": 642 }, { "epoch": 0.1, "learning_rate": 1.3675732143783392e-05, "logits/chosen": -1.4971271753311157, "logits/rejected": -2.7715749740600586, "logps/chosen": -181.79232788085938, "logps/rejected": -315.08612060546875, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -0.9140530824661255, "rewards/margins": 4.070987701416016, "rewards/rejected": -4.985040664672852, "step": 643 }, { "epoch": 0.1, "learning_rate": 1.3674998703252243e-05, "logits/chosen": -2.2256245613098145, "logits/rejected": -2.7901809215545654, "logps/chosen": -259.5778503417969, "logps/rejected": -286.0323791503906, "loss": 1.399, "rewards/accuracies": 0.5, "rewards/chosen": -2.1979498863220215, "rewards/margins": 0.15364766120910645, "rewards/rejected": -2.351597547531128, "step": 644 }, { "epoch": 0.1, "learning_rate": 1.3674265262721095e-05, "logits/chosen": -2.0386834144592285, "logits/rejected": -2.861351728439331, "logps/chosen": -134.54139709472656, "logps/rejected": -426.75115966796875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.1093471050262451, "rewards/margins": 11.400341987609863, "rewards/rejected": -12.509689331054688, "step": 645 }, { "epoch": 0.1, "learning_rate": 1.3673531822189947e-05, "logits/chosen": -2.6755595207214355, "logits/rejected": -2.7362587451934814, "logps/chosen": -183.22061157226562, "logps/rejected": -327.1305236816406, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.6319133639335632, "rewards/margins": 5.612093925476074, "rewards/rejected": -6.244007110595703, "step": 646 }, { "epoch": 0.1, "learning_rate": 1.36727983816588e-05, "logits/chosen": -2.7621963024139404, "logits/rejected": -1.2601232528686523, "logps/chosen": -222.67263793945312, "logps/rejected": -120.18916320800781, "loss": 1.9961, "rewards/accuracies": 0.5, "rewards/chosen": -3.6328530311584473, "rewards/margins": 0.2387988567352295, "rewards/rejected": -3.8716518878936768, "step": 647 }, { "epoch": 0.1, "learning_rate": 1.3672064941127653e-05, "logits/chosen": -1.655540943145752, "logits/rejected": -2.978963851928711, "logps/chosen": -75.42535400390625, "logps/rejected": -533.4757080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7769591808319092, "rewards/margins": 15.148334503173828, "rewards/rejected": -16.92529296875, "step": 648 }, { "epoch": 0.1, "learning_rate": 1.3671331500596505e-05, "logits/chosen": -2.8353829383850098, "logits/rejected": -1.4452228546142578, "logps/chosen": -153.31858825683594, "logps/rejected": -33.4690055847168, "loss": 3.7248, "rewards/accuracies": 0.0, "rewards/chosen": -5.107756614685059, "rewards/margins": -3.7004170417785645, "rewards/rejected": -1.4073392152786255, "step": 649 }, { "epoch": 0.1, "learning_rate": 1.3670598060065356e-05, "logits/chosen": -2.869631052017212, "logits/rejected": -2.995365619659424, "logps/chosen": -43.75103759765625, "logps/rejected": -120.81719207763672, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -2.0081546306610107, "rewards/margins": 4.438131332397461, "rewards/rejected": -6.446286201477051, "step": 650 }, { "epoch": 0.1, "learning_rate": 1.3669864619534208e-05, "logits/chosen": -2.7826802730560303, "logits/rejected": -2.8155224323272705, "logps/chosen": -233.7005157470703, "logps/rejected": -433.1365051269531, "loss": 0.0968, "rewards/accuracies": 1.0, "rewards/chosen": -1.4218754768371582, "rewards/margins": 9.116750717163086, "rewards/rejected": -10.538626670837402, "step": 651 }, { "epoch": 0.1, "learning_rate": 1.366913117900306e-05, "logits/chosen": -2.381727695465088, "logits/rejected": -3.0359814167022705, "logps/chosen": -25.57170867919922, "logps/rejected": -231.86453247070312, "loss": 0.039, "rewards/accuracies": 1.0, "rewards/chosen": -0.45214951038360596, "rewards/margins": 4.740869522094727, "rewards/rejected": -5.193018913269043, "step": 652 }, { "epoch": 0.1, "learning_rate": 1.3668397738471912e-05, "logits/chosen": -2.884507656097412, "logits/rejected": -2.865844964981079, "logps/chosen": -112.38280487060547, "logps/rejected": -175.98838806152344, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -1.2310771942138672, "rewards/margins": 4.627479076385498, "rewards/rejected": -5.858556270599365, "step": 653 }, { "epoch": 0.1, "learning_rate": 1.3667664297940764e-05, "logits/chosen": -3.1458606719970703, "logits/rejected": -2.765615463256836, "logps/chosen": -908.1103515625, "logps/rejected": -621.7084350585938, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.329522728919983, "rewards/margins": 6.244767665863037, "rewards/rejected": -7.5742902755737305, "step": 654 }, { "epoch": 0.1, "learning_rate": 1.3666930857409616e-05, "logits/chosen": -2.017091751098633, "logits/rejected": -2.985673666000366, "logps/chosen": -120.1280517578125, "logps/rejected": -310.0565185546875, "loss": 1.8651, "rewards/accuracies": 0.5, "rewards/chosen": -3.149144172668457, "rewards/margins": 1.2586067914962769, "rewards/rejected": -4.407751083374023, "step": 655 }, { "epoch": 0.1, "learning_rate": 1.366619741687847e-05, "logits/chosen": -2.810375213623047, "logits/rejected": -2.9578235149383545, "logps/chosen": -213.006103515625, "logps/rejected": -249.3568115234375, "loss": 1.5342, "rewards/accuracies": 0.5, "rewards/chosen": -2.7664215564727783, "rewards/margins": 0.5141816139221191, "rewards/rejected": -3.2806031703948975, "step": 656 }, { "epoch": 0.1, "learning_rate": 1.3665463976347321e-05, "logits/chosen": -1.2072014808654785, "logits/rejected": -2.6644415855407715, "logps/chosen": -154.13844299316406, "logps/rejected": -247.0118865966797, "loss": 2.2573, "rewards/accuracies": 0.5, "rewards/chosen": -3.780555009841919, "rewards/margins": 1.186887502670288, "rewards/rejected": -4.967442512512207, "step": 657 }, { "epoch": 0.1, "learning_rate": 1.3664730535816173e-05, "logits/chosen": -2.1386642456054688, "logits/rejected": -3.2987003326416016, "logps/chosen": -361.952392578125, "logps/rejected": -543.4794921875, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -1.8556091785430908, "rewards/margins": 3.37752366065979, "rewards/rejected": -5.233132839202881, "step": 658 }, { "epoch": 0.1, "learning_rate": 1.3663997095285025e-05, "logits/chosen": -2.9591548442840576, "logits/rejected": -1.7989215850830078, "logps/chosen": -280.8568115234375, "logps/rejected": -194.25067138671875, "loss": 3.0217, "rewards/accuracies": 0.5, "rewards/chosen": -4.2888946533203125, "rewards/margins": -0.5782437324523926, "rewards/rejected": -3.710651159286499, "step": 659 }, { "epoch": 0.1, "learning_rate": 1.3663263654753877e-05, "logits/chosen": -2.7467682361602783, "logits/rejected": -3.0791893005371094, "logps/chosen": -144.0556640625, "logps/rejected": -298.23052978515625, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -2.554255962371826, "rewards/margins": 6.195420265197754, "rewards/rejected": -8.749676704406738, "step": 660 }, { "epoch": 0.1, "learning_rate": 1.3662530214222729e-05, "logits/chosen": -2.401329278945923, "logits/rejected": -2.9754507541656494, "logps/chosen": -67.36973571777344, "logps/rejected": -372.89923095703125, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.9994306564331055, "rewards/margins": 6.6106858253479, "rewards/rejected": -7.610116481781006, "step": 661 }, { "epoch": 0.1, "learning_rate": 1.366179677369158e-05, "logits/chosen": -2.628793716430664, "logits/rejected": -2.788822650909424, "logps/chosen": -39.30991744995117, "logps/rejected": -222.31658935546875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.636168897151947, "rewards/margins": 8.887855529785156, "rewards/rejected": -9.52402400970459, "step": 662 }, { "epoch": 0.1, "learning_rate": 1.3661063333160433e-05, "logits/chosen": -2.924062728881836, "logits/rejected": -3.036604166030884, "logps/chosen": -325.67193603515625, "logps/rejected": -98.56485748291016, "loss": 5.3038, "rewards/accuracies": 0.5, "rewards/chosen": -6.728936195373535, "rewards/margins": -3.0667660236358643, "rewards/rejected": -3.66217041015625, "step": 663 }, { "epoch": 0.1, "learning_rate": 1.3660329892629284e-05, "logits/chosen": -1.8418415784835815, "logits/rejected": -2.947263717651367, "logps/chosen": -89.03883361816406, "logps/rejected": -250.89859008789062, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.6795099377632141, "rewards/margins": 6.122790336608887, "rewards/rejected": -6.802300453186035, "step": 664 }, { "epoch": 0.1, "learning_rate": 1.3659596452098138e-05, "logits/chosen": -2.961008310317993, "logits/rejected": -2.913546562194824, "logps/chosen": -113.24789428710938, "logps/rejected": -164.51541137695312, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -1.1304872035980225, "rewards/margins": 3.893383026123047, "rewards/rejected": -5.023870468139648, "step": 665 }, { "epoch": 0.1, "learning_rate": 1.365886301156699e-05, "logits/chosen": -2.9352304935455322, "logits/rejected": -2.4729599952697754, "logps/chosen": -92.45730590820312, "logps/rejected": -215.98358154296875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.1862287521362305, "rewards/margins": 5.87013053894043, "rewards/rejected": -7.05635929107666, "step": 666 }, { "epoch": 0.1, "learning_rate": 1.3658129571035843e-05, "logits/chosen": -1.4956713914871216, "logits/rejected": -3.217801332473755, "logps/chosen": -49.719627380371094, "logps/rejected": -414.5462951660156, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": -2.03564453125, "rewards/margins": 3.4606804847717285, "rewards/rejected": -5.4963250160217285, "step": 667 }, { "epoch": 0.1, "learning_rate": 1.3657396130504695e-05, "logits/chosen": -1.2334650754928589, "logits/rejected": -2.7360546588897705, "logps/chosen": -28.630748748779297, "logps/rejected": -208.91946411132812, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -0.8668693900108337, "rewards/margins": 5.592412948608398, "rewards/rejected": -6.459281921386719, "step": 668 }, { "epoch": 0.1, "learning_rate": 1.3656662689973547e-05, "logits/chosen": -2.7512130737304688, "logits/rejected": -2.8132057189941406, "logps/chosen": -353.517333984375, "logps/rejected": -392.65911865234375, "loss": 2.5292, "rewards/accuracies": 0.5, "rewards/chosen": -4.630516052246094, "rewards/margins": -0.5183058977127075, "rewards/rejected": -4.112210273742676, "step": 669 }, { "epoch": 0.1, "learning_rate": 1.3655929249442399e-05, "logits/chosen": -3.1020922660827637, "logits/rejected": -1.9026994705200195, "logps/chosen": -366.6625061035156, "logps/rejected": -164.98135375976562, "loss": 3.2194, "rewards/accuracies": 0.5, "rewards/chosen": -4.999411106109619, "rewards/margins": -2.1398119926452637, "rewards/rejected": -2.8595991134643555, "step": 670 }, { "epoch": 0.1, "learning_rate": 1.3655195808911251e-05, "logits/chosen": -3.2170772552490234, "logits/rejected": -3.2880899906158447, "logps/chosen": -581.4876098632812, "logps/rejected": -604.194580078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.767431914806366, "rewards/margins": 7.861416816711426, "rewards/rejected": -8.628849029541016, "step": 671 }, { "epoch": 0.1, "learning_rate": 1.3654462368380103e-05, "logits/chosen": -2.358207941055298, "logits/rejected": -2.989988088607788, "logps/chosen": -65.81734466552734, "logps/rejected": -87.99812316894531, "loss": 0.183, "rewards/accuracies": 1.0, "rewards/chosen": -1.2500159740447998, "rewards/margins": 1.8570973873138428, "rewards/rejected": -3.1071133613586426, "step": 672 }, { "epoch": 0.1, "learning_rate": 1.3653728927848955e-05, "logits/chosen": -2.793990135192871, "logits/rejected": -1.4041155576705933, "logps/chosen": -538.6963500976562, "logps/rejected": -213.70877075195312, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.16194716095924377, "rewards/margins": 5.114079475402832, "rewards/rejected": -5.276026248931885, "step": 673 }, { "epoch": 0.1, "learning_rate": 1.3652995487317808e-05, "logits/chosen": -2.5997183322906494, "logits/rejected": -2.955970287322998, "logps/chosen": -503.800048828125, "logps/rejected": -536.2932739257812, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -0.4483299255371094, "rewards/margins": 8.874433517456055, "rewards/rejected": -9.322763442993164, "step": 674 }, { "epoch": 0.1, "learning_rate": 1.365226204678666e-05, "logits/chosen": -2.076707363128662, "logits/rejected": -2.791947364807129, "logps/chosen": -27.725933074951172, "logps/rejected": -128.30670166015625, "loss": 0.0987, "rewards/accuracies": 1.0, "rewards/chosen": -0.4274658262729645, "rewards/margins": 3.438866138458252, "rewards/rejected": -3.8663320541381836, "step": 675 }, { "epoch": 0.11, "learning_rate": 1.3651528606255512e-05, "logits/chosen": -2.287398099899292, "logits/rejected": -3.237534523010254, "logps/chosen": -318.96405029296875, "logps/rejected": -475.5153503417969, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.9483871459960938, "rewards/margins": 4.837384223937988, "rewards/rejected": -6.785771369934082, "step": 676 }, { "epoch": 0.11, "learning_rate": 1.3650795165724364e-05, "logits/chosen": -2.3733620643615723, "logits/rejected": -3.367159843444824, "logps/chosen": -28.923742294311523, "logps/rejected": -295.1087341308594, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1337342262268066, "rewards/margins": 7.176126003265381, "rewards/rejected": -8.309860229492188, "step": 677 }, { "epoch": 0.11, "learning_rate": 1.3650061725193216e-05, "logits/chosen": -3.042250394821167, "logits/rejected": -2.6974267959594727, "logps/chosen": -376.24346923828125, "logps/rejected": -305.95465087890625, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": -0.6047783493995667, "rewards/margins": 6.294946670532227, "rewards/rejected": -6.899724960327148, "step": 678 }, { "epoch": 0.11, "learning_rate": 1.3649328284662068e-05, "logits/chosen": -3.1458516120910645, "logits/rejected": -2.89607310295105, "logps/chosen": -233.1353759765625, "logps/rejected": -229.4925537109375, "loss": 1.2738, "rewards/accuracies": 0.5, "rewards/chosen": -3.4016294479370117, "rewards/margins": 1.6989901065826416, "rewards/rejected": -5.100619316101074, "step": 679 }, { "epoch": 0.11, "learning_rate": 1.364859484413092e-05, "logits/chosen": -2.8024744987487793, "logits/rejected": -2.9438364505767822, "logps/chosen": -126.40992736816406, "logps/rejected": -329.13092041015625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 0.0022235848009586334, "rewards/margins": 6.240880966186523, "rewards/rejected": -6.238657474517822, "step": 680 }, { "epoch": 0.11, "learning_rate": 1.3647861403599771e-05, "logits/chosen": -2.7599081993103027, "logits/rejected": -3.1831929683685303, "logps/chosen": -175.29910278320312, "logps/rejected": -315.67169189453125, "loss": 1.0329, "rewards/accuracies": 0.5, "rewards/chosen": -4.839094161987305, "rewards/margins": 1.0174403190612793, "rewards/rejected": -5.856534957885742, "step": 681 }, { "epoch": 0.11, "learning_rate": 1.3647127963068623e-05, "logits/chosen": -1.9359588623046875, "logits/rejected": -2.978222131729126, "logps/chosen": -239.9398193359375, "logps/rejected": -422.7806396484375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.3682031631469727, "rewards/margins": 6.22427225112915, "rewards/rejected": -9.592475891113281, "step": 682 }, { "epoch": 0.11, "learning_rate": 1.3646394522537477e-05, "logits/chosen": -3.094038486480713, "logits/rejected": -3.1435868740081787, "logps/chosen": -281.35284423828125, "logps/rejected": -358.0357360839844, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -4.029512405395508, "rewards/margins": 6.443049907684326, "rewards/rejected": -10.472562789916992, "step": 683 }, { "epoch": 0.11, "learning_rate": 1.3645661082006329e-05, "logits/chosen": -3.1252360343933105, "logits/rejected": -2.7724761962890625, "logps/chosen": -117.23624420166016, "logps/rejected": -133.04766845703125, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -1.260384440422058, "rewards/margins": 5.475051403045654, "rewards/rejected": -6.735435485839844, "step": 684 }, { "epoch": 0.11, "learning_rate": 1.364492764147518e-05, "logits/chosen": -2.5046842098236084, "logits/rejected": -2.8873038291931152, "logps/chosen": -88.01786804199219, "logps/rejected": -262.5646057128906, "loss": 0.5046, "rewards/accuracies": 0.5, "rewards/chosen": -2.1347312927246094, "rewards/margins": 1.061959981918335, "rewards/rejected": -3.1966910362243652, "step": 685 }, { "epoch": 0.11, "learning_rate": 1.3644194200944032e-05, "logits/chosen": -1.7088574171066284, "logits/rejected": -3.1593856811523438, "logps/chosen": -154.5338897705078, "logps/rejected": -446.50335693359375, "loss": 3.8517, "rewards/accuracies": 0.5, "rewards/chosen": -5.000039577484131, "rewards/margins": -1.3990232944488525, "rewards/rejected": -3.6010162830352783, "step": 686 }, { "epoch": 0.11, "learning_rate": 1.3643460760412884e-05, "logits/chosen": -1.8218973875045776, "logits/rejected": -3.1176867485046387, "logps/chosen": -176.25717163085938, "logps/rejected": -388.0234375, "loss": 0.1274, "rewards/accuracies": 1.0, "rewards/chosen": -2.7333080768585205, "rewards/margins": 4.5926594734191895, "rewards/rejected": -7.325967788696289, "step": 687 }, { "epoch": 0.11, "learning_rate": 1.3642727319881736e-05, "logits/chosen": -1.284706950187683, "logits/rejected": -2.81339693069458, "logps/chosen": -52.789634704589844, "logps/rejected": -262.4026184082031, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.2051033973693848, "rewards/margins": 5.490788459777832, "rewards/rejected": -6.695891857147217, "step": 688 }, { "epoch": 0.11, "learning_rate": 1.3641993879350588e-05, "logits/chosen": -3.0429365634918213, "logits/rejected": -2.7564139366149902, "logps/chosen": -269.9660949707031, "logps/rejected": -252.55955505371094, "loss": 4.3209, "rewards/accuracies": 0.5, "rewards/chosen": -5.484525203704834, "rewards/margins": -1.9013679027557373, "rewards/rejected": -3.5831573009490967, "step": 689 }, { "epoch": 0.11, "learning_rate": 1.364126043881944e-05, "logits/chosen": -2.7679643630981445, "logits/rejected": -2.8207321166992188, "logps/chosen": -158.7521514892578, "logps/rejected": -208.10934448242188, "loss": 3.6418, "rewards/accuracies": 0.5, "rewards/chosen": -4.465574741363525, "rewards/margins": -0.23104071617126465, "rewards/rejected": -4.234533786773682, "step": 690 }, { "epoch": 0.11, "learning_rate": 1.3640526998288292e-05, "logits/chosen": -2.397383451461792, "logits/rejected": -3.0731914043426514, "logps/chosen": -299.8909606933594, "logps/rejected": -404.56964111328125, "loss": 0.1439, "rewards/accuracies": 1.0, "rewards/chosen": -2.4264748096466064, "rewards/margins": 2.9277443885803223, "rewards/rejected": -5.354219436645508, "step": 691 }, { "epoch": 0.11, "learning_rate": 1.3639793557757145e-05, "logits/chosen": -2.7243130207061768, "logits/rejected": -2.092169761657715, "logps/chosen": -241.78160095214844, "logps/rejected": -219.12721252441406, "loss": 0.1067, "rewards/accuracies": 1.0, "rewards/chosen": -3.3362457752227783, "rewards/margins": 3.848860263824463, "rewards/rejected": -7.18510627746582, "step": 692 }, { "epoch": 0.11, "learning_rate": 1.3639060117225997e-05, "logits/chosen": -1.4390063285827637, "logits/rejected": -2.982717752456665, "logps/chosen": -100.54303741455078, "logps/rejected": -332.34234619140625, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -1.161048412322998, "rewards/margins": 4.269564628601074, "rewards/rejected": -5.430613040924072, "step": 693 }, { "epoch": 0.11, "learning_rate": 1.363832667669485e-05, "logits/chosen": -1.1569764614105225, "logits/rejected": -2.184091567993164, "logps/chosen": -538.791259765625, "logps/rejected": -372.52874755859375, "loss": 6.9928, "rewards/accuracies": 0.5, "rewards/chosen": -9.713567733764648, "rewards/margins": -4.139875411987305, "rewards/rejected": -5.573692321777344, "step": 694 }, { "epoch": 0.11, "learning_rate": 1.3637593236163701e-05, "logits/chosen": -2.5087783336639404, "logits/rejected": -2.0665457248687744, "logps/chosen": -176.87457275390625, "logps/rejected": -210.63369750976562, "loss": 2.9913, "rewards/accuracies": 0.5, "rewards/chosen": -4.348857402801514, "rewards/margins": 0.7524566650390625, "rewards/rejected": -5.101313591003418, "step": 695 }, { "epoch": 0.11, "learning_rate": 1.3636859795632553e-05, "logits/chosen": -2.2123923301696777, "logits/rejected": -3.011200428009033, "logps/chosen": -505.5413818359375, "logps/rejected": -808.2041625976562, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -3.566795587539673, "rewards/margins": 4.596214294433594, "rewards/rejected": -8.163009643554688, "step": 696 }, { "epoch": 0.11, "learning_rate": 1.3636126355101405e-05, "logits/chosen": -2.59971284866333, "logits/rejected": -3.1661951541900635, "logps/chosen": -162.70562744140625, "logps/rejected": -215.54977416992188, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.8956332206726074, "rewards/margins": 5.831429481506348, "rewards/rejected": -6.727063179016113, "step": 697 }, { "epoch": 0.11, "learning_rate": 1.3635392914570257e-05, "logits/chosen": -1.6991469860076904, "logits/rejected": -3.0147151947021484, "logps/chosen": -112.39661407470703, "logps/rejected": -308.22296142578125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.2651809453964233, "rewards/margins": 6.342191696166992, "rewards/rejected": -7.607373237609863, "step": 698 }, { "epoch": 0.11, "learning_rate": 1.363465947403911e-05, "logits/chosen": -2.7985639572143555, "logits/rejected": -2.96142315864563, "logps/chosen": -322.6134948730469, "logps/rejected": -564.1353149414062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.9991421699523926, "rewards/margins": 7.920187950134277, "rewards/rejected": -10.919330596923828, "step": 699 }, { "epoch": 0.11, "learning_rate": 1.3633926033507962e-05, "logits/chosen": -2.7474915981292725, "logits/rejected": -2.926936149597168, "logps/chosen": -23.17365264892578, "logps/rejected": -168.57574462890625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.18644046783447266, "rewards/margins": 6.288945198059082, "rewards/rejected": -6.475385665893555, "step": 700 }, { "epoch": 0.11, "learning_rate": 1.3633192592976816e-05, "logits/chosen": -1.6331990957260132, "logits/rejected": -2.679222822189331, "logps/chosen": -79.10453033447266, "logps/rejected": -358.3070068359375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -2.5034191608428955, "rewards/margins": 4.716940402984619, "rewards/rejected": -7.2203593254089355, "step": 701 }, { "epoch": 0.11, "learning_rate": 1.3632459152445668e-05, "logits/chosen": -1.7702171802520752, "logits/rejected": -2.88984751701355, "logps/chosen": -34.884490966796875, "logps/rejected": -173.84246826171875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.5401706099510193, "rewards/margins": 6.039175033569336, "rewards/rejected": -6.579345703125, "step": 702 }, { "epoch": 0.11, "learning_rate": 1.363172571191452e-05, "logits/chosen": -2.864983320236206, "logits/rejected": -2.5103931427001953, "logps/chosen": -393.1524658203125, "logps/rejected": -266.0262145996094, "loss": 3.8784, "rewards/accuracies": 0.5, "rewards/chosen": -4.619982719421387, "rewards/margins": -1.8099775314331055, "rewards/rejected": -2.8100054264068604, "step": 703 }, { "epoch": 0.11, "learning_rate": 1.3630992271383371e-05, "logits/chosen": -3.1018896102905273, "logits/rejected": -3.4273934364318848, "logps/chosen": -379.5636901855469, "logps/rejected": -428.3878173828125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.6217206716537476, "rewards/margins": 6.437588691711426, "rewards/rejected": -8.059309005737305, "step": 704 }, { "epoch": 0.11, "learning_rate": 1.3630258830852223e-05, "logits/chosen": -2.016390800476074, "logits/rejected": -2.5534512996673584, "logps/chosen": -102.08795166015625, "logps/rejected": -197.3935546875, "loss": 0.0935, "rewards/accuracies": 1.0, "rewards/chosen": -2.8340275287628174, "rewards/margins": 2.481259346008301, "rewards/rejected": -5.315287113189697, "step": 705 }, { "epoch": 0.11, "learning_rate": 1.3629525390321075e-05, "logits/chosen": -2.397277593612671, "logits/rejected": -3.0545918941497803, "logps/chosen": -64.04307556152344, "logps/rejected": -294.870361328125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.060693383216858, "rewards/margins": 6.399725914001465, "rewards/rejected": -7.460419654846191, "step": 706 }, { "epoch": 0.11, "learning_rate": 1.3628791949789927e-05, "logits/chosen": -2.8854894638061523, "logits/rejected": -2.4732534885406494, "logps/chosen": -182.88720703125, "logps/rejected": -89.68246459960938, "loss": 0.9483, "rewards/accuracies": 0.5, "rewards/chosen": -1.7591814994812012, "rewards/margins": 0.10632109642028809, "rewards/rejected": -1.8655025959014893, "step": 707 }, { "epoch": 0.11, "learning_rate": 1.3628058509258779e-05, "logits/chosen": -2.867138147354126, "logits/rejected": -2.3397328853607178, "logps/chosen": -234.05062866210938, "logps/rejected": -178.22872924804688, "loss": 0.0654, "rewards/accuracies": 1.0, "rewards/chosen": -1.3256065845489502, "rewards/margins": 3.758114814758301, "rewards/rejected": -5.083721160888672, "step": 708 }, { "epoch": 0.11, "learning_rate": 1.362732506872763e-05, "logits/chosen": -2.8860504627227783, "logits/rejected": -3.131068229675293, "logps/chosen": -149.2220458984375, "logps/rejected": -259.3841857910156, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -1.4396858215332031, "rewards/margins": 4.866417407989502, "rewards/rejected": -6.306102752685547, "step": 709 }, { "epoch": 0.11, "learning_rate": 1.3626591628196484e-05, "logits/chosen": -1.8381773233413696, "logits/rejected": -2.795938014984131, "logps/chosen": -139.19094848632812, "logps/rejected": -381.39630126953125, "loss": 0.6014, "rewards/accuracies": 0.5, "rewards/chosen": -2.4558639526367188, "rewards/margins": 4.288792610168457, "rewards/rejected": -6.744656562805176, "step": 710 }, { "epoch": 0.11, "learning_rate": 1.3625858187665336e-05, "logits/chosen": -1.5513402223587036, "logits/rejected": -3.226076126098633, "logps/chosen": -193.20289611816406, "logps/rejected": -529.0227661132812, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.9655883312225342, "rewards/margins": 5.68319845199585, "rewards/rejected": -7.648787021636963, "step": 711 }, { "epoch": 0.11, "learning_rate": 1.3625124747134188e-05, "logits/chosen": -3.0339274406433105, "logits/rejected": -3.389536142349243, "logps/chosen": -229.2584991455078, "logps/rejected": -293.93084716796875, "loss": 1.6647, "rewards/accuracies": 0.5, "rewards/chosen": -3.3932442665100098, "rewards/margins": 1.3968968391418457, "rewards/rejected": -4.7901411056518555, "step": 712 }, { "epoch": 0.11, "learning_rate": 1.362439130660304e-05, "logits/chosen": -2.4973337650299072, "logits/rejected": -2.7762889862060547, "logps/chosen": -286.1974792480469, "logps/rejected": -262.3814697265625, "loss": 0.7163, "rewards/accuracies": 0.5, "rewards/chosen": -2.8279542922973633, "rewards/margins": 2.40728759765625, "rewards/rejected": -5.235241889953613, "step": 713 }, { "epoch": 0.11, "learning_rate": 1.3623657866071892e-05, "logits/chosen": -2.888159990310669, "logits/rejected": -3.366420030593872, "logps/chosen": -184.21328735351562, "logps/rejected": -281.8966369628906, "loss": 2.5318, "rewards/accuracies": 0.5, "rewards/chosen": -4.04117488861084, "rewards/margins": 0.7066454887390137, "rewards/rejected": -4.7478203773498535, "step": 714 }, { "epoch": 0.11, "learning_rate": 1.3622924425540744e-05, "logits/chosen": -1.8357667922973633, "logits/rejected": -2.9009013175964355, "logps/chosen": -129.77716064453125, "logps/rejected": -259.6492919921875, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -0.34742605686187744, "rewards/margins": 5.479531288146973, "rewards/rejected": -5.826956748962402, "step": 715 }, { "epoch": 0.11, "learning_rate": 1.3622190985009596e-05, "logits/chosen": -1.9140231609344482, "logits/rejected": -3.0083115100860596, "logps/chosen": -364.68170166015625, "logps/rejected": -491.4626770019531, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.8980026245117188, "rewards/margins": 5.786041259765625, "rewards/rejected": -7.684043884277344, "step": 716 }, { "epoch": 0.11, "learning_rate": 1.3621457544478447e-05, "logits/chosen": -2.664917469024658, "logits/rejected": -3.106644868850708, "logps/chosen": -127.89892578125, "logps/rejected": -237.06500244140625, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -2.186405897140503, "rewards/margins": 3.5865654945373535, "rewards/rejected": -5.772971153259277, "step": 717 }, { "epoch": 0.11, "learning_rate": 1.3620724103947301e-05, "logits/chosen": -2.928126335144043, "logits/rejected": -2.7111666202545166, "logps/chosen": -171.1338653564453, "logps/rejected": -262.3914794921875, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -1.449068546295166, "rewards/margins": 3.8574366569519043, "rewards/rejected": -5.30650520324707, "step": 718 }, { "epoch": 0.11, "learning_rate": 1.3619990663416153e-05, "logits/chosen": -2.2713680267333984, "logits/rejected": -3.2081782817840576, "logps/chosen": -59.23828887939453, "logps/rejected": -233.6754913330078, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.8827037811279297, "rewards/margins": 4.657754898071289, "rewards/rejected": -6.540458679199219, "step": 719 }, { "epoch": 0.11, "learning_rate": 1.3619257222885005e-05, "logits/chosen": -2.9539217948913574, "logits/rejected": -2.9876341819763184, "logps/chosen": -326.17352294921875, "logps/rejected": -411.6580810546875, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.405783176422119, "rewards/margins": 6.612292289733887, "rewards/rejected": -9.018075942993164, "step": 720 }, { "epoch": 0.11, "learning_rate": 1.3618523782353857e-05, "logits/chosen": -2.4769814014434814, "logits/rejected": -2.8086771965026855, "logps/chosen": -106.93013763427734, "logps/rejected": -179.25473022460938, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -2.100954055786133, "rewards/margins": 3.340475082397461, "rewards/rejected": -5.441429138183594, "step": 721 }, { "epoch": 0.11, "learning_rate": 1.3617790341822709e-05, "logits/chosen": -2.436595916748047, "logits/rejected": -2.7163820266723633, "logps/chosen": -256.135498046875, "logps/rejected": -263.56988525390625, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.4007843732833862, "rewards/margins": 4.404933929443359, "rewards/rejected": -5.805718421936035, "step": 722 }, { "epoch": 0.11, "learning_rate": 1.361705690129156e-05, "logits/chosen": -2.912804126739502, "logits/rejected": -3.1681594848632812, "logps/chosen": -101.42703247070312, "logps/rejected": -268.48468017578125, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -1.1904503107070923, "rewards/margins": 4.639101505279541, "rewards/rejected": -5.829551696777344, "step": 723 }, { "epoch": 0.11, "learning_rate": 1.3616323460760412e-05, "logits/chosen": -2.5367512702941895, "logits/rejected": -3.13932728767395, "logps/chosen": -114.44596862792969, "logps/rejected": -290.465576171875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.4783509969711304, "rewards/margins": 4.657859802246094, "rewards/rejected": -6.1362104415893555, "step": 724 }, { "epoch": 0.11, "learning_rate": 1.3615590020229264e-05, "logits/chosen": -2.891920566558838, "logits/rejected": -2.748331308364868, "logps/chosen": -179.25152587890625, "logps/rejected": -283.75921630859375, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.4284286499023438, "rewards/margins": 5.809015274047852, "rewards/rejected": -7.237443923950195, "step": 725 }, { "epoch": 0.11, "learning_rate": 1.3614856579698116e-05, "logits/chosen": -2.952883243560791, "logits/rejected": -2.1987786293029785, "logps/chosen": -284.6329650878906, "logps/rejected": -94.67564392089844, "loss": 7.6716, "rewards/accuracies": 0.0, "rewards/chosen": -9.414897918701172, "rewards/margins": -7.666069984436035, "rewards/rejected": -1.7488281726837158, "step": 726 }, { "epoch": 0.11, "learning_rate": 1.361412313916697e-05, "logits/chosen": -3.282764434814453, "logits/rejected": -2.9905056953430176, "logps/chosen": -247.86558532714844, "logps/rejected": -113.59019470214844, "loss": 3.7982, "rewards/accuracies": 0.5, "rewards/chosen": -5.216423511505127, "rewards/margins": -3.070279836654663, "rewards/rejected": -2.146143913269043, "step": 727 }, { "epoch": 0.11, "learning_rate": 1.3613389698635822e-05, "logits/chosen": -2.3981378078460693, "logits/rejected": -3.1202433109283447, "logps/chosen": -155.48171997070312, "logps/rejected": -223.109130859375, "loss": 2.6363, "rewards/accuracies": 0.5, "rewards/chosen": -4.428008556365967, "rewards/margins": 1.886885404586792, "rewards/rejected": -6.31489372253418, "step": 728 }, { "epoch": 0.11, "learning_rate": 1.3612656258104673e-05, "logits/chosen": -1.5246102809906006, "logits/rejected": -2.739368200302124, "logps/chosen": -95.94257354736328, "logps/rejected": -426.62847900390625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.5797035694122314, "rewards/margins": 7.237590789794922, "rewards/rejected": -8.81729507446289, "step": 729 }, { "epoch": 0.11, "learning_rate": 1.3611922817573525e-05, "logits/chosen": -2.1892929077148438, "logits/rejected": -2.736025810241699, "logps/chosen": -100.34928131103516, "logps/rejected": -217.738037109375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.622321605682373, "rewards/margins": 6.869737148284912, "rewards/rejected": -7.492058753967285, "step": 730 }, { "epoch": 0.11, "learning_rate": 1.3611189377042377e-05, "logits/chosen": -1.6880558729171753, "logits/rejected": -2.48254656791687, "logps/chosen": -153.74917602539062, "logps/rejected": -277.067626953125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -2.541461229324341, "rewards/margins": 5.01742696762085, "rewards/rejected": -7.5588884353637695, "step": 731 }, { "epoch": 0.11, "learning_rate": 1.3610455936511229e-05, "logits/chosen": -0.9377488493919373, "logits/rejected": -2.080291271209717, "logps/chosen": -81.58633422851562, "logps/rejected": -315.067626953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9958311915397644, "rewards/margins": 8.377418518066406, "rewards/rejected": -9.373249053955078, "step": 732 }, { "epoch": 0.11, "learning_rate": 1.3609722495980083e-05, "logits/chosen": -3.264350175857544, "logits/rejected": -2.6125450134277344, "logps/chosen": -484.2620849609375, "logps/rejected": -162.86843872070312, "loss": 2.2336, "rewards/accuracies": 0.5, "rewards/chosen": -5.011462211608887, "rewards/margins": -0.6482608318328857, "rewards/rejected": -4.36320161819458, "step": 733 }, { "epoch": 0.11, "learning_rate": 1.3608989055448935e-05, "logits/chosen": -2.5734055042266846, "logits/rejected": -3.0241003036499023, "logps/chosen": -160.18289184570312, "logps/rejected": -287.07867431640625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.536293625831604, "rewards/margins": 5.817293167114258, "rewards/rejected": -6.353586196899414, "step": 734 }, { "epoch": 0.11, "learning_rate": 1.3608255614917786e-05, "logits/chosen": -3.2843713760375977, "logits/rejected": -2.9932210445404053, "logps/chosen": -55.318180084228516, "logps/rejected": -89.18537902832031, "loss": 0.1984, "rewards/accuracies": 1.0, "rewards/chosen": -1.451830267906189, "rewards/margins": 2.7695772647857666, "rewards/rejected": -4.221407890319824, "step": 735 }, { "epoch": 0.11, "learning_rate": 1.360752217438664e-05, "logits/chosen": -2.5666282176971436, "logits/rejected": -2.8626339435577393, "logps/chosen": -60.9538688659668, "logps/rejected": -271.50592041015625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4091479778289795, "rewards/margins": 6.440044403076172, "rewards/rejected": -6.849192142486572, "step": 736 }, { "epoch": 0.11, "learning_rate": 1.3606788733855492e-05, "logits/chosen": -2.851402521133423, "logits/rejected": -2.3302202224731445, "logps/chosen": -1100.1925048828125, "logps/rejected": -863.801513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7893402576446533, "rewards/margins": 11.9185209274292, "rewards/rejected": -14.707860946655273, "step": 737 }, { "epoch": 0.11, "learning_rate": 1.3606055293324344e-05, "logits/chosen": -2.3154735565185547, "logits/rejected": -2.807708263397217, "logps/chosen": -119.97454071044922, "logps/rejected": -266.1999206542969, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -2.987423896789551, "rewards/margins": 6.257079124450684, "rewards/rejected": -9.244503021240234, "step": 738 }, { "epoch": 0.11, "learning_rate": 1.3605321852793196e-05, "logits/chosen": -2.5675129890441895, "logits/rejected": -3.3713347911834717, "logps/chosen": -135.77371215820312, "logps/rejected": -415.23046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.6086688041687012, "rewards/margins": 7.995176315307617, "rewards/rejected": -9.603845596313477, "step": 739 }, { "epoch": 0.12, "learning_rate": 1.3604588412262047e-05, "logits/chosen": -2.881401300430298, "logits/rejected": -3.123211622238159, "logps/chosen": -607.0074462890625, "logps/rejected": -939.8839111328125, "loss": 4.1433, "rewards/accuracies": 0.5, "rewards/chosen": -6.1501617431640625, "rewards/margins": -0.14273643493652344, "rewards/rejected": -6.007425308227539, "step": 740 }, { "epoch": 0.12, "learning_rate": 1.36038549717309e-05, "logits/chosen": -2.5648353099823, "logits/rejected": -2.8726935386657715, "logps/chosen": -339.9189758300781, "logps/rejected": -375.601318359375, "loss": 3.81, "rewards/accuracies": 0.5, "rewards/chosen": -6.734042644500732, "rewards/margins": -2.222611427307129, "rewards/rejected": -4.5114312171936035, "step": 741 }, { "epoch": 0.12, "learning_rate": 1.3603121531199751e-05, "logits/chosen": -2.6048195362091064, "logits/rejected": -2.7799181938171387, "logps/chosen": -46.716617584228516, "logps/rejected": -142.3912353515625, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": -1.3142082691192627, "rewards/margins": 3.9189445972442627, "rewards/rejected": -5.233152866363525, "step": 742 }, { "epoch": 0.12, "learning_rate": 1.3602388090668603e-05, "logits/chosen": -1.5453903675079346, "logits/rejected": -2.9672515392303467, "logps/chosen": -73.45064544677734, "logps/rejected": -266.6800231933594, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.26697108149528503, "rewards/margins": 8.411808967590332, "rewards/rejected": -8.678779602050781, "step": 743 }, { "epoch": 0.12, "learning_rate": 1.3601654650137455e-05, "logits/chosen": -3.2236545085906982, "logits/rejected": -3.0460703372955322, "logps/chosen": -398.1429748535156, "logps/rejected": -258.5112609863281, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.9233405590057373, "rewards/margins": 5.838204383850098, "rewards/rejected": -7.761545181274414, "step": 744 }, { "epoch": 0.12, "learning_rate": 1.3600921209606309e-05, "logits/chosen": -2.7860262393951416, "logits/rejected": -3.0241336822509766, "logps/chosen": -507.1386413574219, "logps/rejected": -426.0369567871094, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.085873603820801, "rewards/margins": 5.970573425292969, "rewards/rejected": -9.05644702911377, "step": 745 }, { "epoch": 0.12, "learning_rate": 1.360018776907516e-05, "logits/chosen": -3.140934705734253, "logits/rejected": -2.8332812786102295, "logps/chosen": -291.205322265625, "logps/rejected": -285.8712158203125, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -3.8478500843048096, "rewards/margins": 4.718601226806641, "rewards/rejected": -8.566451072692871, "step": 746 }, { "epoch": 0.12, "learning_rate": 1.3599454328544012e-05, "logits/chosen": -2.6139516830444336, "logits/rejected": -3.1621506214141846, "logps/chosen": -366.0840148925781, "logps/rejected": -635.813720703125, "loss": 2.2774, "rewards/accuracies": 0.5, "rewards/chosen": -4.470008373260498, "rewards/margins": 1.4741544723510742, "rewards/rejected": -5.944162845611572, "step": 747 }, { "epoch": 0.12, "learning_rate": 1.3598720888012864e-05, "logits/chosen": -1.7353250980377197, "logits/rejected": -2.933483123779297, "logps/chosen": -176.556640625, "logps/rejected": -364.90692138671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1652472019195557, "rewards/margins": 7.9492387771606445, "rewards/rejected": -9.114486694335938, "step": 748 }, { "epoch": 0.12, "learning_rate": 1.3597987447481716e-05, "logits/chosen": -2.7324059009552, "logits/rejected": -2.5860166549682617, "logps/chosen": -210.9865264892578, "logps/rejected": -96.76363372802734, "loss": 4.5538, "rewards/accuracies": 0.0, "rewards/chosen": -6.315835952758789, "rewards/margins": -4.541864395141602, "rewards/rejected": -1.7739713191986084, "step": 749 }, { "epoch": 0.12, "learning_rate": 1.3597254006950568e-05, "logits/chosen": -2.9090027809143066, "logits/rejected": -3.0392391681671143, "logps/chosen": -48.711265563964844, "logps/rejected": -182.33416748046875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.7689039707183838, "rewards/margins": 4.490535259246826, "rewards/rejected": -5.259439468383789, "step": 750 }, { "epoch": 0.12, "learning_rate": 1.359652056641942e-05, "logits/chosen": -2.7989554405212402, "logits/rejected": -2.3309078216552734, "logps/chosen": -443.0983581542969, "logps/rejected": -543.7980346679688, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -1.642234444618225, "rewards/margins": 4.339625358581543, "rewards/rejected": -5.9818596839904785, "step": 751 }, { "epoch": 0.12, "learning_rate": 1.3595787125888272e-05, "logits/chosen": -2.8908002376556396, "logits/rejected": -1.986182689666748, "logps/chosen": -314.5702819824219, "logps/rejected": -194.3587646484375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6314120292663574, "rewards/margins": 5.943114280700684, "rewards/rejected": -7.574525833129883, "step": 752 }, { "epoch": 0.12, "learning_rate": 1.3595053685357124e-05, "logits/chosen": -2.8638088703155518, "logits/rejected": -3.2366530895233154, "logps/chosen": -39.08602523803711, "logps/rejected": -250.4393768310547, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.5762209296226501, "rewards/margins": 5.6475982666015625, "rewards/rejected": -6.223818778991699, "step": 753 }, { "epoch": 0.12, "learning_rate": 1.3594320244825977e-05, "logits/chosen": -1.818722128868103, "logits/rejected": -2.7748727798461914, "logps/chosen": -80.92156982421875, "logps/rejected": -265.3782653808594, "loss": 0.5845, "rewards/accuracies": 0.5, "rewards/chosen": -0.9072338342666626, "rewards/margins": 2.00339674949646, "rewards/rejected": -2.910630464553833, "step": 754 }, { "epoch": 0.12, "learning_rate": 1.3593586804294829e-05, "logits/chosen": -2.9292311668395996, "logits/rejected": -2.6892690658569336, "logps/chosen": -118.15376281738281, "logps/rejected": -106.23812866210938, "loss": 1.0751, "rewards/accuracies": 0.5, "rewards/chosen": -1.8843082189559937, "rewards/margins": 1.7938411235809326, "rewards/rejected": -3.678149461746216, "step": 755 }, { "epoch": 0.12, "learning_rate": 1.3592853363763681e-05, "logits/chosen": -3.1951241493225098, "logits/rejected": -2.5712363719940186, "logps/chosen": -353.658447265625, "logps/rejected": -178.62518310546875, "loss": 2.1282, "rewards/accuracies": 0.5, "rewards/chosen": -3.228817939758301, "rewards/margins": 1.314631462097168, "rewards/rejected": -4.543449401855469, "step": 756 }, { "epoch": 0.12, "learning_rate": 1.3592119923232533e-05, "logits/chosen": -0.5542548298835754, "logits/rejected": -1.4363106489181519, "logps/chosen": -36.66352844238281, "logps/rejected": -445.26336669921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.015569686889648438, "rewards/margins": 7.429265975952148, "rewards/rejected": -7.4136962890625, "step": 757 }, { "epoch": 0.12, "learning_rate": 1.3591386482701385e-05, "logits/chosen": -2.786468267440796, "logits/rejected": -2.531534194946289, "logps/chosen": -378.32879638671875, "logps/rejected": -344.8641357421875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.49298518896102905, "rewards/margins": 7.372466087341309, "rewards/rejected": -7.865451335906982, "step": 758 }, { "epoch": 0.12, "learning_rate": 1.3590653042170237e-05, "logits/chosen": -2.8407108783721924, "logits/rejected": -3.1377527713775635, "logps/chosen": -139.81640625, "logps/rejected": -147.6668701171875, "loss": 1.9182, "rewards/accuracies": 0.5, "rewards/chosen": -3.1196181774139404, "rewards/margins": 2.2631444931030273, "rewards/rejected": -5.382762908935547, "step": 759 }, { "epoch": 0.12, "learning_rate": 1.3589919601639088e-05, "logits/chosen": -1.5926175117492676, "logits/rejected": -2.72318172454834, "logps/chosen": -38.71539306640625, "logps/rejected": -379.3211669921875, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -0.5000672936439514, "rewards/margins": 8.692124366760254, "rewards/rejected": -9.192191123962402, "step": 760 }, { "epoch": 0.12, "learning_rate": 1.358918616110794e-05, "logits/chosen": -2.3741605281829834, "logits/rejected": -2.9491970539093018, "logps/chosen": -422.5985412597656, "logps/rejected": -530.27197265625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.855717420578003, "rewards/margins": 6.591667175292969, "rewards/rejected": -9.44738483428955, "step": 761 }, { "epoch": 0.12, "learning_rate": 1.3588452720576792e-05, "logits/chosen": -1.9100770950317383, "logits/rejected": -2.873159408569336, "logps/chosen": -113.55242919921875, "logps/rejected": -233.89193725585938, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.331382393836975, "rewards/margins": 5.380999565124512, "rewards/rejected": -6.7123823165893555, "step": 762 }, { "epoch": 0.12, "learning_rate": 1.3587719280045646e-05, "logits/chosen": -3.283770799636841, "logits/rejected": -2.8560562133789062, "logps/chosen": -267.2354736328125, "logps/rejected": -87.41069030761719, "loss": 4.741, "rewards/accuracies": 0.5, "rewards/chosen": -6.569374084472656, "rewards/margins": -3.1003761291503906, "rewards/rejected": -3.4689979553222656, "step": 763 }, { "epoch": 0.12, "learning_rate": 1.3586985839514498e-05, "logits/chosen": -3.2432656288146973, "logits/rejected": -2.426074504852295, "logps/chosen": -180.03436279296875, "logps/rejected": -50.73903274536133, "loss": 4.8251, "rewards/accuracies": 0.5, "rewards/chosen": -5.57615852355957, "rewards/margins": -3.313322067260742, "rewards/rejected": -2.262836456298828, "step": 764 }, { "epoch": 0.12, "learning_rate": 1.358625239898335e-05, "logits/chosen": -2.832900047302246, "logits/rejected": -2.406524419784546, "logps/chosen": -140.3224334716797, "logps/rejected": -189.8460235595703, "loss": 0.86, "rewards/accuracies": 0.5, "rewards/chosen": -2.2972004413604736, "rewards/margins": 1.0693516731262207, "rewards/rejected": -3.3665521144866943, "step": 765 }, { "epoch": 0.12, "learning_rate": 1.3585518958452201e-05, "logits/chosen": -2.7144248485565186, "logits/rejected": -2.8173844814300537, "logps/chosen": -64.61530303955078, "logps/rejected": -132.58880615234375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.6312929391860962, "rewards/margins": 5.9083757400512695, "rewards/rejected": -6.539669036865234, "step": 766 }, { "epoch": 0.12, "learning_rate": 1.3584785517921053e-05, "logits/chosen": -2.750810146331787, "logits/rejected": -2.5519566535949707, "logps/chosen": -167.24569702148438, "logps/rejected": -69.83645629882812, "loss": 4.0408, "rewards/accuracies": 0.0, "rewards/chosen": -5.1555376052856445, "rewards/margins": -4.022698879241943, "rewards/rejected": -1.1328387260437012, "step": 767 }, { "epoch": 0.12, "learning_rate": 1.3584052077389907e-05, "logits/chosen": -2.8035104274749756, "logits/rejected": -2.5561697483062744, "logps/chosen": -461.0323486328125, "logps/rejected": -306.06707763671875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -2.66788649559021, "rewards/margins": 5.21018123626709, "rewards/rejected": -7.878067970275879, "step": 768 }, { "epoch": 0.12, "learning_rate": 1.3583318636858759e-05, "logits/chosen": -2.6048150062561035, "logits/rejected": -2.701500654220581, "logps/chosen": -406.5057678222656, "logps/rejected": -442.6061096191406, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.5963919162750244, "rewards/margins": 4.863581657409668, "rewards/rejected": -6.459973335266113, "step": 769 }, { "epoch": 0.12, "learning_rate": 1.358258519632761e-05, "logits/chosen": -2.67665433883667, "logits/rejected": -2.9093329906463623, "logps/chosen": -37.492897033691406, "logps/rejected": -152.82342529296875, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5768318176269531, "rewards/margins": 5.364742279052734, "rewards/rejected": -6.9415740966796875, "step": 770 }, { "epoch": 0.12, "learning_rate": 1.3581851755796462e-05, "logits/chosen": -2.786686420440674, "logits/rejected": -1.257886528968811, "logps/chosen": -412.28704833984375, "logps/rejected": -269.78570556640625, "loss": 2.5585, "rewards/accuracies": 0.5, "rewards/chosen": -3.593486785888672, "rewards/margins": -0.1500105857849121, "rewards/rejected": -3.4434762001037598, "step": 771 }, { "epoch": 0.12, "learning_rate": 1.3581118315265316e-05, "logits/chosen": -2.710693597793579, "logits/rejected": -3.114699363708496, "logps/chosen": -69.79817962646484, "logps/rejected": -262.88482666015625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.3773822784423828, "rewards/margins": 6.474061012268066, "rewards/rejected": -6.851442813873291, "step": 772 }, { "epoch": 0.12, "learning_rate": 1.3580384874734168e-05, "logits/chosen": -2.45440411567688, "logits/rejected": -2.8494269847869873, "logps/chosen": -122.66223907470703, "logps/rejected": -294.8485107421875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.7472683191299438, "rewards/margins": 6.3345842361450195, "rewards/rejected": -7.081852912902832, "step": 773 }, { "epoch": 0.12, "learning_rate": 1.357965143420302e-05, "logits/chosen": -2.2229790687561035, "logits/rejected": -2.36104416847229, "logps/chosen": -464.921875, "logps/rejected": -491.51251220703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.3503562808036804, "rewards/margins": 6.612386703491211, "rewards/rejected": -6.262030124664307, "step": 774 }, { "epoch": 0.12, "learning_rate": 1.3578917993671872e-05, "logits/chosen": -2.846299409866333, "logits/rejected": -3.274512767791748, "logps/chosen": -115.39163208007812, "logps/rejected": -280.1468200683594, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -0.8684770464897156, "rewards/margins": 4.5587873458862305, "rewards/rejected": -5.427264213562012, "step": 775 }, { "epoch": 0.12, "learning_rate": 1.3578184553140724e-05, "logits/chosen": -2.86492657661438, "logits/rejected": -2.7582802772521973, "logps/chosen": -105.97230529785156, "logps/rejected": -127.33918762207031, "loss": 1.5724, "rewards/accuracies": 0.5, "rewards/chosen": -2.375859498977661, "rewards/margins": 0.33017003536224365, "rewards/rejected": -2.7060294151306152, "step": 776 }, { "epoch": 0.12, "learning_rate": 1.3577451112609575e-05, "logits/chosen": -2.157536506652832, "logits/rejected": -2.960994005203247, "logps/chosen": -443.0267028808594, "logps/rejected": -579.038818359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.12548789381980896, "rewards/margins": 7.499969482421875, "rewards/rejected": -7.374481201171875, "step": 777 }, { "epoch": 0.12, "learning_rate": 1.3576717672078427e-05, "logits/chosen": -1.5887068510055542, "logits/rejected": -2.9120264053344727, "logps/chosen": -66.9248046875, "logps/rejected": -245.26434326171875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.6167687177658081, "rewards/margins": 4.935070037841797, "rewards/rejected": -5.5518388748168945, "step": 778 }, { "epoch": 0.12, "learning_rate": 1.357598423154728e-05, "logits/chosen": -1.6077420711517334, "logits/rejected": -2.7837419509887695, "logps/chosen": -172.21022033691406, "logps/rejected": -349.48663330078125, "loss": 1.6372, "rewards/accuracies": 0.5, "rewards/chosen": -3.191310405731201, "rewards/margins": 4.559959411621094, "rewards/rejected": -7.751269340515137, "step": 779 }, { "epoch": 0.12, "learning_rate": 1.3575250791016131e-05, "logits/chosen": -2.7372019290924072, "logits/rejected": -3.076547145843506, "logps/chosen": -374.4556884765625, "logps/rejected": -319.9430236816406, "loss": 0.0281, "rewards/accuracies": 1.0, "rewards/chosen": -1.9845657348632812, "rewards/margins": 3.9610166549682617, "rewards/rejected": -5.945582389831543, "step": 780 }, { "epoch": 0.12, "learning_rate": 1.3574517350484985e-05, "logits/chosen": -1.6912156343460083, "logits/rejected": -3.078136444091797, "logps/chosen": -105.78984069824219, "logps/rejected": -309.78704833984375, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -1.604548692703247, "rewards/margins": 5.600302219390869, "rewards/rejected": -7.204850673675537, "step": 781 }, { "epoch": 0.12, "learning_rate": 1.3573783909953837e-05, "logits/chosen": -2.7260630130767822, "logits/rejected": -3.3021442890167236, "logps/chosen": -129.7859344482422, "logps/rejected": -282.57623291015625, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/chosen": -1.1663967370986938, "rewards/margins": 4.236551284790039, "rewards/rejected": -5.402948379516602, "step": 782 }, { "epoch": 0.12, "learning_rate": 1.3573050469422688e-05, "logits/chosen": -2.876115322113037, "logits/rejected": -2.303438901901245, "logps/chosen": -271.47479248046875, "logps/rejected": -379.4602355957031, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.4631919264793396, "rewards/margins": 5.795601844787598, "rewards/rejected": -6.258793830871582, "step": 783 }, { "epoch": 0.12, "learning_rate": 1.357231702889154e-05, "logits/chosen": -2.548891544342041, "logits/rejected": -2.8909544944763184, "logps/chosen": -130.23501586914062, "logps/rejected": -289.97296142578125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.1551133394241333, "rewards/margins": 4.918910980224609, "rewards/rejected": -6.074024677276611, "step": 784 }, { "epoch": 0.12, "learning_rate": 1.3571583588360392e-05, "logits/chosen": -2.7021141052246094, "logits/rejected": -2.2029507160186768, "logps/chosen": -444.5079650878906, "logps/rejected": -307.56805419921875, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4299136996269226, "rewards/margins": 4.6676025390625, "rewards/rejected": -5.0975165367126465, "step": 785 }, { "epoch": 0.12, "learning_rate": 1.3570850147829244e-05, "logits/chosen": -3.0406105518341064, "logits/rejected": -2.935425043106079, "logps/chosen": -146.92724609375, "logps/rejected": -252.9579620361328, "loss": 1.142, "rewards/accuracies": 0.5, "rewards/chosen": -1.697625756263733, "rewards/margins": 1.6642793416976929, "rewards/rejected": -3.3619048595428467, "step": 786 }, { "epoch": 0.12, "learning_rate": 1.3570116707298096e-05, "logits/chosen": -3.065579652786255, "logits/rejected": -2.1088812351226807, "logps/chosen": -430.90283203125, "logps/rejected": -335.96331787109375, "loss": 0.7106, "rewards/accuracies": 0.5, "rewards/chosen": -2.4802260398864746, "rewards/margins": 2.062791347503662, "rewards/rejected": -4.543017387390137, "step": 787 }, { "epoch": 0.12, "learning_rate": 1.3569383266766948e-05, "logits/chosen": -3.206007957458496, "logits/rejected": -2.845935821533203, "logps/chosen": -197.75994873046875, "logps/rejected": -159.37069702148438, "loss": 1.5519, "rewards/accuracies": 0.5, "rewards/chosen": -1.5464072227478027, "rewards/margins": 1.0942498445510864, "rewards/rejected": -2.6406571865081787, "step": 788 }, { "epoch": 0.12, "learning_rate": 1.35686498262358e-05, "logits/chosen": -2.5464227199554443, "logits/rejected": -2.9266059398651123, "logps/chosen": -117.96229553222656, "logps/rejected": -302.23651123046875, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.681662380695343, "rewards/margins": 4.113099575042725, "rewards/rejected": -4.794761657714844, "step": 789 }, { "epoch": 0.12, "learning_rate": 1.3567916385704653e-05, "logits/chosen": -2.747108221054077, "logits/rejected": -2.2532074451446533, "logps/chosen": -326.8138732910156, "logps/rejected": -311.5061950683594, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -2.2841224670410156, "rewards/margins": 4.358455657958984, "rewards/rejected": -6.642578125, "step": 790 }, { "epoch": 0.12, "learning_rate": 1.3567182945173505e-05, "logits/chosen": -2.9356536865234375, "logits/rejected": -2.415093183517456, "logps/chosen": -322.77239990234375, "logps/rejected": -268.9842529296875, "loss": 0.3009, "rewards/accuracies": 1.0, "rewards/chosen": -2.6606431007385254, "rewards/margins": 1.746213674545288, "rewards/rejected": -4.406856536865234, "step": 791 }, { "epoch": 0.12, "learning_rate": 1.3566449504642357e-05, "logits/chosen": -0.6637412309646606, "logits/rejected": -2.292762517929077, "logps/chosen": -78.44424438476562, "logps/rejected": -367.0404968261719, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -0.8968021869659424, "rewards/margins": 5.072896957397461, "rewards/rejected": -5.969698905944824, "step": 792 }, { "epoch": 0.12, "learning_rate": 1.3565716064111209e-05, "logits/chosen": -2.5904529094696045, "logits/rejected": -1.1526726484298706, "logps/chosen": -811.4981079101562, "logps/rejected": -344.4857482910156, "loss": 0.1061, "rewards/accuracies": 1.0, "rewards/chosen": -2.124260425567627, "rewards/margins": 2.8030166625976562, "rewards/rejected": -4.927277088165283, "step": 793 }, { "epoch": 0.12, "learning_rate": 1.356498262358006e-05, "logits/chosen": -2.997666835784912, "logits/rejected": -3.145268440246582, "logps/chosen": -172.16433715820312, "logps/rejected": -214.79660034179688, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.7906895279884338, "rewards/margins": 5.229890823364258, "rewards/rejected": -6.020580291748047, "step": 794 }, { "epoch": 0.12, "learning_rate": 1.3564249183048913e-05, "logits/chosen": -0.9559100270271301, "logits/rejected": -2.807800531387329, "logps/chosen": -116.63756561279297, "logps/rejected": -515.5341796875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.7552732229232788, "rewards/margins": 5.744661331176758, "rewards/rejected": -6.499934196472168, "step": 795 }, { "epoch": 0.12, "learning_rate": 1.3563515742517765e-05, "logits/chosen": -3.0807745456695557, "logits/rejected": -2.3192427158355713, "logps/chosen": -537.7113647460938, "logps/rejected": -319.76959228515625, "loss": 1.4511, "rewards/accuracies": 0.5, "rewards/chosen": -2.8440401554107666, "rewards/margins": -0.0333251953125, "rewards/rejected": -2.8107147216796875, "step": 796 }, { "epoch": 0.12, "learning_rate": 1.3562782301986616e-05, "logits/chosen": -2.185541868209839, "logits/rejected": -3.103073835372925, "logps/chosen": -126.33805847167969, "logps/rejected": -446.35589599609375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.9846735000610352, "rewards/margins": 8.906187057495117, "rewards/rejected": -10.890859603881836, "step": 797 }, { "epoch": 0.12, "learning_rate": 1.3562048861455468e-05, "logits/chosen": -1.4269230365753174, "logits/rejected": -2.945866584777832, "logps/chosen": -350.6015625, "logps/rejected": -394.15234375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.9478870630264282, "rewards/margins": 5.527378559112549, "rewards/rejected": -6.4752655029296875, "step": 798 }, { "epoch": 0.12, "learning_rate": 1.3561315420924322e-05, "logits/chosen": -3.1922192573547363, "logits/rejected": -2.8350236415863037, "logps/chosen": -476.1371154785156, "logps/rejected": -265.7356872558594, "loss": 4.4562, "rewards/accuracies": 0.5, "rewards/chosen": -6.493704319000244, "rewards/margins": -1.740250825881958, "rewards/rejected": -4.753453254699707, "step": 799 }, { "epoch": 0.12, "learning_rate": 1.3560581980393174e-05, "logits/chosen": -2.6316795349121094, "logits/rejected": -3.1395657062530518, "logps/chosen": -89.41610717773438, "logps/rejected": -162.1560516357422, "loss": 0.067, "rewards/accuracies": 1.0, "rewards/chosen": -1.1214714050292969, "rewards/margins": 3.4172348976135254, "rewards/rejected": -4.538706302642822, "step": 800 }, { "epoch": 0.12, "learning_rate": 1.3559848539862026e-05, "logits/chosen": -2.398408889770508, "logits/rejected": -3.020190954208374, "logps/chosen": -299.52923583984375, "logps/rejected": -414.73516845703125, "loss": 3.3578, "rewards/accuracies": 0.5, "rewards/chosen": -4.997778415679932, "rewards/margins": -0.40690159797668457, "rewards/rejected": -4.590876579284668, "step": 801 }, { "epoch": 0.12, "learning_rate": 1.355911509933088e-05, "logits/chosen": -2.8069710731506348, "logits/rejected": -2.431411027908325, "logps/chosen": -158.7462158203125, "logps/rejected": -243.95335388183594, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -2.1968884468078613, "rewards/margins": 4.928521156311035, "rewards/rejected": -7.125410079956055, "step": 802 }, { "epoch": 0.12, "learning_rate": 1.3558381658799731e-05, "logits/chosen": -3.0483040809631348, "logits/rejected": -2.9080722332000732, "logps/chosen": -200.91494750976562, "logps/rejected": -224.65139770507812, "loss": 2.4765, "rewards/accuracies": 0.5, "rewards/chosen": -3.3900954723358154, "rewards/margins": 1.5503978729248047, "rewards/rejected": -4.940493583679199, "step": 803 }, { "epoch": 0.13, "learning_rate": 1.3557648218268583e-05, "logits/chosen": -1.9923373460769653, "logits/rejected": -2.782942533493042, "logps/chosen": -197.59080505371094, "logps/rejected": -287.2910461425781, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -1.212672472000122, "rewards/margins": 4.7958831787109375, "rewards/rejected": -6.0085554122924805, "step": 804 }, { "epoch": 0.13, "learning_rate": 1.3556914777737435e-05, "logits/chosen": -2.813408136367798, "logits/rejected": -2.9762253761291504, "logps/chosen": -112.30716705322266, "logps/rejected": -285.49627685546875, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -1.3514554500579834, "rewards/margins": 6.298079967498779, "rewards/rejected": -7.649535179138184, "step": 805 }, { "epoch": 0.13, "learning_rate": 1.3556181337206287e-05, "logits/chosen": -1.6039992570877075, "logits/rejected": -2.853860855102539, "logps/chosen": -187.14016723632812, "logps/rejected": -420.2814025878906, "loss": 4.2706, "rewards/accuracies": 0.5, "rewards/chosen": -4.827657699584961, "rewards/margins": -1.1343457698822021, "rewards/rejected": -3.693311929702759, "step": 806 }, { "epoch": 0.13, "learning_rate": 1.355544789667514e-05, "logits/chosen": -3.1148414611816406, "logits/rejected": -3.030117988586426, "logps/chosen": -289.5146484375, "logps/rejected": -218.7887420654297, "loss": 2.4098, "rewards/accuracies": 0.5, "rewards/chosen": -3.570067882537842, "rewards/margins": -0.7061896324157715, "rewards/rejected": -2.8638782501220703, "step": 807 }, { "epoch": 0.13, "learning_rate": 1.3554714456143992e-05, "logits/chosen": -1.6641831398010254, "logits/rejected": -2.767193078994751, "logps/chosen": -79.43938446044922, "logps/rejected": -249.33953857421875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.1599746942520142, "rewards/margins": 4.952335357666016, "rewards/rejected": -6.112310409545898, "step": 808 }, { "epoch": 0.13, "learning_rate": 1.3553981015612844e-05, "logits/chosen": -3.3137950897216797, "logits/rejected": -2.907309055328369, "logps/chosen": -417.7865905761719, "logps/rejected": -258.2076721191406, "loss": 0.1603, "rewards/accuracies": 1.0, "rewards/chosen": -2.234220027923584, "rewards/margins": 2.703396797180176, "rewards/rejected": -4.93761682510376, "step": 809 }, { "epoch": 0.13, "learning_rate": 1.3553247575081696e-05, "logits/chosen": -3.143491506576538, "logits/rejected": -2.401245594024658, "logps/chosen": -632.9529418945312, "logps/rejected": -551.257568359375, "loss": 3.6577, "rewards/accuracies": 0.0, "rewards/chosen": -6.279109477996826, "rewards/margins": -3.503599166870117, "rewards/rejected": -2.775510311126709, "step": 810 }, { "epoch": 0.13, "learning_rate": 1.3552514134550548e-05, "logits/chosen": -2.686281442642212, "logits/rejected": -2.47442626953125, "logps/chosen": -254.25820922851562, "logps/rejected": -323.8255310058594, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4636554718017578, "rewards/margins": 7.1394362449646, "rewards/rejected": -8.6030912399292, "step": 811 }, { "epoch": 0.13, "learning_rate": 1.35517806940194e-05, "logits/chosen": -2.657209873199463, "logits/rejected": -2.838923215866089, "logps/chosen": -453.72576904296875, "logps/rejected": -541.239501953125, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": -1.7055145502090454, "rewards/margins": 4.000079154968262, "rewards/rejected": -5.705594062805176, "step": 812 }, { "epoch": 0.13, "learning_rate": 1.3551047253488252e-05, "logits/chosen": -2.394792079925537, "logits/rejected": -3.0328776836395264, "logps/chosen": -166.0496826171875, "logps/rejected": -268.63739013671875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.5916950702667236, "rewards/margins": 4.669662952423096, "rewards/rejected": -6.261358261108398, "step": 813 }, { "epoch": 0.13, "learning_rate": 1.3550313812957103e-05, "logits/chosen": -2.8685693740844727, "logits/rejected": -1.6136351823806763, "logps/chosen": -427.9119567871094, "logps/rejected": -323.04449462890625, "loss": 3.1682, "rewards/accuracies": 0.5, "rewards/chosen": -4.8115973472595215, "rewards/margins": 1.5046021938323975, "rewards/rejected": -6.316199779510498, "step": 814 }, { "epoch": 0.13, "learning_rate": 1.3549580372425955e-05, "logits/chosen": -2.835357666015625, "logits/rejected": -1.9540363550186157, "logps/chosen": -201.0183868408203, "logps/rejected": -124.39031219482422, "loss": 4.1748, "rewards/accuracies": 0.5, "rewards/chosen": -5.634367942810059, "rewards/margins": -2.1900715827941895, "rewards/rejected": -3.444296360015869, "step": 815 }, { "epoch": 0.13, "learning_rate": 1.3548846931894809e-05, "logits/chosen": -2.1508896350860596, "logits/rejected": -2.90878963470459, "logps/chosen": -140.03387451171875, "logps/rejected": -357.87225341796875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.3136110305786133, "rewards/margins": 6.0793046951293945, "rewards/rejected": -7.392915725708008, "step": 816 }, { "epoch": 0.13, "learning_rate": 1.354811349136366e-05, "logits/chosen": -2.7165212631225586, "logits/rejected": -3.150251626968384, "logps/chosen": -301.1086120605469, "logps/rejected": -507.47613525390625, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": -0.5495989322662354, "rewards/margins": 3.110272169113159, "rewards/rejected": -3.6598711013793945, "step": 817 }, { "epoch": 0.13, "learning_rate": 1.3547380050832513e-05, "logits/chosen": -1.5451322793960571, "logits/rejected": -2.4518182277679443, "logps/chosen": -188.88034057617188, "logps/rejected": -208.05825805664062, "loss": 3.2387, "rewards/accuracies": 0.5, "rewards/chosen": -4.3844757080078125, "rewards/margins": -2.830249309539795, "rewards/rejected": -1.554226279258728, "step": 818 }, { "epoch": 0.13, "learning_rate": 1.3546646610301364e-05, "logits/chosen": -2.815704822540283, "logits/rejected": -2.5849058628082275, "logps/chosen": -540.2808837890625, "logps/rejected": -347.1920166015625, "loss": 3.2083, "rewards/accuracies": 0.5, "rewards/chosen": -4.9802703857421875, "rewards/margins": -1.2574774026870728, "rewards/rejected": -3.7227931022644043, "step": 819 }, { "epoch": 0.13, "learning_rate": 1.3545913169770216e-05, "logits/chosen": -2.9242539405822754, "logits/rejected": -2.6954400539398193, "logps/chosen": -522.5778198242188, "logps/rejected": -531.24755859375, "loss": 4.6184, "rewards/accuracies": 0.5, "rewards/chosen": -4.610634803771973, "rewards/margins": -1.082747220993042, "rewards/rejected": -3.5278878211975098, "step": 820 }, { "epoch": 0.13, "learning_rate": 1.3545179729239068e-05, "logits/chosen": -0.3398738503456116, "logits/rejected": -2.3617794513702393, "logps/chosen": -59.015132904052734, "logps/rejected": -731.7119750976562, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": -1.1726698875427246, "rewards/margins": 4.931057929992676, "rewards/rejected": -6.1037278175354, "step": 821 }, { "epoch": 0.13, "learning_rate": 1.354444628870792e-05, "logits/chosen": -3.0015971660614014, "logits/rejected": -2.8080313205718994, "logps/chosen": -318.65716552734375, "logps/rejected": -208.83311462402344, "loss": 0.1896, "rewards/accuracies": 1.0, "rewards/chosen": -1.099036455154419, "rewards/margins": 1.616431474685669, "rewards/rejected": -2.715467929840088, "step": 822 }, { "epoch": 0.13, "learning_rate": 1.3543712848176772e-05, "logits/chosen": -2.394362211227417, "logits/rejected": -2.997305154800415, "logps/chosen": -137.50390625, "logps/rejected": -227.22239685058594, "loss": 0.0465, "rewards/accuracies": 1.0, "rewards/chosen": -1.2496211528778076, "rewards/margins": 4.261848449707031, "rewards/rejected": -5.51146936416626, "step": 823 }, { "epoch": 0.13, "learning_rate": 1.3542979407645624e-05, "logits/chosen": -2.666351556777954, "logits/rejected": -2.784837484359741, "logps/chosen": -35.435455322265625, "logps/rejected": -130.84446716308594, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.25386306643486023, "rewards/margins": 4.8009443283081055, "rewards/rejected": -5.054807662963867, "step": 824 }, { "epoch": 0.13, "learning_rate": 1.3542245967114477e-05, "logits/chosen": -3.289472818374634, "logits/rejected": -3.1698780059814453, "logps/chosen": -130.9342498779297, "logps/rejected": -121.36491394042969, "loss": 0.1184, "rewards/accuracies": 1.0, "rewards/chosen": -1.4107633829116821, "rewards/margins": 2.076843500137329, "rewards/rejected": -3.487607002258301, "step": 825 }, { "epoch": 0.13, "learning_rate": 1.354151252658333e-05, "logits/chosen": -2.553494691848755, "logits/rejected": -2.860469341278076, "logps/chosen": -220.2871551513672, "logps/rejected": -436.7577819824219, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.4354346990585327, "rewards/margins": 5.350765228271484, "rewards/rejected": -6.786200046539307, "step": 826 }, { "epoch": 0.13, "learning_rate": 1.3540779086052181e-05, "logits/chosen": -2.9489927291870117, "logits/rejected": -2.950425386428833, "logps/chosen": -194.15164184570312, "logps/rejected": -252.15167236328125, "loss": 1.8927, "rewards/accuracies": 0.5, "rewards/chosen": -3.598698377609253, "rewards/margins": -1.3575282096862793, "rewards/rejected": -2.2411701679229736, "step": 827 }, { "epoch": 0.13, "learning_rate": 1.3540045645521033e-05, "logits/chosen": -2.2455532550811768, "logits/rejected": -3.1279263496398926, "logps/chosen": -112.52117156982422, "logps/rejected": -261.7157897949219, "loss": 1.9338, "rewards/accuracies": 0.5, "rewards/chosen": -2.9901304244995117, "rewards/margins": 1.1233901977539062, "rewards/rejected": -4.113520622253418, "step": 828 }, { "epoch": 0.13, "learning_rate": 1.3539312204989885e-05, "logits/chosen": -2.9771649837493896, "logits/rejected": -2.912808895111084, "logps/chosen": -165.65875244140625, "logps/rejected": -210.12696838378906, "loss": 1.2801, "rewards/accuracies": 0.5, "rewards/chosen": -2.3084394931793213, "rewards/margins": 2.1511178016662598, "rewards/rejected": -4.45955753326416, "step": 829 }, { "epoch": 0.13, "learning_rate": 1.3538578764458737e-05, "logits/chosen": -2.849062919616699, "logits/rejected": -2.864706516265869, "logps/chosen": -140.95248413085938, "logps/rejected": -120.87266540527344, "loss": 0.8896, "rewards/accuracies": 0.5, "rewards/chosen": -2.5050573348999023, "rewards/margins": 0.5113170146942139, "rewards/rejected": -3.016374111175537, "step": 830 }, { "epoch": 0.13, "learning_rate": 1.3537845323927589e-05, "logits/chosen": -2.8750221729278564, "logits/rejected": -2.689725399017334, "logps/chosen": -205.0136260986328, "logps/rejected": -118.66260528564453, "loss": 1.9578, "rewards/accuracies": 0.5, "rewards/chosen": -2.5583267211914062, "rewards/margins": 1.2732192277908325, "rewards/rejected": -3.831545829772949, "step": 831 }, { "epoch": 0.13, "learning_rate": 1.353711188339644e-05, "logits/chosen": -2.245826005935669, "logits/rejected": -3.037386655807495, "logps/chosen": -74.71662902832031, "logps/rejected": -360.57696533203125, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -1.1751232147216797, "rewards/margins": 4.905618667602539, "rewards/rejected": -6.080741882324219, "step": 832 }, { "epoch": 0.13, "learning_rate": 1.3536378442865292e-05, "logits/chosen": -3.2812752723693848, "logits/rejected": -3.420881986618042, "logps/chosen": -34.20505905151367, "logps/rejected": -110.1092529296875, "loss": 0.0936, "rewards/accuracies": 1.0, "rewards/chosen": -0.25202932953834534, "rewards/margins": 3.5144104957580566, "rewards/rejected": -3.766439914703369, "step": 833 }, { "epoch": 0.13, "learning_rate": 1.3535645002334146e-05, "logits/chosen": -1.7298792600631714, "logits/rejected": -2.791686773300171, "logps/chosen": -220.26895141601562, "logps/rejected": -254.436767578125, "loss": 4.1151, "rewards/accuracies": 0.5, "rewards/chosen": -5.556825637817383, "rewards/margins": -2.818912982940674, "rewards/rejected": -2.73791241645813, "step": 834 }, { "epoch": 0.13, "learning_rate": 1.3534911561802998e-05, "logits/chosen": -2.085446834564209, "logits/rejected": -2.99869966506958, "logps/chosen": -86.71366882324219, "logps/rejected": -281.5967102050781, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.2930622100830078, "rewards/margins": 5.874914169311523, "rewards/rejected": -6.167976379394531, "step": 835 }, { "epoch": 0.13, "learning_rate": 1.3534178121271852e-05, "logits/chosen": -1.5543479919433594, "logits/rejected": -2.991774559020996, "logps/chosen": -57.373924255371094, "logps/rejected": -222.87965393066406, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -1.8458127975463867, "rewards/margins": 4.465694904327393, "rewards/rejected": -6.311507225036621, "step": 836 }, { "epoch": 0.13, "learning_rate": 1.3533444680740703e-05, "logits/chosen": -2.9097747802734375, "logits/rejected": -2.5871872901916504, "logps/chosen": -181.926025390625, "logps/rejected": -188.86126708984375, "loss": 0.7433, "rewards/accuracies": 0.5, "rewards/chosen": -1.5039161443710327, "rewards/margins": 2.087702512741089, "rewards/rejected": -3.591618776321411, "step": 837 }, { "epoch": 0.13, "learning_rate": 1.3532711240209555e-05, "logits/chosen": -2.9101369380950928, "logits/rejected": -2.73799991607666, "logps/chosen": -378.6028137207031, "logps/rejected": -290.62615966796875, "loss": 2.8864, "rewards/accuracies": 0.5, "rewards/chosen": -4.087643623352051, "rewards/margins": -0.7449846267700195, "rewards/rejected": -3.3426589965820312, "step": 838 }, { "epoch": 0.13, "learning_rate": 1.3531977799678407e-05, "logits/chosen": -2.1693968772888184, "logits/rejected": -2.9776740074157715, "logps/chosen": -375.0846862792969, "logps/rejected": -345.004638671875, "loss": 0.3025, "rewards/accuracies": 1.0, "rewards/chosen": -1.3661892414093018, "rewards/margins": 2.314512252807617, "rewards/rejected": -3.680701732635498, "step": 839 }, { "epoch": 0.13, "learning_rate": 1.3531244359147259e-05, "logits/chosen": -2.980525255203247, "logits/rejected": -3.258373260498047, "logps/chosen": -192.2569580078125, "logps/rejected": -238.48312377929688, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.8729231357574463, "rewards/margins": 5.6072492599487305, "rewards/rejected": -6.480172634124756, "step": 840 }, { "epoch": 0.13, "learning_rate": 1.3530510918616111e-05, "logits/chosen": -2.3141233921051025, "logits/rejected": -2.771965265274048, "logps/chosen": -75.22760772705078, "logps/rejected": -367.02008056640625, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -0.23129788041114807, "rewards/margins": 6.876445293426514, "rewards/rejected": -7.107743263244629, "step": 841 }, { "epoch": 0.13, "learning_rate": 1.3529777478084963e-05, "logits/chosen": -3.150195598602295, "logits/rejected": -2.914942979812622, "logps/chosen": -252.42037963867188, "logps/rejected": -184.29104614257812, "loss": 1.6659, "rewards/accuracies": 0.5, "rewards/chosen": -2.643338680267334, "rewards/margins": 1.5348247289657593, "rewards/rejected": -4.178163528442383, "step": 842 }, { "epoch": 0.13, "learning_rate": 1.3529044037553816e-05, "logits/chosen": -3.0785608291625977, "logits/rejected": -2.271434783935547, "logps/chosen": -932.0150146484375, "logps/rejected": -443.28997802734375, "loss": 1.6184, "rewards/accuracies": 0.5, "rewards/chosen": -1.856317162513733, "rewards/margins": 0.9501031637191772, "rewards/rejected": -2.80642032623291, "step": 843 }, { "epoch": 0.13, "learning_rate": 1.3528310597022668e-05, "logits/chosen": -2.7799808979034424, "logits/rejected": -2.8277761936187744, "logps/chosen": -385.7571105957031, "logps/rejected": -376.244384765625, "loss": 0.2234, "rewards/accuracies": 1.0, "rewards/chosen": -2.5154778957366943, "rewards/margins": 2.033174991607666, "rewards/rejected": -4.548652648925781, "step": 844 }, { "epoch": 0.13, "learning_rate": 1.352757715649152e-05, "logits/chosen": -2.9129679203033447, "logits/rejected": -2.6017067432403564, "logps/chosen": -257.6197204589844, "logps/rejected": -220.5987548828125, "loss": 2.3794, "rewards/accuracies": 0.5, "rewards/chosen": -3.3644487857818604, "rewards/margins": 1.1896686553955078, "rewards/rejected": -4.554117202758789, "step": 845 }, { "epoch": 0.13, "learning_rate": 1.3526843715960372e-05, "logits/chosen": -2.7274038791656494, "logits/rejected": -2.827594041824341, "logps/chosen": -572.1322631835938, "logps/rejected": -607.18701171875, "loss": 0.0607, "rewards/accuracies": 1.0, "rewards/chosen": -1.116251826286316, "rewards/margins": 2.8360402584075928, "rewards/rejected": -3.952291965484619, "step": 846 }, { "epoch": 0.13, "learning_rate": 1.3526110275429224e-05, "logits/chosen": -2.372877836227417, "logits/rejected": -2.797753095626831, "logps/chosen": -106.50176239013672, "logps/rejected": -264.60687255859375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.0504562854766846, "rewards/margins": 5.791023254394531, "rewards/rejected": -6.841479301452637, "step": 847 }, { "epoch": 0.13, "learning_rate": 1.3525376834898076e-05, "logits/chosen": -2.95137619972229, "logits/rejected": -2.4985365867614746, "logps/chosen": -271.5990905761719, "logps/rejected": -362.73681640625, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.3790004849433899, "rewards/margins": 6.088918209075928, "rewards/rejected": -6.467918395996094, "step": 848 }, { "epoch": 0.13, "learning_rate": 1.3524643394366928e-05, "logits/chosen": -3.0959622859954834, "logits/rejected": -1.6766533851623535, "logps/chosen": -315.3536376953125, "logps/rejected": -65.98269653320312, "loss": 2.5532, "rewards/accuracies": 0.0, "rewards/chosen": -4.283519744873047, "rewards/margins": -2.464003562927246, "rewards/rejected": -1.8195161819458008, "step": 849 }, { "epoch": 0.13, "learning_rate": 1.352390995383578e-05, "logits/chosen": -2.967991828918457, "logits/rejected": -2.6690211296081543, "logps/chosen": -175.58224487304688, "logps/rejected": -303.00177001953125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.9932558536529541, "rewards/margins": 6.463761329650879, "rewards/rejected": -7.457016944885254, "step": 850 }, { "epoch": 0.13, "learning_rate": 1.3523176513304631e-05, "logits/chosen": -2.813929796218872, "logits/rejected": -2.6350109577178955, "logps/chosen": -108.67584228515625, "logps/rejected": -223.4940643310547, "loss": 1.2599, "rewards/accuracies": 0.5, "rewards/chosen": -2.2520973682403564, "rewards/margins": 2.6843621730804443, "rewards/rejected": -4.936459541320801, "step": 851 }, { "epoch": 0.13, "learning_rate": 1.3522443072773485e-05, "logits/chosen": -2.1947245597839355, "logits/rejected": -2.756484031677246, "logps/chosen": -181.76837158203125, "logps/rejected": -251.03280639648438, "loss": 1.9151, "rewards/accuracies": 0.5, "rewards/chosen": -2.4246878623962402, "rewards/margins": 0.8506230115890503, "rewards/rejected": -3.27531099319458, "step": 852 }, { "epoch": 0.13, "learning_rate": 1.3521709632242337e-05, "logits/chosen": -3.135336399078369, "logits/rejected": -2.6175267696380615, "logps/chosen": -671.060546875, "logps/rejected": -499.66015625, "loss": 0.1578, "rewards/accuracies": 1.0, "rewards/chosen": -1.185725450515747, "rewards/margins": 2.065354347229004, "rewards/rejected": -3.251079559326172, "step": 853 }, { "epoch": 0.13, "learning_rate": 1.3520976191711189e-05, "logits/chosen": -2.8680639266967773, "logits/rejected": -3.1282296180725098, "logps/chosen": -71.71318817138672, "logps/rejected": -209.69647216796875, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.3897707164287567, "rewards/margins": 4.423015594482422, "rewards/rejected": -4.812786102294922, "step": 854 }, { "epoch": 0.13, "learning_rate": 1.352024275118004e-05, "logits/chosen": -2.3126349449157715, "logits/rejected": -3.144669771194458, "logps/chosen": -41.12873840332031, "logps/rejected": -286.0068359375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.0326108932495117, "rewards/margins": 4.949491500854492, "rewards/rejected": -5.982102394104004, "step": 855 }, { "epoch": 0.13, "learning_rate": 1.3519509310648892e-05, "logits/chosen": -1.5873610973358154, "logits/rejected": -3.0265746116638184, "logps/chosen": -260.26580810546875, "logps/rejected": -458.1972961425781, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1418288946151733, "rewards/margins": 6.835811614990234, "rewards/rejected": -7.977640151977539, "step": 856 }, { "epoch": 0.13, "learning_rate": 1.3518775870117744e-05, "logits/chosen": -3.308295965194702, "logits/rejected": -2.620816230773926, "logps/chosen": -651.5003662109375, "logps/rejected": -443.1865234375, "loss": 2.063, "rewards/accuracies": 0.5, "rewards/chosen": -2.117605686187744, "rewards/margins": 0.22955608367919922, "rewards/rejected": -2.3471617698669434, "step": 857 }, { "epoch": 0.13, "learning_rate": 1.3518042429586596e-05, "logits/chosen": -2.7315900325775146, "logits/rejected": -3.0826451778411865, "logps/chosen": -53.40824890136719, "logps/rejected": -349.406005859375, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -0.31092920899391174, "rewards/margins": 5.329587459564209, "rewards/rejected": -5.640516757965088, "step": 858 }, { "epoch": 0.13, "learning_rate": 1.3517308989055448e-05, "logits/chosen": -3.1855976581573486, "logits/rejected": -3.3736188411712646, "logps/chosen": -192.12074279785156, "logps/rejected": -347.7682800292969, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.07858896255493164, "rewards/margins": 5.179945945739746, "rewards/rejected": -5.2585344314575195, "step": 859 }, { "epoch": 0.13, "learning_rate": 1.35165755485243e-05, "logits/chosen": -3.0031278133392334, "logits/rejected": -3.2578670978546143, "logps/chosen": -273.5343933105469, "logps/rejected": -460.2108154296875, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -1.1151432991027832, "rewards/margins": 3.91936993598938, "rewards/rejected": -5.034512996673584, "step": 860 }, { "epoch": 0.13, "learning_rate": 1.3515842107993154e-05, "logits/chosen": -3.2926313877105713, "logits/rejected": -2.9045207500457764, "logps/chosen": -424.15203857421875, "logps/rejected": -293.4971618652344, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": -1.776193380355835, "rewards/margins": 2.7680296897888184, "rewards/rejected": -4.544222831726074, "step": 861 }, { "epoch": 0.13, "learning_rate": 1.3515108667462005e-05, "logits/chosen": -2.4941227436065674, "logits/rejected": -3.089996576309204, "logps/chosen": -100.21189880371094, "logps/rejected": -216.85714721679688, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -0.36439475417137146, "rewards/margins": 4.454877853393555, "rewards/rejected": -4.819272518157959, "step": 862 }, { "epoch": 0.13, "learning_rate": 1.3514375226930857e-05, "logits/chosen": -2.4979496002197266, "logits/rejected": -3.2597150802612305, "logps/chosen": -25.919408798217773, "logps/rejected": -289.5625, "loss": 0.072, "rewards/accuracies": 1.0, "rewards/chosen": -0.9624961018562317, "rewards/margins": 3.025367259979248, "rewards/rejected": -3.987863063812256, "step": 863 }, { "epoch": 0.13, "learning_rate": 1.351364178639971e-05, "logits/chosen": -1.5929118394851685, "logits/rejected": -3.0465755462646484, "logps/chosen": -250.47616577148438, "logps/rejected": -483.1080322265625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.7780876159667969, "rewards/margins": 6.759931564331055, "rewards/rejected": -7.538018703460693, "step": 864 }, { "epoch": 0.13, "learning_rate": 1.3512908345868561e-05, "logits/chosen": -1.3373782634735107, "logits/rejected": -3.0059971809387207, "logps/chosen": -104.05091857910156, "logps/rejected": -389.74932861328125, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.8272693753242493, "rewards/margins": 5.24953556060791, "rewards/rejected": -6.076805114746094, "step": 865 }, { "epoch": 0.13, "learning_rate": 1.3512174905337413e-05, "logits/chosen": -2.603492021560669, "logits/rejected": -3.0562903881073, "logps/chosen": -106.72298431396484, "logps/rejected": -205.25131225585938, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -1.4032440185546875, "rewards/margins": 3.962629795074463, "rewards/rejected": -5.36587381362915, "step": 866 }, { "epoch": 0.13, "learning_rate": 1.3511441464806265e-05, "logits/chosen": -2.5600409507751465, "logits/rejected": -2.956617832183838, "logps/chosen": -190.06600952148438, "logps/rejected": -355.8603515625, "loss": 0.0667, "rewards/accuracies": 1.0, "rewards/chosen": -0.8524320721626282, "rewards/margins": 2.679046154022217, "rewards/rejected": -3.5314784049987793, "step": 867 }, { "epoch": 0.13, "learning_rate": 1.3510708024275118e-05, "logits/chosen": -2.737795114517212, "logits/rejected": -1.6697887182235718, "logps/chosen": -415.55322265625, "logps/rejected": -315.255126953125, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": -1.8344497680664062, "rewards/margins": 4.221192836761475, "rewards/rejected": -6.055642604827881, "step": 868 }, { "epoch": 0.14, "learning_rate": 1.350997458374397e-05, "logits/chosen": -3.050696611404419, "logits/rejected": -2.9453630447387695, "logps/chosen": -221.35086059570312, "logps/rejected": -288.16925048828125, "loss": 0.1585, "rewards/accuracies": 1.0, "rewards/chosen": -2.0126047134399414, "rewards/margins": 2.2632901668548584, "rewards/rejected": -4.275895118713379, "step": 869 }, { "epoch": 0.14, "learning_rate": 1.3509241143212824e-05, "logits/chosen": -1.8156176805496216, "logits/rejected": -3.009460210800171, "logps/chosen": -171.6968994140625, "logps/rejected": -428.5555725097656, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -1.4275023937225342, "rewards/margins": 3.712155342102051, "rewards/rejected": -5.139657974243164, "step": 870 }, { "epoch": 0.14, "learning_rate": 1.3508507702681676e-05, "logits/chosen": -2.358325242996216, "logits/rejected": -2.9684128761291504, "logps/chosen": -383.4056396484375, "logps/rejected": -262.18707275390625, "loss": 4.6935, "rewards/accuracies": 0.5, "rewards/chosen": -6.885612487792969, "rewards/margins": -2.125694751739502, "rewards/rejected": -4.759917259216309, "step": 871 }, { "epoch": 0.14, "learning_rate": 1.3507774262150528e-05, "logits/chosen": -2.873166799545288, "logits/rejected": -1.7987009286880493, "logps/chosen": -233.9575653076172, "logps/rejected": -167.68508911132812, "loss": 3.2352, "rewards/accuracies": 0.5, "rewards/chosen": -4.809174060821533, "rewards/margins": 1.5426931381225586, "rewards/rejected": -6.351867198944092, "step": 872 }, { "epoch": 0.14, "learning_rate": 1.350704082161938e-05, "logits/chosen": -1.3051997423171997, "logits/rejected": -2.0423388481140137, "logps/chosen": -114.84444427490234, "logps/rejected": -289.740478515625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.4859215021133423, "rewards/margins": 6.152910232543945, "rewards/rejected": -6.638831615447998, "step": 873 }, { "epoch": 0.14, "learning_rate": 1.3506307381088231e-05, "logits/chosen": -2.9436283111572266, "logits/rejected": -2.8546202182769775, "logps/chosen": -179.27232360839844, "logps/rejected": -143.219482421875, "loss": 1.6741, "rewards/accuracies": 0.5, "rewards/chosen": -2.6592137813568115, "rewards/margins": -0.13169264793395996, "rewards/rejected": -2.5275211334228516, "step": 874 }, { "epoch": 0.14, "learning_rate": 1.3505573940557083e-05, "logits/chosen": -2.211043357849121, "logits/rejected": -3.1173501014709473, "logps/chosen": -25.446949005126953, "logps/rejected": -286.890380859375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.3193238079547882, "rewards/margins": 6.517579078674316, "rewards/rejected": -6.836902618408203, "step": 875 }, { "epoch": 0.14, "learning_rate": 1.3504840500025935e-05, "logits/chosen": -2.3545448780059814, "logits/rejected": -2.848532199859619, "logps/chosen": -664.3816528320312, "logps/rejected": -716.460205078125, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -1.5561516284942627, "rewards/margins": 3.853870391845703, "rewards/rejected": -5.410021781921387, "step": 876 }, { "epoch": 0.14, "learning_rate": 1.3504107059494787e-05, "logits/chosen": -3.1740047931671143, "logits/rejected": -2.958606243133545, "logps/chosen": -206.92514038085938, "logps/rejected": -173.05718994140625, "loss": 2.3563, "rewards/accuracies": 0.5, "rewards/chosen": -2.689157009124756, "rewards/margins": 0.052457571029663086, "rewards/rejected": -2.741614580154419, "step": 877 }, { "epoch": 0.14, "learning_rate": 1.3503373618963639e-05, "logits/chosen": -1.7324522733688354, "logits/rejected": -0.8558692336082458, "logps/chosen": -206.07725524902344, "logps/rejected": -142.4596405029297, "loss": 2.8312, "rewards/accuracies": 0.5, "rewards/chosen": -3.7499098777770996, "rewards/margins": -0.26419830322265625, "rewards/rejected": -3.4857115745544434, "step": 878 }, { "epoch": 0.14, "learning_rate": 1.3502640178432492e-05, "logits/chosen": -2.679379940032959, "logits/rejected": -3.23197865486145, "logps/chosen": -48.424922943115234, "logps/rejected": -245.97592163085938, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": -0.6829906702041626, "rewards/margins": 3.8335118293762207, "rewards/rejected": -4.516502380371094, "step": 879 }, { "epoch": 0.14, "learning_rate": 1.3501906737901344e-05, "logits/chosen": -3.122587203979492, "logits/rejected": -2.9971628189086914, "logps/chosen": -641.7272338867188, "logps/rejected": -548.3536376953125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.99960458278656, "rewards/margins": 6.609476089477539, "rewards/rejected": -8.609081268310547, "step": 880 }, { "epoch": 0.14, "learning_rate": 1.3501173297370196e-05, "logits/chosen": -3.0846762657165527, "logits/rejected": -2.368896007537842, "logps/chosen": -359.51153564453125, "logps/rejected": -164.72515869140625, "loss": 4.0292, "rewards/accuracies": 0.5, "rewards/chosen": -5.099785804748535, "rewards/margins": -1.9494249820709229, "rewards/rejected": -3.1503608226776123, "step": 881 }, { "epoch": 0.14, "learning_rate": 1.3500439856839048e-05, "logits/chosen": -2.491450071334839, "logits/rejected": -2.850142478942871, "logps/chosen": -137.93040466308594, "logps/rejected": -224.1862030029297, "loss": 0.2048, "rewards/accuracies": 1.0, "rewards/chosen": -0.8031371831893921, "rewards/margins": 2.239123582839966, "rewards/rejected": -3.0422608852386475, "step": 882 }, { "epoch": 0.14, "learning_rate": 1.34997064163079e-05, "logits/chosen": -1.7129414081573486, "logits/rejected": -2.7773332595825195, "logps/chosen": -171.2457275390625, "logps/rejected": -467.9065246582031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.6107659339904785, "rewards/margins": 8.769251823425293, "rewards/rejected": -10.38001823425293, "step": 883 }, { "epoch": 0.14, "learning_rate": 1.3498972975776752e-05, "logits/chosen": -2.3116297721862793, "logits/rejected": -2.9685115814208984, "logps/chosen": -504.25732421875, "logps/rejected": -693.8276977539062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2450180053710938, "rewards/margins": 10.143196105957031, "rewards/rejected": -11.388214111328125, "step": 884 }, { "epoch": 0.14, "learning_rate": 1.3498239535245604e-05, "logits/chosen": -2.7335736751556396, "logits/rejected": -3.0114104747772217, "logps/chosen": -371.07305908203125, "logps/rejected": -396.80535888671875, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.578668236732483, "rewards/margins": 4.652161598205566, "rewards/rejected": -6.23082971572876, "step": 885 }, { "epoch": 0.14, "learning_rate": 1.3497506094714456e-05, "logits/chosen": -3.337080240249634, "logits/rejected": -2.3628718852996826, "logps/chosen": -641.3125610351562, "logps/rejected": -307.89361572265625, "loss": 2.369, "rewards/accuracies": 0.5, "rewards/chosen": -4.074850559234619, "rewards/margins": 0.759547233581543, "rewards/rejected": -4.834397792816162, "step": 886 }, { "epoch": 0.14, "learning_rate": 1.3496772654183307e-05, "logits/chosen": -2.6213109493255615, "logits/rejected": -3.0455007553100586, "logps/chosen": -23.370555877685547, "logps/rejected": -121.345458984375, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.36795496940612793, "rewards/margins": 4.453891754150391, "rewards/rejected": -4.8218464851379395, "step": 887 }, { "epoch": 0.14, "learning_rate": 1.3496039213652161e-05, "logits/chosen": -2.736532211303711, "logits/rejected": -2.2391107082366943, "logps/chosen": -226.15048217773438, "logps/rejected": -286.00592041015625, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": -1.0097824335098267, "rewards/margins": 3.670362949371338, "rewards/rejected": -4.680145263671875, "step": 888 }, { "epoch": 0.14, "learning_rate": 1.3495305773121013e-05, "logits/chosen": -2.7360544204711914, "logits/rejected": -1.9651262760162354, "logps/chosen": -219.8034210205078, "logps/rejected": -202.90408325195312, "loss": 3.0385, "rewards/accuracies": 0.5, "rewards/chosen": -3.2109768390655518, "rewards/margins": 1.370250940322876, "rewards/rejected": -4.581227779388428, "step": 889 }, { "epoch": 0.14, "learning_rate": 1.3494572332589865e-05, "logits/chosen": -2.7597134113311768, "logits/rejected": -1.1659667491912842, "logps/chosen": -160.83665466308594, "logps/rejected": -158.6962432861328, "loss": 0.4901, "rewards/accuracies": 0.5, "rewards/chosen": -1.328106164932251, "rewards/margins": 2.7322003841400146, "rewards/rejected": -4.060306549072266, "step": 890 }, { "epoch": 0.14, "learning_rate": 1.3493838892058717e-05, "logits/chosen": -2.6795551776885986, "logits/rejected": -2.7359354496002197, "logps/chosen": -59.454200744628906, "logps/rejected": -177.39332580566406, "loss": 0.0601, "rewards/accuracies": 1.0, "rewards/chosen": -1.1046572923660278, "rewards/margins": 2.820770263671875, "rewards/rejected": -3.9254274368286133, "step": 891 }, { "epoch": 0.14, "learning_rate": 1.3493105451527569e-05, "logits/chosen": -3.1315622329711914, "logits/rejected": -2.6965107917785645, "logps/chosen": -337.31494140625, "logps/rejected": -254.76101684570312, "loss": 1.9081, "rewards/accuracies": 0.5, "rewards/chosen": -5.275430679321289, "rewards/margins": 0.9938522577285767, "rewards/rejected": -6.269282817840576, "step": 892 }, { "epoch": 0.14, "learning_rate": 1.349237201099642e-05, "logits/chosen": -2.91105055809021, "logits/rejected": -2.228395462036133, "logps/chosen": -136.68894958496094, "logps/rejected": -200.52850341796875, "loss": 2.5488, "rewards/accuracies": 0.5, "rewards/chosen": -3.911571979522705, "rewards/margins": 1.7180933952331543, "rewards/rejected": -5.629665374755859, "step": 893 }, { "epoch": 0.14, "learning_rate": 1.3491638570465272e-05, "logits/chosen": -2.59995698928833, "logits/rejected": -3.1786344051361084, "logps/chosen": -99.3850326538086, "logps/rejected": -400.5623779296875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.1268284320831299, "rewards/margins": 6.67130708694458, "rewards/rejected": -7.798135757446289, "step": 894 }, { "epoch": 0.14, "learning_rate": 1.3490905129934124e-05, "logits/chosen": -2.5238559246063232, "logits/rejected": -2.9529500007629395, "logps/chosen": -655.2555541992188, "logps/rejected": -597.4674072265625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.0309998989105225, "rewards/margins": 5.413937568664551, "rewards/rejected": -7.444937229156494, "step": 895 }, { "epoch": 0.14, "learning_rate": 1.3490171689402976e-05, "logits/chosen": -2.64329195022583, "logits/rejected": -1.4282394647598267, "logps/chosen": -758.6257934570312, "logps/rejected": -389.83624267578125, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -1.9779632091522217, "rewards/margins": 4.5831217765808105, "rewards/rejected": -6.561084747314453, "step": 896 }, { "epoch": 0.14, "learning_rate": 1.348943824887183e-05, "logits/chosen": -2.386087417602539, "logits/rejected": -3.0661368370056152, "logps/chosen": -253.0013427734375, "logps/rejected": -409.35687255859375, "loss": 2.9219, "rewards/accuracies": 0.5, "rewards/chosen": -4.194749355316162, "rewards/margins": 0.4498720169067383, "rewards/rejected": -4.6446213722229, "step": 897 }, { "epoch": 0.14, "learning_rate": 1.3488704808340682e-05, "logits/chosen": -2.688655138015747, "logits/rejected": -2.8774373531341553, "logps/chosen": -834.95947265625, "logps/rejected": -610.6904296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.6787476539611816, "rewards/margins": 9.610261917114258, "rewards/rejected": -11.289009094238281, "step": 898 }, { "epoch": 0.14, "learning_rate": 1.3487971367809533e-05, "logits/chosen": -1.8636846542358398, "logits/rejected": -2.683724880218506, "logps/chosen": -98.13330841064453, "logps/rejected": -166.56781005859375, "loss": 1.7872, "rewards/accuracies": 0.5, "rewards/chosen": -2.225741386413574, "rewards/margins": 1.061482310295105, "rewards/rejected": -3.2872238159179688, "step": 899 }, { "epoch": 0.14, "learning_rate": 1.3487237927278385e-05, "logits/chosen": -3.1390862464904785, "logits/rejected": -2.6120314598083496, "logps/chosen": -411.220947265625, "logps/rejected": -228.6145782470703, "loss": 0.3142, "rewards/accuracies": 1.0, "rewards/chosen": -0.6953781247138977, "rewards/margins": 2.773878574371338, "rewards/rejected": -3.469256639480591, "step": 900 }, { "epoch": 0.14, "learning_rate": 1.3486504486747237e-05, "logits/chosen": -2.4815070629119873, "logits/rejected": -2.993145227432251, "logps/chosen": -127.35367584228516, "logps/rejected": -341.71063232421875, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -1.3470563888549805, "rewards/margins": 4.715518474578857, "rewards/rejected": -6.062574863433838, "step": 901 }, { "epoch": 0.14, "learning_rate": 1.348577104621609e-05, "logits/chosen": -2.9047250747680664, "logits/rejected": -3.2613632678985596, "logps/chosen": -220.83255004882812, "logps/rejected": -431.68402099609375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.8004440069198608, "rewards/margins": 6.073655605316162, "rewards/rejected": -7.8740997314453125, "step": 902 }, { "epoch": 0.14, "learning_rate": 1.3485037605684943e-05, "logits/chosen": -2.5981783866882324, "logits/rejected": -2.188103675842285, "logps/chosen": -442.06744384765625, "logps/rejected": -293.64398193359375, "loss": 3.6841, "rewards/accuracies": 0.5, "rewards/chosen": -4.505364894866943, "rewards/margins": -1.7853655815124512, "rewards/rejected": -2.719999313354492, "step": 903 }, { "epoch": 0.14, "learning_rate": 1.3484304165153794e-05, "logits/chosen": -3.2070226669311523, "logits/rejected": -3.4326834678649902, "logps/chosen": -43.797752380371094, "logps/rejected": -141.193359375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.3230839967727661, "rewards/margins": 4.699647903442383, "rewards/rejected": -6.022731781005859, "step": 904 }, { "epoch": 0.14, "learning_rate": 1.3483570724622648e-05, "logits/chosen": -2.515660285949707, "logits/rejected": -3.154555082321167, "logps/chosen": -81.37104034423828, "logps/rejected": -231.65887451171875, "loss": 1.8962, "rewards/accuracies": 0.5, "rewards/chosen": -2.4334216117858887, "rewards/margins": 2.144782543182373, "rewards/rejected": -4.578204154968262, "step": 905 }, { "epoch": 0.14, "learning_rate": 1.34828372840915e-05, "logits/chosen": -1.7542338371276855, "logits/rejected": -2.9168825149536133, "logps/chosen": -66.64753723144531, "logps/rejected": -411.9107666015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3327687978744507, "rewards/margins": 9.634284019470215, "rewards/rejected": -10.967053413391113, "step": 906 }, { "epoch": 0.14, "learning_rate": 1.3482103843560352e-05, "logits/chosen": -1.258231520652771, "logits/rejected": -2.993967056274414, "logps/chosen": -206.54412841796875, "logps/rejected": -520.0006713867188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3729530572891235, "rewards/margins": 7.532103538513184, "rewards/rejected": -8.905056953430176, "step": 907 }, { "epoch": 0.14, "learning_rate": 1.3481370403029204e-05, "logits/chosen": -2.5534958839416504, "logits/rejected": -3.189016819000244, "logps/chosen": -880.7362060546875, "logps/rejected": -831.54443359375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.3512786626815796, "rewards/margins": 5.087713718414307, "rewards/rejected": -6.438992500305176, "step": 908 }, { "epoch": 0.14, "learning_rate": 1.3480636962498056e-05, "logits/chosen": -3.1945626735687256, "logits/rejected": -2.101076602935791, "logps/chosen": -318.50531005859375, "logps/rejected": -94.81991577148438, "loss": 5.1509, "rewards/accuracies": 0.0, "rewards/chosen": -6.7409868240356445, "rewards/margins": -5.119182586669922, "rewards/rejected": -1.6218039989471436, "step": 909 }, { "epoch": 0.14, "learning_rate": 1.3479903521966907e-05, "logits/chosen": -2.7961645126342773, "logits/rejected": -2.9773666858673096, "logps/chosen": -64.81084442138672, "logps/rejected": -195.94569396972656, "loss": 0.1057, "rewards/accuracies": 1.0, "rewards/chosen": -1.0506689548492432, "rewards/margins": 4.230600357055664, "rewards/rejected": -5.281269550323486, "step": 910 }, { "epoch": 0.14, "learning_rate": 1.347917008143576e-05, "logits/chosen": -1.212597131729126, "logits/rejected": -3.099388360977173, "logps/chosen": -26.551055908203125, "logps/rejected": -429.505859375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.4508820176124573, "rewards/margins": 7.2650909423828125, "rewards/rejected": -7.715972900390625, "step": 911 }, { "epoch": 0.14, "learning_rate": 1.3478436640904611e-05, "logits/chosen": -2.0154025554656982, "logits/rejected": -2.8108291625976562, "logps/chosen": -54.20710372924805, "logps/rejected": -129.23739624023438, "loss": 1.5134, "rewards/accuracies": 0.5, "rewards/chosen": -2.0433096885681152, "rewards/margins": -0.1475001573562622, "rewards/rejected": -1.8958096504211426, "step": 912 }, { "epoch": 0.14, "learning_rate": 1.3477703200373463e-05, "logits/chosen": -2.978705406188965, "logits/rejected": -3.049329996109009, "logps/chosen": -151.32720947265625, "logps/rejected": -246.92230224609375, "loss": 1.6139, "rewards/accuracies": 0.5, "rewards/chosen": -3.1878862380981445, "rewards/margins": 0.8111864328384399, "rewards/rejected": -3.999072551727295, "step": 913 }, { "epoch": 0.14, "learning_rate": 1.3476969759842317e-05, "logits/chosen": -2.849118232727051, "logits/rejected": -2.94744610786438, "logps/chosen": -171.3568115234375, "logps/rejected": -372.822509765625, "loss": 2.6823, "rewards/accuracies": 0.5, "rewards/chosen": -3.1790552139282227, "rewards/margins": 1.2434170246124268, "rewards/rejected": -4.4224724769592285, "step": 914 }, { "epoch": 0.14, "learning_rate": 1.3476236319311169e-05, "logits/chosen": -2.599102735519409, "logits/rejected": -2.811466932296753, "logps/chosen": -369.4811706542969, "logps/rejected": -542.4786376953125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.0064384937286377, "rewards/margins": 6.265397071838379, "rewards/rejected": -8.271835327148438, "step": 915 }, { "epoch": 0.14, "learning_rate": 1.347550287878002e-05, "logits/chosen": -2.599740505218506, "logits/rejected": -2.540135145187378, "logps/chosen": -210.92913818359375, "logps/rejected": -337.84716796875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.1745251417160034, "rewards/margins": 5.954028129577637, "rewards/rejected": -7.12855339050293, "step": 916 }, { "epoch": 0.14, "learning_rate": 1.3474769438248872e-05, "logits/chosen": -2.5561482906341553, "logits/rejected": -2.9793758392333984, "logps/chosen": -90.59268188476562, "logps/rejected": -267.0997619628906, "loss": 0.0651, "rewards/accuracies": 1.0, "rewards/chosen": -0.9219363927841187, "rewards/margins": 4.838703155517578, "rewards/rejected": -5.760639190673828, "step": 917 }, { "epoch": 0.14, "learning_rate": 1.3474035997717724e-05, "logits/chosen": -2.108588695526123, "logits/rejected": -3.012122392654419, "logps/chosen": -183.58616638183594, "logps/rejected": -379.4048767089844, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.0059044361114502, "rewards/margins": 5.8022027015686035, "rewards/rejected": -6.808107376098633, "step": 918 }, { "epoch": 0.14, "learning_rate": 1.3473302557186576e-05, "logits/chosen": -2.774170160293579, "logits/rejected": -3.153580665588379, "logps/chosen": -78.50936126708984, "logps/rejected": -217.15731811523438, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.3803742527961731, "rewards/margins": 4.450078964233398, "rewards/rejected": -4.830453872680664, "step": 919 }, { "epoch": 0.14, "learning_rate": 1.3472569116655428e-05, "logits/chosen": -1.4432289600372314, "logits/rejected": -2.9400746822357178, "logps/chosen": -82.4706802368164, "logps/rejected": -299.6236877441406, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -0.7833194732666016, "rewards/margins": 4.928524971008301, "rewards/rejected": -5.711844444274902, "step": 920 }, { "epoch": 0.14, "learning_rate": 1.347183567612428e-05, "logits/chosen": -2.628571033477783, "logits/rejected": -3.008136034011841, "logps/chosen": -134.0687255859375, "logps/rejected": -420.0018310546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6336963176727295, "rewards/margins": 9.840511322021484, "rewards/rejected": -10.474206924438477, "step": 921 }, { "epoch": 0.14, "learning_rate": 1.3471102235593132e-05, "logits/chosen": -2.3866496086120605, "logits/rejected": -3.068891763687134, "logps/chosen": -361.25, "logps/rejected": -538.8970947265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.3840034604072571, "rewards/margins": 8.543448448181152, "rewards/rejected": -8.927452087402344, "step": 922 }, { "epoch": 0.14, "learning_rate": 1.3470368795061985e-05, "logits/chosen": -2.728497266769409, "logits/rejected": -2.3022851943969727, "logps/chosen": -234.3130340576172, "logps/rejected": -192.1012420654297, "loss": 2.736, "rewards/accuracies": 0.5, "rewards/chosen": -4.206261157989502, "rewards/margins": -0.34779787063598633, "rewards/rejected": -3.8584632873535156, "step": 923 }, { "epoch": 0.14, "learning_rate": 1.3469635354530837e-05, "logits/chosen": -1.7957541942596436, "logits/rejected": -2.8816018104553223, "logps/chosen": -90.45774841308594, "logps/rejected": -306.3368835449219, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -1.1922180652618408, "rewards/margins": 3.82389760017395, "rewards/rejected": -5.016115665435791, "step": 924 }, { "epoch": 0.14, "learning_rate": 1.3468901913999689e-05, "logits/chosen": -2.9483911991119385, "logits/rejected": -2.1990177631378174, "logps/chosen": -178.7302703857422, "logps/rejected": -125.3716812133789, "loss": 0.0602, "rewards/accuracies": 1.0, "rewards/chosen": -1.9970645904541016, "rewards/margins": 2.853604793548584, "rewards/rejected": -4.8506693840026855, "step": 925 }, { "epoch": 0.14, "learning_rate": 1.3468168473468541e-05, "logits/chosen": -2.833436965942383, "logits/rejected": -3.106954336166382, "logps/chosen": -107.4397964477539, "logps/rejected": -124.35266876220703, "loss": 0.5995, "rewards/accuracies": 0.5, "rewards/chosen": -1.5254853963851929, "rewards/margins": 1.7313768863677979, "rewards/rejected": -3.2568624019622803, "step": 926 }, { "epoch": 0.14, "learning_rate": 1.3467435032937393e-05, "logits/chosen": -2.90059494972229, "logits/rejected": -2.9067139625549316, "logps/chosen": -696.424072265625, "logps/rejected": -692.070556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1680694818496704, "rewards/margins": 9.4512939453125, "rewards/rejected": -10.619363784790039, "step": 927 }, { "epoch": 0.14, "learning_rate": 1.3466701592406245e-05, "logits/chosen": -3.110563039779663, "logits/rejected": -2.975609064102173, "logps/chosen": -672.1834716796875, "logps/rejected": -475.03131103515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.6766173839569092, "rewards/margins": 6.721219062805176, "rewards/rejected": -8.397836685180664, "step": 928 }, { "epoch": 0.14, "learning_rate": 1.3465968151875097e-05, "logits/chosen": -2.4330036640167236, "logits/rejected": -2.883267402648926, "logps/chosen": -125.609375, "logps/rejected": -355.2847900390625, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": -0.6061224937438965, "rewards/margins": 6.944454669952393, "rewards/rejected": -7.550577163696289, "step": 929 }, { "epoch": 0.14, "learning_rate": 1.3465234711343948e-05, "logits/chosen": -2.0341243743896484, "logits/rejected": -2.8555147647857666, "logps/chosen": -239.49166870117188, "logps/rejected": -311.57000732421875, "loss": 0.7229, "rewards/accuracies": 0.5, "rewards/chosen": -2.2136483192443848, "rewards/margins": 2.0761940479278564, "rewards/rejected": -4.289842128753662, "step": 930 }, { "epoch": 0.14, "learning_rate": 1.34645012708128e-05, "logits/chosen": -2.5076136589050293, "logits/rejected": -2.9793968200683594, "logps/chosen": -83.05509948730469, "logps/rejected": -336.80865478515625, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -1.1179015636444092, "rewards/margins": 6.024679183959961, "rewards/rejected": -7.142580986022949, "step": 931 }, { "epoch": 0.14, "learning_rate": 1.3463767830281654e-05, "logits/chosen": -1.836448311805725, "logits/rejected": -2.7107291221618652, "logps/chosen": -174.3320770263672, "logps/rejected": -313.078857421875, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -2.178297996520996, "rewards/margins": 3.660407781600952, "rewards/rejected": -5.838705539703369, "step": 932 }, { "epoch": 0.15, "learning_rate": 1.3463034389750506e-05, "logits/chosen": -2.5512583255767822, "logits/rejected": -2.798067331314087, "logps/chosen": -76.53994750976562, "logps/rejected": -191.57681274414062, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.5359686613082886, "rewards/margins": 4.500213623046875, "rewards/rejected": -6.036182403564453, "step": 933 }, { "epoch": 0.15, "learning_rate": 1.3462300949219358e-05, "logits/chosen": -2.705512285232544, "logits/rejected": -3.1934621334075928, "logps/chosen": -184.19810485839844, "logps/rejected": -434.4061279296875, "loss": 2.9531, "rewards/accuracies": 0.5, "rewards/chosen": -3.1207687854766846, "rewards/margins": -1.2382835149765015, "rewards/rejected": -1.882485270500183, "step": 934 }, { "epoch": 0.15, "learning_rate": 1.346156750868821e-05, "logits/chosen": -2.7796285152435303, "logits/rejected": -2.6383519172668457, "logps/chosen": -124.69261932373047, "logps/rejected": -210.19378662109375, "loss": 0.1278, "rewards/accuracies": 1.0, "rewards/chosen": -0.8553266525268555, "rewards/margins": 3.8024792671203613, "rewards/rejected": -4.657805919647217, "step": 935 }, { "epoch": 0.15, "learning_rate": 1.3460834068157063e-05, "logits/chosen": -2.3891682624816895, "logits/rejected": -3.0713117122650146, "logps/chosen": -611.083740234375, "logps/rejected": -652.941162109375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3053268194198608, "rewards/margins": 7.51637077331543, "rewards/rejected": -8.821698188781738, "step": 936 }, { "epoch": 0.15, "learning_rate": 1.3460100627625915e-05, "logits/chosen": -2.8304123878479004, "logits/rejected": -2.8078157901763916, "logps/chosen": -401.17333984375, "logps/rejected": -436.2171630859375, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.7742729187011719, "rewards/margins": 5.006085395812988, "rewards/rejected": -6.78035831451416, "step": 937 }, { "epoch": 0.15, "learning_rate": 1.3459367187094767e-05, "logits/chosen": -2.794218063354492, "logits/rejected": -2.845200777053833, "logps/chosen": -39.14246368408203, "logps/rejected": -165.77935791015625, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -0.5915629267692566, "rewards/margins": 3.837986946105957, "rewards/rejected": -4.429549694061279, "step": 938 }, { "epoch": 0.15, "learning_rate": 1.3458633746563619e-05, "logits/chosen": -2.833512544631958, "logits/rejected": -2.635935068130493, "logps/chosen": -184.91262817382812, "logps/rejected": -190.9403076171875, "loss": 1.7398, "rewards/accuracies": 0.5, "rewards/chosen": -2.5547478199005127, "rewards/margins": -0.4265265464782715, "rewards/rejected": -2.128221035003662, "step": 939 }, { "epoch": 0.15, "learning_rate": 1.345790030603247e-05, "logits/chosen": -1.3069053888320923, "logits/rejected": -2.9659156799316406, "logps/chosen": -127.81492614746094, "logps/rejected": -354.3089599609375, "loss": 0.0767, "rewards/accuracies": 1.0, "rewards/chosen": -1.0267903804779053, "rewards/margins": 4.668363094329834, "rewards/rejected": -5.69515323638916, "step": 940 }, { "epoch": 0.15, "learning_rate": 1.3457166865501324e-05, "logits/chosen": -2.16024112701416, "logits/rejected": -2.857990264892578, "logps/chosen": -304.5317077636719, "logps/rejected": -394.48272705078125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7429291009902954, "rewards/margins": 7.506830215454102, "rewards/rejected": -8.249759674072266, "step": 941 }, { "epoch": 0.15, "learning_rate": 1.3456433424970176e-05, "logits/chosen": -2.33886981010437, "logits/rejected": -2.7405900955200195, "logps/chosen": -51.460323333740234, "logps/rejected": -127.90948486328125, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -0.533703625202179, "rewards/margins": 3.9745850563049316, "rewards/rejected": -4.508288383483887, "step": 942 }, { "epoch": 0.15, "learning_rate": 1.3455699984439028e-05, "logits/chosen": -2.60796856880188, "logits/rejected": -1.4618951082229614, "logps/chosen": -438.12579345703125, "logps/rejected": -326.9403076171875, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.2131222486495972, "rewards/margins": 4.478527069091797, "rewards/rejected": -5.691648960113525, "step": 943 }, { "epoch": 0.15, "learning_rate": 1.345496654390788e-05, "logits/chosen": -2.9421308040618896, "logits/rejected": -2.527538537979126, "logps/chosen": -497.59234619140625, "logps/rejected": -339.25946044921875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.4271087646484375, "rewards/margins": 6.1830244064331055, "rewards/rejected": -7.610133171081543, "step": 944 }, { "epoch": 0.15, "learning_rate": 1.3454233103376732e-05, "logits/chosen": -3.1348793506622314, "logits/rejected": -1.8187273740768433, "logps/chosen": -246.07913208007812, "logps/rejected": -23.317567825317383, "loss": 4.8189, "rewards/accuracies": 0.0, "rewards/chosen": -4.902273654937744, "rewards/margins": -4.804009914398193, "rewards/rejected": -0.0982636958360672, "step": 945 }, { "epoch": 0.15, "learning_rate": 1.3453499662845584e-05, "logits/chosen": -2.639307975769043, "logits/rejected": -2.953557252883911, "logps/chosen": -286.3525085449219, "logps/rejected": -291.99725341796875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.8895832300186157, "rewards/margins": 6.132360458374023, "rewards/rejected": -8.021944046020508, "step": 946 }, { "epoch": 0.15, "learning_rate": 1.3452766222314435e-05, "logits/chosen": -2.781100273132324, "logits/rejected": -2.7935807704925537, "logps/chosen": -346.06280517578125, "logps/rejected": -597.0236206054688, "loss": 3.2169, "rewards/accuracies": 0.5, "rewards/chosen": -4.399148464202881, "rewards/margins": -0.5267822742462158, "rewards/rejected": -3.872366428375244, "step": 947 }, { "epoch": 0.15, "learning_rate": 1.3452032781783287e-05, "logits/chosen": -2.946560859680176, "logits/rejected": -3.0052106380462646, "logps/chosen": -167.93614196777344, "logps/rejected": -249.9822540283203, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -0.3805997967720032, "rewards/margins": 5.469979763031006, "rewards/rejected": -5.850579738616943, "step": 948 }, { "epoch": 0.15, "learning_rate": 1.345129934125214e-05, "logits/chosen": -1.9907094240188599, "logits/rejected": -2.7367138862609863, "logps/chosen": -87.95307922363281, "logps/rejected": -300.9972839355469, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.232667326927185, "rewards/margins": 5.524797439575195, "rewards/rejected": -6.757464408874512, "step": 949 }, { "epoch": 0.15, "learning_rate": 1.3450565900720993e-05, "logits/chosen": -2.852355718612671, "logits/rejected": -2.882845401763916, "logps/chosen": -207.00009155273438, "logps/rejected": -272.670654296875, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -1.8587127923965454, "rewards/margins": 3.7420034408569336, "rewards/rejected": -5.600716590881348, "step": 950 }, { "epoch": 0.15, "learning_rate": 1.3449832460189845e-05, "logits/chosen": -1.4871912002563477, "logits/rejected": -2.7691283226013184, "logps/chosen": -88.79927825927734, "logps/rejected": -405.91650390625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.1822742223739624, "rewards/margins": 7.854809761047363, "rewards/rejected": -9.037083625793457, "step": 951 }, { "epoch": 0.15, "learning_rate": 1.3449099019658696e-05, "logits/chosen": -2.2111713886260986, "logits/rejected": -2.7840394973754883, "logps/chosen": -502.72906494140625, "logps/rejected": -596.5838012695312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.224226474761963, "rewards/margins": 8.056831359863281, "rewards/rejected": -9.281057357788086, "step": 952 }, { "epoch": 0.15, "learning_rate": 1.3448365579127548e-05, "logits/chosen": -2.7375776767730713, "logits/rejected": -3.004340887069702, "logps/chosen": -366.28326416015625, "logps/rejected": -340.8092346191406, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.744480848312378, "rewards/margins": 6.069136142730713, "rewards/rejected": -7.813616752624512, "step": 953 }, { "epoch": 0.15, "learning_rate": 1.34476321385964e-05, "logits/chosen": -1.9338939189910889, "logits/rejected": -2.7675507068634033, "logps/chosen": -348.2846374511719, "logps/rejected": -473.0613708496094, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.552159547805786, "rewards/margins": 6.8661041259765625, "rewards/rejected": -9.418264389038086, "step": 954 }, { "epoch": 0.15, "learning_rate": 1.3446898698065252e-05, "logits/chosen": -2.109196662902832, "logits/rejected": -2.9751482009887695, "logps/chosen": -98.34870910644531, "logps/rejected": -232.1868438720703, "loss": 0.1325, "rewards/accuracies": 1.0, "rewards/chosen": -1.1418735980987549, "rewards/margins": 3.122697114944458, "rewards/rejected": -4.264570713043213, "step": 955 }, { "epoch": 0.15, "learning_rate": 1.3446165257534104e-05, "logits/chosen": -2.7518692016601562, "logits/rejected": -3.0757813453674316, "logps/chosen": -51.96339797973633, "logps/rejected": -176.3565673828125, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.21082477271556854, "rewards/margins": 5.03537130355835, "rewards/rejected": -5.2461957931518555, "step": 956 }, { "epoch": 0.15, "learning_rate": 1.3445431817002956e-05, "logits/chosen": -2.8905081748962402, "logits/rejected": -2.2407355308532715, "logps/chosen": -153.54527282714844, "logps/rejected": -269.287109375, "loss": 2.1261, "rewards/accuracies": 0.5, "rewards/chosen": -3.0661680698394775, "rewards/margins": 1.8702762126922607, "rewards/rejected": -4.936444282531738, "step": 957 }, { "epoch": 0.15, "learning_rate": 1.3444698376471808e-05, "logits/chosen": -2.6474997997283936, "logits/rejected": -2.598999261856079, "logps/chosen": -462.57208251953125, "logps/rejected": -579.002197265625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.113452196121216, "rewards/margins": 9.036931037902832, "rewards/rejected": -11.150382995605469, "step": 958 }, { "epoch": 0.15, "learning_rate": 1.3443964935940661e-05, "logits/chosen": -3.063567876815796, "logits/rejected": -3.0432262420654297, "logps/chosen": -121.27388000488281, "logps/rejected": -159.63662719726562, "loss": 0.1767, "rewards/accuracies": 1.0, "rewards/chosen": 0.07836951315402985, "rewards/margins": 3.5109376907348633, "rewards/rejected": -3.432568311691284, "step": 959 }, { "epoch": 0.15, "learning_rate": 1.3443231495409513e-05, "logits/chosen": -2.6398673057556152, "logits/rejected": -2.6089417934417725, "logps/chosen": -269.3028564453125, "logps/rejected": -404.19854736328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7005999088287354, "rewards/margins": 7.886380195617676, "rewards/rejected": -9.586979866027832, "step": 960 }, { "epoch": 0.15, "learning_rate": 1.3442498054878365e-05, "logits/chosen": -2.9985408782958984, "logits/rejected": -1.9330878257751465, "logps/chosen": -232.95541381835938, "logps/rejected": -85.3980712890625, "loss": 3.2047, "rewards/accuracies": 0.5, "rewards/chosen": -4.0811920166015625, "rewards/margins": -1.9873069524765015, "rewards/rejected": -2.0938851833343506, "step": 961 }, { "epoch": 0.15, "learning_rate": 1.3441764614347217e-05, "logits/chosen": -2.219686269760132, "logits/rejected": -2.9833011627197266, "logps/chosen": -146.04226684570312, "logps/rejected": -253.43045043945312, "loss": 1.0862, "rewards/accuracies": 0.5, "rewards/chosen": -1.8160938024520874, "rewards/margins": 0.22213250398635864, "rewards/rejected": -2.038226366043091, "step": 962 }, { "epoch": 0.15, "learning_rate": 1.3441031173816069e-05, "logits/chosen": -2.406060218811035, "logits/rejected": -2.6140072345733643, "logps/chosen": -56.327613830566406, "logps/rejected": -130.43978881835938, "loss": 0.1033, "rewards/accuracies": 1.0, "rewards/chosen": -0.8561275601387024, "rewards/margins": 2.7567057609558105, "rewards/rejected": -3.6128334999084473, "step": 963 }, { "epoch": 0.15, "learning_rate": 1.344029773328492e-05, "logits/chosen": -2.536283493041992, "logits/rejected": -1.559506893157959, "logps/chosen": -362.2259216308594, "logps/rejected": -215.29354858398438, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -1.2087218761444092, "rewards/margins": 4.964747428894043, "rewards/rejected": -6.173469543457031, "step": 964 }, { "epoch": 0.15, "learning_rate": 1.3439564292753773e-05, "logits/chosen": -0.7406277656555176, "logits/rejected": -2.141869306564331, "logps/chosen": -188.21966552734375, "logps/rejected": -332.5962219238281, "loss": 0.071, "rewards/accuracies": 1.0, "rewards/chosen": -0.26392287015914917, "rewards/margins": 2.9558684825897217, "rewards/rejected": -3.2197914123535156, "step": 965 }, { "epoch": 0.15, "learning_rate": 1.3438830852222624e-05, "logits/chosen": -2.48765230178833, "logits/rejected": -3.0833499431610107, "logps/chosen": -526.1481323242188, "logps/rejected": -514.8070068359375, "loss": 1.0442, "rewards/accuracies": 0.5, "rewards/chosen": -1.9832375049591064, "rewards/margins": 4.137433052062988, "rewards/rejected": -6.120670795440674, "step": 966 }, { "epoch": 0.15, "learning_rate": 1.3438097411691476e-05, "logits/chosen": -1.8944156169891357, "logits/rejected": -2.701282024383545, "logps/chosen": -216.89674377441406, "logps/rejected": -317.73260498046875, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -0.4111998975276947, "rewards/margins": 5.279674530029297, "rewards/rejected": -5.6908745765686035, "step": 967 }, { "epoch": 0.15, "learning_rate": 1.343736397116033e-05, "logits/chosen": -2.818526029586792, "logits/rejected": -3.1761157512664795, "logps/chosen": -44.313785552978516, "logps/rejected": -126.82783508300781, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -0.6794894337654114, "rewards/margins": 4.277616024017334, "rewards/rejected": -4.95710563659668, "step": 968 }, { "epoch": 0.15, "learning_rate": 1.3436630530629182e-05, "logits/chosen": -1.2019388675689697, "logits/rejected": -2.7777469158172607, "logps/chosen": -243.890869140625, "logps/rejected": -564.7157592773438, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -0.1865074187517166, "rewards/margins": 7.503586769104004, "rewards/rejected": -7.690093994140625, "step": 969 }, { "epoch": 0.15, "learning_rate": 1.3435897090098035e-05, "logits/chosen": -2.96165132522583, "logits/rejected": -2.1531481742858887, "logps/chosen": -197.0987548828125, "logps/rejected": -178.82232666015625, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": -1.1124420166015625, "rewards/margins": 3.6419849395751953, "rewards/rejected": -4.754426956176758, "step": 970 }, { "epoch": 0.15, "learning_rate": 1.3435163649566887e-05, "logits/chosen": -1.2257788181304932, "logits/rejected": -2.633054256439209, "logps/chosen": -214.43075561523438, "logps/rejected": -456.0462646484375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.2941299676895142, "rewards/margins": 7.287689208984375, "rewards/rejected": -8.581819534301758, "step": 971 }, { "epoch": 0.15, "learning_rate": 1.3434430209035739e-05, "logits/chosen": -2.935551404953003, "logits/rejected": -2.8030941486358643, "logps/chosen": -355.6245422363281, "logps/rejected": -339.1649169921875, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": -1.6751316785812378, "rewards/margins": 5.1480512619018555, "rewards/rejected": -6.823183059692383, "step": 972 }, { "epoch": 0.15, "learning_rate": 1.3433696768504591e-05, "logits/chosen": -1.3924455642700195, "logits/rejected": -2.6191818714141846, "logps/chosen": -54.12678909301758, "logps/rejected": -249.5855712890625, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -0.6248529553413391, "rewards/margins": 5.839882850646973, "rewards/rejected": -6.464735507965088, "step": 973 }, { "epoch": 0.15, "learning_rate": 1.3432963327973443e-05, "logits/chosen": -2.7947559356689453, "logits/rejected": -2.508848190307617, "logps/chosen": -196.95956420898438, "logps/rejected": -164.59820556640625, "loss": 3.8598, "rewards/accuracies": 0.5, "rewards/chosen": -4.552156925201416, "rewards/margins": 0.24748015403747559, "rewards/rejected": -4.799637317657471, "step": 974 }, { "epoch": 0.15, "learning_rate": 1.3432229887442295e-05, "logits/chosen": -2.719942331314087, "logits/rejected": -2.193081855773926, "logps/chosen": -199.305419921875, "logps/rejected": -212.97036743164062, "loss": 3.7211, "rewards/accuracies": 0.5, "rewards/chosen": -3.5221309661865234, "rewards/margins": 1.1605806350708008, "rewards/rejected": -4.682711124420166, "step": 975 }, { "epoch": 0.15, "learning_rate": 1.3431496446911147e-05, "logits/chosen": -1.9680830240249634, "logits/rejected": -2.722325563430786, "logps/chosen": -126.03297424316406, "logps/rejected": -262.6666259765625, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.306999921798706, "rewards/margins": 7.135979652404785, "rewards/rejected": -8.44297981262207, "step": 976 }, { "epoch": 0.15, "learning_rate": 1.343076300638e-05, "logits/chosen": -2.8547699451446533, "logits/rejected": -1.692138433456421, "logps/chosen": -183.002685546875, "logps/rejected": -86.91351318359375, "loss": 3.1568, "rewards/accuracies": 0.5, "rewards/chosen": -4.151974678039551, "rewards/margins": -2.7139041423797607, "rewards/rejected": -1.4380706548690796, "step": 977 }, { "epoch": 0.15, "learning_rate": 1.3430029565848852e-05, "logits/chosen": -2.1984150409698486, "logits/rejected": -2.724823474884033, "logps/chosen": -239.84512329101562, "logps/rejected": -232.89346313476562, "loss": 3.2374, "rewards/accuracies": 0.5, "rewards/chosen": -3.995194673538208, "rewards/margins": 0.3821451663970947, "rewards/rejected": -4.377339839935303, "step": 978 }, { "epoch": 0.15, "learning_rate": 1.3429296125317704e-05, "logits/chosen": -1.7500406503677368, "logits/rejected": -2.70011043548584, "logps/chosen": -89.11063385009766, "logps/rejected": -238.60968017578125, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.2506788671016693, "rewards/margins": 5.995216369628906, "rewards/rejected": -6.2458953857421875, "step": 979 }, { "epoch": 0.15, "learning_rate": 1.3428562684786556e-05, "logits/chosen": -2.266387939453125, "logits/rejected": -2.6428980827331543, "logps/chosen": -82.87545776367188, "logps/rejected": -176.7517852783203, "loss": 0.0706, "rewards/accuracies": 1.0, "rewards/chosen": -0.06498794257640839, "rewards/margins": 4.465424537658691, "rewards/rejected": -4.530412673950195, "step": 980 }, { "epoch": 0.15, "learning_rate": 1.3427829244255408e-05, "logits/chosen": -2.8128581047058105, "logits/rejected": -1.3754698038101196, "logps/chosen": -620.680908203125, "logps/rejected": -247.54637145996094, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -2.810786485671997, "rewards/margins": 4.740415573120117, "rewards/rejected": -7.551201820373535, "step": 981 }, { "epoch": 0.15, "learning_rate": 1.342709580372426e-05, "logits/chosen": -2.68520450592041, "logits/rejected": -1.7157307863235474, "logps/chosen": -336.4656982421875, "logps/rejected": -235.0604248046875, "loss": 0.4536, "rewards/accuracies": 0.5, "rewards/chosen": -1.142061710357666, "rewards/margins": 3.72011137008667, "rewards/rejected": -4.862173080444336, "step": 982 }, { "epoch": 0.15, "learning_rate": 1.3426362363193111e-05, "logits/chosen": -2.808506727218628, "logits/rejected": -2.0021092891693115, "logps/chosen": -608.6399536132812, "logps/rejected": -356.2757568359375, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -2.169276475906372, "rewards/margins": 3.3925986289978027, "rewards/rejected": -5.561875343322754, "step": 983 }, { "epoch": 0.15, "learning_rate": 1.3425628922661963e-05, "logits/chosen": -2.3502678871154785, "logits/rejected": -2.7739272117614746, "logps/chosen": -154.49208068847656, "logps/rejected": -155.0908203125, "loss": 2.5911, "rewards/accuracies": 0.5, "rewards/chosen": -3.394874095916748, "rewards/margins": 0.6908271312713623, "rewards/rejected": -4.0857014656066895, "step": 984 }, { "epoch": 0.15, "learning_rate": 1.3424895482130815e-05, "logits/chosen": -2.738442897796631, "logits/rejected": -1.298517107963562, "logps/chosen": -887.6917114257812, "logps/rejected": -321.4881286621094, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.79355388879776, "rewards/margins": 6.715566635131836, "rewards/rejected": -7.509120941162109, "step": 985 }, { "epoch": 0.15, "learning_rate": 1.3424162041599669e-05, "logits/chosen": -2.765416145324707, "logits/rejected": -2.8218538761138916, "logps/chosen": -639.44921875, "logps/rejected": -621.33447265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8300430774688721, "rewards/margins": 8.623320579528809, "rewards/rejected": -9.453363418579102, "step": 986 }, { "epoch": 0.15, "learning_rate": 1.342342860106852e-05, "logits/chosen": -2.854088068008423, "logits/rejected": -2.75626277923584, "logps/chosen": -271.578125, "logps/rejected": -175.11618041992188, "loss": 4.0135, "rewards/accuracies": 0.5, "rewards/chosen": -4.165829181671143, "rewards/margins": -2.545362949371338, "rewards/rejected": -1.620466709136963, "step": 987 }, { "epoch": 0.15, "learning_rate": 1.3422695160537373e-05, "logits/chosen": -2.495293617248535, "logits/rejected": -2.7489781379699707, "logps/chosen": -482.4706726074219, "logps/rejected": -364.7039794921875, "loss": 2.6954, "rewards/accuracies": 0.5, "rewards/chosen": -3.6223907470703125, "rewards/margins": 1.0212132930755615, "rewards/rejected": -4.643603801727295, "step": 988 }, { "epoch": 0.15, "learning_rate": 1.3421961720006224e-05, "logits/chosen": -2.6947004795074463, "logits/rejected": -2.478189706802368, "logps/chosen": -359.51153564453125, "logps/rejected": -230.60780334472656, "loss": 2.6122, "rewards/accuracies": 0.5, "rewards/chosen": -3.1791489124298096, "rewards/margins": 1.313081979751587, "rewards/rejected": -4.4922308921813965, "step": 989 }, { "epoch": 0.15, "learning_rate": 1.3421228279475076e-05, "logits/chosen": -1.9829351902008057, "logits/rejected": -1.3009617328643799, "logps/chosen": -871.4218139648438, "logps/rejected": -334.2985534667969, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.8841400146484375, "rewards/margins": 5.821324348449707, "rewards/rejected": -6.7054643630981445, "step": 990 }, { "epoch": 0.15, "learning_rate": 1.3420494838943928e-05, "logits/chosen": -2.7497916221618652, "logits/rejected": -1.9085696935653687, "logps/chosen": -460.19012451171875, "logps/rejected": -338.57440185546875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.3237769603729248, "rewards/margins": 6.045872688293457, "rewards/rejected": -7.369649887084961, "step": 991 }, { "epoch": 0.15, "learning_rate": 1.341976139841278e-05, "logits/chosen": -2.432884931564331, "logits/rejected": -2.9676673412323, "logps/chosen": -26.701099395751953, "logps/rejected": -158.02040100097656, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 0.0945466011762619, "rewards/margins": 4.881837368011475, "rewards/rejected": -4.787290573120117, "step": 992 }, { "epoch": 0.15, "learning_rate": 1.3419027957881632e-05, "logits/chosen": -3.2698683738708496, "logits/rejected": -1.9339247941970825, "logps/chosen": -258.1646728515625, "logps/rejected": -121.47714233398438, "loss": 2.1122, "rewards/accuracies": 0.0, "rewards/chosen": -3.2717621326446533, "rewards/margins": -1.8832318782806396, "rewards/rejected": -1.3885303735733032, "step": 993 }, { "epoch": 0.15, "learning_rate": 1.3418294517350486e-05, "logits/chosen": -2.9671812057495117, "logits/rejected": -2.653566598892212, "logps/chosen": -158.84060668945312, "logps/rejected": -263.36669921875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.3058887720108032, "rewards/margins": 5.880498886108398, "rewards/rejected": -7.186387538909912, "step": 994 }, { "epoch": 0.15, "learning_rate": 1.3417561076819337e-05, "logits/chosen": -1.3019996881484985, "logits/rejected": -2.7607879638671875, "logps/chosen": -131.82406616210938, "logps/rejected": -453.3896789550781, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 0.5138351321220398, "rewards/margins": 6.402442932128906, "rewards/rejected": -5.888607978820801, "step": 995 }, { "epoch": 0.15, "learning_rate": 1.341682763628819e-05, "logits/chosen": -2.646655559539795, "logits/rejected": -1.47126305103302, "logps/chosen": -343.62310791015625, "logps/rejected": -212.12844848632812, "loss": 3.8845, "rewards/accuracies": 0.5, "rewards/chosen": -3.961590051651001, "rewards/margins": -0.9682269096374512, "rewards/rejected": -2.99336314201355, "step": 996 }, { "epoch": 0.16, "learning_rate": 1.3416094195757041e-05, "logits/chosen": -2.4204113483428955, "logits/rejected": -2.7442514896392822, "logps/chosen": -109.88353729248047, "logps/rejected": -119.72825622558594, "loss": 2.0286, "rewards/accuracies": 0.5, "rewards/chosen": -1.9656049013137817, "rewards/margins": 0.48421311378479004, "rewards/rejected": -2.4498178958892822, "step": 997 }, { "epoch": 0.16, "learning_rate": 1.3415360755225893e-05, "logits/chosen": -2.4547030925750732, "logits/rejected": -2.8847029209136963, "logps/chosen": -49.554080963134766, "logps/rejected": -269.83477783203125, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.45339155197143555, "rewards/margins": 5.8301849365234375, "rewards/rejected": -6.283576965332031, "step": 998 }, { "epoch": 0.16, "learning_rate": 1.3414627314694745e-05, "logits/chosen": -2.319581985473633, "logits/rejected": -2.941023111343384, "logps/chosen": -171.25350952148438, "logps/rejected": -233.92218017578125, "loss": 2.7639, "rewards/accuracies": 0.5, "rewards/chosen": -2.8713014125823975, "rewards/margins": -0.6197183132171631, "rewards/rejected": -2.2515830993652344, "step": 999 }, { "epoch": 0.16, "learning_rate": 1.3413893874163597e-05, "logits/chosen": -2.187657594680786, "logits/rejected": -2.721571683883667, "logps/chosen": -184.99021911621094, "logps/rejected": -165.9358673095703, "loss": 2.7251, "rewards/accuracies": 0.5, "rewards/chosen": -3.0027225017547607, "rewards/margins": -0.7946467399597168, "rewards/rejected": -2.208075761795044, "step": 1000 }, { "epoch": 0.16, "learning_rate": 1.3413160433632449e-05, "logits/chosen": -2.2215378284454346, "logits/rejected": -2.9695327281951904, "logps/chosen": -425.1692810058594, "logps/rejected": -486.66229248046875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.7257156372070312, "rewards/margins": 5.6083984375, "rewards/rejected": -7.334114074707031, "step": 1001 }, { "epoch": 0.16, "learning_rate": 1.3412426993101302e-05, "logits/chosen": -2.3818130493164062, "logits/rejected": -2.7769463062286377, "logps/chosen": -444.4256286621094, "logps/rejected": -572.9364624023438, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.2233249694108963, "rewards/margins": 7.754724025726318, "rewards/rejected": -7.978049278259277, "step": 1002 }, { "epoch": 0.16, "learning_rate": 1.3411693552570154e-05, "logits/chosen": -3.0616376399993896, "logits/rejected": -1.5998884439468384, "logps/chosen": -722.5615844726562, "logps/rejected": -303.9442443847656, "loss": 0.9022, "rewards/accuracies": 0.5, "rewards/chosen": -2.304535150527954, "rewards/margins": 1.7388619184494019, "rewards/rejected": -4.043396949768066, "step": 1003 }, { "epoch": 0.16, "learning_rate": 1.3410960112039008e-05, "logits/chosen": -2.438495635986328, "logits/rejected": -2.8304550647735596, "logps/chosen": -83.35399627685547, "logps/rejected": -138.08041381835938, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": -0.11766605079174042, "rewards/margins": 3.189307689666748, "rewards/rejected": -3.306973695755005, "step": 1004 }, { "epoch": 0.16, "learning_rate": 1.341022667150786e-05, "logits/chosen": -2.5564770698547363, "logits/rejected": -2.7735116481781006, "logps/chosen": -211.8121337890625, "logps/rejected": -237.11300659179688, "loss": 1.5519, "rewards/accuracies": 0.5, "rewards/chosen": -1.974683403968811, "rewards/margins": 2.6313319206237793, "rewards/rejected": -4.606015682220459, "step": 1005 }, { "epoch": 0.16, "learning_rate": 1.3409493230976711e-05, "logits/chosen": -2.797755718231201, "logits/rejected": -3.2976300716400146, "logps/chosen": -156.77005004882812, "logps/rejected": -303.63580322265625, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": 0.2020954191684723, "rewards/margins": 3.0530788898468018, "rewards/rejected": -2.8509836196899414, "step": 1006 }, { "epoch": 0.16, "learning_rate": 1.3408759790445563e-05, "logits/chosen": -2.9097931385040283, "logits/rejected": -3.102999448776245, "logps/chosen": -120.50009155273438, "logps/rejected": -166.5863037109375, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -1.233637809753418, "rewards/margins": 4.178119659423828, "rewards/rejected": -5.411757469177246, "step": 1007 }, { "epoch": 0.16, "learning_rate": 1.3408026349914415e-05, "logits/chosen": -2.5628604888916016, "logits/rejected": -2.9108357429504395, "logps/chosen": -87.65446472167969, "logps/rejected": -210.60098266601562, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.09969662129878998, "rewards/margins": 5.088695526123047, "rewards/rejected": -5.188392162322998, "step": 1008 }, { "epoch": 0.16, "learning_rate": 1.3407292909383267e-05, "logits/chosen": -3.0067501068115234, "logits/rejected": -2.3221919536590576, "logps/chosen": -254.6654052734375, "logps/rejected": -433.2508239746094, "loss": 1.3013, "rewards/accuracies": 0.5, "rewards/chosen": -1.517099380493164, "rewards/margins": 0.8931272029876709, "rewards/rejected": -2.410226583480835, "step": 1009 }, { "epoch": 0.16, "learning_rate": 1.3406559468852119e-05, "logits/chosen": -2.9544975757598877, "logits/rejected": -3.018066644668579, "logps/chosen": -580.4241333007812, "logps/rejected": -600.5741577148438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.02386474609375, "rewards/margins": 9.102262496948242, "rewards/rejected": -8.078397750854492, "step": 1010 }, { "epoch": 0.16, "learning_rate": 1.3405826028320971e-05, "logits/chosen": -2.7855615615844727, "logits/rejected": -2.7705178260803223, "logps/chosen": -287.2688903808594, "logps/rejected": -365.1719055175781, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.2616257667541504, "rewards/margins": 4.823697090148926, "rewards/rejected": -6.085322380065918, "step": 1011 }, { "epoch": 0.16, "learning_rate": 1.3405092587789824e-05, "logits/chosen": -2.6208431720733643, "logits/rejected": -2.840363025665283, "logps/chosen": -23.290931701660156, "logps/rejected": -164.32901000976562, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": -0.16238442063331604, "rewards/margins": 5.25147819519043, "rewards/rejected": -5.413862705230713, "step": 1012 }, { "epoch": 0.16, "learning_rate": 1.3404359147258676e-05, "logits/chosen": -2.695437431335449, "logits/rejected": -2.662466526031494, "logps/chosen": -171.52867126464844, "logps/rejected": -513.3970336914062, "loss": 1.0234, "rewards/accuracies": 0.0, "rewards/chosen": -2.071153163909912, "rewards/margins": -0.5449159741401672, "rewards/rejected": -1.5262372493743896, "step": 1013 }, { "epoch": 0.16, "learning_rate": 1.3403625706727528e-05, "logits/chosen": -2.7120535373687744, "logits/rejected": -3.097043514251709, "logps/chosen": -113.28925323486328, "logps/rejected": -358.41845703125, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": 1.0896278619766235, "rewards/margins": 7.876863479614258, "rewards/rejected": -6.787235260009766, "step": 1014 }, { "epoch": 0.16, "learning_rate": 1.340289226619638e-05, "logits/chosen": -1.7664546966552734, "logits/rejected": -2.5821642875671387, "logps/chosen": -169.19253540039062, "logps/rejected": -315.3579406738281, "loss": 1.7399, "rewards/accuracies": 0.5, "rewards/chosen": -1.7961795330047607, "rewards/margins": 2.7764978408813477, "rewards/rejected": -4.5726776123046875, "step": 1015 }, { "epoch": 0.16, "learning_rate": 1.3402158825665232e-05, "logits/chosen": -1.8079577684402466, "logits/rejected": -3.0306568145751953, "logps/chosen": -44.085227966308594, "logps/rejected": -237.2449493408203, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": -0.14192436635494232, "rewards/margins": 3.284372568130493, "rewards/rejected": -3.4262969493865967, "step": 1016 }, { "epoch": 0.16, "learning_rate": 1.3401425385134084e-05, "logits/chosen": -3.135939598083496, "logits/rejected": -2.3712737560272217, "logps/chosen": -354.20867919921875, "logps/rejected": -266.70709228515625, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -0.6504837274551392, "rewards/margins": 4.011133193969727, "rewards/rejected": -4.661616802215576, "step": 1017 }, { "epoch": 0.16, "learning_rate": 1.3400691944602936e-05, "logits/chosen": -3.07486891746521, "logits/rejected": -2.5038914680480957, "logps/chosen": -335.44036865234375, "logps/rejected": -246.96368408203125, "loss": 0.8948, "rewards/accuracies": 0.5, "rewards/chosen": -2.233013868331909, "rewards/margins": 1.9198579788208008, "rewards/rejected": -4.152871608734131, "step": 1018 }, { "epoch": 0.16, "learning_rate": 1.3399958504071788e-05, "logits/chosen": -1.3602344989776611, "logits/rejected": -2.4258508682250977, "logps/chosen": -282.5267028808594, "logps/rejected": -336.80670166015625, "loss": 3.5915, "rewards/accuracies": 0.5, "rewards/chosen": -4.126977443695068, "rewards/margins": 1.515984058380127, "rewards/rejected": -5.642961502075195, "step": 1019 }, { "epoch": 0.16, "learning_rate": 1.339922506354064e-05, "logits/chosen": -3.09791898727417, "logits/rejected": -2.2610740661621094, "logps/chosen": -463.1657409667969, "logps/rejected": -210.09884643554688, "loss": 1.3724, "rewards/accuracies": 0.5, "rewards/chosen": -1.6735962629318237, "rewards/margins": 0.6970221996307373, "rewards/rejected": -2.3706185817718506, "step": 1020 }, { "epoch": 0.16, "learning_rate": 1.3398491623009493e-05, "logits/chosen": -2.609034538269043, "logits/rejected": -3.1343002319335938, "logps/chosen": -57.94166564941406, "logps/rejected": -257.86602783203125, "loss": 0.0415, "rewards/accuracies": 1.0, "rewards/chosen": 0.9256613254547119, "rewards/margins": 4.388497829437256, "rewards/rejected": -3.462836503982544, "step": 1021 }, { "epoch": 0.16, "learning_rate": 1.3397758182478345e-05, "logits/chosen": -2.8297646045684814, "logits/rejected": -2.933440923690796, "logps/chosen": -589.5969848632812, "logps/rejected": -323.78411865234375, "loss": 4.1779, "rewards/accuracies": 0.5, "rewards/chosen": -4.23394775390625, "rewards/margins": -1.5635623931884766, "rewards/rejected": -2.6703853607177734, "step": 1022 }, { "epoch": 0.16, "learning_rate": 1.3397024741947197e-05, "logits/chosen": -2.733569383621216, "logits/rejected": -3.0211000442504883, "logps/chosen": -109.10816955566406, "logps/rejected": -227.75503540039062, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 0.9409145712852478, "rewards/margins": 4.174395561218262, "rewards/rejected": -3.2334811687469482, "step": 1023 }, { "epoch": 0.16, "learning_rate": 1.3396291301416049e-05, "logits/chosen": -2.293530225753784, "logits/rejected": -1.8368891477584839, "logps/chosen": -467.9642639160156, "logps/rejected": -386.40655517578125, "loss": 3.1811, "rewards/accuracies": 0.5, "rewards/chosen": -4.530301094055176, "rewards/margins": -1.983562707901001, "rewards/rejected": -2.546738386154175, "step": 1024 }, { "epoch": 0.16, "learning_rate": 1.33955578608849e-05, "logits/chosen": -1.0433911085128784, "logits/rejected": -3.036283016204834, "logps/chosen": -49.00526428222656, "logps/rejected": -454.5509033203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.2472580075263977, "rewards/margins": 6.65222692489624, "rewards/rejected": -6.899485111236572, "step": 1025 }, { "epoch": 0.16, "learning_rate": 1.3394824420353752e-05, "logits/chosen": -1.8050620555877686, "logits/rejected": -2.6933274269104004, "logps/chosen": -152.764892578125, "logps/rejected": -160.76666259765625, "loss": 1.6654, "rewards/accuracies": 0.5, "rewards/chosen": -2.447502374649048, "rewards/margins": 0.7064806222915649, "rewards/rejected": -3.1539828777313232, "step": 1026 }, { "epoch": 0.16, "learning_rate": 1.3394090979822604e-05, "logits/chosen": -2.455199956893921, "logits/rejected": -3.2105202674865723, "logps/chosen": -160.23912048339844, "logps/rejected": -282.6870422363281, "loss": 1.7964, "rewards/accuracies": 0.5, "rewards/chosen": -0.90604168176651, "rewards/margins": 0.43179595470428467, "rewards/rejected": -1.33783757686615, "step": 1027 }, { "epoch": 0.16, "learning_rate": 1.3393357539291456e-05, "logits/chosen": -2.072887659072876, "logits/rejected": -2.867086887359619, "logps/chosen": -70.60317993164062, "logps/rejected": -232.7711944580078, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": 0.5864682793617249, "rewards/margins": 5.74050235748291, "rewards/rejected": -5.15403413772583, "step": 1028 }, { "epoch": 0.16, "learning_rate": 1.3392624098760308e-05, "logits/chosen": -2.1289517879486084, "logits/rejected": -2.6328349113464355, "logps/chosen": -65.20872497558594, "logps/rejected": -146.28094482421875, "loss": 0.0843, "rewards/accuracies": 1.0, "rewards/chosen": 0.256417453289032, "rewards/margins": 3.83152174949646, "rewards/rejected": -3.575104236602783, "step": 1029 }, { "epoch": 0.16, "learning_rate": 1.3391890658229162e-05, "logits/chosen": -2.8926289081573486, "logits/rejected": -2.1844189167022705, "logps/chosen": -292.82830810546875, "logps/rejected": -121.71665954589844, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": 0.2600931227207184, "rewards/margins": 4.026698112487793, "rewards/rejected": -3.7666049003601074, "step": 1030 }, { "epoch": 0.16, "learning_rate": 1.3391157217698014e-05, "logits/chosen": -1.5552864074707031, "logits/rejected": -2.7294793128967285, "logps/chosen": -116.35371398925781, "logps/rejected": -175.21908569335938, "loss": 0.1311, "rewards/accuracies": 1.0, "rewards/chosen": -0.18350505828857422, "rewards/margins": 2.882007598876953, "rewards/rejected": -3.0655126571655273, "step": 1031 }, { "epoch": 0.16, "learning_rate": 1.3390423777166865e-05, "logits/chosen": -2.7737231254577637, "logits/rejected": -3.225823163986206, "logps/chosen": -90.57738494873047, "logps/rejected": -249.45018005371094, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.08308863639831543, "rewards/margins": 6.055976867675781, "rewards/rejected": -5.972887992858887, "step": 1032 }, { "epoch": 0.16, "learning_rate": 1.3389690336635717e-05, "logits/chosen": -2.860888957977295, "logits/rejected": -2.3275303840637207, "logps/chosen": -339.9845275878906, "logps/rejected": -409.7838134765625, "loss": 1.6536, "rewards/accuracies": 0.5, "rewards/chosen": -2.754748582839966, "rewards/margins": 0.9643172025680542, "rewards/rejected": -3.7190659046173096, "step": 1033 }, { "epoch": 0.16, "learning_rate": 1.3388956896104569e-05, "logits/chosen": -1.1592493057250977, "logits/rejected": -2.7009103298187256, "logps/chosen": -89.24527740478516, "logps/rejected": -404.0456848144531, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 0.038165271282196045, "rewards/margins": 5.834658622741699, "rewards/rejected": -5.7964935302734375, "step": 1034 }, { "epoch": 0.16, "learning_rate": 1.3388223455573421e-05, "logits/chosen": -2.845824718475342, "logits/rejected": -3.3311517238616943, "logps/chosen": -81.93959045410156, "logps/rejected": -224.56005859375, "loss": 0.0684, "rewards/accuracies": 1.0, "rewards/chosen": -0.7744969129562378, "rewards/margins": 2.6955957412719727, "rewards/rejected": -3.4700927734375, "step": 1035 }, { "epoch": 0.16, "learning_rate": 1.3387490015042275e-05, "logits/chosen": -1.998475432395935, "logits/rejected": -2.7217836380004883, "logps/chosen": -98.06209564208984, "logps/rejected": -231.000244140625, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.2058357298374176, "rewards/margins": 4.761646747589111, "rewards/rejected": -4.967482566833496, "step": 1036 }, { "epoch": 0.16, "learning_rate": 1.3386756574511126e-05, "logits/chosen": -2.9902079105377197, "logits/rejected": -3.191455125808716, "logps/chosen": -170.1753692626953, "logps/rejected": -238.3734588623047, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.49025899171829224, "rewards/margins": 5.437506198883057, "rewards/rejected": -5.927764892578125, "step": 1037 }, { "epoch": 0.16, "learning_rate": 1.3386023133979978e-05, "logits/chosen": -2.3768258094787598, "logits/rejected": -2.9010367393493652, "logps/chosen": -502.4754638671875, "logps/rejected": -451.70159912109375, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.24652865529060364, "rewards/margins": 4.917162895202637, "rewards/rejected": -5.163691520690918, "step": 1038 }, { "epoch": 0.16, "learning_rate": 1.3385289693448832e-05, "logits/chosen": -3.0653443336486816, "logits/rejected": -2.2447474002838135, "logps/chosen": -205.33773803710938, "logps/rejected": -58.0682373046875, "loss": 3.7827, "rewards/accuracies": 0.0, "rewards/chosen": -2.9651875495910645, "rewards/margins": -3.474623441696167, "rewards/rejected": 0.5094359517097473, "step": 1039 }, { "epoch": 0.16, "learning_rate": 1.3384556252917684e-05, "logits/chosen": -2.054699420928955, "logits/rejected": -2.7174692153930664, "logps/chosen": -30.342247009277344, "logps/rejected": -151.8946075439453, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": 0.15375083684921265, "rewards/margins": 3.9592978954315186, "rewards/rejected": -3.8055472373962402, "step": 1040 }, { "epoch": 0.16, "learning_rate": 1.3383822812386536e-05, "logits/chosen": -2.755204916000366, "logits/rejected": -0.6630797982215881, "logps/chosen": -404.66021728515625, "logps/rejected": -85.77136993408203, "loss": 5.3613, "rewards/accuracies": 0.0, "rewards/chosen": -5.673323631286621, "rewards/margins": -5.3565192222595215, "rewards/rejected": -0.31680452823638916, "step": 1041 }, { "epoch": 0.16, "learning_rate": 1.3383089371855388e-05, "logits/chosen": -0.9503471851348877, "logits/rejected": -2.1946799755096436, "logps/chosen": -185.39947509765625, "logps/rejected": -434.907470703125, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.5002815127372742, "rewards/margins": 4.867472171783447, "rewards/rejected": -5.367753982543945, "step": 1042 }, { "epoch": 0.16, "learning_rate": 1.338235593132424e-05, "logits/chosen": -3.0649259090423584, "logits/rejected": -2.6193947792053223, "logps/chosen": -369.0599365234375, "logps/rejected": -243.44700622558594, "loss": 3.5008, "rewards/accuracies": 0.5, "rewards/chosen": -3.3409087657928467, "rewards/margins": -0.6921148300170898, "rewards/rejected": -2.648793935775757, "step": 1043 }, { "epoch": 0.16, "learning_rate": 1.3381622490793091e-05, "logits/chosen": -1.5685462951660156, "logits/rejected": -3.1172385215759277, "logps/chosen": -81.96338653564453, "logps/rejected": -527.9881591796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.6746208667755127, "rewards/margins": 7.3287458419799805, "rewards/rejected": -6.6541242599487305, "step": 1044 }, { "epoch": 0.16, "learning_rate": 1.3380889050261943e-05, "logits/chosen": -2.603283405303955, "logits/rejected": -1.9228594303131104, "logps/chosen": -232.22286987304688, "logps/rejected": -254.0535125732422, "loss": 2.5223, "rewards/accuracies": 0.5, "rewards/chosen": -3.9206371307373047, "rewards/margins": 0.6199405193328857, "rewards/rejected": -4.5405778884887695, "step": 1045 }, { "epoch": 0.16, "learning_rate": 1.3380155609730795e-05, "logits/chosen": -2.362100839614868, "logits/rejected": -2.824101209640503, "logps/chosen": -263.9604187011719, "logps/rejected": -263.52783203125, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.9619662761688232, "rewards/margins": 4.487338542938232, "rewards/rejected": -5.449305057525635, "step": 1046 }, { "epoch": 0.16, "learning_rate": 1.3379422169199647e-05, "logits/chosen": -1.7651925086975098, "logits/rejected": -3.066840648651123, "logps/chosen": -135.6787109375, "logps/rejected": -464.1007385253906, "loss": 0.3652, "rewards/accuracies": 0.5, "rewards/chosen": -0.17670556902885437, "rewards/margins": 3.4394214153289795, "rewards/rejected": -3.6161270141601562, "step": 1047 }, { "epoch": 0.16, "learning_rate": 1.33786887286685e-05, "logits/chosen": -2.0448102951049805, "logits/rejected": -3.039050579071045, "logps/chosen": -180.67576599121094, "logps/rejected": -235.62747192382812, "loss": 3.0292, "rewards/accuracies": 0.5, "rewards/chosen": -3.0877764225006104, "rewards/margins": -0.5466775894165039, "rewards/rejected": -2.5410988330841064, "step": 1048 }, { "epoch": 0.16, "learning_rate": 1.3377955288137352e-05, "logits/chosen": -1.3472155332565308, "logits/rejected": -2.7326111793518066, "logps/chosen": -96.296875, "logps/rejected": -277.997802734375, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -0.4987579584121704, "rewards/margins": 4.056745529174805, "rewards/rejected": -4.5555033683776855, "step": 1049 }, { "epoch": 0.16, "learning_rate": 1.3377221847606204e-05, "logits/chosen": -2.9244091510772705, "logits/rejected": -2.38415789604187, "logps/chosen": -478.53125, "logps/rejected": -374.6407470703125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 0.66021728515625, "rewards/margins": 5.933906555175781, "rewards/rejected": -5.273689270019531, "step": 1050 }, { "epoch": 0.16, "learning_rate": 1.3376488407075056e-05, "logits/chosen": -2.6913561820983887, "logits/rejected": -1.8370356559753418, "logps/chosen": -139.89129638671875, "logps/rejected": -135.48194885253906, "loss": 1.0005, "rewards/accuracies": 0.5, "rewards/chosen": -1.7529398202896118, "rewards/margins": 1.2635358572006226, "rewards/rejected": -3.0164756774902344, "step": 1051 }, { "epoch": 0.16, "learning_rate": 1.3375754966543908e-05, "logits/chosen": -2.692082166671753, "logits/rejected": -3.147670030593872, "logps/chosen": -47.747013092041016, "logps/rejected": -174.1695098876953, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 0.09828996658325195, "rewards/margins": 5.218155860900879, "rewards/rejected": -5.119865894317627, "step": 1052 }, { "epoch": 0.16, "learning_rate": 1.337502152601276e-05, "logits/chosen": -2.1836278438568115, "logits/rejected": -2.921555280685425, "logps/chosen": -80.02751159667969, "logps/rejected": -233.87887573242188, "loss": 0.0868, "rewards/accuracies": 1.0, "rewards/chosen": 0.27806106209754944, "rewards/margins": 2.468869924545288, "rewards/rejected": -2.1908087730407715, "step": 1053 }, { "epoch": 0.16, "learning_rate": 1.3374288085481612e-05, "logits/chosen": -2.460883617401123, "logits/rejected": -2.9644277095794678, "logps/chosen": -61.01013946533203, "logps/rejected": -225.6861572265625, "loss": 0.0987, "rewards/accuracies": 1.0, "rewards/chosen": 0.05354928970336914, "rewards/margins": 3.6262896060943604, "rewards/rejected": -3.572740316390991, "step": 1054 }, { "epoch": 0.16, "learning_rate": 1.3373554644950464e-05, "logits/chosen": -2.4656646251678467, "logits/rejected": -2.898401975631714, "logps/chosen": -84.92707061767578, "logps/rejected": -357.130615234375, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": 0.6161321997642517, "rewards/margins": 5.067728042602539, "rewards/rejected": -4.451596260070801, "step": 1055 }, { "epoch": 0.16, "learning_rate": 1.3372821204419316e-05, "logits/chosen": -3.1386306285858154, "logits/rejected": -3.0004470348358154, "logps/chosen": -126.80543518066406, "logps/rejected": -190.98728942871094, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": 0.4750393033027649, "rewards/margins": 4.674471855163574, "rewards/rejected": -4.199432849884033, "step": 1056 }, { "epoch": 0.16, "learning_rate": 1.3372087763888169e-05, "logits/chosen": -2.476364850997925, "logits/rejected": -2.750749349594116, "logps/chosen": -32.435630798339844, "logps/rejected": -237.63137817382812, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 1.097107172012329, "rewards/margins": 4.922269821166992, "rewards/rejected": -3.825162410736084, "step": 1057 }, { "epoch": 0.16, "learning_rate": 1.3371354323357021e-05, "logits/chosen": -1.6861826181411743, "logits/rejected": -2.6879608631134033, "logps/chosen": -270.07281494140625, "logps/rejected": -587.1402587890625, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.42438429594039917, "rewards/margins": 5.507051467895508, "rewards/rejected": -5.931436061859131, "step": 1058 }, { "epoch": 0.16, "learning_rate": 1.3370620882825873e-05, "logits/chosen": -2.278907537460327, "logits/rejected": -2.976900815963745, "logps/chosen": -20.491626739501953, "logps/rejected": -215.15573120117188, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 0.5574206709861755, "rewards/margins": 6.352499008178711, "rewards/rejected": -5.795078277587891, "step": 1059 }, { "epoch": 0.16, "learning_rate": 1.3369887442294725e-05, "logits/chosen": -2.625209331512451, "logits/rejected": -2.6950345039367676, "logps/chosen": -301.4205017089844, "logps/rejected": -324.4646911621094, "loss": 0.0756, "rewards/accuracies": 1.0, "rewards/chosen": 0.2913833558559418, "rewards/margins": 3.8209152221679688, "rewards/rejected": -3.529531955718994, "step": 1060 }, { "epoch": 0.17, "learning_rate": 1.3369154001763577e-05, "logits/chosen": -1.8031247854232788, "logits/rejected": -2.6730170249938965, "logps/chosen": -251.16934204101562, "logps/rejected": -415.2401123046875, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": 0.5480644106864929, "rewards/margins": 4.785416603088379, "rewards/rejected": -4.237351894378662, "step": 1061 }, { "epoch": 0.17, "learning_rate": 1.3368420561232429e-05, "logits/chosen": -2.946445941925049, "logits/rejected": -2.338968276977539, "logps/chosen": -382.81695556640625, "logps/rejected": -345.61090087890625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.0898224115371704, "rewards/margins": 5.32060432434082, "rewards/rejected": -6.410426139831543, "step": 1062 }, { "epoch": 0.17, "learning_rate": 1.336768712070128e-05, "logits/chosen": -1.480309247970581, "logits/rejected": -3.0444109439849854, "logps/chosen": -66.56504821777344, "logps/rejected": -343.8252868652344, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.33114662766456604, "rewards/margins": 5.979786396026611, "rewards/rejected": -6.3109331130981445, "step": 1063 }, { "epoch": 0.17, "learning_rate": 1.3366953680170132e-05, "logits/chosen": -2.49941349029541, "logits/rejected": -2.5074193477630615, "logps/chosen": -148.9888153076172, "logps/rejected": -254.00148010253906, "loss": 1.6659, "rewards/accuracies": 0.5, "rewards/chosen": -1.9226794242858887, "rewards/margins": 2.007052421569824, "rewards/rejected": -3.929731845855713, "step": 1064 }, { "epoch": 0.17, "learning_rate": 1.3366220239638984e-05, "logits/chosen": -2.901404619216919, "logits/rejected": -2.268737554550171, "logps/chosen": -341.4078674316406, "logps/rejected": -225.04165649414062, "loss": 2.3953, "rewards/accuracies": 0.5, "rewards/chosen": -1.8381531238555908, "rewards/margins": 0.64681077003479, "rewards/rejected": -2.484963893890381, "step": 1065 }, { "epoch": 0.17, "learning_rate": 1.3365486799107838e-05, "logits/chosen": -3.0951685905456543, "logits/rejected": -2.9067981243133545, "logps/chosen": -483.24249267578125, "logps/rejected": -402.2177429199219, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.9914975166320801, "rewards/margins": 7.44540548324585, "rewards/rejected": -6.4539079666137695, "step": 1066 }, { "epoch": 0.17, "learning_rate": 1.336475335857669e-05, "logits/chosen": -2.78300404548645, "logits/rejected": -3.1412224769592285, "logps/chosen": -60.32331085205078, "logps/rejected": -177.976806640625, "loss": 0.8536, "rewards/accuracies": 0.5, "rewards/chosen": -0.02797389030456543, "rewards/margins": 0.11427617073059082, "rewards/rejected": -0.14225003123283386, "step": 1067 }, { "epoch": 0.17, "learning_rate": 1.3364019918045541e-05, "logits/chosen": -1.2804769277572632, "logits/rejected": -2.7396738529205322, "logps/chosen": -63.027435302734375, "logps/rejected": -321.5777893066406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.44875335693359375, "rewards/margins": 6.54530143737793, "rewards/rejected": -6.096548080444336, "step": 1068 }, { "epoch": 0.17, "learning_rate": 1.3363286477514393e-05, "logits/chosen": -2.7612500190734863, "logits/rejected": -3.207174777984619, "logps/chosen": -267.7718505859375, "logps/rejected": -314.3663330078125, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -0.4409976005554199, "rewards/margins": 3.901762008666992, "rewards/rejected": -4.34276008605957, "step": 1069 }, { "epoch": 0.17, "learning_rate": 1.3362553036983247e-05, "logits/chosen": -1.9196770191192627, "logits/rejected": -2.6931862831115723, "logps/chosen": -363.77069091796875, "logps/rejected": -699.9266967773438, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.0012386441230773926, "rewards/margins": 7.063100337982178, "rewards/rejected": -7.064338684082031, "step": 1070 }, { "epoch": 0.17, "learning_rate": 1.3361819596452099e-05, "logits/chosen": -2.6497745513916016, "logits/rejected": -2.8795166015625, "logps/chosen": -128.086669921875, "logps/rejected": -121.70487976074219, "loss": 0.8692, "rewards/accuracies": 0.5, "rewards/chosen": -0.05538332462310791, "rewards/margins": -0.12453693151473999, "rewards/rejected": 0.06915360689163208, "step": 1071 }, { "epoch": 0.17, "learning_rate": 1.336108615592095e-05, "logits/chosen": -2.554318904876709, "logits/rejected": -2.852583408355713, "logps/chosen": -47.91037368774414, "logps/rejected": -267.7034912109375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.6070553064346313, "rewards/margins": 5.510408401489258, "rewards/rejected": -6.117464065551758, "step": 1072 }, { "epoch": 0.17, "learning_rate": 1.3360352715389803e-05, "logits/chosen": -3.0711708068847656, "logits/rejected": -3.23677396774292, "logps/chosen": -111.1103515625, "logps/rejected": -212.34286499023438, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 0.1096431314945221, "rewards/margins": 5.0848469734191895, "rewards/rejected": -4.975203990936279, "step": 1073 }, { "epoch": 0.17, "learning_rate": 1.3359619274858654e-05, "logits/chosen": -2.8406808376312256, "logits/rejected": -2.058375120162964, "logps/chosen": -276.0155029296875, "logps/rejected": -156.75624084472656, "loss": 1.8457, "rewards/accuracies": 0.5, "rewards/chosen": -2.6770989894866943, "rewards/margins": 1.1466466188430786, "rewards/rejected": -3.8237457275390625, "step": 1074 }, { "epoch": 0.17, "learning_rate": 1.3358885834327508e-05, "logits/chosen": -2.394711971282959, "logits/rejected": -2.885871171951294, "logps/chosen": -33.115177154541016, "logps/rejected": -170.46304321289062, "loss": 0.101, "rewards/accuracies": 1.0, "rewards/chosen": -0.4638041853904724, "rewards/margins": 4.564029693603516, "rewards/rejected": -5.027833938598633, "step": 1075 }, { "epoch": 0.17, "learning_rate": 1.335815239379636e-05, "logits/chosen": -2.3800671100616455, "logits/rejected": -2.4725091457366943, "logps/chosen": -172.07241821289062, "logps/rejected": -327.38104248046875, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": 1.224452018737793, "rewards/margins": 4.071226119995117, "rewards/rejected": -2.8467743396759033, "step": 1076 }, { "epoch": 0.17, "learning_rate": 1.3357418953265212e-05, "logits/chosen": -2.445356607437134, "logits/rejected": -2.899820566177368, "logps/chosen": -62.515323638916016, "logps/rejected": -179.87738037109375, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -0.22486621141433716, "rewards/margins": 4.1053361892700195, "rewards/rejected": -4.330202102661133, "step": 1077 }, { "epoch": 0.17, "learning_rate": 1.3356685512734064e-05, "logits/chosen": -2.424921989440918, "logits/rejected": -2.8176827430725098, "logps/chosen": -37.587799072265625, "logps/rejected": -155.93325805664062, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": 0.7361209988594055, "rewards/margins": 4.806589126586914, "rewards/rejected": -4.070467948913574, "step": 1078 }, { "epoch": 0.17, "learning_rate": 1.3355952072202916e-05, "logits/chosen": -1.4964470863342285, "logits/rejected": -2.583160638809204, "logps/chosen": -130.81153869628906, "logps/rejected": -407.52679443359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.09372025728225708, "rewards/margins": 9.22671127319336, "rewards/rejected": -9.132990837097168, "step": 1079 }, { "epoch": 0.17, "learning_rate": 1.3355218631671767e-05, "logits/chosen": -1.7161496877670288, "logits/rejected": -2.7914879322052, "logps/chosen": -151.1309051513672, "logps/rejected": -526.939697265625, "loss": 2.8006, "rewards/accuracies": 0.5, "rewards/chosen": -4.245680809020996, "rewards/margins": 0.9264171123504639, "rewards/rejected": -5.172098159790039, "step": 1080 }, { "epoch": 0.17, "learning_rate": 1.335448519114062e-05, "logits/chosen": -2.9046106338500977, "logits/rejected": -2.878154993057251, "logps/chosen": -409.82073974609375, "logps/rejected": -382.7302551269531, "loss": 0.0346, "rewards/accuracies": 1.0, "rewards/chosen": -1.311974287033081, "rewards/margins": 4.205122470855713, "rewards/rejected": -5.517096519470215, "step": 1081 }, { "epoch": 0.17, "learning_rate": 1.3353751750609471e-05, "logits/chosen": -2.9862325191497803, "logits/rejected": -2.0859158039093018, "logps/chosen": -442.25421142578125, "logps/rejected": -240.10006713867188, "loss": 1.2398, "rewards/accuracies": 0.5, "rewards/chosen": -1.7006187438964844, "rewards/margins": 1.1496460437774658, "rewards/rejected": -2.85026478767395, "step": 1082 }, { "epoch": 0.17, "learning_rate": 1.3353018310078323e-05, "logits/chosen": -1.1557502746582031, "logits/rejected": -2.7624638080596924, "logps/chosen": -112.33903503417969, "logps/rejected": -344.84979248046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.9935478568077087, "rewards/margins": 6.8590240478515625, "rewards/rejected": -5.865476608276367, "step": 1083 }, { "epoch": 0.17, "learning_rate": 1.3352284869547177e-05, "logits/chosen": -2.285140037536621, "logits/rejected": -2.724736213684082, "logps/chosen": -10.054636001586914, "logps/rejected": -138.54959106445312, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": 0.6165934205055237, "rewards/margins": 6.840572357177734, "rewards/rejected": -6.2239789962768555, "step": 1084 }, { "epoch": 0.17, "learning_rate": 1.3351551429016028e-05, "logits/chosen": -2.0315778255462646, "logits/rejected": -2.830949306488037, "logps/chosen": -355.9502868652344, "logps/rejected": -289.53759765625, "loss": 2.2182, "rewards/accuracies": 0.5, "rewards/chosen": -2.6981139183044434, "rewards/margins": 1.3118185997009277, "rewards/rejected": -4.009932994842529, "step": 1085 }, { "epoch": 0.17, "learning_rate": 1.335081798848488e-05, "logits/chosen": -2.174880266189575, "logits/rejected": -2.5205769538879395, "logps/chosen": -334.2839660644531, "logps/rejected": -377.1538391113281, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7945137023925781, "rewards/margins": 4.990505218505859, "rewards/rejected": -5.7850189208984375, "step": 1086 }, { "epoch": 0.17, "learning_rate": 1.3350084547953732e-05, "logits/chosen": -3.141314744949341, "logits/rejected": -3.0733025074005127, "logps/chosen": -520.0362548828125, "logps/rejected": -347.69317626953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.3100704550743103, "rewards/margins": 7.363722324371338, "rewards/rejected": -7.673792839050293, "step": 1087 }, { "epoch": 0.17, "learning_rate": 1.3349351107422584e-05, "logits/chosen": -1.4146779775619507, "logits/rejected": -3.0293548107147217, "logps/chosen": -38.01091003417969, "logps/rejected": -389.93853759765625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.23345902562141418, "rewards/margins": 7.311793327331543, "rewards/rejected": -7.078334331512451, "step": 1088 }, { "epoch": 0.17, "learning_rate": 1.3348617666891436e-05, "logits/chosen": -2.010256767272949, "logits/rejected": -2.8096506595611572, "logps/chosen": -198.05886840820312, "logps/rejected": -329.3875427246094, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0450156927108765, "rewards/margins": 6.39119291305542, "rewards/rejected": -7.436208724975586, "step": 1089 }, { "epoch": 0.17, "learning_rate": 1.3347884226360288e-05, "logits/chosen": -1.8833794593811035, "logits/rejected": -2.879554271697998, "logps/chosen": -195.9417724609375, "logps/rejected": -320.30206298828125, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.207529902458191, "rewards/margins": 7.94255256652832, "rewards/rejected": -9.1500825881958, "step": 1090 }, { "epoch": 0.17, "learning_rate": 1.334715078582914e-05, "logits/chosen": -1.1534297466278076, "logits/rejected": -2.1091368198394775, "logps/chosen": -159.4868927001953, "logps/rejected": -585.9171142578125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.2034240961074829, "rewards/margins": 7.775795936584473, "rewards/rejected": -7.572371482849121, "step": 1091 }, { "epoch": 0.17, "learning_rate": 1.3346417345297993e-05, "logits/chosen": -1.3935539722442627, "logits/rejected": -2.8026492595672607, "logps/chosen": -117.7943344116211, "logps/rejected": -378.2640075683594, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.4118751585483551, "rewards/margins": 6.917165756225586, "rewards/rejected": -7.32904052734375, "step": 1092 }, { "epoch": 0.17, "learning_rate": 1.3345683904766845e-05, "logits/chosen": -2.167663335800171, "logits/rejected": -2.567854642868042, "logps/chosen": -328.03839111328125, "logps/rejected": -432.5931396484375, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": -1.7678277492523193, "rewards/margins": 3.214540958404541, "rewards/rejected": -4.982368469238281, "step": 1093 }, { "epoch": 0.17, "learning_rate": 1.3344950464235697e-05, "logits/chosen": -2.119305372238159, "logits/rejected": -2.715879201889038, "logps/chosen": -129.43556213378906, "logps/rejected": -236.5978240966797, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 1.2078793048858643, "rewards/margins": 6.719563961029053, "rewards/rejected": -5.511684894561768, "step": 1094 }, { "epoch": 0.17, "learning_rate": 1.3344217023704549e-05, "logits/chosen": -2.330955982208252, "logits/rejected": -2.777491569519043, "logps/chosen": -159.7777099609375, "logps/rejected": -250.38864135742188, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.18947143852710724, "rewards/margins": 6.464648723602295, "rewards/rejected": -6.275177001953125, "step": 1095 }, { "epoch": 0.17, "learning_rate": 1.3343483583173401e-05, "logits/chosen": -2.5181610584259033, "logits/rejected": -2.6268227100372314, "logps/chosen": -8.173015594482422, "logps/rejected": -247.36642456054688, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": 1.1544150114059448, "rewards/margins": 6.317012310028076, "rewards/rejected": -5.162597179412842, "step": 1096 }, { "epoch": 0.17, "learning_rate": 1.3342750142642253e-05, "logits/chosen": -2.7461016178131104, "logits/rejected": -2.295358657836914, "logps/chosen": -475.90460205078125, "logps/rejected": -311.3037109375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.7313621640205383, "rewards/margins": 6.020638465881348, "rewards/rejected": -6.75200080871582, "step": 1097 }, { "epoch": 0.17, "learning_rate": 1.3342016702111105e-05, "logits/chosen": -0.5289570093154907, "logits/rejected": -2.603921890258789, "logps/chosen": -55.04951858520508, "logps/rejected": -439.88836669921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.20115414261817932, "rewards/margins": 9.36873722076416, "rewards/rejected": -9.167583465576172, "step": 1098 }, { "epoch": 0.17, "learning_rate": 1.3341283261579956e-05, "logits/chosen": -2.491421699523926, "logits/rejected": -2.572904109954834, "logps/chosen": -409.92706298828125, "logps/rejected": -379.060791015625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.8845772743225098, "rewards/margins": 5.303703308105469, "rewards/rejected": -6.18828010559082, "step": 1099 }, { "epoch": 0.17, "learning_rate": 1.3340549821048808e-05, "logits/chosen": -2.55643367767334, "logits/rejected": -1.6828479766845703, "logps/chosen": -294.660400390625, "logps/rejected": -57.30744934082031, "loss": 7.3127, "rewards/accuracies": 0.0, "rewards/chosen": -6.989166259765625, "rewards/margins": -7.312033653259277, "rewards/rejected": 0.32286739349365234, "step": 1100 }, { "epoch": 0.17, "learning_rate": 1.3339816380517662e-05, "logits/chosen": -2.284205436706543, "logits/rejected": -2.824070453643799, "logps/chosen": -101.9334945678711, "logps/rejected": -288.4129638671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.38395312428474426, "rewards/margins": 7.767210960388184, "rewards/rejected": -8.151164054870605, "step": 1101 }, { "epoch": 0.17, "learning_rate": 1.3339082939986514e-05, "logits/chosen": -2.9738001823425293, "logits/rejected": -2.5979652404785156, "logps/chosen": -483.2236328125, "logps/rejected": -379.1543273925781, "loss": 0.1863, "rewards/accuracies": 1.0, "rewards/chosen": -0.22497707605361938, "rewards/margins": 4.534660339355469, "rewards/rejected": -4.759637355804443, "step": 1102 }, { "epoch": 0.17, "learning_rate": 1.3338349499455366e-05, "logits/chosen": -2.609924554824829, "logits/rejected": -2.5721874237060547, "logps/chosen": -214.9992218017578, "logps/rejected": -232.7648468017578, "loss": 2.1805, "rewards/accuracies": 0.5, "rewards/chosen": -2.198885917663574, "rewards/margins": -0.31903076171875, "rewards/rejected": -1.8798549175262451, "step": 1103 }, { "epoch": 0.17, "learning_rate": 1.333761605892422e-05, "logits/chosen": -2.3934690952301025, "logits/rejected": -2.8444149494171143, "logps/chosen": -232.717041015625, "logps/rejected": -330.871826171875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.48691055178642273, "rewards/margins": 5.783566474914551, "rewards/rejected": -6.270477294921875, "step": 1104 }, { "epoch": 0.17, "learning_rate": 1.3336882618393071e-05, "logits/chosen": -2.342489242553711, "logits/rejected": -2.8295083045959473, "logps/chosen": -20.196706771850586, "logps/rejected": -203.52615356445312, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 0.9405767321586609, "rewards/margins": 5.788525581359863, "rewards/rejected": -4.8479485511779785, "step": 1105 }, { "epoch": 0.17, "learning_rate": 1.3336149177861923e-05, "logits/chosen": -2.2282354831695557, "logits/rejected": -2.579421281814575, "logps/chosen": -105.81536102294922, "logps/rejected": -252.21791076660156, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.044856071472168, "rewards/margins": 6.827275276184082, "rewards/rejected": -7.87213134765625, "step": 1106 }, { "epoch": 0.17, "learning_rate": 1.3335415737330775e-05, "logits/chosen": -1.696021556854248, "logits/rejected": -2.7987968921661377, "logps/chosen": -51.73244857788086, "logps/rejected": -214.63670349121094, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": 0.5067585110664368, "rewards/margins": 5.2253313064575195, "rewards/rejected": -4.718573093414307, "step": 1107 }, { "epoch": 0.17, "learning_rate": 1.3334682296799627e-05, "logits/chosen": -2.4130630493164062, "logits/rejected": -2.8741934299468994, "logps/chosen": -64.74961853027344, "logps/rejected": -280.1886291503906, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.8517311811447144, "rewards/margins": 7.485816478729248, "rewards/rejected": -6.634085178375244, "step": 1108 }, { "epoch": 0.17, "learning_rate": 1.3333948856268479e-05, "logits/chosen": -2.4930996894836426, "logits/rejected": -2.5160295963287354, "logps/chosen": -143.6721954345703, "logps/rejected": -103.01930236816406, "loss": 3.6709, "rewards/accuracies": 0.5, "rewards/chosen": -3.2722535133361816, "rewards/margins": -2.2903285026550293, "rewards/rejected": -0.9819250106811523, "step": 1109 }, { "epoch": 0.17, "learning_rate": 1.3333215415737332e-05, "logits/chosen": -2.6326940059661865, "logits/rejected": -2.9843413829803467, "logps/chosen": -178.5046844482422, "logps/rejected": -415.3883056640625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.6815544366836548, "rewards/margins": 6.149646759033203, "rewards/rejected": -7.831201553344727, "step": 1110 }, { "epoch": 0.17, "learning_rate": 1.3332481975206184e-05, "logits/chosen": -2.336622476577759, "logits/rejected": -2.7556517124176025, "logps/chosen": -267.320556640625, "logps/rejected": -292.52728271484375, "loss": 3.6886, "rewards/accuracies": 0.5, "rewards/chosen": -4.283169746398926, "rewards/margins": 0.3148007392883301, "rewards/rejected": -4.597970485687256, "step": 1111 }, { "epoch": 0.17, "learning_rate": 1.3331748534675036e-05, "logits/chosen": -1.5226601362228394, "logits/rejected": -2.8321166038513184, "logps/chosen": -90.27404022216797, "logps/rejected": -335.2874755859375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7450634241104126, "rewards/margins": 6.569183826446533, "rewards/rejected": -7.314247131347656, "step": 1112 }, { "epoch": 0.17, "learning_rate": 1.3331015094143888e-05, "logits/chosen": -2.149372100830078, "logits/rejected": -2.9190542697906494, "logps/chosen": -114.35096740722656, "logps/rejected": -288.47161865234375, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.9177744388580322, "rewards/margins": 5.390636444091797, "rewards/rejected": -6.30841064453125, "step": 1113 }, { "epoch": 0.17, "learning_rate": 1.333028165361274e-05, "logits/chosen": -2.4793245792388916, "logits/rejected": -2.736211061477661, "logps/chosen": -57.13768768310547, "logps/rejected": -175.67578125, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -1.8840603828430176, "rewards/margins": 4.872602462768555, "rewards/rejected": -6.7566633224487305, "step": 1114 }, { "epoch": 0.17, "learning_rate": 1.3329548213081592e-05, "logits/chosen": -2.960458517074585, "logits/rejected": -2.193216562271118, "logps/chosen": -287.31878662109375, "logps/rejected": -148.7753448486328, "loss": 1.634, "rewards/accuracies": 0.5, "rewards/chosen": -1.5878570079803467, "rewards/margins": 0.3066253662109375, "rewards/rejected": -1.8944823741912842, "step": 1115 }, { "epoch": 0.17, "learning_rate": 1.3328814772550443e-05, "logits/chosen": -2.684410572052002, "logits/rejected": -2.507657766342163, "logps/chosen": -615.6444091796875, "logps/rejected": -451.2140808105469, "loss": 2.0636, "rewards/accuracies": 0.5, "rewards/chosen": -4.5314788818359375, "rewards/margins": 1.31508469581604, "rewards/rejected": -5.846563339233398, "step": 1116 }, { "epoch": 0.17, "learning_rate": 1.3328081332019295e-05, "logits/chosen": -1.8344476222991943, "logits/rejected": -2.5986592769622803, "logps/chosen": -76.68930053710938, "logps/rejected": -248.50201416015625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.6866397857666016, "rewards/margins": 5.735042095184326, "rewards/rejected": -6.421681880950928, "step": 1117 }, { "epoch": 0.17, "learning_rate": 1.3327347891488147e-05, "logits/chosen": -2.5109384059906006, "logits/rejected": -2.3731906414031982, "logps/chosen": -686.1112670898438, "logps/rejected": -509.4087829589844, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.9470943808555603, "rewards/margins": 4.68752384185791, "rewards/rejected": -5.634617805480957, "step": 1118 }, { "epoch": 0.17, "learning_rate": 1.3326614450957e-05, "logits/chosen": -2.6419363021850586, "logits/rejected": -1.2673704624176025, "logps/chosen": -541.7296142578125, "logps/rejected": -206.3223876953125, "loss": 4.7832, "rewards/accuracies": 0.5, "rewards/chosen": -5.679468154907227, "rewards/margins": -3.101175308227539, "rewards/rejected": -2.5782928466796875, "step": 1119 }, { "epoch": 0.17, "learning_rate": 1.3325881010425853e-05, "logits/chosen": -2.712052345275879, "logits/rejected": -1.8661866188049316, "logps/chosen": -266.6368103027344, "logps/rejected": -160.35626220703125, "loss": 3.4011, "rewards/accuracies": 0.5, "rewards/chosen": -3.042686700820923, "rewards/margins": 1.238616943359375, "rewards/rejected": -4.281303882598877, "step": 1120 }, { "epoch": 0.17, "learning_rate": 1.3325147569894705e-05, "logits/chosen": -2.834404468536377, "logits/rejected": -3.0034966468811035, "logps/chosen": -203.91567993164062, "logps/rejected": -234.80746459960938, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": -1.696387529373169, "rewards/margins": 3.802765369415283, "rewards/rejected": -5.499153137207031, "step": 1121 }, { "epoch": 0.17, "learning_rate": 1.3324414129363556e-05, "logits/chosen": -2.833296775817871, "logits/rejected": -2.818108081817627, "logps/chosen": -429.1309509277344, "logps/rejected": -493.3946533203125, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -2.5538086891174316, "rewards/margins": 5.426153659820557, "rewards/rejected": -7.979962348937988, "step": 1122 }, { "epoch": 0.17, "learning_rate": 1.3323680688832408e-05, "logits/chosen": -1.6185888051986694, "logits/rejected": -2.543534278869629, "logps/chosen": -81.25709533691406, "logps/rejected": -274.5604553222656, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.4158489406108856, "rewards/margins": 5.538296699523926, "rewards/rejected": -5.954145908355713, "step": 1123 }, { "epoch": 0.17, "learning_rate": 1.332294724830126e-05, "logits/chosen": -2.143293619155884, "logits/rejected": -2.894199848175049, "logps/chosen": -118.88494110107422, "logps/rejected": -255.80174255371094, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.5311520099639893, "rewards/margins": 5.505606651306152, "rewards/rejected": -4.974454879760742, "step": 1124 }, { "epoch": 0.17, "learning_rate": 1.3322213807770112e-05, "logits/chosen": -2.688188076019287, "logits/rejected": -2.7986562252044678, "logps/chosen": -382.0683288574219, "logps/rejected": -390.09942626953125, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -3.6616272926330566, "rewards/margins": 4.364727973937988, "rewards/rejected": -8.026355743408203, "step": 1125 }, { "epoch": 0.18, "learning_rate": 1.3321480367238964e-05, "logits/chosen": -2.433591604232788, "logits/rejected": -2.7573704719543457, "logps/chosen": -125.66930389404297, "logps/rejected": -208.6417999267578, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.6957559585571289, "rewards/margins": 4.987698554992676, "rewards/rejected": -4.291942596435547, "step": 1126 }, { "epoch": 0.18, "learning_rate": 1.3320746926707816e-05, "logits/chosen": -2.587528705596924, "logits/rejected": -2.7987451553344727, "logps/chosen": -72.5331802368164, "logps/rejected": -224.69210815429688, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.9015582799911499, "rewards/margins": 6.055215835571289, "rewards/rejected": -5.15365743637085, "step": 1127 }, { "epoch": 0.18, "learning_rate": 1.332001348617667e-05, "logits/chosen": -2.0212507247924805, "logits/rejected": -3.0471668243408203, "logps/chosen": -30.468416213989258, "logps/rejected": -259.28839111328125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 1.1363476514816284, "rewards/margins": 6.564486503601074, "rewards/rejected": -5.428138732910156, "step": 1128 }, { "epoch": 0.18, "learning_rate": 1.3319280045645521e-05, "logits/chosen": -2.6457488536834717, "logits/rejected": -3.103071928024292, "logps/chosen": -137.75973510742188, "logps/rejected": -272.8785400390625, "loss": 0.0468, "rewards/accuracies": 1.0, "rewards/chosen": -2.099430561065674, "rewards/margins": 3.740431547164917, "rewards/rejected": -5.839861869812012, "step": 1129 }, { "epoch": 0.18, "learning_rate": 1.3318546605114373e-05, "logits/chosen": -2.0274009704589844, "logits/rejected": -2.7395412921905518, "logps/chosen": -273.2948913574219, "logps/rejected": -509.2200927734375, "loss": 1.9277, "rewards/accuracies": 0.5, "rewards/chosen": -5.207015037536621, "rewards/margins": -0.6591269969940186, "rewards/rejected": -4.547888278961182, "step": 1130 }, { "epoch": 0.18, "learning_rate": 1.3317813164583225e-05, "logits/chosen": -2.8425347805023193, "logits/rejected": -3.0903990268707275, "logps/chosen": -151.14151000976562, "logps/rejected": -316.97320556640625, "loss": 1.6938, "rewards/accuracies": 0.5, "rewards/chosen": -2.969712257385254, "rewards/margins": 1.1275066137313843, "rewards/rejected": -4.097218990325928, "step": 1131 }, { "epoch": 0.18, "learning_rate": 1.3317079724052077e-05, "logits/chosen": -2.469111919403076, "logits/rejected": -2.1762399673461914, "logps/chosen": -158.30674743652344, "logps/rejected": -231.08941650390625, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 0.7269223928451538, "rewards/margins": 5.900042533874512, "rewards/rejected": -5.173120021820068, "step": 1132 }, { "epoch": 0.18, "learning_rate": 1.3316346283520929e-05, "logits/chosen": -2.093456506729126, "logits/rejected": -2.632518768310547, "logps/chosen": -248.37332153320312, "logps/rejected": -313.2709045410156, "loss": 1.1558, "rewards/accuracies": 0.5, "rewards/chosen": -2.0446906089782715, "rewards/margins": 2.833608627319336, "rewards/rejected": -4.878299236297607, "step": 1133 }, { "epoch": 0.18, "learning_rate": 1.331561284298978e-05, "logits/chosen": -1.6356768608093262, "logits/rejected": -2.734001398086548, "logps/chosen": -306.2518310546875, "logps/rejected": -431.10150146484375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.5004539489746094, "rewards/margins": 5.428642749786377, "rewards/rejected": -6.9290971755981445, "step": 1134 }, { "epoch": 0.18, "learning_rate": 1.3314879402458633e-05, "logits/chosen": -2.8971145153045654, "logits/rejected": -2.8454384803771973, "logps/chosen": -285.6909484863281, "logps/rejected": -258.10784912109375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.054138749837875366, "rewards/margins": 6.118525505065918, "rewards/rejected": -6.172664165496826, "step": 1135 }, { "epoch": 0.18, "learning_rate": 1.3314145961927484e-05, "logits/chosen": -2.7826995849609375, "logits/rejected": -2.0272741317749023, "logps/chosen": -294.98553466796875, "logps/rejected": -355.00616455078125, "loss": 4.0746, "rewards/accuracies": 0.5, "rewards/chosen": -3.9975767135620117, "rewards/margins": 1.336014747619629, "rewards/rejected": -5.333591461181641, "step": 1136 }, { "epoch": 0.18, "learning_rate": 1.3313412521396338e-05, "logits/chosen": -2.651458740234375, "logits/rejected": -2.6084866523742676, "logps/chosen": -181.43609619140625, "logps/rejected": -235.232421875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.27720949053764343, "rewards/margins": 6.09736442565918, "rewards/rejected": -6.374573707580566, "step": 1137 }, { "epoch": 0.18, "learning_rate": 1.3312679080865192e-05, "logits/chosen": -2.4898481369018555, "logits/rejected": -2.751016855239868, "logps/chosen": -258.9998474121094, "logps/rejected": -435.559326171875, "loss": 3.6406, "rewards/accuracies": 0.5, "rewards/chosen": -3.6974968910217285, "rewards/margins": 1.002945899963379, "rewards/rejected": -4.700442790985107, "step": 1138 }, { "epoch": 0.18, "learning_rate": 1.3311945640334043e-05, "logits/chosen": -2.525132417678833, "logits/rejected": -2.1188290119171143, "logps/chosen": -175.9939422607422, "logps/rejected": -231.0141143798828, "loss": 3.7885, "rewards/accuracies": 0.5, "rewards/chosen": -3.098356246948242, "rewards/margins": -0.836989164352417, "rewards/rejected": -2.261367082595825, "step": 1139 }, { "epoch": 0.18, "learning_rate": 1.3311212199802895e-05, "logits/chosen": -2.240967035293579, "logits/rejected": -2.8043394088745117, "logps/chosen": -258.491455078125, "logps/rejected": -310.7982482910156, "loss": 2.539, "rewards/accuracies": 0.5, "rewards/chosen": -2.0900909900665283, "rewards/margins": -1.8167701959609985, "rewards/rejected": -0.27332085371017456, "step": 1140 }, { "epoch": 0.18, "learning_rate": 1.3310478759271747e-05, "logits/chosen": -2.831955671310425, "logits/rejected": -2.843780755996704, "logps/chosen": -182.29212951660156, "logps/rejected": -310.33013916015625, "loss": 1.8385, "rewards/accuracies": 0.5, "rewards/chosen": -1.8283549547195435, "rewards/margins": 1.9180235862731934, "rewards/rejected": -3.7463784217834473, "step": 1141 }, { "epoch": 0.18, "learning_rate": 1.3309745318740599e-05, "logits/chosen": -2.978961944580078, "logits/rejected": -2.8132665157318115, "logps/chosen": -334.9427490234375, "logps/rejected": -450.1163330078125, "loss": 1.0294, "rewards/accuracies": 0.5, "rewards/chosen": -1.4779709577560425, "rewards/margins": 1.8531615734100342, "rewards/rejected": -3.331132411956787, "step": 1142 }, { "epoch": 0.18, "learning_rate": 1.3309011878209451e-05, "logits/chosen": -1.3941787481307983, "logits/rejected": -2.674631118774414, "logps/chosen": -257.717529296875, "logps/rejected": -526.9996337890625, "loss": 4.1093, "rewards/accuracies": 0.0, "rewards/chosen": -5.339144229888916, "rewards/margins": -4.092787265777588, "rewards/rejected": -1.2463569641113281, "step": 1143 }, { "epoch": 0.18, "learning_rate": 1.3308278437678303e-05, "logits/chosen": -1.97482430934906, "logits/rejected": -2.45119309425354, "logps/chosen": -226.12278747558594, "logps/rejected": -226.89984130859375, "loss": 2.6038, "rewards/accuracies": 0.5, "rewards/chosen": -2.5490241050720215, "rewards/margins": 0.9248137474060059, "rewards/rejected": -3.4738378524780273, "step": 1144 }, { "epoch": 0.18, "learning_rate": 1.3307544997147155e-05, "logits/chosen": -2.238837957382202, "logits/rejected": -2.9718055725097656, "logps/chosen": -208.0948944091797, "logps/rejected": -374.01416015625, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": 0.29030531644821167, "rewards/margins": 3.9754114151000977, "rewards/rejected": -3.685105800628662, "step": 1145 }, { "epoch": 0.18, "learning_rate": 1.3306811556616008e-05, "logits/chosen": -1.1248339414596558, "logits/rejected": -2.7764222621917725, "logps/chosen": -82.54167938232422, "logps/rejected": -339.3565368652344, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 0.09469255805015564, "rewards/margins": 4.8354058265686035, "rewards/rejected": -4.740713119506836, "step": 1146 }, { "epoch": 0.18, "learning_rate": 1.330607811608486e-05, "logits/chosen": -1.6572494506835938, "logits/rejected": -1.9748883247375488, "logps/chosen": -116.70111083984375, "logps/rejected": -244.64312744140625, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": 0.4960857629776001, "rewards/margins": 3.528742790222168, "rewards/rejected": -3.0326569080352783, "step": 1147 }, { "epoch": 0.18, "learning_rate": 1.3305344675553712e-05, "logits/chosen": -2.544074296951294, "logits/rejected": -2.9401371479034424, "logps/chosen": -160.44476318359375, "logps/rejected": -286.5411071777344, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 0.4244905710220337, "rewards/margins": 5.936090469360352, "rewards/rejected": -5.511599540710449, "step": 1148 }, { "epoch": 0.18, "learning_rate": 1.3304611235022564e-05, "logits/chosen": -1.9002776145935059, "logits/rejected": -2.7308146953582764, "logps/chosen": -99.72854614257812, "logps/rejected": -179.2815399169922, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": 0.007802009582519531, "rewards/margins": 3.969895362854004, "rewards/rejected": -3.9620933532714844, "step": 1149 }, { "epoch": 0.18, "learning_rate": 1.3303877794491416e-05, "logits/chosen": -2.667022466659546, "logits/rejected": -3.0038819313049316, "logps/chosen": -481.04339599609375, "logps/rejected": -478.0171813964844, "loss": 0.0476, "rewards/accuracies": 1.0, "rewards/chosen": -1.0164159536361694, "rewards/margins": 4.749597549438477, "rewards/rejected": -5.7660136222839355, "step": 1150 }, { "epoch": 0.18, "learning_rate": 1.3303144353960268e-05, "logits/chosen": -2.6791625022888184, "logits/rejected": -2.3923842906951904, "logps/chosen": -187.6474609375, "logps/rejected": -62.84033203125, "loss": 4.3154, "rewards/accuracies": 0.5, "rewards/chosen": -4.153716087341309, "rewards/margins": -1.635730266571045, "rewards/rejected": -2.517986297607422, "step": 1151 }, { "epoch": 0.18, "learning_rate": 1.330241091342912e-05, "logits/chosen": -2.3981549739837646, "logits/rejected": -2.7990007400512695, "logps/chosen": -81.2322006225586, "logps/rejected": -140.67971801757812, "loss": 2.0915, "rewards/accuracies": 0.5, "rewards/chosen": -1.6595451831817627, "rewards/margins": 0.5514538288116455, "rewards/rejected": -2.210999011993408, "step": 1152 }, { "epoch": 0.18, "learning_rate": 1.3301677472897971e-05, "logits/chosen": -1.7513363361358643, "logits/rejected": -2.7125396728515625, "logps/chosen": -68.61132049560547, "logps/rejected": -260.1541748046875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.40570068359375, "rewards/margins": 6.711424350738525, "rewards/rejected": -6.305723667144775, "step": 1153 }, { "epoch": 0.18, "learning_rate": 1.3300944032366823e-05, "logits/chosen": -1.1056630611419678, "logits/rejected": -1.545650839805603, "logps/chosen": -137.10205078125, "logps/rejected": -173.677490234375, "loss": 1.8023, "rewards/accuracies": 0.5, "rewards/chosen": -2.056030035018921, "rewards/margins": 0.537987470626831, "rewards/rejected": -2.594017505645752, "step": 1154 }, { "epoch": 0.18, "learning_rate": 1.3300210591835677e-05, "logits/chosen": -2.995391607284546, "logits/rejected": -1.7643483877182007, "logps/chosen": -471.3076477050781, "logps/rejected": -134.41200256347656, "loss": 2.7096, "rewards/accuracies": 0.0, "rewards/chosen": -2.626077175140381, "rewards/margins": -2.6198158264160156, "rewards/rejected": -0.0062614381313323975, "step": 1155 }, { "epoch": 0.18, "learning_rate": 1.3299477151304529e-05, "logits/chosen": -2.795203685760498, "logits/rejected": -2.4663710594177246, "logps/chosen": -147.180908203125, "logps/rejected": -129.77308654785156, "loss": 2.311, "rewards/accuracies": 0.5, "rewards/chosen": -1.8596473932266235, "rewards/margins": 0.2762150764465332, "rewards/rejected": -2.1358625888824463, "step": 1156 }, { "epoch": 0.18, "learning_rate": 1.329874371077338e-05, "logits/chosen": -1.3403555154800415, "logits/rejected": -2.8174257278442383, "logps/chosen": -125.72931671142578, "logps/rejected": -455.54180908203125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.2240852415561676, "rewards/margins": 7.265263080596924, "rewards/rejected": -7.489348411560059, "step": 1157 }, { "epoch": 0.18, "learning_rate": 1.3298010270242233e-05, "logits/chosen": -2.827683925628662, "logits/rejected": -2.366225004196167, "logps/chosen": -138.1732635498047, "logps/rejected": -149.5945587158203, "loss": 4.3812, "rewards/accuracies": 0.5, "rewards/chosen": -3.959031820297241, "rewards/margins": -0.4087536334991455, "rewards/rejected": -3.550278425216675, "step": 1158 }, { "epoch": 0.18, "learning_rate": 1.3297276829711084e-05, "logits/chosen": -2.4386987686157227, "logits/rejected": -2.721773386001587, "logps/chosen": -322.5980224609375, "logps/rejected": -444.034912109375, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": -0.47231751680374146, "rewards/margins": 2.8435869216918945, "rewards/rejected": -3.315904140472412, "step": 1159 }, { "epoch": 0.18, "learning_rate": 1.3296543389179936e-05, "logits/chosen": -2.3905253410339355, "logits/rejected": -2.657712459564209, "logps/chosen": -161.5948028564453, "logps/rejected": -249.79315185546875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 0.7240622043609619, "rewards/margins": 4.721963882446289, "rewards/rejected": -3.9979019165039062, "step": 1160 }, { "epoch": 0.18, "learning_rate": 1.3295809948648788e-05, "logits/chosen": -1.9989992380142212, "logits/rejected": -2.985050916671753, "logps/chosen": -206.4376220703125, "logps/rejected": -536.3961181640625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 0.6940994262695312, "rewards/margins": 5.643651008605957, "rewards/rejected": -4.949551582336426, "step": 1161 }, { "epoch": 0.18, "learning_rate": 1.329507650811764e-05, "logits/chosen": -2.6943511962890625, "logits/rejected": -3.245903730392456, "logps/chosen": -8.296448707580566, "logps/rejected": -130.3505401611328, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 0.8789420127868652, "rewards/margins": 4.99183464050293, "rewards/rejected": -4.1128926277160645, "step": 1162 }, { "epoch": 0.18, "learning_rate": 1.3294343067586492e-05, "logits/chosen": -1.8741987943649292, "logits/rejected": -2.6883339881896973, "logps/chosen": -303.0508728027344, "logps/rejected": -388.7236328125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.7954552173614502, "rewards/margins": 5.7331767082214355, "rewards/rejected": -7.528632164001465, "step": 1163 }, { "epoch": 0.18, "learning_rate": 1.3293609627055346e-05, "logits/chosen": -1.8160710334777832, "logits/rejected": -3.0764431953430176, "logps/chosen": -48.10641098022461, "logps/rejected": -370.3319091796875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.050585366785526276, "rewards/margins": 7.255739688873291, "rewards/rejected": -7.2051544189453125, "step": 1164 }, { "epoch": 0.18, "learning_rate": 1.3292876186524197e-05, "logits/chosen": -2.6851699352264404, "logits/rejected": -3.066819190979004, "logps/chosen": -367.84759521484375, "logps/rejected": -542.0591430664062, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.4899642765522003, "rewards/margins": 5.357367038726807, "rewards/rejected": -5.8473310470581055, "step": 1165 }, { "epoch": 0.18, "learning_rate": 1.329214274599305e-05, "logits/chosen": -2.6834707260131836, "logits/rejected": -1.3797894716262817, "logps/chosen": -350.28009033203125, "logps/rejected": -167.67819213867188, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -0.6640658974647522, "rewards/margins": 3.2586255073547363, "rewards/rejected": -3.9226913452148438, "step": 1166 }, { "epoch": 0.18, "learning_rate": 1.3291409305461901e-05, "logits/chosen": -2.0998623371124268, "logits/rejected": -3.14068865776062, "logps/chosen": -103.14692687988281, "logps/rejected": -316.60302734375, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": 0.5452917218208313, "rewards/margins": 4.279719352722168, "rewards/rejected": -3.7344276905059814, "step": 1167 }, { "epoch": 0.18, "learning_rate": 1.3290675864930753e-05, "logits/chosen": -2.6548643112182617, "logits/rejected": -1.8202670812606812, "logps/chosen": -489.9671936035156, "logps/rejected": -386.6493835449219, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.623544692993164, "rewards/margins": 5.046148300170898, "rewards/rejected": -6.6696929931640625, "step": 1168 }, { "epoch": 0.18, "learning_rate": 1.3289942424399605e-05, "logits/chosen": -3.2334342002868652, "logits/rejected": -2.6650407314300537, "logps/chosen": -162.44439697265625, "logps/rejected": -64.5323257446289, "loss": 1.7807, "rewards/accuracies": 0.5, "rewards/chosen": -0.7571316361427307, "rewards/margins": 0.3244359493255615, "rewards/rejected": -1.0815675258636475, "step": 1169 }, { "epoch": 0.18, "learning_rate": 1.3289208983868457e-05, "logits/chosen": -1.6802977323532104, "logits/rejected": -2.608673572540283, "logps/chosen": -104.67498016357422, "logps/rejected": -296.63970947265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.378199964761734, "rewards/margins": 7.25214958190918, "rewards/rejected": -6.873950004577637, "step": 1170 }, { "epoch": 0.18, "learning_rate": 1.328847554333731e-05, "logits/chosen": -2.7938783168792725, "logits/rejected": -1.9602493047714233, "logps/chosen": -605.42626953125, "logps/rejected": -314.9354553222656, "loss": 2.0451, "rewards/accuracies": 0.5, "rewards/chosen": -2.208439826965332, "rewards/margins": 0.5972237586975098, "rewards/rejected": -2.8056633472442627, "step": 1171 }, { "epoch": 0.18, "learning_rate": 1.3287742102806162e-05, "logits/chosen": -2.716970920562744, "logits/rejected": -2.141479253768921, "logps/chosen": -235.2631378173828, "logps/rejected": -191.63705444335938, "loss": 4.9566, "rewards/accuracies": 0.5, "rewards/chosen": -4.939653396606445, "rewards/margins": -2.6936612129211426, "rewards/rejected": -2.245992422103882, "step": 1172 }, { "epoch": 0.18, "learning_rate": 1.3287008662275016e-05, "logits/chosen": -2.7025668621063232, "logits/rejected": -2.9797353744506836, "logps/chosen": -129.09042358398438, "logps/rejected": -264.5413818359375, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 0.024767279624938965, "rewards/margins": 4.904097557067871, "rewards/rejected": -4.879330635070801, "step": 1173 }, { "epoch": 0.18, "learning_rate": 1.3286275221743868e-05, "logits/chosen": -2.113853931427002, "logits/rejected": -2.4369771480560303, "logps/chosen": -43.68756103515625, "logps/rejected": -95.97279357910156, "loss": 0.6651, "rewards/accuracies": 0.5, "rewards/chosen": 0.45147761702537537, "rewards/margins": 1.6345584392547607, "rewards/rejected": -1.1830809116363525, "step": 1174 }, { "epoch": 0.18, "learning_rate": 1.328554178121272e-05, "logits/chosen": -1.9042714834213257, "logits/rejected": -2.5777318477630615, "logps/chosen": -595.7745971679688, "logps/rejected": -495.60919189453125, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.8938011527061462, "rewards/margins": 4.605475425720215, "rewards/rejected": -5.499276638031006, "step": 1175 }, { "epoch": 0.18, "learning_rate": 1.3284808340681571e-05, "logits/chosen": -2.7400882244110107, "logits/rejected": -3.150320053100586, "logps/chosen": -353.7742004394531, "logps/rejected": -355.96234130859375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.27557188272476196, "rewards/margins": 5.93972110748291, "rewards/rejected": -6.215292930603027, "step": 1176 }, { "epoch": 0.18, "learning_rate": 1.3284074900150423e-05, "logits/chosen": -2.6196587085723877, "logits/rejected": -2.904446840286255, "logps/chosen": -782.9763793945312, "logps/rejected": -713.1587524414062, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.9145264625549316, "rewards/margins": 7.046897888183594, "rewards/rejected": -9.961423873901367, "step": 1177 }, { "epoch": 0.18, "learning_rate": 1.3283341459619275e-05, "logits/chosen": -1.8706313371658325, "logits/rejected": -2.938495397567749, "logps/chosen": -58.56990432739258, "logps/rejected": -319.25701904296875, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 0.9259450435638428, "rewards/margins": 4.623016357421875, "rewards/rejected": -3.697071075439453, "step": 1178 }, { "epoch": 0.18, "learning_rate": 1.3282608019088127e-05, "logits/chosen": -2.984684467315674, "logits/rejected": -2.182413101196289, "logps/chosen": -846.7588500976562, "logps/rejected": -511.19195556640625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.2282257080078125, "rewards/margins": 7.321464538574219, "rewards/rejected": -7.549690246582031, "step": 1179 }, { "epoch": 0.18, "learning_rate": 1.3281874578556979e-05, "logits/chosen": -1.8736926317214966, "logits/rejected": -3.1691930294036865, "logps/chosen": -73.18768310546875, "logps/rejected": -413.7084655761719, "loss": 0.0528, "rewards/accuracies": 1.0, "rewards/chosen": -0.42878228425979614, "rewards/margins": 2.9206485748291016, "rewards/rejected": -3.349431037902832, "step": 1180 }, { "epoch": 0.18, "learning_rate": 1.3281141138025833e-05, "logits/chosen": -2.165872097015381, "logits/rejected": -2.3360753059387207, "logps/chosen": -103.07272338867188, "logps/rejected": -254.23428344726562, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 0.7221584320068359, "rewards/margins": 4.945914268493652, "rewards/rejected": -4.223755836486816, "step": 1181 }, { "epoch": 0.18, "learning_rate": 1.3280407697494684e-05, "logits/chosen": -1.8749560117721558, "logits/rejected": -3.018562078475952, "logps/chosen": -68.07160949707031, "logps/rejected": -411.8103332519531, "loss": 0.0796, "rewards/accuracies": 1.0, "rewards/chosen": 0.2546237111091614, "rewards/margins": 4.11604642868042, "rewards/rejected": -3.8614230155944824, "step": 1182 }, { "epoch": 0.18, "learning_rate": 1.3279674256963536e-05, "logits/chosen": -2.8283278942108154, "logits/rejected": -2.9948534965515137, "logps/chosen": -31.162879943847656, "logps/rejected": -122.17379760742188, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": 0.7860854268074036, "rewards/margins": 4.507462501525879, "rewards/rejected": -3.721376895904541, "step": 1183 }, { "epoch": 0.18, "learning_rate": 1.3278940816432388e-05, "logits/chosen": -2.916489362716675, "logits/rejected": -2.7042906284332275, "logps/chosen": -123.53717803955078, "logps/rejected": -164.74404907226562, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.09893418103456497, "rewards/margins": 5.828958988189697, "rewards/rejected": -5.730024814605713, "step": 1184 }, { "epoch": 0.18, "learning_rate": 1.327820737590124e-05, "logits/chosen": -2.7188026905059814, "logits/rejected": -2.8206210136413574, "logps/chosen": -176.11192321777344, "logps/rejected": -162.36431884765625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 0.27429264783859253, "rewards/margins": 4.7994704246521, "rewards/rejected": -4.525177955627441, "step": 1185 }, { "epoch": 0.18, "learning_rate": 1.3277473935370092e-05, "logits/chosen": -2.702073335647583, "logits/rejected": -2.141441583633423, "logps/chosen": -265.59033203125, "logps/rejected": -44.30518341064453, "loss": 3.2383, "rewards/accuracies": 0.0, "rewards/chosen": -3.701549530029297, "rewards/margins": -3.149066925048828, "rewards/rejected": -0.5524827241897583, "step": 1186 }, { "epoch": 0.18, "learning_rate": 1.3276740494838944e-05, "logits/chosen": -0.8077782392501831, "logits/rejected": -2.9068188667297363, "logps/chosen": -69.33827209472656, "logps/rejected": -436.08758544921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 1.186530590057373, "rewards/margins": 8.186651229858398, "rewards/rejected": -7.000120162963867, "step": 1187 }, { "epoch": 0.18, "learning_rate": 1.3276007054307796e-05, "logits/chosen": -1.8346675634384155, "logits/rejected": -2.985454797744751, "logps/chosen": -156.0490264892578, "logps/rejected": -349.56304931640625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.5532171726226807, "rewards/margins": 5.352071762084961, "rewards/rejected": -5.9052886962890625, "step": 1188 }, { "epoch": 0.18, "learning_rate": 1.3275273613776648e-05, "logits/chosen": -1.9504238367080688, "logits/rejected": -2.637779474258423, "logps/chosen": -56.19272994995117, "logps/rejected": -157.29115295410156, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.1490306854248047, "rewards/margins": 4.983644485473633, "rewards/rejected": -5.1326751708984375, "step": 1189 }, { "epoch": 0.19, "learning_rate": 1.3274540173245501e-05, "logits/chosen": -3.078404188156128, "logits/rejected": -2.374258518218994, "logps/chosen": -374.73590087890625, "logps/rejected": -294.9012145996094, "loss": 3.9435, "rewards/accuracies": 0.5, "rewards/chosen": -3.297144889831543, "rewards/margins": -2.7650675773620605, "rewards/rejected": -0.5320770144462585, "step": 1190 }, { "epoch": 0.19, "learning_rate": 1.3273806732714353e-05, "logits/chosen": -2.4543726444244385, "logits/rejected": -2.9219958782196045, "logps/chosen": -119.73599243164062, "logps/rejected": -236.44210815429688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.23332902789115906, "rewards/margins": 7.066527366638184, "rewards/rejected": -6.833198070526123, "step": 1191 }, { "epoch": 0.19, "learning_rate": 1.3273073292183205e-05, "logits/chosen": -0.9782890677452087, "logits/rejected": -2.8560500144958496, "logps/chosen": -61.21874237060547, "logps/rejected": -366.6375427246094, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 1.122063159942627, "rewards/margins": 6.83958625793457, "rewards/rejected": -5.717523574829102, "step": 1192 }, { "epoch": 0.19, "learning_rate": 1.3272339851652057e-05, "logits/chosen": -1.135124921798706, "logits/rejected": -2.509885311126709, "logps/chosen": -145.00897216796875, "logps/rejected": -323.23822021484375, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -0.7876964807510376, "rewards/margins": 5.365317344665527, "rewards/rejected": -6.153013706207275, "step": 1193 }, { "epoch": 0.19, "learning_rate": 1.3271606411120909e-05, "logits/chosen": -2.683422803878784, "logits/rejected": -2.5381758213043213, "logps/chosen": -581.067138671875, "logps/rejected": -503.3991394042969, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.4772920310497284, "rewards/margins": 6.036628723144531, "rewards/rejected": -6.513920783996582, "step": 1194 }, { "epoch": 0.19, "learning_rate": 1.327087297058976e-05, "logits/chosen": -2.943779706954956, "logits/rejected": -2.9088363647460938, "logps/chosen": -82.27456665039062, "logps/rejected": -134.73411560058594, "loss": 1.274, "rewards/accuracies": 0.5, "rewards/chosen": -0.9161384105682373, "rewards/margins": 1.9442615509033203, "rewards/rejected": -2.8604001998901367, "step": 1195 }, { "epoch": 0.19, "learning_rate": 1.3270139530058612e-05, "logits/chosen": -1.9748152494430542, "logits/rejected": -2.507046699523926, "logps/chosen": -387.0390319824219, "logps/rejected": -334.8905029296875, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -1.6183182001113892, "rewards/margins": 4.712032318115234, "rewards/rejected": -6.330350875854492, "step": 1196 }, { "epoch": 0.19, "learning_rate": 1.3269406089527464e-05, "logits/chosen": -2.529172658920288, "logits/rejected": -2.209663152694702, "logps/chosen": -635.3244018554688, "logps/rejected": -381.530029296875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.23513954877853394, "rewards/margins": 6.587018013000488, "rewards/rejected": -6.822157382965088, "step": 1197 }, { "epoch": 0.19, "learning_rate": 1.3268672648996316e-05, "logits/chosen": -1.87554132938385, "logits/rejected": -1.787041187286377, "logps/chosen": -835.9124145507812, "logps/rejected": -598.3029174804688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.41042935848236084, "rewards/margins": 6.964144706726074, "rewards/rejected": -7.374574661254883, "step": 1198 }, { "epoch": 0.19, "learning_rate": 1.326793920846517e-05, "logits/chosen": -0.9862815737724304, "logits/rejected": -2.5637600421905518, "logps/chosen": -12.350831985473633, "logps/rejected": -239.22071838378906, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.7326380014419556, "rewards/margins": 6.804808616638184, "rewards/rejected": -6.072170257568359, "step": 1199 }, { "epoch": 0.19, "learning_rate": 1.3267205767934022e-05, "logits/chosen": -2.646426200866699, "logits/rejected": -2.91856050491333, "logps/chosen": -268.229736328125, "logps/rejected": -272.0213623046875, "loss": 2.4394, "rewards/accuracies": 0.5, "rewards/chosen": -2.3595101833343506, "rewards/margins": 1.9300477504730225, "rewards/rejected": -4.289557933807373, "step": 1200 }, { "epoch": 0.19, "learning_rate": 1.3266472327402873e-05, "logits/chosen": -2.838683605194092, "logits/rejected": -2.2078089714050293, "logps/chosen": -318.0706787109375, "logps/rejected": -174.5428009033203, "loss": 2.8991, "rewards/accuracies": 0.5, "rewards/chosen": -3.5140151977539062, "rewards/margins": -0.19309186935424805, "rewards/rejected": -3.3209235668182373, "step": 1201 }, { "epoch": 0.19, "learning_rate": 1.3265738886871725e-05, "logits/chosen": -2.2930941581726074, "logits/rejected": -3.0992863178253174, "logps/chosen": -81.75711822509766, "logps/rejected": -414.03424072265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.29186326265335083, "rewards/margins": 7.838309288024902, "rewards/rejected": -7.546446323394775, "step": 1202 }, { "epoch": 0.19, "learning_rate": 1.3265005446340577e-05, "logits/chosen": -2.424783945083618, "logits/rejected": -2.9567041397094727, "logps/chosen": -178.03125, "logps/rejected": -279.0179443359375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 0.7058349847793579, "rewards/margins": 5.386175155639648, "rewards/rejected": -4.680339813232422, "step": 1203 }, { "epoch": 0.19, "learning_rate": 1.3264272005809429e-05, "logits/chosen": -2.040969133377075, "logits/rejected": -2.888239622116089, "logps/chosen": -299.31982421875, "logps/rejected": -329.193115234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.7453216910362244, "rewards/margins": 7.659390449523926, "rewards/rejected": -8.404711723327637, "step": 1204 }, { "epoch": 0.19, "learning_rate": 1.3263538565278283e-05, "logits/chosen": -3.268009901046753, "logits/rejected": -2.663731336593628, "logps/chosen": -281.28289794921875, "logps/rejected": -297.0333251953125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.335038959980011, "rewards/margins": 4.031538009643555, "rewards/rejected": -4.3665771484375, "step": 1205 }, { "epoch": 0.19, "learning_rate": 1.3262805124747135e-05, "logits/chosen": -2.5798826217651367, "logits/rejected": -2.9677183628082275, "logps/chosen": -45.43343734741211, "logps/rejected": -228.17520141601562, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": 0.4214872419834137, "rewards/margins": 5.1817450523376465, "rewards/rejected": -4.760257720947266, "step": 1206 }, { "epoch": 0.19, "learning_rate": 1.3262071684215986e-05, "logits/chosen": -1.2925503253936768, "logits/rejected": -2.9171977043151855, "logps/chosen": -172.4393310546875, "logps/rejected": -421.08056640625, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -1.1890288591384888, "rewards/margins": 4.406821250915527, "rewards/rejected": -5.595850467681885, "step": 1207 }, { "epoch": 0.19, "learning_rate": 1.326133824368484e-05, "logits/chosen": -2.8615128993988037, "logits/rejected": -2.8124217987060547, "logps/chosen": -58.74306869506836, "logps/rejected": -183.55612182617188, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": 0.5530622005462646, "rewards/margins": 4.006908893585205, "rewards/rejected": -3.4538469314575195, "step": 1208 }, { "epoch": 0.19, "learning_rate": 1.3260604803153692e-05, "logits/chosen": -2.725933313369751, "logits/rejected": -3.0121824741363525, "logps/chosen": -198.44688415527344, "logps/rejected": -297.5378723144531, "loss": 3.3241, "rewards/accuracies": 0.5, "rewards/chosen": -2.6904048919677734, "rewards/margins": -1.7313376665115356, "rewards/rejected": -0.9590672254562378, "step": 1209 }, { "epoch": 0.19, "learning_rate": 1.3259871362622544e-05, "logits/chosen": -1.9431873559951782, "logits/rejected": -2.997156858444214, "logps/chosen": -266.777099609375, "logps/rejected": -431.05596923828125, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": 1.3357712030410767, "rewards/margins": 6.982043266296387, "rewards/rejected": -5.6462721824646, "step": 1210 }, { "epoch": 0.19, "learning_rate": 1.3259137922091396e-05, "logits/chosen": -2.569807291030884, "logits/rejected": -2.9089770317077637, "logps/chosen": -56.80162048339844, "logps/rejected": -244.1953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.5600700378417969, "rewards/margins": 6.8792924880981445, "rewards/rejected": -6.319222450256348, "step": 1211 }, { "epoch": 0.19, "learning_rate": 1.3258404481560248e-05, "logits/chosen": -1.4869409799575806, "logits/rejected": -2.7183310985565186, "logps/chosen": -206.24188232421875, "logps/rejected": -377.8291015625, "loss": 0.2407, "rewards/accuracies": 1.0, "rewards/chosen": -1.000806450843811, "rewards/margins": 3.2555670738220215, "rewards/rejected": -4.256373405456543, "step": 1212 }, { "epoch": 0.19, "learning_rate": 1.32576710410291e-05, "logits/chosen": -2.428501844406128, "logits/rejected": -2.519012212753296, "logps/chosen": -143.4923095703125, "logps/rejected": -103.17120361328125, "loss": 3.3789, "rewards/accuracies": 0.5, "rewards/chosen": -2.7594821453094482, "rewards/margins": -1.1103086471557617, "rewards/rejected": -1.649173378944397, "step": 1213 }, { "epoch": 0.19, "learning_rate": 1.3256937600497951e-05, "logits/chosen": -2.2982418537139893, "logits/rejected": -2.445465564727783, "logps/chosen": -78.29574584960938, "logps/rejected": -208.70562744140625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.46123990416526794, "rewards/margins": 6.091989994049072, "rewards/rejected": -5.6307501792907715, "step": 1214 }, { "epoch": 0.19, "learning_rate": 1.3256204159966803e-05, "logits/chosen": -2.725802183151245, "logits/rejected": -3.0436999797821045, "logps/chosen": -77.89772033691406, "logps/rejected": -155.92779541015625, "loss": 0.2327, "rewards/accuracies": 1.0, "rewards/chosen": -0.8845397233963013, "rewards/margins": 1.8161237239837646, "rewards/rejected": -2.7006635665893555, "step": 1215 }, { "epoch": 0.19, "learning_rate": 1.3255470719435655e-05, "logits/chosen": -2.6515164375305176, "logits/rejected": -2.8184733390808105, "logps/chosen": -381.2913818359375, "logps/rejected": -255.15618896484375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.2679542303085327, "rewards/margins": 5.506906509399414, "rewards/rejected": -6.774860382080078, "step": 1216 }, { "epoch": 0.19, "learning_rate": 1.3254737278904509e-05, "logits/chosen": -2.441636800765991, "logits/rejected": -2.8615024089813232, "logps/chosen": -11.561628341674805, "logps/rejected": -157.8316650390625, "loss": 0.2084, "rewards/accuracies": 1.0, "rewards/chosen": 0.251089870929718, "rewards/margins": 3.798611879348755, "rewards/rejected": -3.5475220680236816, "step": 1217 }, { "epoch": 0.19, "learning_rate": 1.325400383837336e-05, "logits/chosen": -2.7263338565826416, "logits/rejected": -2.7973458766937256, "logps/chosen": -335.9436950683594, "logps/rejected": -270.2527770996094, "loss": 2.8368, "rewards/accuracies": 0.5, "rewards/chosen": -3.625897169113159, "rewards/margins": -1.8736968040466309, "rewards/rejected": -1.7522003650665283, "step": 1218 }, { "epoch": 0.19, "learning_rate": 1.3253270397842212e-05, "logits/chosen": -2.8624751567840576, "logits/rejected": -2.768733263015747, "logps/chosen": -300.08642578125, "logps/rejected": -328.10919189453125, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.7630223035812378, "rewards/margins": 4.685654640197754, "rewards/rejected": -5.448677062988281, "step": 1219 }, { "epoch": 0.19, "learning_rate": 1.3252536957311064e-05, "logits/chosen": -2.800396680831909, "logits/rejected": -2.729621171951294, "logps/chosen": -226.9416046142578, "logps/rejected": -223.3817138671875, "loss": 2.2777, "rewards/accuracies": 0.5, "rewards/chosen": -2.341663360595703, "rewards/margins": 1.0785222053527832, "rewards/rejected": -3.4201855659484863, "step": 1220 }, { "epoch": 0.19, "learning_rate": 1.3251803516779916e-05, "logits/chosen": -2.56384539604187, "logits/rejected": -2.7185726165771484, "logps/chosen": -110.96435546875, "logps/rejected": -208.90896606445312, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -0.1481494903564453, "rewards/margins": 4.789130210876465, "rewards/rejected": -4.93727970123291, "step": 1221 }, { "epoch": 0.19, "learning_rate": 1.3251070076248768e-05, "logits/chosen": -1.762671709060669, "logits/rejected": -2.5111801624298096, "logps/chosen": -134.8767547607422, "logps/rejected": -162.6807403564453, "loss": 1.9211, "rewards/accuracies": 0.5, "rewards/chosen": -1.0487984418869019, "rewards/margins": 1.8421084880828857, "rewards/rejected": -2.890907049179077, "step": 1222 }, { "epoch": 0.19, "learning_rate": 1.325033663571762e-05, "logits/chosen": -2.7975242137908936, "logits/rejected": -1.6855801343917847, "logps/chosen": -411.66033935546875, "logps/rejected": -64.61318969726562, "loss": 5.8348, "rewards/accuracies": 0.0, "rewards/chosen": -5.580802917480469, "rewards/margins": -5.8188605308532715, "rewards/rejected": 0.23805715143680573, "step": 1223 }, { "epoch": 0.19, "learning_rate": 1.3249603195186472e-05, "logits/chosen": -1.0894478559494019, "logits/rejected": -2.982027053833008, "logps/chosen": -47.257347106933594, "logps/rejected": -535.982177734375, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": 0.8060541152954102, "rewards/margins": 6.657040596008301, "rewards/rejected": -5.850986480712891, "step": 1224 }, { "epoch": 0.19, "learning_rate": 1.3248869754655324e-05, "logits/chosen": -2.3114802837371826, "logits/rejected": -2.9423580169677734, "logps/chosen": -39.86812973022461, "logps/rejected": -191.7268829345703, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.4472091794013977, "rewards/margins": 6.21339225769043, "rewards/rejected": -5.766182899475098, "step": 1225 }, { "epoch": 0.19, "learning_rate": 1.3248136314124177e-05, "logits/chosen": -2.326650381088257, "logits/rejected": -2.430554151535034, "logps/chosen": -197.93820190429688, "logps/rejected": -217.95408630371094, "loss": 0.5127, "rewards/accuracies": 0.5, "rewards/chosen": -1.299310326576233, "rewards/margins": 2.063915967941284, "rewards/rejected": -3.3632264137268066, "step": 1226 }, { "epoch": 0.19, "learning_rate": 1.3247402873593029e-05, "logits/chosen": -2.97560715675354, "logits/rejected": -3.263488292694092, "logps/chosen": -153.83616638183594, "logps/rejected": -313.88763427734375, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": 0.04480856657028198, "rewards/margins": 3.7565343379974365, "rewards/rejected": -3.7117257118225098, "step": 1227 }, { "epoch": 0.19, "learning_rate": 1.3246669433061881e-05, "logits/chosen": -1.6497957706451416, "logits/rejected": -2.6926803588867188, "logps/chosen": -85.26481628417969, "logps/rejected": -267.0691833496094, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": 0.2242916077375412, "rewards/margins": 4.209516525268555, "rewards/rejected": -3.985225200653076, "step": 1228 }, { "epoch": 0.19, "learning_rate": 1.3245935992530733e-05, "logits/chosen": -2.254246234893799, "logits/rejected": -2.6736347675323486, "logps/chosen": -76.76024627685547, "logps/rejected": -139.47232055664062, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": 0.7813885807991028, "rewards/margins": 4.087948322296143, "rewards/rejected": -3.3065598011016846, "step": 1229 }, { "epoch": 0.19, "learning_rate": 1.3245202551999585e-05, "logits/chosen": -2.4919867515563965, "logits/rejected": -2.6614840030670166, "logps/chosen": -234.55531311035156, "logps/rejected": -257.62890625, "loss": 1.9219, "rewards/accuracies": 0.5, "rewards/chosen": -1.8489333391189575, "rewards/margins": 2.025395393371582, "rewards/rejected": -3.87432861328125, "step": 1230 }, { "epoch": 0.19, "learning_rate": 1.3244469111468437e-05, "logits/chosen": -2.718502998352051, "logits/rejected": -2.6261117458343506, "logps/chosen": -191.35867309570312, "logps/rejected": -267.5596618652344, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.3729734718799591, "rewards/margins": 5.336617469787598, "rewards/rejected": -5.709590911865234, "step": 1231 }, { "epoch": 0.19, "learning_rate": 1.3243735670937288e-05, "logits/chosen": -1.9748175144195557, "logits/rejected": -2.9741642475128174, "logps/chosen": -104.88834381103516, "logps/rejected": -214.36172485351562, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.7532342076301575, "rewards/margins": 5.538797855377197, "rewards/rejected": -4.7855634689331055, "step": 1232 }, { "epoch": 0.19, "learning_rate": 1.324300223040614e-05, "logits/chosen": -2.9679880142211914, "logits/rejected": -2.9291372299194336, "logps/chosen": -153.55592346191406, "logps/rejected": -132.3386688232422, "loss": 2.4935, "rewards/accuracies": 0.5, "rewards/chosen": -1.3189038038253784, "rewards/margins": 0.18107271194458008, "rewards/rejected": -1.499976396560669, "step": 1233 }, { "epoch": 0.19, "learning_rate": 1.3242268789874992e-05, "logits/chosen": -2.321976900100708, "logits/rejected": -3.00242018699646, "logps/chosen": -61.747642517089844, "logps/rejected": -251.54244995117188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.7535264492034912, "rewards/margins": 6.399743556976318, "rewards/rejected": -5.646217346191406, "step": 1234 }, { "epoch": 0.19, "learning_rate": 1.3241535349343846e-05, "logits/chosen": -2.7704973220825195, "logits/rejected": -3.0137009620666504, "logps/chosen": -101.13369750976562, "logps/rejected": -269.0439758300781, "loss": 3.9016, "rewards/accuracies": 0.5, "rewards/chosen": -2.5705809593200684, "rewards/margins": 1.4386239051818848, "rewards/rejected": -4.009205341339111, "step": 1235 }, { "epoch": 0.19, "learning_rate": 1.3240801908812698e-05, "logits/chosen": -2.544111490249634, "logits/rejected": -1.8999308347702026, "logps/chosen": -71.96215057373047, "logps/rejected": -55.51773452758789, "loss": 2.813, "rewards/accuracies": 0.5, "rewards/chosen": -1.567739725112915, "rewards/margins": -1.5291540622711182, "rewards/rejected": -0.038585662841796875, "step": 1236 }, { "epoch": 0.19, "learning_rate": 1.324006846828155e-05, "logits/chosen": -2.5357208251953125, "logits/rejected": -2.57861065864563, "logps/chosen": -383.7989196777344, "logps/rejected": -615.0653076171875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9225890636444092, "rewards/margins": 5.655725479125977, "rewards/rejected": -7.578314304351807, "step": 1237 }, { "epoch": 0.19, "learning_rate": 1.3239335027750401e-05, "logits/chosen": -2.8166074752807617, "logits/rejected": -2.067859649658203, "logps/chosen": -489.4599609375, "logps/rejected": -397.29656982421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9863983392715454, "rewards/margins": 7.942027568817139, "rewards/rejected": -9.928425788879395, "step": 1238 }, { "epoch": 0.19, "learning_rate": 1.3238601587219255e-05, "logits/chosen": -2.393342971801758, "logits/rejected": -2.3950486183166504, "logps/chosen": -132.05572509765625, "logps/rejected": -99.19078063964844, "loss": 2.0851, "rewards/accuracies": 0.5, "rewards/chosen": -1.4666225910186768, "rewards/margins": 1.5096781253814697, "rewards/rejected": -2.9763007164001465, "step": 1239 }, { "epoch": 0.19, "learning_rate": 1.3237868146688107e-05, "logits/chosen": -1.573672890663147, "logits/rejected": -2.6392602920532227, "logps/chosen": -115.11750030517578, "logps/rejected": -302.7143249511719, "loss": 0.8896, "rewards/accuracies": 0.5, "rewards/chosen": -1.455531358718872, "rewards/margins": 3.6616833209991455, "rewards/rejected": -5.117214202880859, "step": 1240 }, { "epoch": 0.19, "learning_rate": 1.3237134706156959e-05, "logits/chosen": -1.827439308166504, "logits/rejected": -2.587066411972046, "logps/chosen": -235.88522338867188, "logps/rejected": -384.39874267578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.605194091796875, "rewards/margins": 7.271670341491699, "rewards/rejected": -8.876864433288574, "step": 1241 }, { "epoch": 0.19, "learning_rate": 1.323640126562581e-05, "logits/chosen": -1.9261518716812134, "logits/rejected": -2.561249017715454, "logps/chosen": -74.57522583007812, "logps/rejected": -83.29408264160156, "loss": 1.2979, "rewards/accuracies": 0.5, "rewards/chosen": -0.3090580105781555, "rewards/margins": 1.3869898319244385, "rewards/rejected": -1.6960479021072388, "step": 1242 }, { "epoch": 0.19, "learning_rate": 1.3235667825094663e-05, "logits/chosen": -2.4658877849578857, "logits/rejected": -2.9272687435150146, "logps/chosen": -366.2441711425781, "logps/rejected": -530.6209106445312, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.9954925775527954, "rewards/margins": 4.651545524597168, "rewards/rejected": -6.647038459777832, "step": 1243 }, { "epoch": 0.19, "learning_rate": 1.3234934384563516e-05, "logits/chosen": -2.95327091217041, "logits/rejected": -2.9970080852508545, "logps/chosen": -15.63211727142334, "logps/rejected": -61.661376953125, "loss": 0.2282, "rewards/accuracies": 1.0, "rewards/chosen": 0.7783721685409546, "rewards/margins": 2.5797717571258545, "rewards/rejected": -1.8013995885849, "step": 1244 }, { "epoch": 0.19, "learning_rate": 1.3234200944032368e-05, "logits/chosen": -2.305717945098877, "logits/rejected": -3.224027633666992, "logps/chosen": -280.69879150390625, "logps/rejected": -424.7900390625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.171687364578247, "rewards/margins": 5.493927001953125, "rewards/rejected": -6.665614128112793, "step": 1245 }, { "epoch": 0.19, "learning_rate": 1.323346750350122e-05, "logits/chosen": -2.726558208465576, "logits/rejected": -2.9377174377441406, "logps/chosen": -75.62490844726562, "logps/rejected": -163.75953674316406, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": 0.3784729242324829, "rewards/margins": 3.7560110092163086, "rewards/rejected": -3.377537965774536, "step": 1246 }, { "epoch": 0.19, "learning_rate": 1.3232734062970072e-05, "logits/chosen": -2.623589038848877, "logits/rejected": -3.0255355834960938, "logps/chosen": -20.8353328704834, "logps/rejected": -157.194580078125, "loss": 0.1196, "rewards/accuracies": 1.0, "rewards/chosen": 0.6724736094474792, "rewards/margins": 3.9989168643951416, "rewards/rejected": -3.3264431953430176, "step": 1247 }, { "epoch": 0.19, "learning_rate": 1.3232000622438924e-05, "logits/chosen": -2.358095169067383, "logits/rejected": -2.716697931289673, "logps/chosen": -4.625946521759033, "logps/rejected": -117.36424255371094, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": 1.3341665267944336, "rewards/margins": 4.094964981079102, "rewards/rejected": -2.760798454284668, "step": 1248 }, { "epoch": 0.19, "learning_rate": 1.3231267181907775e-05, "logits/chosen": -2.7024149894714355, "logits/rejected": -2.4867286682128906, "logps/chosen": -340.2561340332031, "logps/rejected": -353.8417663574219, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.06591719388961792, "rewards/margins": 4.770739555358887, "rewards/rejected": -4.8366570472717285, "step": 1249 }, { "epoch": 0.19, "learning_rate": 1.3230533741376627e-05, "logits/chosen": -2.331343650817871, "logits/rejected": -2.6491997241973877, "logps/chosen": -80.89647674560547, "logps/rejected": -197.90814208984375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 1.0110934972763062, "rewards/margins": 5.928106307983398, "rewards/rejected": -4.917012691497803, "step": 1250 }, { "epoch": 0.19, "learning_rate": 1.322980030084548e-05, "logits/chosen": -2.0725479125976562, "logits/rejected": -2.7674355506896973, "logps/chosen": -471.8409729003906, "logps/rejected": -532.972900390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.4461436867713928, "rewards/margins": 7.509094715118408, "rewards/rejected": -7.06295108795166, "step": 1251 }, { "epoch": 0.19, "learning_rate": 1.3229066860314331e-05, "logits/chosen": -0.7156574130058289, "logits/rejected": -2.7102267742156982, "logps/chosen": -182.37759399414062, "logps/rejected": -508.2989196777344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.17639313638210297, "rewards/margins": 8.439218521118164, "rewards/rejected": -8.262825012207031, "step": 1252 }, { "epoch": 0.19, "learning_rate": 1.3228333419783185e-05, "logits/chosen": -2.183595657348633, "logits/rejected": -2.883633613586426, "logps/chosen": -119.98991394042969, "logps/rejected": -181.35995483398438, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 0.19061660766601562, "rewards/margins": 4.390597820281982, "rewards/rejected": -4.199981212615967, "step": 1253 }, { "epoch": 0.2, "learning_rate": 1.3227599979252037e-05, "logits/chosen": -2.3143725395202637, "logits/rejected": -2.802022695541382, "logps/chosen": -188.45947265625, "logps/rejected": -363.2092590332031, "loss": 2.4814, "rewards/accuracies": 0.5, "rewards/chosen": -2.1675267219543457, "rewards/margins": 2.4815165996551514, "rewards/rejected": -4.649043560028076, "step": 1254 }, { "epoch": 0.2, "learning_rate": 1.3226866538720888e-05, "logits/chosen": -2.9228272438049316, "logits/rejected": -2.1526033878326416, "logps/chosen": -63.33734893798828, "logps/rejected": -11.36343002319336, "loss": 2.4963, "rewards/accuracies": 0.5, "rewards/chosen": -0.9076815843582153, "rewards/margins": -1.9244166612625122, "rewards/rejected": 1.0167351961135864, "step": 1255 }, { "epoch": 0.2, "learning_rate": 1.322613309818974e-05, "logits/chosen": -1.837085485458374, "logits/rejected": -3.0078365802764893, "logps/chosen": -214.21934509277344, "logps/rejected": -356.87994384765625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.1617477536201477, "rewards/margins": 5.737484931945801, "rewards/rejected": -5.575736999511719, "step": 1256 }, { "epoch": 0.2, "learning_rate": 1.3225399657658592e-05, "logits/chosen": -2.1335678100585938, "logits/rejected": -1.51492178440094, "logps/chosen": -425.519775390625, "logps/rejected": -461.5597839355469, "loss": 6.784, "rewards/accuracies": 0.0, "rewards/chosen": -8.064987182617188, "rewards/margins": -6.7824554443359375, "rewards/rejected": -1.28253173828125, "step": 1257 }, { "epoch": 0.2, "learning_rate": 1.3224666217127444e-05, "logits/chosen": -2.605832815170288, "logits/rejected": -2.562613010406494, "logps/chosen": -395.9336853027344, "logps/rejected": -428.34112548828125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.03258056938648224, "rewards/margins": 6.34939432144165, "rewards/rejected": -6.381975173950195, "step": 1258 }, { "epoch": 0.2, "learning_rate": 1.3223932776596296e-05, "logits/chosen": -2.9019832611083984, "logits/rejected": -2.6569011211395264, "logps/chosen": -342.1790771484375, "logps/rejected": -384.83294677734375, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.0274795293807983, "rewards/margins": 6.059638977050781, "rewards/rejected": -7.087118625640869, "step": 1259 }, { "epoch": 0.2, "learning_rate": 1.3223199336065148e-05, "logits/chosen": -1.3702185153961182, "logits/rejected": -2.393157720565796, "logps/chosen": -174.84500122070312, "logps/rejected": -304.4967346191406, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -0.014199838042259216, "rewards/margins": 5.471625328063965, "rewards/rejected": -5.485825061798096, "step": 1260 }, { "epoch": 0.2, "learning_rate": 1.3222465895534e-05, "logits/chosen": -2.3763556480407715, "logits/rejected": -1.1857179403305054, "logps/chosen": -272.592041015625, "logps/rejected": -270.3318786621094, "loss": 4.4134, "rewards/accuracies": 0.5, "rewards/chosen": -3.5537943840026855, "rewards/margins": -0.45114946365356445, "rewards/rejected": -3.1026451587677, "step": 1261 }, { "epoch": 0.2, "learning_rate": 1.3221732455002853e-05, "logits/chosen": -2.9077913761138916, "logits/rejected": -2.9186556339263916, "logps/chosen": -87.33212280273438, "logps/rejected": -127.80488586425781, "loss": 0.1686, "rewards/accuracies": 1.0, "rewards/chosen": 0.38443148136138916, "rewards/margins": 1.7410091161727905, "rewards/rejected": -1.3565776348114014, "step": 1262 }, { "epoch": 0.2, "learning_rate": 1.3220999014471705e-05, "logits/chosen": -2.285611867904663, "logits/rejected": -2.5716769695281982, "logps/chosen": -227.18603515625, "logps/rejected": -350.5341796875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.15286484360694885, "rewards/margins": 6.670469760894775, "rewards/rejected": -6.517604827880859, "step": 1263 }, { "epoch": 0.2, "learning_rate": 1.3220265573940557e-05, "logits/chosen": -2.564096689224243, "logits/rejected": -2.7278199195861816, "logps/chosen": -487.82855224609375, "logps/rejected": -450.33758544921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.0662285089492798, "rewards/margins": 7.771920204162598, "rewards/rejected": -8.838149070739746, "step": 1264 }, { "epoch": 0.2, "learning_rate": 1.3219532133409409e-05, "logits/chosen": -2.3942928314208984, "logits/rejected": -1.8351283073425293, "logps/chosen": -396.4938049316406, "logps/rejected": -324.24420166015625, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -1.7431362867355347, "rewards/margins": 3.370262622833252, "rewards/rejected": -5.113398551940918, "step": 1265 }, { "epoch": 0.2, "learning_rate": 1.321879869287826e-05, "logits/chosen": -2.1373679637908936, "logits/rejected": -2.6051812171936035, "logps/chosen": -143.64952087402344, "logps/rejected": -321.5439453125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 0.08445548266172409, "rewards/margins": 6.221137046813965, "rewards/rejected": -6.13668155670166, "step": 1266 }, { "epoch": 0.2, "learning_rate": 1.3218065252347113e-05, "logits/chosen": -2.364794969558716, "logits/rejected": -2.6628124713897705, "logps/chosen": -150.0236053466797, "logps/rejected": -109.81190490722656, "loss": 1.6011, "rewards/accuracies": 0.5, "rewards/chosen": -0.8326351642608643, "rewards/margins": 1.5680092573165894, "rewards/rejected": -2.400644302368164, "step": 1267 }, { "epoch": 0.2, "learning_rate": 1.3217331811815965e-05, "logits/chosen": -2.478421211242676, "logits/rejected": -2.3167788982391357, "logps/chosen": -147.81045532226562, "logps/rejected": -164.8692169189453, "loss": 2.8398, "rewards/accuracies": 0.5, "rewards/chosen": -1.7689731121063232, "rewards/margins": 0.3006927967071533, "rewards/rejected": -2.0696659088134766, "step": 1268 }, { "epoch": 0.2, "learning_rate": 1.3216598371284816e-05, "logits/chosen": -2.2211971282958984, "logits/rejected": -2.738945960998535, "logps/chosen": -37.291866302490234, "logps/rejected": -192.71841430664062, "loss": 0.0604, "rewards/accuracies": 1.0, "rewards/chosen": 1.0068442821502686, "rewards/margins": 3.5615463256835938, "rewards/rejected": -2.554702043533325, "step": 1269 }, { "epoch": 0.2, "learning_rate": 1.3215864930753668e-05, "logits/chosen": -2.45394229888916, "logits/rejected": -1.8967283964157104, "logps/chosen": -130.44375610351562, "logps/rejected": -261.74676513671875, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 0.39939039945602417, "rewards/margins": 4.152374267578125, "rewards/rejected": -3.752984046936035, "step": 1270 }, { "epoch": 0.2, "learning_rate": 1.3215131490222522e-05, "logits/chosen": -2.9991674423217773, "logits/rejected": -2.6293299198150635, "logps/chosen": -300.5726013183594, "logps/rejected": -89.98616790771484, "loss": 4.4158, "rewards/accuracies": 0.0, "rewards/chosen": -4.480103492736816, "rewards/margins": -4.377450942993164, "rewards/rejected": -0.10265231132507324, "step": 1271 }, { "epoch": 0.2, "learning_rate": 1.3214398049691374e-05, "logits/chosen": -1.6329643726348877, "logits/rejected": -2.7315940856933594, "logps/chosen": -40.751548767089844, "logps/rejected": -159.48797607421875, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 0.7304903268814087, "rewards/margins": 4.405143737792969, "rewards/rejected": -3.6746535301208496, "step": 1272 }, { "epoch": 0.2, "learning_rate": 1.3213664609160227e-05, "logits/chosen": -2.3396544456481934, "logits/rejected": -3.222691297531128, "logps/chosen": -109.35726928710938, "logps/rejected": -346.3931884765625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 0.6902080774307251, "rewards/margins": 5.805192947387695, "rewards/rejected": -5.11498498916626, "step": 1273 }, { "epoch": 0.2, "learning_rate": 1.321293116862908e-05, "logits/chosen": -1.9990448951721191, "logits/rejected": -2.648515462875366, "logps/chosen": -449.4840087890625, "logps/rejected": -451.7430419921875, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -0.6868488192558289, "rewards/margins": 5.812704086303711, "rewards/rejected": -6.499553203582764, "step": 1274 }, { "epoch": 0.2, "learning_rate": 1.3212197728097931e-05, "logits/chosen": -2.6735544204711914, "logits/rejected": -2.3910670280456543, "logps/chosen": -235.72528076171875, "logps/rejected": -223.8653106689453, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 0.3813415765762329, "rewards/margins": 4.993819236755371, "rewards/rejected": -4.612477779388428, "step": 1275 }, { "epoch": 0.2, "learning_rate": 1.3211464287566783e-05, "logits/chosen": -2.5290682315826416, "logits/rejected": -3.2064406871795654, "logps/chosen": -332.2897033691406, "logps/rejected": -485.48358154296875, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": 0.8075302839279175, "rewards/margins": 6.156251430511475, "rewards/rejected": -5.348721504211426, "step": 1276 }, { "epoch": 0.2, "learning_rate": 1.3210730847035635e-05, "logits/chosen": -1.5730795860290527, "logits/rejected": -2.6077795028686523, "logps/chosen": -168.05874633789062, "logps/rejected": -162.66616821289062, "loss": 4.235, "rewards/accuracies": 0.5, "rewards/chosen": -3.5815956592559814, "rewards/margins": -2.283561944961548, "rewards/rejected": -1.2980337142944336, "step": 1277 }, { "epoch": 0.2, "learning_rate": 1.3209997406504487e-05, "logits/chosen": -3.0318026542663574, "logits/rejected": -3.0186522006988525, "logps/chosen": -213.04739379882812, "logps/rejected": -281.2795104980469, "loss": 2.0613, "rewards/accuracies": 0.5, "rewards/chosen": -1.3806442022323608, "rewards/margins": 2.682743787765503, "rewards/rejected": -4.063388347625732, "step": 1278 }, { "epoch": 0.2, "learning_rate": 1.320926396597334e-05, "logits/chosen": -2.438072443008423, "logits/rejected": -2.8899710178375244, "logps/chosen": -202.03073120117188, "logps/rejected": -316.112060546875, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": 0.9270744323730469, "rewards/margins": 5.830729007720947, "rewards/rejected": -4.9036545753479, "step": 1279 }, { "epoch": 0.2, "learning_rate": 1.3208530525442192e-05, "logits/chosen": -1.654101014137268, "logits/rejected": -2.5991172790527344, "logps/chosen": -36.46723937988281, "logps/rejected": -195.18673706054688, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 0.3063611090183258, "rewards/margins": 5.227502822875977, "rewards/rejected": -4.921141624450684, "step": 1280 }, { "epoch": 0.2, "learning_rate": 1.3207797084911044e-05, "logits/chosen": -2.287586212158203, "logits/rejected": -3.1727840900421143, "logps/chosen": -101.16744232177734, "logps/rejected": -385.4173278808594, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.5758342146873474, "rewards/margins": 7.039432525634766, "rewards/rejected": -6.463598728179932, "step": 1281 }, { "epoch": 0.2, "learning_rate": 1.3207063644379896e-05, "logits/chosen": -2.062209367752075, "logits/rejected": -2.8850138187408447, "logps/chosen": -111.16476440429688, "logps/rejected": -432.47930908203125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.18669815361499786, "rewards/margins": 6.467708587646484, "rewards/rejected": -6.281010627746582, "step": 1282 }, { "epoch": 0.2, "learning_rate": 1.3206330203848748e-05, "logits/chosen": -3.123351812362671, "logits/rejected": -2.331808090209961, "logps/chosen": -329.32568359375, "logps/rejected": -137.50559997558594, "loss": 7.2163, "rewards/accuracies": 0.0, "rewards/chosen": -6.726297378540039, "rewards/margins": -7.215456008911133, "rewards/rejected": 0.4891592860221863, "step": 1283 }, { "epoch": 0.2, "learning_rate": 1.32055967633176e-05, "logits/chosen": -1.641770362854004, "logits/rejected": -2.6632094383239746, "logps/chosen": -150.75637817382812, "logps/rejected": -255.47264099121094, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": 0.14890404045581818, "rewards/margins": 4.268247604370117, "rewards/rejected": -4.1193437576293945, "step": 1284 }, { "epoch": 0.2, "learning_rate": 1.3204863322786452e-05, "logits/chosen": -1.6764247417449951, "logits/rejected": -3.1205005645751953, "logps/chosen": -156.50601196289062, "logps/rejected": -562.6407470703125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 0.3055320680141449, "rewards/margins": 7.255005836486816, "rewards/rejected": -6.949473857879639, "step": 1285 }, { "epoch": 0.2, "learning_rate": 1.3204129882255303e-05, "logits/chosen": -1.479560375213623, "logits/rejected": -2.625645160675049, "logps/chosen": -72.82373046875, "logps/rejected": -284.9686279296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.19165775179862976, "rewards/margins": 7.139736175537109, "rewards/rejected": -6.948078155517578, "step": 1286 }, { "epoch": 0.2, "learning_rate": 1.3203396441724155e-05, "logits/chosen": -2.8483264446258545, "logits/rejected": -2.9057817459106445, "logps/chosen": -318.5042419433594, "logps/rejected": -206.14898681640625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.4813937246799469, "rewards/margins": 5.947502136230469, "rewards/rejected": -6.428895950317383, "step": 1287 }, { "epoch": 0.2, "learning_rate": 1.3202663001193009e-05, "logits/chosen": -2.8891243934631348, "logits/rejected": -2.750020742416382, "logps/chosen": -96.55284118652344, "logps/rejected": -91.44393157958984, "loss": 1.4825, "rewards/accuracies": 0.5, "rewards/chosen": -0.7928280234336853, "rewards/margins": 1.264470100402832, "rewards/rejected": -2.057297945022583, "step": 1288 }, { "epoch": 0.2, "learning_rate": 1.320192956066186e-05, "logits/chosen": -1.7074217796325684, "logits/rejected": -2.9919490814208984, "logps/chosen": -98.07536315917969, "logps/rejected": -426.38836669921875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.8799282312393188, "rewards/margins": 6.093809127807617, "rewards/rejected": -5.213881015777588, "step": 1289 }, { "epoch": 0.2, "learning_rate": 1.3201196120130713e-05, "logits/chosen": -2.6780340671539307, "logits/rejected": -2.840546131134033, "logps/chosen": -68.75424194335938, "logps/rejected": -167.6358184814453, "loss": 1.9105, "rewards/accuracies": 0.5, "rewards/chosen": -0.8696403503417969, "rewards/margins": 1.7401881217956543, "rewards/rejected": -2.609828472137451, "step": 1290 }, { "epoch": 0.2, "learning_rate": 1.3200462679599565e-05, "logits/chosen": -2.182788848876953, "logits/rejected": -2.74381160736084, "logps/chosen": -101.09207153320312, "logps/rejected": -174.25924682617188, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": 0.2182769775390625, "rewards/margins": 3.198859691619873, "rewards/rejected": -2.9805827140808105, "step": 1291 }, { "epoch": 0.2, "learning_rate": 1.3199729239068416e-05, "logits/chosen": -2.6364097595214844, "logits/rejected": -2.7613680362701416, "logps/chosen": -195.63253784179688, "logps/rejected": -197.34095764160156, "loss": 0.1103, "rewards/accuracies": 1.0, "rewards/chosen": 0.502183198928833, "rewards/margins": 3.389923095703125, "rewards/rejected": -2.887740135192871, "step": 1292 }, { "epoch": 0.2, "learning_rate": 1.3198995798537268e-05, "logits/chosen": -2.2364773750305176, "logits/rejected": -2.8751091957092285, "logps/chosen": -87.17333221435547, "logps/rejected": -257.1287841796875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 0.4038814604282379, "rewards/margins": 5.271742820739746, "rewards/rejected": -4.867861270904541, "step": 1293 }, { "epoch": 0.2, "learning_rate": 1.319826235800612e-05, "logits/chosen": -1.140309453010559, "logits/rejected": -2.2190215587615967, "logps/chosen": -248.24085998535156, "logps/rejected": -550.3677978515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8149765133857727, "rewards/margins": 7.054296970367432, "rewards/rejected": -7.8692731857299805, "step": 1294 }, { "epoch": 0.2, "learning_rate": 1.3197528917474972e-05, "logits/chosen": -1.1310579776763916, "logits/rejected": -2.2921926975250244, "logps/chosen": -120.9188003540039, "logps/rejected": -475.58355712890625, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": 0.05984768271446228, "rewards/margins": 4.099419116973877, "rewards/rejected": -4.039571762084961, "step": 1295 }, { "epoch": 0.2, "learning_rate": 1.3196795476943824e-05, "logits/chosen": -2.9884302616119385, "logits/rejected": -2.954448938369751, "logps/chosen": -1134.909423828125, "logps/rejected": -922.4879150390625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.0771453380584717, "rewards/margins": 6.099728584289551, "rewards/rejected": -8.176874160766602, "step": 1296 }, { "epoch": 0.2, "learning_rate": 1.3196062036412678e-05, "logits/chosen": -2.692736864089966, "logits/rejected": -2.0409774780273438, "logps/chosen": -445.2586669921875, "logps/rejected": -358.061767578125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.32690316438674927, "rewards/margins": 5.913017272949219, "rewards/rejected": -5.586113929748535, "step": 1297 }, { "epoch": 0.2, "learning_rate": 1.319532859588153e-05, "logits/chosen": -1.534389853477478, "logits/rejected": -2.869370698928833, "logps/chosen": -113.21987915039062, "logps/rejected": -424.6004333496094, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.8608084917068481, "rewards/margins": 8.074235916137695, "rewards/rejected": -7.213427543640137, "step": 1298 }, { "epoch": 0.2, "learning_rate": 1.3194595155350381e-05, "logits/chosen": -2.3852593898773193, "logits/rejected": -3.0201594829559326, "logps/chosen": -72.48583984375, "logps/rejected": -334.3287048339844, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 0.4209880828857422, "rewards/margins": 4.979487419128418, "rewards/rejected": -4.558499336242676, "step": 1299 }, { "epoch": 0.2, "learning_rate": 1.3193861714819233e-05, "logits/chosen": -2.145369291305542, "logits/rejected": -2.752715826034546, "logps/chosen": -273.73223876953125, "logps/rejected": -285.52496337890625, "loss": 3.3791, "rewards/accuracies": 0.5, "rewards/chosen": -3.6959781646728516, "rewards/margins": -0.4170033931732178, "rewards/rejected": -3.278974771499634, "step": 1300 }, { "epoch": 0.2, "learning_rate": 1.3193128274288085e-05, "logits/chosen": -1.6625698804855347, "logits/rejected": -3.203040599822998, "logps/chosen": -118.63475036621094, "logps/rejected": -425.04119873046875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 0.8227001428604126, "rewards/margins": 6.302340507507324, "rewards/rejected": -5.479640007019043, "step": 1301 }, { "epoch": 0.2, "learning_rate": 1.3192394833756937e-05, "logits/chosen": -0.9299317598342896, "logits/rejected": -2.2960314750671387, "logps/chosen": -220.73727416992188, "logps/rejected": -416.47564697265625, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.17987290024757385, "rewards/margins": 6.013660907745361, "rewards/rejected": -6.193533897399902, "step": 1302 }, { "epoch": 0.2, "learning_rate": 1.3191661393225789e-05, "logits/chosen": -2.423366069793701, "logits/rejected": -2.839940071105957, "logps/chosen": -118.8989028930664, "logps/rejected": -405.422607421875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.2965284287929535, "rewards/margins": 7.406825065612793, "rewards/rejected": -7.110296726226807, "step": 1303 }, { "epoch": 0.2, "learning_rate": 1.319092795269464e-05, "logits/chosen": -1.0097346305847168, "logits/rejected": -2.8455183506011963, "logps/chosen": -121.34193420410156, "logps/rejected": -316.2539367675781, "loss": 4.8252, "rewards/accuracies": 0.5, "rewards/chosen": -4.217402458190918, "rewards/margins": -0.7819986343383789, "rewards/rejected": -3.435403347015381, "step": 1304 }, { "epoch": 0.2, "learning_rate": 1.3190194512163494e-05, "logits/chosen": -2.1264472007751465, "logits/rejected": -2.691629409790039, "logps/chosen": -163.78074645996094, "logps/rejected": -320.96087646484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.06299248337745667, "rewards/margins": 7.599143028259277, "rewards/rejected": -7.662136077880859, "step": 1305 }, { "epoch": 0.2, "learning_rate": 1.3189461071632346e-05, "logits/chosen": -2.3968825340270996, "logits/rejected": -2.642380714416504, "logps/chosen": -154.57017517089844, "logps/rejected": -254.6812286376953, "loss": 3.0155, "rewards/accuracies": 0.5, "rewards/chosen": -2.9968836307525635, "rewards/margins": 1.5590341091156006, "rewards/rejected": -4.555917739868164, "step": 1306 }, { "epoch": 0.2, "learning_rate": 1.31887276311012e-05, "logits/chosen": -2.9996843338012695, "logits/rejected": -2.0572450160980225, "logps/chosen": -556.1331787109375, "logps/rejected": -376.1025390625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 0.01412808895111084, "rewards/margins": 4.794622898101807, "rewards/rejected": -4.780494689941406, "step": 1307 }, { "epoch": 0.2, "learning_rate": 1.3187994190570052e-05, "logits/chosen": -2.5907673835754395, "logits/rejected": -2.9719629287719727, "logps/chosen": -114.48434448242188, "logps/rejected": -288.04522705078125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.6889229416847229, "rewards/margins": 7.150026321411133, "rewards/rejected": -6.461103439331055, "step": 1308 }, { "epoch": 0.2, "learning_rate": 1.3187260750038903e-05, "logits/chosen": -2.928407907485962, "logits/rejected": -3.183436155319214, "logps/chosen": -281.1942443847656, "logps/rejected": -287.57354736328125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.793857991695404, "rewards/margins": 5.682736873626709, "rewards/rejected": -6.476594924926758, "step": 1309 }, { "epoch": 0.2, "learning_rate": 1.3186527309507755e-05, "logits/chosen": -2.7273612022399902, "logits/rejected": -2.684093713760376, "logps/chosen": -245.72598266601562, "logps/rejected": -324.0775451660156, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 1.146072506904602, "rewards/margins": 7.4286699295043945, "rewards/rejected": -6.282597541809082, "step": 1310 }, { "epoch": 0.2, "learning_rate": 1.3185793868976607e-05, "logits/chosen": -2.560703754425049, "logits/rejected": -2.769030809402466, "logps/chosen": -141.05596923828125, "logps/rejected": -326.19244384765625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.08074722439050674, "rewards/margins": 7.684211254119873, "rewards/rejected": -7.603464126586914, "step": 1311 }, { "epoch": 0.2, "learning_rate": 1.3185060428445459e-05, "logits/chosen": -0.8407137393951416, "logits/rejected": -2.696544885635376, "logps/chosen": -113.52625274658203, "logps/rejected": -307.5812072753906, "loss": 1.5863, "rewards/accuracies": 0.5, "rewards/chosen": -1.4151346683502197, "rewards/margins": 3.563264846801758, "rewards/rejected": -4.978399753570557, "step": 1312 }, { "epoch": 0.2, "learning_rate": 1.3184326987914311e-05, "logits/chosen": -3.056535482406616, "logits/rejected": -2.190776824951172, "logps/chosen": -395.73046875, "logps/rejected": -365.87127685546875, "loss": 3.791, "rewards/accuracies": 0.5, "rewards/chosen": -4.272261142730713, "rewards/margins": -1.1004259586334229, "rewards/rejected": -3.17183518409729, "step": 1313 }, { "epoch": 0.2, "learning_rate": 1.3183593547383163e-05, "logits/chosen": -2.1162514686584473, "logits/rejected": -2.8709373474121094, "logps/chosen": -54.85470199584961, "logps/rejected": -208.23316955566406, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -0.10983524471521378, "rewards/margins": 4.75172233581543, "rewards/rejected": -4.861557960510254, "step": 1314 }, { "epoch": 0.2, "learning_rate": 1.3182860106852016e-05, "logits/chosen": -2.767360210418701, "logits/rejected": -2.2881240844726562, "logps/chosen": -224.9978790283203, "logps/rejected": -298.01324462890625, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.8208656907081604, "rewards/margins": 4.189742088317871, "rewards/rejected": -5.010607719421387, "step": 1315 }, { "epoch": 0.2, "learning_rate": 1.3182126666320868e-05, "logits/chosen": -1.7582061290740967, "logits/rejected": -0.9719327092170715, "logps/chosen": -1043.166748046875, "logps/rejected": -329.5370788574219, "loss": 0.0844, "rewards/accuracies": 1.0, "rewards/chosen": -2.496586322784424, "rewards/margins": 4.6201090812683105, "rewards/rejected": -7.116695404052734, "step": 1316 }, { "epoch": 0.2, "learning_rate": 1.318139322578972e-05, "logits/chosen": -2.3593783378601074, "logits/rejected": -2.2263576984405518, "logps/chosen": -223.5624237060547, "logps/rejected": -214.0685577392578, "loss": 2.6292, "rewards/accuracies": 0.5, "rewards/chosen": -2.0884079933166504, "rewards/margins": -0.2003488540649414, "rewards/rejected": -1.8880592584609985, "step": 1317 }, { "epoch": 0.2, "learning_rate": 1.3180659785258572e-05, "logits/chosen": -2.593968152999878, "logits/rejected": -1.945716142654419, "logps/chosen": -296.7610168457031, "logps/rejected": -361.8435363769531, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.1994576454162598, "rewards/margins": 5.277767181396484, "rewards/rejected": -6.477224826812744, "step": 1318 }, { "epoch": 0.21, "learning_rate": 1.3179926344727424e-05, "logits/chosen": -2.7083213329315186, "logits/rejected": -3.126575231552124, "logps/chosen": -2.543703079223633, "logps/rejected": -86.40524291992188, "loss": 0.0515, "rewards/accuracies": 1.0, "rewards/chosen": 1.2198472023010254, "rewards/margins": 3.325162172317505, "rewards/rejected": -2.1053149700164795, "step": 1319 }, { "epoch": 0.21, "learning_rate": 1.3179192904196276e-05, "logits/chosen": -2.1967506408691406, "logits/rejected": -2.915003776550293, "logps/chosen": -74.71895599365234, "logps/rejected": -403.15679931640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.9471867084503174, "rewards/margins": 6.759482383728027, "rewards/rejected": -5.812295436859131, "step": 1320 }, { "epoch": 0.21, "learning_rate": 1.3178459463665128e-05, "logits/chosen": -2.9245128631591797, "logits/rejected": -2.4337942600250244, "logps/chosen": -231.04937744140625, "logps/rejected": -203.20729064941406, "loss": 2.0884, "rewards/accuracies": 0.5, "rewards/chosen": -2.5438849925994873, "rewards/margins": 0.5081911087036133, "rewards/rejected": -3.0520761013031006, "step": 1321 }, { "epoch": 0.21, "learning_rate": 1.317772602313398e-05, "logits/chosen": -1.6839932203292847, "logits/rejected": -2.8428471088409424, "logps/chosen": -18.83450698852539, "logps/rejected": -355.8360595703125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.580571174621582, "rewards/margins": 7.575027942657471, "rewards/rejected": -6.994456768035889, "step": 1322 }, { "epoch": 0.21, "learning_rate": 1.3176992582602831e-05, "logits/chosen": -1.794219732284546, "logits/rejected": -2.834589719772339, "logps/chosen": -234.96002197265625, "logps/rejected": -490.107177734375, "loss": 0.1652, "rewards/accuracies": 1.0, "rewards/chosen": -1.3825401067733765, "rewards/margins": 2.0782241821289062, "rewards/rejected": -3.4607644081115723, "step": 1323 }, { "epoch": 0.21, "learning_rate": 1.3176259142071685e-05, "logits/chosen": -3.13153338432312, "logits/rejected": -2.9614150524139404, "logps/chosen": -84.44450378417969, "logps/rejected": -97.84735107421875, "loss": 1.3077, "rewards/accuracies": 0.5, "rewards/chosen": -0.6532018184661865, "rewards/margins": 0.3594568967819214, "rewards/rejected": -1.012658715248108, "step": 1324 }, { "epoch": 0.21, "learning_rate": 1.3175525701540537e-05, "logits/chosen": -2.4705636501312256, "logits/rejected": -3.1256043910980225, "logps/chosen": -76.0440902709961, "logps/rejected": -253.88934326171875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 1.2081146240234375, "rewards/margins": 5.277552127838135, "rewards/rejected": -4.069437503814697, "step": 1325 }, { "epoch": 0.21, "learning_rate": 1.3174792261009389e-05, "logits/chosen": -2.483248233795166, "logits/rejected": -2.821629285812378, "logps/chosen": -167.40252685546875, "logps/rejected": -172.51947021484375, "loss": 0.9454, "rewards/accuracies": 0.5, "rewards/chosen": -1.760711669921875, "rewards/margins": 1.693419098854065, "rewards/rejected": -3.4541308879852295, "step": 1326 }, { "epoch": 0.21, "learning_rate": 1.317405882047824e-05, "logits/chosen": -2.6361300945281982, "logits/rejected": -2.9980180263519287, "logps/chosen": -191.8383026123047, "logps/rejected": -357.9113464355469, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.6148780584335327, "rewards/margins": 6.181492805480957, "rewards/rejected": -5.566614627838135, "step": 1327 }, { "epoch": 0.21, "learning_rate": 1.3173325379947093e-05, "logits/chosen": -2.777973175048828, "logits/rejected": -2.9891111850738525, "logps/chosen": -45.0355224609375, "logps/rejected": -138.643310546875, "loss": 0.0475, "rewards/accuracies": 1.0, "rewards/chosen": 0.8768892288208008, "rewards/margins": 3.7521111965179443, "rewards/rejected": -2.8752219676971436, "step": 1328 }, { "epoch": 0.21, "learning_rate": 1.3172591939415944e-05, "logits/chosen": -2.7316384315490723, "logits/rejected": -1.8867417573928833, "logps/chosen": -478.2593994140625, "logps/rejected": -215.545166015625, "loss": 3.0023, "rewards/accuracies": 0.5, "rewards/chosen": -2.083235263824463, "rewards/margins": 0.49777817726135254, "rewards/rejected": -2.5810134410858154, "step": 1329 }, { "epoch": 0.21, "learning_rate": 1.3171858498884796e-05, "logits/chosen": -2.2759523391723633, "logits/rejected": -2.9596340656280518, "logps/chosen": -115.36077117919922, "logps/rejected": -369.4348449707031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.8470625281333923, "rewards/margins": 7.473345756530762, "rewards/rejected": -6.626283645629883, "step": 1330 }, { "epoch": 0.21, "learning_rate": 1.3171125058353648e-05, "logits/chosen": -2.8080201148986816, "logits/rejected": -3.2190451622009277, "logps/chosen": -108.5127182006836, "logps/rejected": -168.68809509277344, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": 0.09118843078613281, "rewards/margins": 3.7644569873809814, "rewards/rejected": -3.6732685565948486, "step": 1331 }, { "epoch": 0.21, "learning_rate": 1.31703916178225e-05, "logits/chosen": -2.6051297187805176, "logits/rejected": -3.1181178092956543, "logps/chosen": -184.84288024902344, "logps/rejected": -348.6978759765625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 1.1127772331237793, "rewards/margins": 5.842700004577637, "rewards/rejected": -4.729922771453857, "step": 1332 }, { "epoch": 0.21, "learning_rate": 1.3169658177291354e-05, "logits/chosen": -2.652064800262451, "logits/rejected": -3.046332359313965, "logps/chosen": -159.39137268066406, "logps/rejected": -364.27130126953125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 0.36600741744041443, "rewards/margins": 5.6542768478393555, "rewards/rejected": -5.28826904296875, "step": 1333 }, { "epoch": 0.21, "learning_rate": 1.3168924736760205e-05, "logits/chosen": -2.145016670227051, "logits/rejected": -3.081878185272217, "logps/chosen": -19.20807647705078, "logps/rejected": -261.8592834472656, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": 0.4172581732273102, "rewards/margins": 3.8944435119628906, "rewards/rejected": -3.4771852493286133, "step": 1334 }, { "epoch": 0.21, "learning_rate": 1.3168191296229057e-05, "logits/chosen": -3.145920515060425, "logits/rejected": -2.90295672416687, "logps/chosen": -220.69015502929688, "logps/rejected": -231.02268981933594, "loss": 2.8486, "rewards/accuracies": 0.5, "rewards/chosen": -2.8821754455566406, "rewards/margins": 0.3985450267791748, "rewards/rejected": -3.2807204723358154, "step": 1335 }, { "epoch": 0.21, "learning_rate": 1.316745785569791e-05, "logits/chosen": -1.4477109909057617, "logits/rejected": -1.9003472328186035, "logps/chosen": -119.76239776611328, "logps/rejected": -349.1768493652344, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.2273307740688324, "rewards/margins": 7.188388347625732, "rewards/rejected": -6.961057662963867, "step": 1336 }, { "epoch": 0.21, "learning_rate": 1.3166724415166761e-05, "logits/chosen": -2.6497788429260254, "logits/rejected": -2.4833149909973145, "logps/chosen": -368.0257568359375, "logps/rejected": -265.00848388671875, "loss": 0.0645, "rewards/accuracies": 1.0, "rewards/chosen": -0.23602622747421265, "rewards/margins": 4.831268310546875, "rewards/rejected": -5.067294597625732, "step": 1337 }, { "epoch": 0.21, "learning_rate": 1.3165990974635613e-05, "logits/chosen": -2.699986696243286, "logits/rejected": -2.820601224899292, "logps/chosen": -310.175048828125, "logps/rejected": -351.9319152832031, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": -0.44886380434036255, "rewards/margins": 3.651211738586426, "rewards/rejected": -4.100075721740723, "step": 1338 }, { "epoch": 0.21, "learning_rate": 1.3165257534104467e-05, "logits/chosen": -2.9534621238708496, "logits/rejected": -2.5396416187286377, "logps/chosen": -355.563232421875, "logps/rejected": -253.26132202148438, "loss": 2.0997, "rewards/accuracies": 0.5, "rewards/chosen": -3.541983127593994, "rewards/margins": 1.3396108150482178, "rewards/rejected": -4.881594181060791, "step": 1339 }, { "epoch": 0.21, "learning_rate": 1.3164524093573318e-05, "logits/chosen": -1.9704500436782837, "logits/rejected": -3.217288017272949, "logps/chosen": -59.36732482910156, "logps/rejected": -415.97027587890625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.795028567314148, "rewards/margins": 5.881647109985352, "rewards/rejected": -5.086618423461914, "step": 1340 }, { "epoch": 0.21, "learning_rate": 1.316379065304217e-05, "logits/chosen": -3.0629184246063232, "logits/rejected": -2.974327802658081, "logps/chosen": -724.98583984375, "logps/rejected": -356.485107421875, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -0.4119361937046051, "rewards/margins": 5.14307975769043, "rewards/rejected": -5.555015563964844, "step": 1341 }, { "epoch": 0.21, "learning_rate": 1.3163057212511024e-05, "logits/chosen": -1.4159598350524902, "logits/rejected": -2.9640512466430664, "logps/chosen": -16.45952033996582, "logps/rejected": -208.51416015625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.5688230991363525, "rewards/margins": 5.609947204589844, "rewards/rejected": -5.04112434387207, "step": 1342 }, { "epoch": 0.21, "learning_rate": 1.3162323771979876e-05, "logits/chosen": -2.6631557941436768, "logits/rejected": -3.296013832092285, "logps/chosen": -274.7383117675781, "logps/rejected": -399.2550048828125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.5540155172348022, "rewards/margins": 5.7003278732299805, "rewards/rejected": -6.254343509674072, "step": 1343 }, { "epoch": 0.21, "learning_rate": 1.3161590331448728e-05, "logits/chosen": -2.562941312789917, "logits/rejected": -2.968942403793335, "logps/chosen": -301.4833068847656, "logps/rejected": -492.26318359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.8046923875808716, "rewards/margins": 8.557119369506836, "rewards/rejected": -10.361812591552734, "step": 1344 }, { "epoch": 0.21, "learning_rate": 1.316085689091758e-05, "logits/chosen": -2.477431535720825, "logits/rejected": -2.905073642730713, "logps/chosen": -81.31434631347656, "logps/rejected": -180.02938842773438, "loss": 2.1488, "rewards/accuracies": 0.5, "rewards/chosen": -2.3207294940948486, "rewards/margins": -0.2508906126022339, "rewards/rejected": -2.069838762283325, "step": 1345 }, { "epoch": 0.21, "learning_rate": 1.3160123450386431e-05, "logits/chosen": -2.4389421939849854, "logits/rejected": -3.0676004886627197, "logps/chosen": -523.3460693359375, "logps/rejected": -522.5731201171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.34638214111328125, "rewards/margins": 8.254703521728516, "rewards/rejected": -8.601085662841797, "step": 1346 }, { "epoch": 0.21, "learning_rate": 1.3159390009855283e-05, "logits/chosen": -2.952627658843994, "logits/rejected": -2.6861250400543213, "logps/chosen": -119.23605346679688, "logps/rejected": -187.53855895996094, "loss": 1.9209, "rewards/accuracies": 0.5, "rewards/chosen": -0.8405983448028564, "rewards/margins": 1.325101375579834, "rewards/rejected": -2.1656997203826904, "step": 1347 }, { "epoch": 0.21, "learning_rate": 1.3158656569324135e-05, "logits/chosen": -2.379964828491211, "logits/rejected": -2.8878965377807617, "logps/chosen": -48.40951919555664, "logps/rejected": -260.54888916015625, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": 0.6009458303451538, "rewards/margins": 5.068636417388916, "rewards/rejected": -4.467690467834473, "step": 1348 }, { "epoch": 0.21, "learning_rate": 1.3157923128792987e-05, "logits/chosen": -2.9178311824798584, "logits/rejected": -2.920313596725464, "logps/chosen": -36.52425003051758, "logps/rejected": -155.40957641601562, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 0.9763805866241455, "rewards/margins": 5.181128978729248, "rewards/rejected": -4.204748153686523, "step": 1349 }, { "epoch": 0.21, "learning_rate": 1.3157189688261839e-05, "logits/chosen": -2.8122167587280273, "logits/rejected": -1.7172572612762451, "logps/chosen": -186.025390625, "logps/rejected": -57.629852294921875, "loss": 1.8277, "rewards/accuracies": 0.5, "rewards/chosen": -1.9880627393722534, "rewards/margins": -1.3667141199111938, "rewards/rejected": -0.6213486194610596, "step": 1350 }, { "epoch": 0.21, "learning_rate": 1.3156456247730692e-05, "logits/chosen": -2.6964919567108154, "logits/rejected": -3.076503276824951, "logps/chosen": -94.09896850585938, "logps/rejected": -288.46881103515625, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/chosen": -0.9896146655082703, "rewards/margins": 4.18494176864624, "rewards/rejected": -5.174556255340576, "step": 1351 }, { "epoch": 0.21, "learning_rate": 1.3155722807199544e-05, "logits/chosen": -2.9264538288116455, "logits/rejected": -3.0199310779571533, "logps/chosen": -194.01974487304688, "logps/rejected": -270.3768310546875, "loss": 1.7772, "rewards/accuracies": 0.5, "rewards/chosen": -1.757002353668213, "rewards/margins": 1.2247284650802612, "rewards/rejected": -2.9817306995391846, "step": 1352 }, { "epoch": 0.21, "learning_rate": 1.3154989366668396e-05, "logits/chosen": -1.967365026473999, "logits/rejected": -2.7883920669555664, "logps/chosen": -242.31072998046875, "logps/rejected": -373.36920166015625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.4331451654434204, "rewards/margins": 6.286199569702148, "rewards/rejected": -6.719344139099121, "step": 1353 }, { "epoch": 0.21, "learning_rate": 1.3154255926137248e-05, "logits/chosen": -2.579871416091919, "logits/rejected": -2.033064365386963, "logps/chosen": -402.68084716796875, "logps/rejected": -280.8214111328125, "loss": 1.0209, "rewards/accuracies": 0.5, "rewards/chosen": -0.34646302461624146, "rewards/margins": 2.42928147315979, "rewards/rejected": -2.7757444381713867, "step": 1354 }, { "epoch": 0.21, "learning_rate": 1.31535224856061e-05, "logits/chosen": -2.7269370555877686, "logits/rejected": -3.106760263442993, "logps/chosen": -80.82685852050781, "logps/rejected": -245.81427001953125, "loss": 0.0924, "rewards/accuracies": 1.0, "rewards/chosen": -0.011991888284683228, "rewards/margins": 4.68801212310791, "rewards/rejected": -4.7000041007995605, "step": 1355 }, { "epoch": 0.21, "learning_rate": 1.3152789045074952e-05, "logits/chosen": -2.7301363945007324, "logits/rejected": -1.3460328578948975, "logps/chosen": -336.2187805175781, "logps/rejected": -120.23603057861328, "loss": 4.5612, "rewards/accuracies": 0.5, "rewards/chosen": -4.289070129394531, "rewards/margins": -2.36728835105896, "rewards/rejected": -1.9217818975448608, "step": 1356 }, { "epoch": 0.21, "learning_rate": 1.3152055604543804e-05, "logits/chosen": -1.7121068239212036, "logits/rejected": -2.4684300422668457, "logps/chosen": -223.7518310546875, "logps/rejected": -454.71063232421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.5923011898994446, "rewards/margins": 8.353364944458008, "rewards/rejected": -7.761064529418945, "step": 1357 }, { "epoch": 0.21, "learning_rate": 1.3151322164012656e-05, "logits/chosen": -2.639160394668579, "logits/rejected": -3.2110157012939453, "logps/chosen": -61.76499557495117, "logps/rejected": -345.718017578125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 1.006697654724121, "rewards/margins": 9.3925142288208, "rewards/rejected": -8.38581657409668, "step": 1358 }, { "epoch": 0.21, "learning_rate": 1.3150588723481508e-05, "logits/chosen": -1.6829255819320679, "logits/rejected": -2.767054557800293, "logps/chosen": -173.38397216796875, "logps/rejected": -328.198486328125, "loss": 1.623, "rewards/accuracies": 0.5, "rewards/chosen": -1.444220781326294, "rewards/margins": 1.125572919845581, "rewards/rejected": -2.569793701171875, "step": 1359 }, { "epoch": 0.21, "learning_rate": 1.3149855282950361e-05, "logits/chosen": -2.1236653327941895, "logits/rejected": -2.857187271118164, "logps/chosen": -341.0748291015625, "logps/rejected": -237.15512084960938, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": -0.8517746925354004, "rewards/margins": 3.0948190689086914, "rewards/rejected": -3.946593761444092, "step": 1360 }, { "epoch": 0.21, "learning_rate": 1.3149121842419213e-05, "logits/chosen": -0.9654837250709534, "logits/rejected": -1.801071286201477, "logps/chosen": -257.0789794921875, "logps/rejected": -518.3153076171875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 0.438690185546875, "rewards/margins": 6.027891635894775, "rewards/rejected": -5.5892014503479, "step": 1361 }, { "epoch": 0.21, "learning_rate": 1.3148388401888065e-05, "logits/chosen": -2.4583852291107178, "logits/rejected": -2.8518569469451904, "logps/chosen": -12.13827133178711, "logps/rejected": -129.30508422851562, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 1.1608686447143555, "rewards/margins": 4.602839469909668, "rewards/rejected": -3.4419713020324707, "step": 1362 }, { "epoch": 0.21, "learning_rate": 1.3147654961356917e-05, "logits/chosen": -2.214783191680908, "logits/rejected": -2.994677782058716, "logps/chosen": -328.5922546386719, "logps/rejected": -382.45819091796875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 0.7557071447372437, "rewards/margins": 5.571715354919434, "rewards/rejected": -4.8160080909729, "step": 1363 }, { "epoch": 0.21, "learning_rate": 1.3146921520825769e-05, "logits/chosen": -2.2150750160217285, "logits/rejected": -2.592820405960083, "logps/chosen": -312.671142578125, "logps/rejected": -309.22930908203125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 0.4088718593120575, "rewards/margins": 5.226103782653809, "rewards/rejected": -4.817232131958008, "step": 1364 }, { "epoch": 0.21, "learning_rate": 1.314618808029462e-05, "logits/chosen": -2.7847723960876465, "logits/rejected": -2.8463644981384277, "logps/chosen": -456.101318359375, "logps/rejected": -532.87353515625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.48910102248191833, "rewards/margins": 6.509167194366455, "rewards/rejected": -6.998268127441406, "step": 1365 }, { "epoch": 0.21, "learning_rate": 1.3145454639763472e-05, "logits/chosen": -2.6463348865509033, "logits/rejected": -2.634552478790283, "logps/chosen": -517.851806640625, "logps/rejected": -379.7403259277344, "loss": 7.0764, "rewards/accuracies": 0.0, "rewards/chosen": -7.185080051422119, "rewards/margins": -7.0751953125, "rewards/rejected": -0.10988503694534302, "step": 1366 }, { "epoch": 0.21, "learning_rate": 1.3144721199232324e-05, "logits/chosen": -2.744048595428467, "logits/rejected": -2.489847421646118, "logps/chosen": -427.4960632324219, "logps/rejected": -424.1197509765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.5010475516319275, "rewards/margins": 7.788470268249512, "rewards/rejected": -7.287422180175781, "step": 1367 }, { "epoch": 0.21, "learning_rate": 1.3143987758701178e-05, "logits/chosen": -2.2264766693115234, "logits/rejected": -2.3331873416900635, "logps/chosen": -201.52944946289062, "logps/rejected": -184.7674560546875, "loss": 2.1064, "rewards/accuracies": 0.5, "rewards/chosen": -2.065399169921875, "rewards/margins": 0.17236566543579102, "rewards/rejected": -2.237764835357666, "step": 1368 }, { "epoch": 0.21, "learning_rate": 1.314325431817003e-05, "logits/chosen": -1.9552453756332397, "logits/rejected": -2.603630304336548, "logps/chosen": -312.7976379394531, "logps/rejected": -472.36865234375, "loss": 0.0997, "rewards/accuracies": 1.0, "rewards/chosen": -0.21477681398391724, "rewards/margins": 4.709868907928467, "rewards/rejected": -4.92464542388916, "step": 1369 }, { "epoch": 0.21, "learning_rate": 1.3142520877638882e-05, "logits/chosen": -2.8272578716278076, "logits/rejected": -2.780210018157959, "logps/chosen": -111.42334747314453, "logps/rejected": -95.99942016601562, "loss": 2.253, "rewards/accuracies": 0.5, "rewards/chosen": -2.454054355621338, "rewards/margins": 1.1128153800964355, "rewards/rejected": -3.5668697357177734, "step": 1370 }, { "epoch": 0.21, "learning_rate": 1.3141787437107733e-05, "logits/chosen": -3.007934331893921, "logits/rejected": -2.9877569675445557, "logps/chosen": -100.8331298828125, "logps/rejected": -169.7264404296875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.09051399677991867, "rewards/margins": 5.805817604064941, "rewards/rejected": -5.896331787109375, "step": 1371 }, { "epoch": 0.21, "learning_rate": 1.3141053996576585e-05, "logits/chosen": -2.701500654220581, "logits/rejected": -3.0165112018585205, "logps/chosen": -232.05226135253906, "logps/rejected": -219.24996948242188, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -0.20154112577438354, "rewards/margins": 3.830207109451294, "rewards/rejected": -4.031748294830322, "step": 1372 }, { "epoch": 0.21, "learning_rate": 1.3140320556045439e-05, "logits/chosen": -2.3993163108825684, "logits/rejected": -3.174180507659912, "logps/chosen": -206.308349609375, "logps/rejected": -256.11029052734375, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 0.27299728989601135, "rewards/margins": 4.466773509979248, "rewards/rejected": -4.1937761306762695, "step": 1373 }, { "epoch": 0.21, "learning_rate": 1.313958711551429e-05, "logits/chosen": -1.8892736434936523, "logits/rejected": -2.979691505432129, "logps/chosen": -101.46755981445312, "logps/rejected": -322.9852600097656, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 0.10136719048023224, "rewards/margins": 6.483987331390381, "rewards/rejected": -6.382619857788086, "step": 1374 }, { "epoch": 0.21, "learning_rate": 1.3138853674983143e-05, "logits/chosen": -2.5377135276794434, "logits/rejected": -2.6647493839263916, "logps/chosen": -248.2860870361328, "logps/rejected": -297.2911376953125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 1.2764304876327515, "rewards/margins": 6.718180179595947, "rewards/rejected": -5.441749572753906, "step": 1375 }, { "epoch": 0.21, "learning_rate": 1.3138120234451995e-05, "logits/chosen": -2.877063035964966, "logits/rejected": -1.4627537727355957, "logps/chosen": -300.7093505859375, "logps/rejected": -123.41850280761719, "loss": 4.272, "rewards/accuracies": 0.0, "rewards/chosen": -3.3559067249298096, "rewards/margins": -4.257662773132324, "rewards/rejected": 0.9017559289932251, "step": 1376 }, { "epoch": 0.21, "learning_rate": 1.3137386793920848e-05, "logits/chosen": -2.781822919845581, "logits/rejected": -3.0475387573242188, "logps/chosen": -46.30400085449219, "logps/rejected": -137.013671875, "loss": 0.1819, "rewards/accuracies": 1.0, "rewards/chosen": 0.4493617117404938, "rewards/margins": 1.8703150749206543, "rewards/rejected": -1.4209532737731934, "step": 1377 }, { "epoch": 0.21, "learning_rate": 1.31366533533897e-05, "logits/chosen": -2.444485664367676, "logits/rejected": -2.8506758213043213, "logps/chosen": -323.40301513671875, "logps/rejected": -349.4096374511719, "loss": 2.232, "rewards/accuracies": 0.5, "rewards/chosen": -2.609288215637207, "rewards/margins": -0.2810096740722656, "rewards/rejected": -2.3282783031463623, "step": 1378 }, { "epoch": 0.21, "learning_rate": 1.3135919912858552e-05, "logits/chosen": -2.6944453716278076, "logits/rejected": -2.6369669437408447, "logps/chosen": -131.47415161132812, "logps/rejected": -186.34793090820312, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7512447834014893, "rewards/margins": 4.294942855834961, "rewards/rejected": -5.046187400817871, "step": 1379 }, { "epoch": 0.21, "learning_rate": 1.3135186472327404e-05, "logits/chosen": -2.8177835941314697, "logits/rejected": -2.7602856159210205, "logps/chosen": -41.241085052490234, "logps/rejected": -50.619998931884766, "loss": 1.1065, "rewards/accuracies": 0.5, "rewards/chosen": 0.49839916825294495, "rewards/margins": 1.3390851020812988, "rewards/rejected": -0.8406858444213867, "step": 1380 }, { "epoch": 0.21, "learning_rate": 1.3134453031796256e-05, "logits/chosen": -2.458397388458252, "logits/rejected": -2.84458065032959, "logps/chosen": -217.51620483398438, "logps/rejected": -419.99554443359375, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -0.009954839944839478, "rewards/margins": 5.514340400695801, "rewards/rejected": -5.524295330047607, "step": 1381 }, { "epoch": 0.21, "learning_rate": 1.3133719591265107e-05, "logits/chosen": -2.798907518386841, "logits/rejected": -2.8023414611816406, "logps/chosen": -561.81787109375, "logps/rejected": -362.93707275390625, "loss": 0.0628, "rewards/accuracies": 1.0, "rewards/chosen": -0.9104537963867188, "rewards/margins": 3.423722982406616, "rewards/rejected": -4.334177017211914, "step": 1382 }, { "epoch": 0.22, "learning_rate": 1.313298615073396e-05, "logits/chosen": -1.9739816188812256, "logits/rejected": -2.662368059158325, "logps/chosen": -299.018798828125, "logps/rejected": -401.2398681640625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5148903727531433, "rewards/margins": 6.215358734130859, "rewards/rejected": -6.730249404907227, "step": 1383 }, { "epoch": 0.22, "learning_rate": 1.3132252710202811e-05, "logits/chosen": -2.4708592891693115, "logits/rejected": -3.1788952350616455, "logps/chosen": -78.42630004882812, "logps/rejected": -390.69482421875, "loss": 0.0931, "rewards/accuracies": 1.0, "rewards/chosen": 0.20315304398536682, "rewards/margins": 5.910065174102783, "rewards/rejected": -5.706912040710449, "step": 1384 }, { "epoch": 0.22, "learning_rate": 1.3131519269671663e-05, "logits/chosen": -2.022125005722046, "logits/rejected": -3.0399951934814453, "logps/chosen": -127.80652618408203, "logps/rejected": -286.05517578125, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -0.34741172194480896, "rewards/margins": 4.430733680725098, "rewards/rejected": -4.778145790100098, "step": 1385 }, { "epoch": 0.22, "learning_rate": 1.3130785829140517e-05, "logits/chosen": -2.848637342453003, "logits/rejected": -2.680621862411499, "logps/chosen": -249.37191772460938, "logps/rejected": -250.39451599121094, "loss": 2.0634, "rewards/accuracies": 0.5, "rewards/chosen": -1.4981647729873657, "rewards/margins": 0.05592536926269531, "rewards/rejected": -1.554090142250061, "step": 1386 }, { "epoch": 0.22, "learning_rate": 1.3130052388609369e-05, "logits/chosen": -2.7270257472991943, "logits/rejected": -2.1526923179626465, "logps/chosen": -358.6950988769531, "logps/rejected": -264.2175598144531, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.651496171951294, "rewards/margins": 4.839058876037598, "rewards/rejected": -5.4905548095703125, "step": 1387 }, { "epoch": 0.22, "learning_rate": 1.312931894807822e-05, "logits/chosen": -0.8979350924491882, "logits/rejected": -2.937753677368164, "logps/chosen": -22.969751358032227, "logps/rejected": -495.1669921875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 0.5276703834533691, "rewards/margins": 8.29668140411377, "rewards/rejected": -7.7690110206604, "step": 1388 }, { "epoch": 0.22, "learning_rate": 1.3128585507547072e-05, "logits/chosen": -1.8872414827346802, "logits/rejected": -2.932279348373413, "logps/chosen": -41.93423080444336, "logps/rejected": -259.1504211425781, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": 0.39246684312820435, "rewards/margins": 4.78746223449707, "rewards/rejected": -4.394995212554932, "step": 1389 }, { "epoch": 0.22, "learning_rate": 1.3127852067015924e-05, "logits/chosen": -3.0283312797546387, "logits/rejected": -1.7306523323059082, "logps/chosen": -228.71389770507812, "logps/rejected": -99.8631362915039, "loss": 5.3335, "rewards/accuracies": 0.0, "rewards/chosen": -4.072642803192139, "rewards/margins": -5.327901840209961, "rewards/rejected": 1.2552587985992432, "step": 1390 }, { "epoch": 0.22, "learning_rate": 1.3127118626484776e-05, "logits/chosen": -1.804290771484375, "logits/rejected": -3.1654860973358154, "logps/chosen": -95.23746490478516, "logps/rejected": -330.36212158203125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.7142928838729858, "rewards/margins": 6.301011085510254, "rewards/rejected": -5.5867180824279785, "step": 1391 }, { "epoch": 0.22, "learning_rate": 1.3126385185953628e-05, "logits/chosen": -2.2111451625823975, "logits/rejected": -3.1961851119995117, "logps/chosen": -162.99169921875, "logps/rejected": -371.8189392089844, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": 0.06856153905391693, "rewards/margins": 4.423030853271484, "rewards/rejected": -4.354469299316406, "step": 1392 }, { "epoch": 0.22, "learning_rate": 1.312565174542248e-05, "logits/chosen": -3.0070641040802, "logits/rejected": -2.843780517578125, "logps/chosen": -620.6290893554688, "logps/rejected": -665.238037109375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.637446641921997, "rewards/margins": 8.475438117980957, "rewards/rejected": -10.112884521484375, "step": 1393 }, { "epoch": 0.22, "learning_rate": 1.3124918304891332e-05, "logits/chosen": -1.7054558992385864, "logits/rejected": -2.5352377891540527, "logps/chosen": -113.892333984375, "logps/rejected": -286.2098388671875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 0.48242342472076416, "rewards/margins": 5.110990524291992, "rewards/rejected": -4.628566741943359, "step": 1394 }, { "epoch": 0.22, "learning_rate": 1.3124184864360185e-05, "logits/chosen": -2.660874843597412, "logits/rejected": -3.073066473007202, "logps/chosen": -234.59071350097656, "logps/rejected": -350.132568359375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 0.5884050726890564, "rewards/margins": 5.977337837219238, "rewards/rejected": -5.388933181762695, "step": 1395 }, { "epoch": 0.22, "learning_rate": 1.3123451423829037e-05, "logits/chosen": -2.3067331314086914, "logits/rejected": -2.920011043548584, "logps/chosen": -216.516357421875, "logps/rejected": -269.0302734375, "loss": 2.3069, "rewards/accuracies": 0.5, "rewards/chosen": -2.32243275642395, "rewards/margins": 1.7446634769439697, "rewards/rejected": -4.06709623336792, "step": 1396 }, { "epoch": 0.22, "learning_rate": 1.3122717983297889e-05, "logits/chosen": -2.9061427116394043, "logits/rejected": -3.0471768379211426, "logps/chosen": -205.47207641601562, "logps/rejected": -181.04937744140625, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -0.8026795387268066, "rewards/margins": 4.251725196838379, "rewards/rejected": -5.054404258728027, "step": 1397 }, { "epoch": 0.22, "learning_rate": 1.3121984542766741e-05, "logits/chosen": -1.766588568687439, "logits/rejected": -2.6314797401428223, "logps/chosen": -48.25012969970703, "logps/rejected": -127.78057098388672, "loss": 0.0841, "rewards/accuracies": 1.0, "rewards/chosen": 0.26560893654823303, "rewards/margins": 2.48665189743042, "rewards/rejected": -2.2210428714752197, "step": 1398 }, { "epoch": 0.22, "learning_rate": 1.3121251102235593e-05, "logits/chosen": -2.9275975227355957, "logits/rejected": -2.0722551345825195, "logps/chosen": -181.72970581054688, "logps/rejected": -271.8414306640625, "loss": 2.7637, "rewards/accuracies": 0.5, "rewards/chosen": -1.7561891078948975, "rewards/margins": 1.3923420906066895, "rewards/rejected": -3.148531198501587, "step": 1399 }, { "epoch": 0.22, "learning_rate": 1.3120517661704445e-05, "logits/chosen": -2.5953426361083984, "logits/rejected": -3.119288444519043, "logps/chosen": -114.5537109375, "logps/rejected": -370.40911865234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.19954167306423187, "rewards/margins": 6.906595706939697, "rewards/rejected": -6.707054138183594, "step": 1400 }, { "epoch": 0.22, "learning_rate": 1.3119784221173297e-05, "logits/chosen": -2.9564638137817383, "logits/rejected": -3.1195619106292725, "logps/chosen": -112.83638000488281, "logps/rejected": -309.1676025390625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.059401094913482666, "rewards/margins": 7.334556579589844, "rewards/rejected": -7.275155067443848, "step": 1401 }, { "epoch": 0.22, "learning_rate": 1.3119050780642148e-05, "logits/chosen": -2.147228717803955, "logits/rejected": -2.8943212032318115, "logps/chosen": -161.067138671875, "logps/rejected": -307.11163330078125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 0.06310272216796875, "rewards/margins": 5.003827095031738, "rewards/rejected": -4.9407243728637695, "step": 1402 }, { "epoch": 0.22, "learning_rate": 1.3118317340111e-05, "logits/chosen": -0.9438826441764832, "logits/rejected": -2.1603362560272217, "logps/chosen": -111.9267349243164, "logps/rejected": -341.49493408203125, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 0.4732414186000824, "rewards/margins": 4.841459274291992, "rewards/rejected": -4.368217468261719, "step": 1403 }, { "epoch": 0.22, "learning_rate": 1.3117583899579854e-05, "logits/chosen": -2.804461717605591, "logits/rejected": -3.1315650939941406, "logps/chosen": -80.02801513671875, "logps/rejected": -358.4313659667969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7048113346099854, "rewards/margins": 8.654855728149414, "rewards/rejected": -9.359667778015137, "step": 1404 }, { "epoch": 0.22, "learning_rate": 1.3116850459048706e-05, "logits/chosen": -2.66816782951355, "logits/rejected": -1.5552647113800049, "logps/chosen": -327.6479187011719, "logps/rejected": -39.65343475341797, "loss": 4.0772, "rewards/accuracies": 0.5, "rewards/chosen": -3.482088565826416, "rewards/margins": -3.726184129714966, "rewards/rejected": 0.2440958023071289, "step": 1405 }, { "epoch": 0.22, "learning_rate": 1.3116117018517558e-05, "logits/chosen": -1.0691677331924438, "logits/rejected": -2.5904757976531982, "logps/chosen": -97.8387451171875, "logps/rejected": -323.0316162109375, "loss": 0.1948, "rewards/accuracies": 1.0, "rewards/chosen": -0.25161486864089966, "rewards/margins": 4.412239074707031, "rewards/rejected": -4.663854122161865, "step": 1406 }, { "epoch": 0.22, "learning_rate": 1.3115383577986411e-05, "logits/chosen": -2.811030387878418, "logits/rejected": -3.0707755088806152, "logps/chosen": -173.4482421875, "logps/rejected": -294.96783447265625, "loss": 3.4157, "rewards/accuracies": 0.5, "rewards/chosen": -2.969050884246826, "rewards/margins": -1.5542473793029785, "rewards/rejected": -1.4148037433624268, "step": 1407 }, { "epoch": 0.22, "learning_rate": 1.3114650137455263e-05, "logits/chosen": -2.031430721282959, "logits/rejected": -2.9786813259124756, "logps/chosen": -101.22821044921875, "logps/rejected": -285.6041259765625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.4797344207763672, "rewards/margins": 5.964120864868164, "rewards/rejected": -5.484386444091797, "step": 1408 }, { "epoch": 0.22, "learning_rate": 1.3113916696924115e-05, "logits/chosen": -2.308521032333374, "logits/rejected": -3.05245041847229, "logps/chosen": -15.037633895874023, "logps/rejected": -190.58016967773438, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": 0.7968558073043823, "rewards/margins": 4.292704105377197, "rewards/rejected": -3.4958481788635254, "step": 1409 }, { "epoch": 0.22, "learning_rate": 1.3113183256392967e-05, "logits/chosen": -2.620016574859619, "logits/rejected": -2.820265293121338, "logps/chosen": -153.92935180664062, "logps/rejected": -45.91960144042969, "loss": 1.8751, "rewards/accuracies": 0.5, "rewards/chosen": -0.4520718455314636, "rewards/margins": -0.7390248775482178, "rewards/rejected": 0.28695306181907654, "step": 1410 }, { "epoch": 0.22, "learning_rate": 1.3112449815861819e-05, "logits/chosen": -1.852640986442566, "logits/rejected": -3.1006815433502197, "logps/chosen": -54.3950080871582, "logps/rejected": -449.9873046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.17608827352523804, "rewards/margins": 9.762557029724121, "rewards/rejected": -9.586468696594238, "step": 1411 }, { "epoch": 0.22, "learning_rate": 1.311171637533067e-05, "logits/chosen": -2.7064576148986816, "logits/rejected": -3.155035972595215, "logps/chosen": -239.97067260742188, "logps/rejected": -489.36785888671875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 0.09579998254776001, "rewards/margins": 6.384218215942383, "rewards/rejected": -6.288418769836426, "step": 1412 }, { "epoch": 0.22, "learning_rate": 1.3110982934799524e-05, "logits/chosen": -2.4421446323394775, "logits/rejected": -2.880323648452759, "logps/chosen": -98.69859313964844, "logps/rejected": -203.28172302246094, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.11727219820022583, "rewards/margins": 5.700557231903076, "rewards/rejected": -5.817829608917236, "step": 1413 }, { "epoch": 0.22, "learning_rate": 1.3110249494268376e-05, "logits/chosen": -2.9016456604003906, "logits/rejected": -2.4519901275634766, "logps/chosen": -93.7507095336914, "logps/rejected": -86.09439086914062, "loss": 0.7372, "rewards/accuracies": 0.5, "rewards/chosen": -0.0800376832485199, "rewards/margins": 2.5549354553222656, "rewards/rejected": -2.6349730491638184, "step": 1414 }, { "epoch": 0.22, "learning_rate": 1.3109516053737228e-05, "logits/chosen": -1.8821717500686646, "logits/rejected": -2.83587646484375, "logps/chosen": -187.35960388183594, "logps/rejected": -381.3013000488281, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.6776618957519531, "rewards/margins": 5.238870143890381, "rewards/rejected": -5.916532516479492, "step": 1415 }, { "epoch": 0.22, "learning_rate": 1.310878261320608e-05, "logits/chosen": -2.4174225330352783, "logits/rejected": -2.4244027137756348, "logps/chosen": -374.9976806640625, "logps/rejected": -377.79986572265625, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6322517395019531, "rewards/margins": 6.347429275512695, "rewards/rejected": -6.979681491851807, "step": 1416 }, { "epoch": 0.22, "learning_rate": 1.3108049172674932e-05, "logits/chosen": -2.89793062210083, "logits/rejected": -3.035815954208374, "logps/chosen": -97.79727172851562, "logps/rejected": -214.62881469726562, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 0.21459656953811646, "rewards/margins": 6.563046455383301, "rewards/rejected": -6.34844970703125, "step": 1417 }, { "epoch": 0.22, "learning_rate": 1.3107315732143784e-05, "logits/chosen": -2.7239139080047607, "logits/rejected": -2.8998525142669678, "logps/chosen": -176.14376831054688, "logps/rejected": -303.158935546875, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": 1.033068060874939, "rewards/margins": 4.928081512451172, "rewards/rejected": -3.8950135707855225, "step": 1418 }, { "epoch": 0.22, "learning_rate": 1.3106582291612635e-05, "logits/chosen": -2.3174095153808594, "logits/rejected": -2.7891178131103516, "logps/chosen": -593.0716552734375, "logps/rejected": -564.6962890625, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -0.05982741713523865, "rewards/margins": 5.552887916564941, "rewards/rejected": -5.612715244293213, "step": 1419 }, { "epoch": 0.22, "learning_rate": 1.3105848851081487e-05, "logits/chosen": -2.5614583492279053, "logits/rejected": -3.0352377891540527, "logps/chosen": -132.98724365234375, "logps/rejected": -249.7065887451172, "loss": 0.0743, "rewards/accuracies": 1.0, "rewards/chosen": 0.25871390104293823, "rewards/margins": 3.8729922771453857, "rewards/rejected": -3.6142783164978027, "step": 1420 }, { "epoch": 0.22, "learning_rate": 1.310511541055034e-05, "logits/chosen": -1.9216033220291138, "logits/rejected": -2.27445650100708, "logps/chosen": -177.27688598632812, "logps/rejected": -310.48687744140625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6109787225723267, "rewards/margins": 6.199215888977051, "rewards/rejected": -6.810194492340088, "step": 1421 }, { "epoch": 0.22, "learning_rate": 1.3104381970019193e-05, "logits/chosen": -2.435793876647949, "logits/rejected": -3.0150067806243896, "logps/chosen": -420.2782287597656, "logps/rejected": -528.9987182617188, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -0.48841592669487, "rewards/margins": 6.67429780960083, "rewards/rejected": -7.162714004516602, "step": 1422 }, { "epoch": 0.22, "learning_rate": 1.3103648529488045e-05, "logits/chosen": -2.1541731357574463, "logits/rejected": -3.2216076850891113, "logps/chosen": -222.87841796875, "logps/rejected": -360.1949462890625, "loss": 3.7245, "rewards/accuracies": 0.5, "rewards/chosen": -4.009152889251709, "rewards/margins": 1.0649495124816895, "rewards/rejected": -5.074102878570557, "step": 1423 }, { "epoch": 0.22, "learning_rate": 1.3102915088956897e-05, "logits/chosen": -1.460288643836975, "logits/rejected": -2.0762128829956055, "logps/chosen": -138.81077575683594, "logps/rejected": -296.1839599609375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 0.17666666209697723, "rewards/margins": 6.596839904785156, "rewards/rejected": -6.420173168182373, "step": 1424 }, { "epoch": 0.22, "learning_rate": 1.3102181648425748e-05, "logits/chosen": -2.9785096645355225, "logits/rejected": -3.0477960109710693, "logps/chosen": -180.04432678222656, "logps/rejected": -239.35072326660156, "loss": 2.6469, "rewards/accuracies": 0.5, "rewards/chosen": -3.482149600982666, "rewards/margins": 0.14332270622253418, "rewards/rejected": -3.6254723072052, "step": 1425 }, { "epoch": 0.22, "learning_rate": 1.31014482078946e-05, "logits/chosen": -0.7998547554016113, "logits/rejected": -2.7475991249084473, "logps/chosen": -17.00697898864746, "logps/rejected": -503.80072021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.48058658838272095, "rewards/margins": 11.46568775177002, "rewards/rejected": -10.985101699829102, "step": 1426 }, { "epoch": 0.22, "learning_rate": 1.3100714767363452e-05, "logits/chosen": -2.9084393978118896, "logits/rejected": -3.183117151260376, "logps/chosen": -298.79656982421875, "logps/rejected": -294.8321228027344, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 0.6714958548545837, "rewards/margins": 6.725019454956055, "rewards/rejected": -6.053523540496826, "step": 1427 }, { "epoch": 0.22, "learning_rate": 1.3099981326832304e-05, "logits/chosen": -1.7672687768936157, "logits/rejected": -2.6110236644744873, "logps/chosen": -124.64825439453125, "logps/rejected": -331.52880859375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 0.8403782844543457, "rewards/margins": 7.752437591552734, "rewards/rejected": -6.912059307098389, "step": 1428 }, { "epoch": 0.22, "learning_rate": 1.3099247886301156e-05, "logits/chosen": -2.803907632827759, "logits/rejected": -2.3296689987182617, "logps/chosen": -277.97021484375, "logps/rejected": -236.9123077392578, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": 0.5091078877449036, "rewards/margins": 5.047086238861084, "rewards/rejected": -4.537978172302246, "step": 1429 }, { "epoch": 0.22, "learning_rate": 1.3098514445770008e-05, "logits/chosen": -2.7967605590820312, "logits/rejected": -2.927213191986084, "logps/chosen": -66.00467681884766, "logps/rejected": -237.71194458007812, "loss": 2.5401, "rewards/accuracies": 0.5, "rewards/chosen": -1.2851288318634033, "rewards/margins": 0.803081750869751, "rewards/rejected": -2.0882105827331543, "step": 1430 }, { "epoch": 0.22, "learning_rate": 1.3097781005238861e-05, "logits/chosen": -2.5439400672912598, "logits/rejected": -2.8028440475463867, "logps/chosen": -12.149989128112793, "logps/rejected": -175.0370635986328, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 1.0849206447601318, "rewards/margins": 4.85159969329834, "rewards/rejected": -3.766679286956787, "step": 1431 }, { "epoch": 0.22, "learning_rate": 1.3097047564707713e-05, "logits/chosen": -2.281834840774536, "logits/rejected": -3.1661319732666016, "logps/chosen": -69.49128723144531, "logps/rejected": -299.83502197265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.5322181582450867, "rewards/margins": 7.404356002807617, "rewards/rejected": -6.872137546539307, "step": 1432 }, { "epoch": 0.22, "learning_rate": 1.3096314124176565e-05, "logits/chosen": -2.6111788749694824, "logits/rejected": -2.7386934757232666, "logps/chosen": -139.52928161621094, "logps/rejected": -72.06966400146484, "loss": 3.0059, "rewards/accuracies": 0.5, "rewards/chosen": -2.187837600708008, "rewards/margins": -2.0561392307281494, "rewards/rejected": -0.1316983997821808, "step": 1433 }, { "epoch": 0.22, "learning_rate": 1.3095580683645417e-05, "logits/chosen": -3.0456485748291016, "logits/rejected": -3.329787015914917, "logps/chosen": -175.9173583984375, "logps/rejected": -215.890625, "loss": 2.6873, "rewards/accuracies": 0.5, "rewards/chosen": -1.5546722412109375, "rewards/margins": 1.4431324005126953, "rewards/rejected": -2.997804641723633, "step": 1434 }, { "epoch": 0.22, "learning_rate": 1.3094847243114269e-05, "logits/chosen": -2.6672770977020264, "logits/rejected": -1.6589415073394775, "logps/chosen": -286.8741760253906, "logps/rejected": -198.27139282226562, "loss": 4.0123, "rewards/accuracies": 0.5, "rewards/chosen": -4.574710845947266, "rewards/margins": -1.7528841495513916, "rewards/rejected": -2.8218271732330322, "step": 1435 }, { "epoch": 0.22, "learning_rate": 1.309411380258312e-05, "logits/chosen": -2.756312608718872, "logits/rejected": -3.1709823608398438, "logps/chosen": -181.39088439941406, "logps/rejected": -319.37799072265625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.2935829162597656, "rewards/margins": 7.278036117553711, "rewards/rejected": -6.984453201293945, "step": 1436 }, { "epoch": 0.22, "learning_rate": 1.3093380362051973e-05, "logits/chosen": -1.8571563959121704, "logits/rejected": -2.9314582347869873, "logps/chosen": -38.40566635131836, "logps/rejected": -229.30892944335938, "loss": 0.1475, "rewards/accuracies": 1.0, "rewards/chosen": 0.6330798864364624, "rewards/margins": 2.744150161743164, "rewards/rejected": -2.111070394515991, "step": 1437 }, { "epoch": 0.22, "learning_rate": 1.3092646921520825e-05, "logits/chosen": -2.3420183658599854, "logits/rejected": -2.949192523956299, "logps/chosen": -39.319637298583984, "logps/rejected": -233.92930603027344, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.8471971154212952, "rewards/margins": 6.506113529205322, "rewards/rejected": -5.658916473388672, "step": 1438 }, { "epoch": 0.22, "learning_rate": 1.3091913480989678e-05, "logits/chosen": -2.8021466732025146, "logits/rejected": -3.1462647914886475, "logps/chosen": -366.3472595214844, "logps/rejected": -444.471435546875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.371891736984253, "rewards/margins": 5.903754234313965, "rewards/rejected": -7.275645732879639, "step": 1439 }, { "epoch": 0.22, "learning_rate": 1.309118004045853e-05, "logits/chosen": -2.753185510635376, "logits/rejected": -3.1206772327423096, "logps/chosen": -346.5596008300781, "logps/rejected": -601.8221435546875, "loss": 1.2132, "rewards/accuracies": 0.5, "rewards/chosen": -1.48260498046875, "rewards/margins": 2.3573670387268066, "rewards/rejected": -3.8399720191955566, "step": 1440 }, { "epoch": 0.22, "learning_rate": 1.3090446599927384e-05, "logits/chosen": -1.3677549362182617, "logits/rejected": -2.9211113452911377, "logps/chosen": -59.56380081176758, "logps/rejected": -650.9884033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8566548228263855, "rewards/margins": 10.443404197692871, "rewards/rejected": -9.586750030517578, "step": 1441 }, { "epoch": 0.22, "learning_rate": 1.3089713159396235e-05, "logits/chosen": -2.5213983058929443, "logits/rejected": -2.816087484359741, "logps/chosen": -67.34912109375, "logps/rejected": -282.51568603515625, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.34862691164016724, "rewards/margins": 5.390223026275635, "rewards/rejected": -5.738849639892578, "step": 1442 }, { "epoch": 0.22, "learning_rate": 1.3088979718865087e-05, "logits/chosen": -2.695143699645996, "logits/rejected": -2.451284885406494, "logps/chosen": -108.14312744140625, "logps/rejected": -233.51162719726562, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 0.34683915972709656, "rewards/margins": 5.3657941818237305, "rewards/rejected": -5.018954753875732, "step": 1443 }, { "epoch": 0.22, "learning_rate": 1.308824627833394e-05, "logits/chosen": -2.3011300563812256, "logits/rejected": -2.8969666957855225, "logps/chosen": -287.56585693359375, "logps/rejected": -463.1131896972656, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7381597757339478, "rewards/margins": 7.292574882507324, "rewards/rejected": -8.03073501586914, "step": 1444 }, { "epoch": 0.22, "learning_rate": 1.3087512837802791e-05, "logits/chosen": -1.7480807304382324, "logits/rejected": -2.8820886611938477, "logps/chosen": -75.51329040527344, "logps/rejected": -242.6271514892578, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.2366763949394226, "rewards/margins": 5.948578834533691, "rewards/rejected": -5.711902141571045, "step": 1445 }, { "epoch": 0.22, "learning_rate": 1.3086779397271643e-05, "logits/chosen": -2.8532698154449463, "logits/rejected": -2.8506526947021484, "logps/chosen": -162.05288696289062, "logps/rejected": -259.0117492675781, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": 1.0187828540802002, "rewards/margins": 4.4939985275268555, "rewards/rejected": -3.4752159118652344, "step": 1446 }, { "epoch": 0.23, "learning_rate": 1.3086045956740495e-05, "logits/chosen": -2.948814630508423, "logits/rejected": -3.0927624702453613, "logps/chosen": -183.95620727539062, "logps/rejected": -260.8491516113281, "loss": 2.0552, "rewards/accuracies": 0.5, "rewards/chosen": -2.146855592727661, "rewards/margins": -0.1458728313446045, "rewards/rejected": -2.0009827613830566, "step": 1447 }, { "epoch": 0.23, "learning_rate": 1.3085312516209347e-05, "logits/chosen": -2.682331085205078, "logits/rejected": -2.8095345497131348, "logps/chosen": -67.1321029663086, "logps/rejected": -149.00958251953125, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": 1.080446720123291, "rewards/margins": 4.176021575927734, "rewards/rejected": -3.0955746173858643, "step": 1448 }, { "epoch": 0.23, "learning_rate": 1.30845790756782e-05, "logits/chosen": -2.6015255451202393, "logits/rejected": -2.5692150592803955, "logps/chosen": -147.6839599609375, "logps/rejected": -299.9147033691406, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -0.56036376953125, "rewards/margins": 4.696634292602539, "rewards/rejected": -5.256997585296631, "step": 1449 }, { "epoch": 0.23, "learning_rate": 1.3083845635147052e-05, "logits/chosen": -2.717243194580078, "logits/rejected": -2.837193250656128, "logps/chosen": -121.59056854248047, "logps/rejected": -398.4991149902344, "loss": 1.7425, "rewards/accuracies": 0.5, "rewards/chosen": -1.6978813409805298, "rewards/margins": 4.9384331703186035, "rewards/rejected": -6.636314392089844, "step": 1450 }, { "epoch": 0.23, "learning_rate": 1.3083112194615904e-05, "logits/chosen": -1.596631407737732, "logits/rejected": -2.7520625591278076, "logps/chosen": -124.73959350585938, "logps/rejected": -202.0112762451172, "loss": 1.1148, "rewards/accuracies": 0.5, "rewards/chosen": -0.6945183277130127, "rewards/margins": 2.8606855869293213, "rewards/rejected": -3.555203914642334, "step": 1451 }, { "epoch": 0.23, "learning_rate": 1.3082378754084756e-05, "logits/chosen": -1.3901492357254028, "logits/rejected": -2.988788604736328, "logps/chosen": -137.14999389648438, "logps/rejected": -728.4034423828125, "loss": 1.0626, "rewards/accuracies": 0.5, "rewards/chosen": -2.491196393966675, "rewards/margins": 1.1151206493377686, "rewards/rejected": -3.6063170433044434, "step": 1452 }, { "epoch": 0.23, "learning_rate": 1.3081645313553608e-05, "logits/chosen": -2.246152639389038, "logits/rejected": -3.201000452041626, "logps/chosen": -161.86331176757812, "logps/rejected": -301.8419189453125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 0.7366969585418701, "rewards/margins": 5.342761039733887, "rewards/rejected": -4.6060638427734375, "step": 1453 }, { "epoch": 0.23, "learning_rate": 1.308091187302246e-05, "logits/chosen": -2.622506618499756, "logits/rejected": -2.679985523223877, "logps/chosen": -277.0953063964844, "logps/rejected": -373.513671875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.22689437866210938, "rewards/margins": 6.3530449867248535, "rewards/rejected": -6.579939365386963, "step": 1454 }, { "epoch": 0.23, "learning_rate": 1.3080178432491312e-05, "logits/chosen": -3.2714738845825195, "logits/rejected": -2.9209401607513428, "logps/chosen": -138.65155029296875, "logps/rejected": -211.40896606445312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.41959840059280396, "rewards/margins": 6.335430145263672, "rewards/rejected": -5.915831565856934, "step": 1455 }, { "epoch": 0.23, "learning_rate": 1.3079444991960163e-05, "logits/chosen": -1.5651206970214844, "logits/rejected": -2.982609272003174, "logps/chosen": -16.878707885742188, "logps/rejected": -375.72467041015625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.839967668056488, "rewards/margins": 6.5937981605529785, "rewards/rejected": -5.753830432891846, "step": 1456 }, { "epoch": 0.23, "learning_rate": 1.3078711551429015e-05, "logits/chosen": -2.672555685043335, "logits/rejected": -2.63978910446167, "logps/chosen": -177.8496856689453, "logps/rejected": -74.33283996582031, "loss": 5.1276, "rewards/accuracies": 0.0, "rewards/chosen": -4.292895317077637, "rewards/margins": -5.117072105407715, "rewards/rejected": 0.8241763114929199, "step": 1457 }, { "epoch": 0.23, "learning_rate": 1.3077978110897869e-05, "logits/chosen": -2.962254762649536, "logits/rejected": -2.7774477005004883, "logps/chosen": -564.2410888671875, "logps/rejected": -638.8075561523438, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.5683639645576477, "rewards/margins": 7.298272609710693, "rewards/rejected": -7.866636753082275, "step": 1458 }, { "epoch": 0.23, "learning_rate": 1.307724467036672e-05, "logits/chosen": -3.110992193222046, "logits/rejected": -3.0636415481567383, "logps/chosen": -122.86577606201172, "logps/rejected": -44.88911819458008, "loss": 3.8187, "rewards/accuracies": 0.0, "rewards/chosen": -3.0084710121154785, "rewards/margins": -3.795452117919922, "rewards/rejected": 0.7869812250137329, "step": 1459 }, { "epoch": 0.23, "learning_rate": 1.3076511229835573e-05, "logits/chosen": -2.469036102294922, "logits/rejected": -3.2169766426086426, "logps/chosen": -66.77497100830078, "logps/rejected": -265.6063232421875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 1.110883355140686, "rewards/margins": 6.4060821533203125, "rewards/rejected": -5.295198917388916, "step": 1460 }, { "epoch": 0.23, "learning_rate": 1.3075777789304425e-05, "logits/chosen": -1.4201618432998657, "logits/rejected": -2.1493752002716064, "logps/chosen": -157.3359375, "logps/rejected": -227.1044158935547, "loss": 3.0012, "rewards/accuracies": 0.5, "rewards/chosen": -2.5692849159240723, "rewards/margins": 0.36925745010375977, "rewards/rejected": -2.938542366027832, "step": 1461 }, { "epoch": 0.23, "learning_rate": 1.3075044348773276e-05, "logits/chosen": -2.4072439670562744, "logits/rejected": -3.1385035514831543, "logps/chosen": -22.307464599609375, "logps/rejected": -174.94969177246094, "loss": 0.1051, "rewards/accuracies": 1.0, "rewards/chosen": 0.6340509057044983, "rewards/margins": 2.332592487335205, "rewards/rejected": -1.6985416412353516, "step": 1462 }, { "epoch": 0.23, "learning_rate": 1.3074310908242128e-05, "logits/chosen": -2.8488988876342773, "logits/rejected": -3.2601256370544434, "logps/chosen": -33.925132751464844, "logps/rejected": -195.80575561523438, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": 1.367790699005127, "rewards/margins": 3.7236955165863037, "rewards/rejected": -2.3559048175811768, "step": 1463 }, { "epoch": 0.23, "learning_rate": 1.307357746771098e-05, "logits/chosen": -2.4470033645629883, "logits/rejected": -2.880948066711426, "logps/chosen": -424.491455078125, "logps/rejected": -597.4856567382812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.32855093479156494, "rewards/margins": 8.639333724975586, "rewards/rejected": -8.96788501739502, "step": 1464 }, { "epoch": 0.23, "learning_rate": 1.3072844027179832e-05, "logits/chosen": -2.586575508117676, "logits/rejected": -2.9855728149414062, "logps/chosen": -49.203182220458984, "logps/rejected": -161.02810668945312, "loss": 0.1429, "rewards/accuracies": 1.0, "rewards/chosen": 0.6788021326065063, "rewards/margins": 3.494999647140503, "rewards/rejected": -2.816197395324707, "step": 1465 }, { "epoch": 0.23, "learning_rate": 1.3072110586648686e-05, "logits/chosen": -2.143433094024658, "logits/rejected": -2.9806857109069824, "logps/chosen": -102.84526062011719, "logps/rejected": -239.45828247070312, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 0.5667438507080078, "rewards/margins": 5.849884033203125, "rewards/rejected": -5.283140182495117, "step": 1466 }, { "epoch": 0.23, "learning_rate": 1.3071377146117537e-05, "logits/chosen": -2.2237331867218018, "logits/rejected": -2.7900750637054443, "logps/chosen": -140.2266845703125, "logps/rejected": -305.48974609375, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": 0.2519804835319519, "rewards/margins": 5.384130954742432, "rewards/rejected": -5.132150650024414, "step": 1467 }, { "epoch": 0.23, "learning_rate": 1.307064370558639e-05, "logits/chosen": -2.540822744369507, "logits/rejected": -3.2629876136779785, "logps/chosen": -282.5979919433594, "logps/rejected": -406.1735534667969, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 0.6215457916259766, "rewards/margins": 7.434242248535156, "rewards/rejected": -6.8126959800720215, "step": 1468 }, { "epoch": 0.23, "learning_rate": 1.3069910265055241e-05, "logits/chosen": -2.8492584228515625, "logits/rejected": -3.143432378768921, "logps/chosen": -91.1240005493164, "logps/rejected": -328.7773742675781, "loss": 1.0294, "rewards/accuracies": 0.5, "rewards/chosen": -0.3496779203414917, "rewards/margins": 0.8256499767303467, "rewards/rejected": -1.1753278970718384, "step": 1469 }, { "epoch": 0.23, "learning_rate": 1.3069176824524093e-05, "logits/chosen": -2.670623302459717, "logits/rejected": -2.403451681137085, "logps/chosen": -398.5679016113281, "logps/rejected": -352.4627990722656, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 0.09069058299064636, "rewards/margins": 5.971735954284668, "rewards/rejected": -5.881045341491699, "step": 1470 }, { "epoch": 0.23, "learning_rate": 1.3068443383992945e-05, "logits/chosen": -3.274137496948242, "logits/rejected": -3.1439530849456787, "logps/chosen": -556.9216918945312, "logps/rejected": -411.5834655761719, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 0.07880554348230362, "rewards/margins": 5.208330154418945, "rewards/rejected": -5.129524230957031, "step": 1471 }, { "epoch": 0.23, "learning_rate": 1.3067709943461797e-05, "logits/chosen": -2.4990463256835938, "logits/rejected": -2.886935234069824, "logps/chosen": -21.225858688354492, "logps/rejected": -186.24331665039062, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.6644737720489502, "rewards/margins": 6.154653549194336, "rewards/rejected": -5.490180015563965, "step": 1472 }, { "epoch": 0.23, "learning_rate": 1.306697650293065e-05, "logits/chosen": -2.488192558288574, "logits/rejected": -3.0304617881774902, "logps/chosen": -254.12393188476562, "logps/rejected": -296.87481689453125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 1.2669708728790283, "rewards/margins": 5.907218933105469, "rewards/rejected": -4.6402482986450195, "step": 1473 }, { "epoch": 0.23, "learning_rate": 1.3066243062399502e-05, "logits/chosen": -2.7841150760650635, "logits/rejected": -2.394333839416504, "logps/chosen": -979.2001953125, "logps/rejected": -572.1939697265625, "loss": 0.2505, "rewards/accuracies": 1.0, "rewards/chosen": -0.5973083972930908, "rewards/margins": 4.857083320617676, "rewards/rejected": -5.4543914794921875, "step": 1474 }, { "epoch": 0.23, "learning_rate": 1.3065509621868356e-05, "logits/chosen": -2.924628734588623, "logits/rejected": -2.9680330753326416, "logps/chosen": -588.230224609375, "logps/rejected": -604.243408203125, "loss": 0.0768, "rewards/accuracies": 1.0, "rewards/chosen": 0.162750244140625, "rewards/margins": 2.6101059913635254, "rewards/rejected": -2.4473557472229004, "step": 1475 }, { "epoch": 0.23, "learning_rate": 1.3064776181337208e-05, "logits/chosen": -2.873230218887329, "logits/rejected": -3.2627477645874023, "logps/chosen": -611.0140380859375, "logps/rejected": -567.0711669921875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.33969423174858093, "rewards/margins": 6.40964937210083, "rewards/rejected": -6.7493438720703125, "step": 1476 }, { "epoch": 0.23, "learning_rate": 1.306404274080606e-05, "logits/chosen": -2.961495876312256, "logits/rejected": -2.0907742977142334, "logps/chosen": -340.0317687988281, "logps/rejected": -356.4787292480469, "loss": 2.7425, "rewards/accuracies": 0.5, "rewards/chosen": -3.372425079345703, "rewards/margins": 0.3195223808288574, "rewards/rejected": -3.6919474601745605, "step": 1477 }, { "epoch": 0.23, "learning_rate": 1.3063309300274912e-05, "logits/chosen": -2.8341100215911865, "logits/rejected": -2.044095039367676, "logps/chosen": -312.48040771484375, "logps/rejected": -249.29925537109375, "loss": 3.1195, "rewards/accuracies": 0.5, "rewards/chosen": -1.847104787826538, "rewards/margins": 0.34355926513671875, "rewards/rejected": -2.190664052963257, "step": 1478 }, { "epoch": 0.23, "learning_rate": 1.3062575859743763e-05, "logits/chosen": -2.6670069694519043, "logits/rejected": -2.9757885932922363, "logps/chosen": -57.8069953918457, "logps/rejected": -191.3788299560547, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 0.7545328736305237, "rewards/margins": 3.5320167541503906, "rewards/rejected": -2.7774837017059326, "step": 1479 }, { "epoch": 0.23, "learning_rate": 1.3061842419212615e-05, "logits/chosen": -2.3718528747558594, "logits/rejected": -2.917107582092285, "logps/chosen": -139.4407958984375, "logps/rejected": -222.154052734375, "loss": 0.1529, "rewards/accuracies": 1.0, "rewards/chosen": -0.8221756219863892, "rewards/margins": 3.513627290725708, "rewards/rejected": -4.335803031921387, "step": 1480 }, { "epoch": 0.23, "learning_rate": 1.3061108978681467e-05, "logits/chosen": -2.7226791381835938, "logits/rejected": -2.852797746658325, "logps/chosen": -179.31788635253906, "logps/rejected": -191.4311065673828, "loss": 1.8372, "rewards/accuracies": 0.5, "rewards/chosen": -0.5795612931251526, "rewards/margins": 2.3849143981933594, "rewards/rejected": -2.964475631713867, "step": 1481 }, { "epoch": 0.23, "learning_rate": 1.3060375538150319e-05, "logits/chosen": -2.4554622173309326, "logits/rejected": -3.129549026489258, "logps/chosen": -390.4283447265625, "logps/rejected": -472.760009765625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.20648041367530823, "rewards/margins": 7.732497215270996, "rewards/rejected": -7.938977241516113, "step": 1482 }, { "epoch": 0.23, "learning_rate": 1.3059642097619171e-05, "logits/chosen": -2.114406108856201, "logits/rejected": -2.1894702911376953, "logps/chosen": -388.6011047363281, "logps/rejected": -325.83392333984375, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -1.1102612018585205, "rewards/margins": 4.010128974914551, "rewards/rejected": -5.12039041519165, "step": 1483 }, { "epoch": 0.23, "learning_rate": 1.3058908657088024e-05, "logits/chosen": -1.3072669506072998, "logits/rejected": -2.812258005142212, "logps/chosen": -55.922935485839844, "logps/rejected": -344.0438537597656, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 1.4775869846343994, "rewards/margins": 8.017388343811035, "rewards/rejected": -6.539801597595215, "step": 1484 }, { "epoch": 0.23, "learning_rate": 1.3058175216556876e-05, "logits/chosen": -2.181713104248047, "logits/rejected": -2.3165230751037598, "logps/chosen": -137.02935791015625, "logps/rejected": -214.39761352539062, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 1.0376710891723633, "rewards/margins": 6.475111961364746, "rewards/rejected": -5.437440872192383, "step": 1485 }, { "epoch": 0.23, "learning_rate": 1.3057441776025728e-05, "logits/chosen": -2.520292282104492, "logits/rejected": -3.018087863922119, "logps/chosen": -46.66529846191406, "logps/rejected": -335.8047180175781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.7763410806655884, "rewards/margins": 9.216928482055664, "rewards/rejected": -7.440587997436523, "step": 1486 }, { "epoch": 0.23, "learning_rate": 1.305670833549458e-05, "logits/chosen": -2.998786211013794, "logits/rejected": -3.3106119632720947, "logps/chosen": -77.0107650756836, "logps/rejected": -290.68359375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.7972064018249512, "rewards/margins": 6.08772087097168, "rewards/rejected": -5.2905144691467285, "step": 1487 }, { "epoch": 0.23, "learning_rate": 1.3055974894963432e-05, "logits/chosen": -2.280069351196289, "logits/rejected": -2.7508022785186768, "logps/chosen": -241.78558349609375, "logps/rejected": -262.0957336425781, "loss": 0.0472, "rewards/accuracies": 1.0, "rewards/chosen": 1.1596742868423462, "rewards/margins": 4.171286582946777, "rewards/rejected": -3.0116124153137207, "step": 1488 }, { "epoch": 0.23, "learning_rate": 1.3055241454432284e-05, "logits/chosen": -3.196681261062622, "logits/rejected": -2.4508941173553467, "logps/chosen": -417.4683837890625, "logps/rejected": -196.6712188720703, "loss": 0.2895, "rewards/accuracies": 1.0, "rewards/chosen": 0.16271516680717468, "rewards/margins": 2.9371414184570312, "rewards/rejected": -2.774426221847534, "step": 1489 }, { "epoch": 0.23, "learning_rate": 1.3054508013901136e-05, "logits/chosen": -2.2878341674804688, "logits/rejected": -2.957552194595337, "logps/chosen": -30.161460876464844, "logps/rejected": -165.18301391601562, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 1.1811738014221191, "rewards/margins": 5.766377925872803, "rewards/rejected": -4.585204124450684, "step": 1490 }, { "epoch": 0.23, "learning_rate": 1.3053774573369988e-05, "logits/chosen": -3.1659703254699707, "logits/rejected": -2.9010634422302246, "logps/chosen": -211.88671875, "logps/rejected": -136.19790649414062, "loss": 2.0118, "rewards/accuracies": 0.5, "rewards/chosen": -1.4719395637512207, "rewards/margins": 1.9581265449523926, "rewards/rejected": -3.4300661087036133, "step": 1491 }, { "epoch": 0.23, "learning_rate": 1.305304113283884e-05, "logits/chosen": -2.249730348587036, "logits/rejected": -3.2744054794311523, "logps/chosen": -113.45503997802734, "logps/rejected": -390.152587890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.7477463483810425, "rewards/margins": 7.987435340881348, "rewards/rejected": -7.239688873291016, "step": 1492 }, { "epoch": 0.23, "learning_rate": 1.3052307692307693e-05, "logits/chosen": -1.3061314821243286, "logits/rejected": -2.737260103225708, "logps/chosen": -183.32461547851562, "logps/rejected": -436.06610107421875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.5352540016174316, "rewards/margins": 7.943000793457031, "rewards/rejected": -6.407747268676758, "step": 1493 }, { "epoch": 0.23, "learning_rate": 1.3051574251776545e-05, "logits/chosen": -0.6595070362091064, "logits/rejected": -2.1907527446746826, "logps/chosen": -186.95669555664062, "logps/rejected": -785.9580078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18770675361156464, "rewards/margins": 8.92599105834961, "rewards/rejected": -8.738285064697266, "step": 1494 }, { "epoch": 0.23, "learning_rate": 1.3050840811245397e-05, "logits/chosen": -2.8668324947357178, "logits/rejected": -1.0606125593185425, "logps/chosen": -265.478515625, "logps/rejected": -38.5794563293457, "loss": 4.9935, "rewards/accuracies": 0.0, "rewards/chosen": -4.519685745239258, "rewards/margins": -4.9799346923828125, "rewards/rejected": 0.46024858951568604, "step": 1495 }, { "epoch": 0.23, "learning_rate": 1.3050107370714249e-05, "logits/chosen": -1.674403190612793, "logits/rejected": -2.8630781173706055, "logps/chosen": -166.82373046875, "logps/rejected": -377.5001525878906, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.8213733434677124, "rewards/margins": 5.918920040130615, "rewards/rejected": -5.097546577453613, "step": 1496 }, { "epoch": 0.23, "learning_rate": 1.30493739301831e-05, "logits/chosen": -2.707038164138794, "logits/rejected": -3.075604200363159, "logps/chosen": -140.85653686523438, "logps/rejected": -281.43365478515625, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": 0.07943496853113174, "rewards/margins": 4.957225799560547, "rewards/rejected": -4.877790927886963, "step": 1497 }, { "epoch": 0.23, "learning_rate": 1.3048640489651952e-05, "logits/chosen": -2.8756473064422607, "logits/rejected": -2.711848258972168, "logps/chosen": -72.17581939697266, "logps/rejected": -164.13035583496094, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.07941064983606339, "rewards/margins": 5.36564826965332, "rewards/rejected": -5.445059299468994, "step": 1498 }, { "epoch": 0.23, "learning_rate": 1.3047907049120804e-05, "logits/chosen": -3.0610053539276123, "logits/rejected": -2.501910924911499, "logps/chosen": -305.4958190917969, "logps/rejected": -298.6742248535156, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": 0.019808202981948853, "rewards/margins": 3.9940781593322754, "rewards/rejected": -3.9742698669433594, "step": 1499 }, { "epoch": 0.23, "learning_rate": 1.3047173608589656e-05, "logits/chosen": -2.2189321517944336, "logits/rejected": -2.778032064437866, "logps/chosen": -132.88107299804688, "logps/rejected": -260.2036437988281, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": 0.16079702973365784, "rewards/margins": 6.232344627380371, "rewards/rejected": -6.071547508239746, "step": 1500 }, { "epoch": 0.23, "learning_rate": 1.3046440168058508e-05, "logits/chosen": -1.2142196893692017, "logits/rejected": -2.35701322555542, "logps/chosen": -260.08062744140625, "logps/rejected": -398.8531799316406, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.4329071044921875, "rewards/margins": 5.506758213043213, "rewards/rejected": -5.9396653175354, "step": 1501 }, { "epoch": 0.23, "learning_rate": 1.3045706727527362e-05, "logits/chosen": -1.9943464994430542, "logits/rejected": -2.441575288772583, "logps/chosen": -134.9496612548828, "logps/rejected": -296.7021789550781, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 1.235081434249878, "rewards/margins": 7.355147361755371, "rewards/rejected": -6.120065689086914, "step": 1502 }, { "epoch": 0.23, "learning_rate": 1.3044973286996214e-05, "logits/chosen": -2.508054733276367, "logits/rejected": -3.068067789077759, "logps/chosen": -98.65637969970703, "logps/rejected": -256.7520446777344, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 0.6850780248641968, "rewards/margins": 5.246645927429199, "rewards/rejected": -4.561567783355713, "step": 1503 }, { "epoch": 0.23, "learning_rate": 1.3044239846465065e-05, "logits/chosen": -2.6558077335357666, "logits/rejected": -2.675319194793701, "logps/chosen": -208.5814208984375, "logps/rejected": -105.08149719238281, "loss": 3.9619, "rewards/accuracies": 0.5, "rewards/chosen": -3.637908697128296, "rewards/margins": -1.6761012077331543, "rewards/rejected": -1.9618074893951416, "step": 1504 }, { "epoch": 0.23, "learning_rate": 1.3043506405933917e-05, "logits/chosen": -3.0592381954193115, "logits/rejected": -2.9343981742858887, "logps/chosen": -410.6434020996094, "logps/rejected": -365.09375, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": 0.7627608776092529, "rewards/margins": 5.160830974578857, "rewards/rejected": -4.398069858551025, "step": 1505 }, { "epoch": 0.23, "learning_rate": 1.304277296540277e-05, "logits/chosen": -1.4095182418823242, "logits/rejected": -2.899376153945923, "logps/chosen": -9.978546142578125, "logps/rejected": -238.50814819335938, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 1.2638567686080933, "rewards/margins": 6.117652893066406, "rewards/rejected": -4.853796482086182, "step": 1506 }, { "epoch": 0.23, "learning_rate": 1.3042039524871623e-05, "logits/chosen": -2.4184532165527344, "logits/rejected": -2.9689085483551025, "logps/chosen": -134.1315460205078, "logps/rejected": -307.8269958496094, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.9475113153457642, "rewards/margins": 6.432832717895508, "rewards/rejected": -5.485321044921875, "step": 1507 }, { "epoch": 0.23, "learning_rate": 1.3041306084340475e-05, "logits/chosen": -2.2116692066192627, "logits/rejected": -2.564481258392334, "logps/chosen": -141.15818786621094, "logps/rejected": -372.64434814453125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.056812092661857605, "rewards/margins": 7.005535125732422, "rewards/rejected": -6.948722839355469, "step": 1508 }, { "epoch": 0.23, "learning_rate": 1.3040572643809327e-05, "logits/chosen": -2.249008893966675, "logits/rejected": -3.2389535903930664, "logps/chosen": -80.13396453857422, "logps/rejected": -472.76611328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 1.0357639789581299, "rewards/margins": 7.439339637756348, "rewards/rejected": -6.403575420379639, "step": 1509 }, { "epoch": 0.23, "learning_rate": 1.3039839203278178e-05, "logits/chosen": -1.831228256225586, "logits/rejected": -2.4903039932250977, "logps/chosen": -54.45277404785156, "logps/rejected": -188.92857360839844, "loss": 0.7671, "rewards/accuracies": 0.5, "rewards/chosen": -0.13503456115722656, "rewards/margins": 3.3341989517211914, "rewards/rejected": -3.469233512878418, "step": 1510 }, { "epoch": 0.23, "learning_rate": 1.3039105762747032e-05, "logits/chosen": -2.308067560195923, "logits/rejected": -2.885906219482422, "logps/chosen": -254.7725372314453, "logps/rejected": -427.282958984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.7152404189109802, "rewards/margins": 7.622390270233154, "rewards/rejected": -6.907149314880371, "step": 1511 }, { "epoch": 0.24, "learning_rate": 1.3038372322215884e-05, "logits/chosen": -2.8015010356903076, "logits/rejected": -3.188115119934082, "logps/chosen": -54.41874694824219, "logps/rejected": -157.93133544921875, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": 0.891754150390625, "rewards/margins": 4.240805625915527, "rewards/rejected": -3.3490519523620605, "step": 1512 }, { "epoch": 0.24, "learning_rate": 1.3037638881684736e-05, "logits/chosen": -2.4921658039093018, "logits/rejected": -2.829272985458374, "logps/chosen": -852.037353515625, "logps/rejected": -736.8356323242188, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -1.575531005859375, "rewards/margins": 4.649287223815918, "rewards/rejected": -6.224818229675293, "step": 1513 }, { "epoch": 0.24, "learning_rate": 1.3036905441153588e-05, "logits/chosen": -2.6775155067443848, "logits/rejected": -3.0889806747436523, "logps/chosen": -142.36849975585938, "logps/rejected": -224.32833862304688, "loss": 2.6323, "rewards/accuracies": 0.5, "rewards/chosen": -2.7765274047851562, "rewards/margins": -0.6201276779174805, "rewards/rejected": -2.1563994884490967, "step": 1514 }, { "epoch": 0.24, "learning_rate": 1.303617200062244e-05, "logits/chosen": -1.989439606666565, "logits/rejected": -2.9474968910217285, "logps/chosen": -171.00759887695312, "logps/rejected": -389.87579345703125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.1810699701309204, "rewards/margins": 8.524238586425781, "rewards/rejected": -8.343169212341309, "step": 1515 }, { "epoch": 0.24, "learning_rate": 1.3035438560091291e-05, "logits/chosen": -1.5935027599334717, "logits/rejected": -2.744755983352661, "logps/chosen": -114.44235229492188, "logps/rejected": -408.2662353515625, "loss": 1.8376, "rewards/accuracies": 0.5, "rewards/chosen": -1.0885330438613892, "rewards/margins": 3.2555909156799316, "rewards/rejected": -4.344123840332031, "step": 1516 }, { "epoch": 0.24, "learning_rate": 1.3034705119560143e-05, "logits/chosen": -2.327782392501831, "logits/rejected": -2.9661195278167725, "logps/chosen": -58.903472900390625, "logps/rejected": -230.98757934570312, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 0.7790751457214355, "rewards/margins": 5.414616584777832, "rewards/rejected": -4.635541915893555, "step": 1517 }, { "epoch": 0.24, "learning_rate": 1.3033971679028995e-05, "logits/chosen": -2.7027649879455566, "logits/rejected": -3.02826189994812, "logps/chosen": -479.0306701660156, "logps/rejected": -501.2037048339844, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.5580791234970093, "rewards/margins": 8.110154151916504, "rewards/rejected": -9.668233871459961, "step": 1518 }, { "epoch": 0.24, "learning_rate": 1.3033238238497847e-05, "logits/chosen": -2.105055093765259, "logits/rejected": -2.7123377323150635, "logps/chosen": -77.1356201171875, "logps/rejected": -150.33856201171875, "loss": 2.0078, "rewards/accuracies": 0.5, "rewards/chosen": -1.0471489429473877, "rewards/margins": 1.3703465461730957, "rewards/rejected": -2.4174954891204834, "step": 1519 }, { "epoch": 0.24, "learning_rate": 1.30325047979667e-05, "logits/chosen": -2.4535715579986572, "logits/rejected": -2.9195804595947266, "logps/chosen": -51.31909942626953, "logps/rejected": -329.1641845703125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.7395723462104797, "rewards/margins": 7.9303107261657715, "rewards/rejected": -7.190738677978516, "step": 1520 }, { "epoch": 0.24, "learning_rate": 1.3031771357435552e-05, "logits/chosen": -2.73241925239563, "logits/rejected": -2.730668067932129, "logps/chosen": -371.2907409667969, "logps/rejected": -412.95330810546875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.6149948239326477, "rewards/margins": 7.470271110534668, "rewards/rejected": -6.855276584625244, "step": 1521 }, { "epoch": 0.24, "learning_rate": 1.3031037916904404e-05, "logits/chosen": -1.6055164337158203, "logits/rejected": -2.7309396266937256, "logps/chosen": -51.695594787597656, "logps/rejected": -253.89456176757812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.04826602339744568, "rewards/margins": 7.748373985290527, "rewards/rejected": -7.700107574462891, "step": 1522 }, { "epoch": 0.24, "learning_rate": 1.3030304476373256e-05, "logits/chosen": -2.545499801635742, "logits/rejected": -1.776705026626587, "logps/chosen": -330.1835632324219, "logps/rejected": -375.89788818359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.12494277954101562, "rewards/margins": 8.188407897949219, "rewards/rejected": -8.063464164733887, "step": 1523 }, { "epoch": 0.24, "learning_rate": 1.3029571035842108e-05, "logits/chosen": -1.7242472171783447, "logits/rejected": -2.810544729232788, "logps/chosen": -223.05787658691406, "logps/rejected": -370.6136169433594, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.925478458404541, "rewards/margins": 6.1564435958862305, "rewards/rejected": -8.08192253112793, "step": 1524 }, { "epoch": 0.24, "learning_rate": 1.302883759531096e-05, "logits/chosen": -0.934177041053772, "logits/rejected": -1.0134637355804443, "logps/chosen": -349.5595703125, "logps/rejected": -155.87477111816406, "loss": 5.1956, "rewards/accuracies": 0.5, "rewards/chosen": -5.002354621887207, "rewards/margins": -3.3908095359802246, "rewards/rejected": -1.6115448474884033, "step": 1525 }, { "epoch": 0.24, "learning_rate": 1.3028104154779812e-05, "logits/chosen": -2.6131410598754883, "logits/rejected": -3.210801362991333, "logps/chosen": -40.792213439941406, "logps/rejected": -240.90211486816406, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 0.9494194984436035, "rewards/margins": 4.768120765686035, "rewards/rejected": -3.8187007904052734, "step": 1526 }, { "epoch": 0.24, "learning_rate": 1.3027370714248664e-05, "logits/chosen": -3.2463173866271973, "logits/rejected": -3.2402915954589844, "logps/chosen": -298.15728759765625, "logps/rejected": -297.85833740234375, "loss": 0.0701, "rewards/accuracies": 1.0, "rewards/chosen": -0.755391001701355, "rewards/margins": 3.8061599731445312, "rewards/rejected": -4.561551094055176, "step": 1527 }, { "epoch": 0.24, "learning_rate": 1.3026637273717516e-05, "logits/chosen": -3.183358907699585, "logits/rejected": -3.2881383895874023, "logps/chosen": -241.47230529785156, "logps/rejected": -297.233154296875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.7589011192321777, "rewards/margins": 6.953274726867676, "rewards/rejected": -6.194374084472656, "step": 1528 }, { "epoch": 0.24, "learning_rate": 1.302590383318637e-05, "logits/chosen": -2.596714496612549, "logits/rejected": -2.9549672603607178, "logps/chosen": -60.24940490722656, "logps/rejected": -216.12326049804688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.667510449886322, "rewards/margins": 7.036841869354248, "rewards/rejected": -6.369331359863281, "step": 1529 }, { "epoch": 0.24, "learning_rate": 1.3025170392655221e-05, "logits/chosen": -2.024216651916504, "logits/rejected": -2.9530115127563477, "logps/chosen": -395.68206787109375, "logps/rejected": -531.9263305664062, "loss": 2.3037, "rewards/accuracies": 0.5, "rewards/chosen": -4.2686872482299805, "rewards/margins": 2.142716884613037, "rewards/rejected": -6.411404132843018, "step": 1530 }, { "epoch": 0.24, "learning_rate": 1.3024436952124073e-05, "logits/chosen": -2.416682720184326, "logits/rejected": -2.8500258922576904, "logps/chosen": -117.70310974121094, "logps/rejected": -251.30995178222656, "loss": 2.0097, "rewards/accuracies": 0.5, "rewards/chosen": -1.565131425857544, "rewards/margins": 2.652677059173584, "rewards/rejected": -4.217808723449707, "step": 1531 }, { "epoch": 0.24, "learning_rate": 1.3023703511592925e-05, "logits/chosen": -2.9035937786102295, "logits/rejected": -3.0752861499786377, "logps/chosen": -305.6565246582031, "logps/rejected": -375.0286865234375, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -0.546137273311615, "rewards/margins": 4.011310577392578, "rewards/rejected": -4.557447910308838, "step": 1532 }, { "epoch": 0.24, "learning_rate": 1.3022970071061777e-05, "logits/chosen": -2.8231770992279053, "logits/rejected": -2.9457802772521973, "logps/chosen": -155.92263793945312, "logps/rejected": -275.23944091796875, "loss": 0.0403, "rewards/accuracies": 1.0, "rewards/chosen": -0.004716694355010986, "rewards/margins": 4.173017501831055, "rewards/rejected": -4.177734375, "step": 1533 }, { "epoch": 0.24, "learning_rate": 1.3022236630530629e-05, "logits/chosen": -0.44923272728919983, "logits/rejected": -2.702036142349243, "logps/chosen": -61.9979248046875, "logps/rejected": -725.52685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.9862270355224609, "rewards/margins": 11.153715133666992, "rewards/rejected": -10.167488098144531, "step": 1534 }, { "epoch": 0.24, "learning_rate": 1.302150318999948e-05, "logits/chosen": -2.164949417114258, "logits/rejected": -2.79318904876709, "logps/chosen": -324.76202392578125, "logps/rejected": -350.1361389160156, "loss": 0.8056, "rewards/accuracies": 0.5, "rewards/chosen": -0.27242887020111084, "rewards/margins": 3.772036552429199, "rewards/rejected": -4.0444655418396, "step": 1535 }, { "epoch": 0.24, "learning_rate": 1.3020769749468332e-05, "logits/chosen": -2.237637996673584, "logits/rejected": -2.9178202152252197, "logps/chosen": -348.0279846191406, "logps/rejected": -280.1299743652344, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": 0.2329285591840744, "rewards/margins": 3.0944461822509766, "rewards/rejected": -2.8615174293518066, "step": 1536 }, { "epoch": 0.24, "learning_rate": 1.3020036308937184e-05, "logits/chosen": -1.8189668655395508, "logits/rejected": -2.774951934814453, "logps/chosen": -441.1337585449219, "logps/rejected": -515.1275634765625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.20267599821090698, "rewards/margins": 8.322936058044434, "rewards/rejected": -8.525611877441406, "step": 1537 }, { "epoch": 0.24, "learning_rate": 1.3019302868406038e-05, "logits/chosen": -3.117169141769409, "logits/rejected": -2.962658166885376, "logps/chosen": -200.89366149902344, "logps/rejected": -100.58997344970703, "loss": 2.6224, "rewards/accuracies": 0.5, "rewards/chosen": -2.5621721744537354, "rewards/margins": -0.616890549659729, "rewards/rejected": -1.9452816247940063, "step": 1538 }, { "epoch": 0.24, "learning_rate": 1.301856942787489e-05, "logits/chosen": -1.3071246147155762, "logits/rejected": -2.6297318935394287, "logps/chosen": -115.39823913574219, "logps/rejected": -445.947509765625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.4479621648788452, "rewards/margins": 6.2044572830200195, "rewards/rejected": -5.756494998931885, "step": 1539 }, { "epoch": 0.24, "learning_rate": 1.3017835987343742e-05, "logits/chosen": -1.833135724067688, "logits/rejected": -2.634995222091675, "logps/chosen": -113.60220336914062, "logps/rejected": -167.73912048339844, "loss": 1.4367, "rewards/accuracies": 0.5, "rewards/chosen": -0.3169746398925781, "rewards/margins": 2.5416977405548096, "rewards/rejected": -2.8586723804473877, "step": 1540 }, { "epoch": 0.24, "learning_rate": 1.3017102546812593e-05, "logits/chosen": -1.6364325284957886, "logits/rejected": -2.743950128555298, "logps/chosen": -133.1378173828125, "logps/rejected": -432.085205078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.8962082266807556, "rewards/margins": 9.023615837097168, "rewards/rejected": -8.127408027648926, "step": 1541 }, { "epoch": 0.24, "learning_rate": 1.3016369106281447e-05, "logits/chosen": -2.2651114463806152, "logits/rejected": -2.981981039047241, "logps/chosen": -144.263671875, "logps/rejected": -425.8219299316406, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7589935660362244, "rewards/margins": 7.065556526184082, "rewards/rejected": -7.824549674987793, "step": 1542 }, { "epoch": 0.24, "learning_rate": 1.3015635665750299e-05, "logits/chosen": -2.315509557723999, "logits/rejected": -2.7480783462524414, "logps/chosen": -11.818000793457031, "logps/rejected": -129.0349578857422, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 0.9286371469497681, "rewards/margins": 4.441507339477539, "rewards/rejected": -3.5128700733184814, "step": 1543 }, { "epoch": 0.24, "learning_rate": 1.301490222521915e-05, "logits/chosen": -2.6145782470703125, "logits/rejected": -1.7980122566223145, "logps/chosen": -612.9447021484375, "logps/rejected": -255.04229736328125, "loss": 2.9187, "rewards/accuracies": 0.5, "rewards/chosen": -2.554694414138794, "rewards/margins": -1.1789133548736572, "rewards/rejected": -1.3757809400558472, "step": 1544 }, { "epoch": 0.24, "learning_rate": 1.3014168784688003e-05, "logits/chosen": -2.726078748703003, "logits/rejected": -1.0725529193878174, "logps/chosen": -425.2646179199219, "logps/rejected": -112.8192138671875, "loss": 0.1539, "rewards/accuracies": 1.0, "rewards/chosen": -0.650061845779419, "rewards/margins": 1.93047034740448, "rewards/rejected": -2.5805320739746094, "step": 1545 }, { "epoch": 0.24, "learning_rate": 1.3013435344156854e-05, "logits/chosen": -2.99607253074646, "logits/rejected": -2.590298652648926, "logps/chosen": -163.06063842773438, "logps/rejected": -196.56649780273438, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.0991816520690918, "rewards/margins": 6.39351749420166, "rewards/rejected": -7.49269962310791, "step": 1546 }, { "epoch": 0.24, "learning_rate": 1.3012701903625708e-05, "logits/chosen": -2.930976629257202, "logits/rejected": -2.7992804050445557, "logps/chosen": -134.8444366455078, "logps/rejected": -226.68333435058594, "loss": 2.3391, "rewards/accuracies": 0.5, "rewards/chosen": -2.501532793045044, "rewards/margins": -0.653694748878479, "rewards/rejected": -1.8478381633758545, "step": 1547 }, { "epoch": 0.24, "learning_rate": 1.301196846309456e-05, "logits/chosen": -1.4847251176834106, "logits/rejected": -2.577735662460327, "logps/chosen": -134.02978515625, "logps/rejected": -396.13665771484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.5746536254882812, "rewards/margins": 8.579142570495605, "rewards/rejected": -8.004488945007324, "step": 1548 }, { "epoch": 0.24, "learning_rate": 1.3011235022563412e-05, "logits/chosen": -2.6881020069122314, "logits/rejected": -1.6864919662475586, "logps/chosen": -201.2778778076172, "logps/rejected": -230.12368774414062, "loss": 2.9538, "rewards/accuracies": 0.5, "rewards/chosen": -2.7790818214416504, "rewards/margins": 2.254711151123047, "rewards/rejected": -5.033792495727539, "step": 1549 }, { "epoch": 0.24, "learning_rate": 1.3010501582032264e-05, "logits/chosen": -2.7856199741363525, "logits/rejected": -3.082414150238037, "logps/chosen": -362.7206115722656, "logps/rejected": -534.7149047851562, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.3267693519592285, "rewards/margins": 5.348398208618164, "rewards/rejected": -6.675168037414551, "step": 1550 }, { "epoch": 0.24, "learning_rate": 1.3009768141501116e-05, "logits/chosen": -2.653827428817749, "logits/rejected": -2.3495593070983887, "logps/chosen": -400.15118408203125, "logps/rejected": -337.62786865234375, "loss": 0.1486, "rewards/accuracies": 1.0, "rewards/chosen": -2.29826283454895, "rewards/margins": 2.742494821548462, "rewards/rejected": -5.040757656097412, "step": 1551 }, { "epoch": 0.24, "learning_rate": 1.3009034700969967e-05, "logits/chosen": -3.0263733863830566, "logits/rejected": -2.769610643386841, "logps/chosen": -324.9856262207031, "logps/rejected": -135.98008728027344, "loss": 0.052, "rewards/accuracies": 1.0, "rewards/chosen": -1.2287026643753052, "rewards/margins": 3.1357874870300293, "rewards/rejected": -4.364490509033203, "step": 1552 }, { "epoch": 0.24, "learning_rate": 1.300830126043882e-05, "logits/chosen": -0.9870803952217102, "logits/rejected": -1.7521697282791138, "logps/chosen": -162.75827026367188, "logps/rejected": -297.5181884765625, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": 0.3919311463832855, "rewards/margins": 4.638367652893066, "rewards/rejected": -4.246436595916748, "step": 1553 }, { "epoch": 0.24, "learning_rate": 1.3007567819907671e-05, "logits/chosen": -2.6836788654327393, "logits/rejected": -1.5862376689910889, "logps/chosen": -119.55162048339844, "logps/rejected": -105.74246215820312, "loss": 0.6772, "rewards/accuracies": 0.5, "rewards/chosen": -0.22674435377120972, "rewards/margins": 1.4366188049316406, "rewards/rejected": -1.6633632183074951, "step": 1554 }, { "epoch": 0.24, "learning_rate": 1.3006834379376525e-05, "logits/chosen": -2.158780813217163, "logits/rejected": -3.127910852432251, "logps/chosen": -203.87164306640625, "logps/rejected": -363.556884765625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.3255203366279602, "rewards/margins": 6.70214319229126, "rewards/rejected": -6.376623153686523, "step": 1555 }, { "epoch": 0.24, "learning_rate": 1.3006100938845377e-05, "logits/chosen": -2.2208290100097656, "logits/rejected": -3.029620885848999, "logps/chosen": -151.0910186767578, "logps/rejected": -322.881103515625, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 1.4076011180877686, "rewards/margins": 4.9004669189453125, "rewards/rejected": -3.492866039276123, "step": 1556 }, { "epoch": 0.24, "learning_rate": 1.3005367498314229e-05, "logits/chosen": -2.8272528648376465, "logits/rejected": -2.00319242477417, "logps/chosen": -332.77593994140625, "logps/rejected": -141.73963928222656, "loss": 3.9321, "rewards/accuracies": 0.5, "rewards/chosen": -4.759974956512451, "rewards/margins": -1.605905294418335, "rewards/rejected": -3.154069423675537, "step": 1557 }, { "epoch": 0.24, "learning_rate": 1.300463405778308e-05, "logits/chosen": -2.6047630310058594, "logits/rejected": -3.0092010498046875, "logps/chosen": -154.5594024658203, "logps/rejected": -248.36013793945312, "loss": 3.3653, "rewards/accuracies": 0.5, "rewards/chosen": -2.87841796875, "rewards/margins": -1.2054176330566406, "rewards/rejected": -1.6730003356933594, "step": 1558 }, { "epoch": 0.24, "learning_rate": 1.3003900617251932e-05, "logits/chosen": -2.1017720699310303, "logits/rejected": -3.1268162727355957, "logps/chosen": -107.33985137939453, "logps/rejected": -361.483642578125, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.34596407413482666, "rewards/margins": 5.308651924133301, "rewards/rejected": -5.654615879058838, "step": 1559 }, { "epoch": 0.24, "learning_rate": 1.3003167176720784e-05, "logits/chosen": -2.7048730850219727, "logits/rejected": -1.4257196187973022, "logps/chosen": -223.7129364013672, "logps/rejected": -158.01156616210938, "loss": 3.5737, "rewards/accuracies": 0.5, "rewards/chosen": -3.2319440841674805, "rewards/margins": -1.5570118427276611, "rewards/rejected": -1.6749321222305298, "step": 1560 }, { "epoch": 0.24, "learning_rate": 1.3002433736189636e-05, "logits/chosen": -1.602673053741455, "logits/rejected": -2.438481092453003, "logps/chosen": -207.24171447753906, "logps/rejected": -287.82177734375, "loss": 0.0774, "rewards/accuracies": 1.0, "rewards/chosen": 0.5014331936836243, "rewards/margins": 3.9531161785125732, "rewards/rejected": -3.4516830444335938, "step": 1561 }, { "epoch": 0.24, "learning_rate": 1.3001700295658488e-05, "logits/chosen": -1.6318014860153198, "logits/rejected": -2.9083824157714844, "logps/chosen": -14.765515327453613, "logps/rejected": -212.82424926757812, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 0.48496097326278687, "rewards/margins": 5.087129592895508, "rewards/rejected": -4.602168083190918, "step": 1562 }, { "epoch": 0.24, "learning_rate": 1.300096685512734e-05, "logits/chosen": -1.524169683456421, "logits/rejected": -2.433992862701416, "logps/chosen": -127.12618255615234, "logps/rejected": -475.3853759765625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.2808818817138672, "rewards/margins": 7.2600202560424805, "rewards/rejected": -6.979138374328613, "step": 1563 }, { "epoch": 0.24, "learning_rate": 1.3000233414596193e-05, "logits/chosen": -2.2034990787506104, "logits/rejected": -2.9241909980773926, "logps/chosen": -618.714599609375, "logps/rejected": -712.6704711914062, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.38121718168258667, "rewards/margins": 7.401154518127441, "rewards/rejected": -7.782371520996094, "step": 1564 }, { "epoch": 0.24, "learning_rate": 1.2999499974065045e-05, "logits/chosen": -2.960867166519165, "logits/rejected": -2.797245502471924, "logps/chosen": -172.9696502685547, "logps/rejected": -109.30421447753906, "loss": 1.6079, "rewards/accuracies": 0.5, "rewards/chosen": -1.8603904247283936, "rewards/margins": -0.7145759463310242, "rewards/rejected": -1.1458145380020142, "step": 1565 }, { "epoch": 0.24, "learning_rate": 1.2998766533533897e-05, "logits/chosen": -2.8341803550720215, "logits/rejected": -2.461686372756958, "logps/chosen": -104.70246887207031, "logps/rejected": -151.90756225585938, "loss": 0.7181, "rewards/accuracies": 0.5, "rewards/chosen": 0.3507870137691498, "rewards/margins": 3.087527275085449, "rewards/rejected": -2.7367401123046875, "step": 1566 }, { "epoch": 0.24, "learning_rate": 1.2998033093002749e-05, "logits/chosen": -1.8701894283294678, "logits/rejected": -2.9192757606506348, "logps/chosen": -292.7211608886719, "logps/rejected": -469.39605712890625, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.5144874453544617, "rewards/margins": 4.916049957275391, "rewards/rejected": -5.430537700653076, "step": 1567 }, { "epoch": 0.24, "learning_rate": 1.2997299652471601e-05, "logits/chosen": -2.321153163909912, "logits/rejected": -2.8747591972351074, "logps/chosen": -47.63636779785156, "logps/rejected": -238.45269775390625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 0.6786908507347107, "rewards/margins": 5.328125953674316, "rewards/rejected": -4.649435520172119, "step": 1568 }, { "epoch": 0.24, "learning_rate": 1.2996566211940453e-05, "logits/chosen": -2.92031192779541, "logits/rejected": -2.826962947845459, "logps/chosen": -266.2073669433594, "logps/rejected": -83.02505493164062, "loss": 4.1868, "rewards/accuracies": 0.5, "rewards/chosen": -4.868917942047119, "rewards/margins": -2.4215896129608154, "rewards/rejected": -2.4473280906677246, "step": 1569 }, { "epoch": 0.24, "learning_rate": 1.2995832771409305e-05, "logits/chosen": -1.5257164239883423, "logits/rejected": -2.7143378257751465, "logps/chosen": -64.66566467285156, "logps/rejected": -321.96539306640625, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": 0.7805432081222534, "rewards/margins": 8.318818092346191, "rewards/rejected": -7.538274765014648, "step": 1570 }, { "epoch": 0.24, "learning_rate": 1.2995099330878157e-05, "logits/chosen": -0.9915733933448792, "logits/rejected": -2.267651319503784, "logps/chosen": -158.83314514160156, "logps/rejected": -488.5534973144531, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 0.5499585866928101, "rewards/margins": 5.839405536651611, "rewards/rejected": -5.289446830749512, "step": 1571 }, { "epoch": 0.24, "learning_rate": 1.2994365890347008e-05, "logits/chosen": -2.023827075958252, "logits/rejected": -2.733163356781006, "logps/chosen": -20.413700103759766, "logps/rejected": -282.10662841796875, "loss": 0.9016, "rewards/accuracies": 0.5, "rewards/chosen": 0.9412658214569092, "rewards/margins": 2.2414956092834473, "rewards/rejected": -1.3002296686172485, "step": 1572 }, { "epoch": 0.24, "learning_rate": 1.2993632449815862e-05, "logits/chosen": -2.8105270862579346, "logits/rejected": -3.0095036029815674, "logps/chosen": -125.44220733642578, "logps/rejected": -210.7432861328125, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": 1.291182518005371, "rewards/margins": 4.3310136795043945, "rewards/rejected": -3.0398311614990234, "step": 1573 }, { "epoch": 0.24, "learning_rate": 1.2992899009284714e-05, "logits/chosen": -2.674837112426758, "logits/rejected": -2.0125224590301514, "logps/chosen": -666.4641723632812, "logps/rejected": -568.896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.22220009565353394, "rewards/margins": 10.276716232299805, "rewards/rejected": -10.054516792297363, "step": 1574 }, { "epoch": 0.24, "learning_rate": 1.2992165568753566e-05, "logits/chosen": -1.297019362449646, "logits/rejected": -2.6513607501983643, "logps/chosen": -172.33352661132812, "logps/rejected": -386.8281555175781, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.3891395330429077, "rewards/margins": 5.975231170654297, "rewards/rejected": -6.364370822906494, "step": 1575 }, { "epoch": 0.25, "learning_rate": 1.299143212822242e-05, "logits/chosen": -3.249901056289673, "logits/rejected": -2.279137372970581, "logps/chosen": -427.1036071777344, "logps/rejected": -70.52359771728516, "loss": 6.1095, "rewards/accuracies": 0.0, "rewards/chosen": -5.267919540405273, "rewards/margins": -6.107280254364014, "rewards/rejected": 0.8393609523773193, "step": 1576 }, { "epoch": 0.25, "learning_rate": 1.2990698687691271e-05, "logits/chosen": -2.152604341506958, "logits/rejected": -2.817291259765625, "logps/chosen": -177.3590850830078, "logps/rejected": -273.58477783203125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 1.8720759153366089, "rewards/margins": 6.513169288635254, "rewards/rejected": -4.6410932540893555, "step": 1577 }, { "epoch": 0.25, "learning_rate": 1.2989965247160123e-05, "logits/chosen": -2.959862470626831, "logits/rejected": -3.3192708492279053, "logps/chosen": -278.4689025878906, "logps/rejected": -428.58758544921875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 0.7931241989135742, "rewards/margins": 7.526403903961182, "rewards/rejected": -6.733279705047607, "step": 1578 }, { "epoch": 0.25, "learning_rate": 1.2989231806628975e-05, "logits/chosen": -3.024181842803955, "logits/rejected": -2.8372342586517334, "logps/chosen": -635.46875, "logps/rejected": -565.1268310546875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.9406830072402954, "rewards/margins": 5.600183010101318, "rewards/rejected": -6.540865898132324, "step": 1579 }, { "epoch": 0.25, "learning_rate": 1.2988498366097827e-05, "logits/chosen": -2.8261642456054688, "logits/rejected": -3.1066126823425293, "logps/chosen": -26.046789169311523, "logps/rejected": -175.5442352294922, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": 0.5895676016807556, "rewards/margins": 4.024009704589844, "rewards/rejected": -3.4344420433044434, "step": 1580 }, { "epoch": 0.25, "learning_rate": 1.2987764925566679e-05, "logits/chosen": -2.0190751552581787, "logits/rejected": -2.863778591156006, "logps/chosen": -254.10443115234375, "logps/rejected": -332.175048828125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 0.06528167426586151, "rewards/margins": 4.985520362854004, "rewards/rejected": -4.920238494873047, "step": 1581 }, { "epoch": 0.25, "learning_rate": 1.2987031485035532e-05, "logits/chosen": -2.5675528049468994, "logits/rejected": -2.7433316707611084, "logps/chosen": -757.4981689453125, "logps/rejected": -588.041015625, "loss": 2.9869, "rewards/accuracies": 0.5, "rewards/chosen": -4.266902446746826, "rewards/margins": 1.2493500709533691, "rewards/rejected": -5.516252517700195, "step": 1582 }, { "epoch": 0.25, "learning_rate": 1.2986298044504384e-05, "logits/chosen": -3.337099552154541, "logits/rejected": -2.7356557846069336, "logps/chosen": -672.69921875, "logps/rejected": -400.8160705566406, "loss": 1.0615, "rewards/accuracies": 0.5, "rewards/chosen": -2.4401657581329346, "rewards/margins": 2.0323326587677, "rewards/rejected": -4.472498416900635, "step": 1583 }, { "epoch": 0.25, "learning_rate": 1.2985564603973236e-05, "logits/chosen": -2.1790380477905273, "logits/rejected": -3.2204489707946777, "logps/chosen": -89.50909423828125, "logps/rejected": -290.5213623046875, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -0.008528143167495728, "rewards/margins": 3.560408115386963, "rewards/rejected": -3.568936347961426, "step": 1584 }, { "epoch": 0.25, "learning_rate": 1.2984831163442088e-05, "logits/chosen": -2.495047092437744, "logits/rejected": -2.0309181213378906, "logps/chosen": -222.31613159179688, "logps/rejected": -323.4781494140625, "loss": 2.3951, "rewards/accuracies": 0.5, "rewards/chosen": -1.7525925636291504, "rewards/margins": 0.461029052734375, "rewards/rejected": -2.2136216163635254, "step": 1585 }, { "epoch": 0.25, "learning_rate": 1.298409772291094e-05, "logits/chosen": -2.9704325199127197, "logits/rejected": -3.0123841762542725, "logps/chosen": -107.53899383544922, "logps/rejected": -157.51377868652344, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/chosen": 0.24786525964736938, "rewards/margins": 3.7190492153167725, "rewards/rejected": -3.4711837768554688, "step": 1586 }, { "epoch": 0.25, "learning_rate": 1.2983364282379792e-05, "logits/chosen": -2.555832624435425, "logits/rejected": -2.572521924972534, "logps/chosen": -109.0248794555664, "logps/rejected": -225.8429718017578, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": 0.056653380393981934, "rewards/margins": 3.884047508239746, "rewards/rejected": -3.8273942470550537, "step": 1587 }, { "epoch": 0.25, "learning_rate": 1.2982630841848644e-05, "logits/chosen": -2.7870192527770996, "logits/rejected": -3.1765730381011963, "logps/chosen": -266.7107238769531, "logps/rejected": -370.6065979003906, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.11138686537742615, "rewards/margins": 7.693090438842773, "rewards/rejected": -7.804477691650391, "step": 1588 }, { "epoch": 0.25, "learning_rate": 1.2981897401317495e-05, "logits/chosen": -1.8275434970855713, "logits/rejected": -2.8014206886291504, "logps/chosen": -89.5889892578125, "logps/rejected": -236.4391632080078, "loss": 0.1586, "rewards/accuracies": 1.0, "rewards/chosen": 0.984511137008667, "rewards/margins": 2.5656182765960693, "rewards/rejected": -1.5811071395874023, "step": 1589 }, { "epoch": 0.25, "learning_rate": 1.2981163960786347e-05, "logits/chosen": -2.601341485977173, "logits/rejected": -2.9852495193481445, "logps/chosen": -54.42584228515625, "logps/rejected": -215.6188507080078, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 0.6866364479064941, "rewards/margins": 4.420768737792969, "rewards/rejected": -3.7341322898864746, "step": 1590 }, { "epoch": 0.25, "learning_rate": 1.2980430520255201e-05, "logits/chosen": -2.6871144771575928, "logits/rejected": -2.2633578777313232, "logps/chosen": -480.0692443847656, "logps/rejected": -501.69012451171875, "loss": 3.2011, "rewards/accuracies": 0.5, "rewards/chosen": -4.6801557540893555, "rewards/margins": -0.8809192180633545, "rewards/rejected": -3.799236297607422, "step": 1591 }, { "epoch": 0.25, "learning_rate": 1.2979697079724053e-05, "logits/chosen": -2.593440532684326, "logits/rejected": -3.1391196250915527, "logps/chosen": -25.665096282958984, "logps/rejected": -179.09075927734375, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": 0.4581946134567261, "rewards/margins": 4.0935139656066895, "rewards/rejected": -3.635319232940674, "step": 1592 }, { "epoch": 0.25, "learning_rate": 1.2978963639192905e-05, "logits/chosen": -3.0995264053344727, "logits/rejected": -2.9135494232177734, "logps/chosen": -261.2821960449219, "logps/rejected": -272.5551452636719, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.6867287158966064, "rewards/margins": 5.240118026733398, "rewards/rejected": -5.926846504211426, "step": 1593 }, { "epoch": 0.25, "learning_rate": 1.2978230198661757e-05, "logits/chosen": -2.7513210773468018, "logits/rejected": -2.4636552333831787, "logps/chosen": -630.6693115234375, "logps/rejected": -565.5469360351562, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.5297329425811768, "rewards/margins": 7.601984977722168, "rewards/rejected": -7.07225227355957, "step": 1594 }, { "epoch": 0.25, "learning_rate": 1.2977496758130608e-05, "logits/chosen": -2.1634883880615234, "logits/rejected": -2.871115207672119, "logps/chosen": -82.53340911865234, "logps/rejected": -219.5330047607422, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.5183641314506531, "rewards/margins": 4.239331245422363, "rewards/rejected": -3.7209672927856445, "step": 1595 }, { "epoch": 0.25, "learning_rate": 1.297676331759946e-05, "logits/chosen": -2.5897603034973145, "logits/rejected": -2.959296941757202, "logps/chosen": -111.196533203125, "logps/rejected": -239.79183959960938, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": 0.448709100484848, "rewards/margins": 4.794585227966309, "rewards/rejected": -4.345876216888428, "step": 1596 }, { "epoch": 0.25, "learning_rate": 1.2976029877068312e-05, "logits/chosen": -2.9135794639587402, "logits/rejected": -2.8565239906311035, "logps/chosen": -392.34613037109375, "logps/rejected": -315.73974609375, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": 0.9180053472518921, "rewards/margins": 5.731603622436523, "rewards/rejected": -4.8135986328125, "step": 1597 }, { "epoch": 0.25, "learning_rate": 1.2975296436537164e-05, "logits/chosen": -1.6557183265686035, "logits/rejected": -2.79181170463562, "logps/chosen": -436.3518371582031, "logps/rejected": -379.1228332519531, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.9524322748184204, "rewards/margins": 7.247840404510498, "rewards/rejected": -6.295408248901367, "step": 1598 }, { "epoch": 0.25, "learning_rate": 1.2974562996006016e-05, "logits/chosen": -2.8380043506622314, "logits/rejected": -2.6398847103118896, "logps/chosen": -118.43922424316406, "logps/rejected": -147.33352661132812, "loss": 2.5389, "rewards/accuracies": 0.5, "rewards/chosen": -1.814090371131897, "rewards/margins": -1.1019536256790161, "rewards/rejected": -0.7121368050575256, "step": 1599 }, { "epoch": 0.25, "learning_rate": 1.297382955547487e-05, "logits/chosen": -2.4924168586730957, "logits/rejected": -2.5823469161987305, "logps/chosen": -158.95480346679688, "logps/rejected": -250.4244842529297, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.4784111976623535, "rewards/margins": 5.926836967468262, "rewards/rejected": -5.448425769805908, "step": 1600 }, { "epoch": 0.25, "learning_rate": 1.2973096114943721e-05, "logits/chosen": -2.925826072692871, "logits/rejected": -2.9897561073303223, "logps/chosen": -86.81315612792969, "logps/rejected": -62.35218811035156, "loss": 2.9349, "rewards/accuracies": 0.5, "rewards/chosen": -1.7980766296386719, "rewards/margins": 0.009672164916992188, "rewards/rejected": -1.807748794555664, "step": 1601 }, { "epoch": 0.25, "learning_rate": 1.2972362674412573e-05, "logits/chosen": -2.041766881942749, "logits/rejected": -2.8643863201141357, "logps/chosen": -142.2993927001953, "logps/rejected": -246.7030029296875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 0.25948992371559143, "rewards/margins": 5.436156272888184, "rewards/rejected": -5.176666259765625, "step": 1602 }, { "epoch": 0.25, "learning_rate": 1.2971629233881425e-05, "logits/chosen": -1.51535165309906, "logits/rejected": -2.836775064468384, "logps/chosen": -78.05889892578125, "logps/rejected": -292.5198974609375, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": 0.23213157057762146, "rewards/margins": 4.5724101066589355, "rewards/rejected": -4.340278625488281, "step": 1603 }, { "epoch": 0.25, "learning_rate": 1.2970895793350277e-05, "logits/chosen": -2.7105581760406494, "logits/rejected": -3.1624481678009033, "logps/chosen": -101.20516967773438, "logps/rejected": -318.6199951171875, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.06757837533950806, "rewards/margins": 5.245909690856934, "rewards/rejected": -5.313488006591797, "step": 1604 }, { "epoch": 0.25, "learning_rate": 1.2970162352819129e-05, "logits/chosen": -2.528660297393799, "logits/rejected": -3.273252248764038, "logps/chosen": -60.64853286743164, "logps/rejected": -194.28173828125, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": 0.03818865120410919, "rewards/margins": 4.917180061340332, "rewards/rejected": -4.87899112701416, "step": 1605 }, { "epoch": 0.25, "learning_rate": 1.296942891228798e-05, "logits/chosen": -1.2397373914718628, "logits/rejected": -3.0353026390075684, "logps/chosen": -217.99534606933594, "logps/rejected": -612.89404296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.33236241340637207, "rewards/margins": 9.52787971496582, "rewards/rejected": -9.195517539978027, "step": 1606 }, { "epoch": 0.25, "learning_rate": 1.2968695471756833e-05, "logits/chosen": -2.885124683380127, "logits/rejected": -2.331636667251587, "logps/chosen": -583.408935546875, "logps/rejected": -340.8980407714844, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 0.19339293241500854, "rewards/margins": 6.4683027267456055, "rewards/rejected": -6.274909973144531, "step": 1607 }, { "epoch": 0.25, "learning_rate": 1.2967962031225686e-05, "logits/chosen": -3.3172662258148193, "logits/rejected": -3.2712771892547607, "logps/chosen": -95.2169189453125, "logps/rejected": -92.04517364501953, "loss": 2.2024, "rewards/accuracies": 0.5, "rewards/chosen": -1.6946899890899658, "rewards/margins": -0.1511695384979248, "rewards/rejected": -1.5435206890106201, "step": 1608 }, { "epoch": 0.25, "learning_rate": 1.2967228590694538e-05, "logits/chosen": -1.0427954196929932, "logits/rejected": -2.684776544570923, "logps/chosen": -93.77470397949219, "logps/rejected": -423.3585205078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.46599310636520386, "rewards/margins": 7.141745567321777, "rewards/rejected": -6.675752639770508, "step": 1609 }, { "epoch": 0.25, "learning_rate": 1.2966495150163392e-05, "logits/chosen": -3.0279860496520996, "logits/rejected": -2.9531214237213135, "logps/chosen": -379.1832580566406, "logps/rejected": -390.99041748046875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.28597742319107056, "rewards/margins": 6.5696001052856445, "rewards/rejected": -6.85557746887207, "step": 1610 }, { "epoch": 0.25, "learning_rate": 1.2965761709632244e-05, "logits/chosen": -2.1668670177459717, "logits/rejected": -2.982635259628296, "logps/chosen": -292.40240478515625, "logps/rejected": -494.6607360839844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.4325927793979645, "rewards/margins": 9.234725952148438, "rewards/rejected": -8.802133560180664, "step": 1611 }, { "epoch": 0.25, "learning_rate": 1.2965028269101095e-05, "logits/chosen": -2.775919198989868, "logits/rejected": -3.1122756004333496, "logps/chosen": -226.32179260253906, "logps/rejected": -332.35986328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.03758352994918823, "rewards/margins": 7.596409320831299, "rewards/rejected": -7.558825492858887, "step": 1612 }, { "epoch": 0.25, "learning_rate": 1.2964294828569947e-05, "logits/chosen": -2.2196974754333496, "logits/rejected": -2.848766803741455, "logps/chosen": -197.52813720703125, "logps/rejected": -513.0111694335938, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.5562406778335571, "rewards/margins": 8.145673751831055, "rewards/rejected": -8.70191478729248, "step": 1613 }, { "epoch": 0.25, "learning_rate": 1.29635613880388e-05, "logits/chosen": -2.7079834938049316, "logits/rejected": -2.880485773086548, "logps/chosen": -76.54280853271484, "logps/rejected": -207.43258666992188, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.9107747077941895, "rewards/margins": 6.305175304412842, "rewards/rejected": -5.394400596618652, "step": 1614 }, { "epoch": 0.25, "learning_rate": 1.2962827947507651e-05, "logits/chosen": -2.4873902797698975, "logits/rejected": -3.0568408966064453, "logps/chosen": -184.74652099609375, "logps/rejected": -368.11737060546875, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.5161590576171875, "rewards/margins": 5.029367446899414, "rewards/rejected": -6.545526504516602, "step": 1615 }, { "epoch": 0.25, "learning_rate": 1.2962094506976503e-05, "logits/chosen": -2.5874218940734863, "logits/rejected": -2.931229591369629, "logps/chosen": -234.58636474609375, "logps/rejected": -247.2703094482422, "loss": 1.3532, "rewards/accuracies": 0.5, "rewards/chosen": -1.3719069957733154, "rewards/margins": 2.8042726516723633, "rewards/rejected": -4.1761794090271, "step": 1616 }, { "epoch": 0.25, "learning_rate": 1.2961361066445355e-05, "logits/chosen": -2.9450912475585938, "logits/rejected": -2.9887447357177734, "logps/chosen": -76.38178253173828, "logps/rejected": -160.62387084960938, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.7401569485664368, "rewards/margins": 7.035777568817139, "rewards/rejected": -6.295620918273926, "step": 1617 }, { "epoch": 0.25, "learning_rate": 1.2960627625914208e-05, "logits/chosen": -2.7678592205047607, "logits/rejected": -2.701205253601074, "logps/chosen": -145.55816650390625, "logps/rejected": -191.0059814453125, "loss": 0.1177, "rewards/accuracies": 1.0, "rewards/chosen": 0.23850783705711365, "rewards/margins": 4.1052565574646, "rewards/rejected": -3.866748809814453, "step": 1618 }, { "epoch": 0.25, "learning_rate": 1.295989418538306e-05, "logits/chosen": -2.530712366104126, "logits/rejected": -2.8085145950317383, "logps/chosen": -138.2724151611328, "logps/rejected": -73.55514526367188, "loss": 2.0912, "rewards/accuracies": 0.5, "rewards/chosen": -1.4828267097473145, "rewards/margins": 0.1494450569152832, "rewards/rejected": -1.6322718858718872, "step": 1619 }, { "epoch": 0.25, "learning_rate": 1.2959160744851912e-05, "logits/chosen": -2.5114810466766357, "logits/rejected": -2.5549163818359375, "logps/chosen": -161.958251953125, "logps/rejected": -319.8990783691406, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": -0.6980827450752258, "rewards/margins": 5.048844337463379, "rewards/rejected": -5.746926784515381, "step": 1620 }, { "epoch": 0.25, "learning_rate": 1.2958427304320764e-05, "logits/chosen": -2.500481605529785, "logits/rejected": -2.9915945529937744, "logps/chosen": -198.3826141357422, "logps/rejected": -392.691650390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2352173328399658, "rewards/margins": 6.774144172668457, "rewards/rejected": -8.009361267089844, "step": 1621 }, { "epoch": 0.25, "learning_rate": 1.2957693863789616e-05, "logits/chosen": -2.686833381652832, "logits/rejected": -2.751133441925049, "logps/chosen": -105.12033081054688, "logps/rejected": -109.35884857177734, "loss": 0.6854, "rewards/accuracies": 0.5, "rewards/chosen": -0.44034481048583984, "rewards/margins": 2.1445415019989014, "rewards/rejected": -2.5848865509033203, "step": 1622 }, { "epoch": 0.25, "learning_rate": 1.2956960423258468e-05, "logits/chosen": -2.2187139987945557, "logits/rejected": -3.0577330589294434, "logps/chosen": -150.52963256835938, "logps/rejected": -320.4869384765625, "loss": 0.0531, "rewards/accuracies": 1.0, "rewards/chosen": -1.6156296730041504, "rewards/margins": 4.166375637054443, "rewards/rejected": -5.782005310058594, "step": 1623 }, { "epoch": 0.25, "learning_rate": 1.295622698272732e-05, "logits/chosen": -2.5445199012756348, "logits/rejected": -2.7537105083465576, "logps/chosen": -82.55357360839844, "logps/rejected": -216.1666717529297, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.8316131830215454, "rewards/margins": 6.114239692687988, "rewards/rejected": -5.282626152038574, "step": 1624 }, { "epoch": 0.25, "learning_rate": 1.2955493542196172e-05, "logits/chosen": -2.2199573516845703, "logits/rejected": -2.618626594543457, "logps/chosen": -68.21873474121094, "logps/rejected": -172.18731689453125, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": 0.4750700294971466, "rewards/margins": 4.47996187210083, "rewards/rejected": -4.004891872406006, "step": 1625 }, { "epoch": 0.25, "learning_rate": 1.2954760101665023e-05, "logits/chosen": -2.756143569946289, "logits/rejected": -2.7830774784088135, "logps/chosen": -377.4329833984375, "logps/rejected": -308.56146240234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0997329950332642, "rewards/margins": 7.186798095703125, "rewards/rejected": -8.286531448364258, "step": 1626 }, { "epoch": 0.25, "learning_rate": 1.2954026661133877e-05, "logits/chosen": -2.5952131748199463, "logits/rejected": -2.8854877948760986, "logps/chosen": -123.49528503417969, "logps/rejected": -173.4549102783203, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 0.27731743454933167, "rewards/margins": 4.66414213180542, "rewards/rejected": -4.386824607849121, "step": 1627 }, { "epoch": 0.25, "learning_rate": 1.2953293220602729e-05, "logits/chosen": -2.898534059524536, "logits/rejected": -2.730192184448242, "logps/chosen": -71.27284240722656, "logps/rejected": -156.892822265625, "loss": 1.8109, "rewards/accuracies": 0.5, "rewards/chosen": -0.30811017751693726, "rewards/margins": 1.981531023979187, "rewards/rejected": -2.2896411418914795, "step": 1628 }, { "epoch": 0.25, "learning_rate": 1.295255978007158e-05, "logits/chosen": -2.7033846378326416, "logits/rejected": -2.718027114868164, "logps/chosen": -353.42584228515625, "logps/rejected": -270.76214599609375, "loss": 0.0606, "rewards/accuracies": 1.0, "rewards/chosen": -0.043285369873046875, "rewards/margins": 3.970771312713623, "rewards/rejected": -4.01405668258667, "step": 1629 }, { "epoch": 0.25, "learning_rate": 1.2951826339540433e-05, "logits/chosen": -2.873347520828247, "logits/rejected": -2.01023006439209, "logps/chosen": -367.4784851074219, "logps/rejected": -195.3734130859375, "loss": 1.4072, "rewards/accuracies": 0.5, "rewards/chosen": -3.5240044593811035, "rewards/margins": 1.1108050346374512, "rewards/rejected": -4.634809494018555, "step": 1630 }, { "epoch": 0.25, "learning_rate": 1.2951092899009284e-05, "logits/chosen": -2.9523205757141113, "logits/rejected": -2.8846917152404785, "logps/chosen": -204.91030883789062, "logps/rejected": -441.37298583984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.0479682981967926, "rewards/margins": 9.954730987548828, "rewards/rejected": -9.906763076782227, "step": 1631 }, { "epoch": 0.25, "learning_rate": 1.2950359458478136e-05, "logits/chosen": -3.2985124588012695, "logits/rejected": -2.902437925338745, "logps/chosen": -592.3409423828125, "logps/rejected": -432.501953125, "loss": 5.0983, "rewards/accuracies": 0.5, "rewards/chosen": -6.811413764953613, "rewards/margins": -1.8916702270507812, "rewards/rejected": -4.919743537902832, "step": 1632 }, { "epoch": 0.25, "learning_rate": 1.2949626017946988e-05, "logits/chosen": -1.6810457706451416, "logits/rejected": -2.9185538291931152, "logps/chosen": -148.28778076171875, "logps/rejected": -284.41131591796875, "loss": 0.0719, "rewards/accuracies": 1.0, "rewards/chosen": -0.4445739984512329, "rewards/margins": 3.9382262229919434, "rewards/rejected": -4.382800102233887, "step": 1633 }, { "epoch": 0.25, "learning_rate": 1.294889257741584e-05, "logits/chosen": -2.4097611904144287, "logits/rejected": -2.6364190578460693, "logps/chosen": -606.8679809570312, "logps/rejected": -490.55059814453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.08734285831451416, "rewards/margins": 9.042795181274414, "rewards/rejected": -9.130138397216797, "step": 1634 }, { "epoch": 0.25, "learning_rate": 1.2948159136884692e-05, "logits/chosen": -2.551239490509033, "logits/rejected": -2.9211978912353516, "logps/chosen": -203.8063507080078, "logps/rejected": -302.5110778808594, "loss": 3.5905, "rewards/accuracies": 0.5, "rewards/chosen": -3.2897214889526367, "rewards/margins": 2.637388229370117, "rewards/rejected": -5.927109718322754, "step": 1635 }, { "epoch": 0.25, "learning_rate": 1.2947425696353546e-05, "logits/chosen": -2.8856661319732666, "logits/rejected": -2.981663942337036, "logps/chosen": -95.2523193359375, "logps/rejected": -202.7074432373047, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.252297043800354, "rewards/margins": 4.7262773513793945, "rewards/rejected": -4.978574752807617, "step": 1636 }, { "epoch": 0.25, "learning_rate": 1.2946692255822397e-05, "logits/chosen": -2.5373568534851074, "logits/rejected": -0.9746599197387695, "logps/chosen": -210.18377685546875, "logps/rejected": -9.257014274597168, "loss": 3.4983, "rewards/accuracies": 0.0, "rewards/chosen": -2.138789415359497, "rewards/margins": -3.413506031036377, "rewards/rejected": 1.2747166156768799, "step": 1637 }, { "epoch": 0.25, "learning_rate": 1.294595881529125e-05, "logits/chosen": -0.9156205654144287, "logits/rejected": -2.5492160320281982, "logps/chosen": -81.74919128417969, "logps/rejected": -377.4676818847656, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 0.5176658630371094, "rewards/margins": 5.754311561584473, "rewards/rejected": -5.236645698547363, "step": 1638 }, { "epoch": 0.25, "learning_rate": 1.2945225374760101e-05, "logits/chosen": -0.9428320527076721, "logits/rejected": -2.761169195175171, "logps/chosen": -22.5306339263916, "logps/rejected": -208.18011474609375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.7644113302230835, "rewards/margins": 7.328789710998535, "rewards/rejected": -6.56437873840332, "step": 1639 }, { "epoch": 0.26, "learning_rate": 1.2944491934228953e-05, "logits/chosen": -2.491989850997925, "logits/rejected": -3.0956695079803467, "logps/chosen": -350.7406921386719, "logps/rejected": -330.3112487792969, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.44749295711517334, "rewards/margins": 6.543400764465332, "rewards/rejected": -6.990893363952637, "step": 1640 }, { "epoch": 0.26, "learning_rate": 1.2943758493697805e-05, "logits/chosen": -1.3778382539749146, "logits/rejected": -2.7372052669525146, "logps/chosen": -62.87036895751953, "logps/rejected": -346.3360290527344, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.27374184131622314, "rewards/margins": 7.35118293762207, "rewards/rejected": -7.0774407386779785, "step": 1641 }, { "epoch": 0.26, "learning_rate": 1.2943025053166659e-05, "logits/chosen": -2.458101511001587, "logits/rejected": -2.9834141731262207, "logps/chosen": -100.89540100097656, "logps/rejected": -225.8934326171875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.10610237717628479, "rewards/margins": 6.574263572692871, "rewards/rejected": -6.680365562438965, "step": 1642 }, { "epoch": 0.26, "learning_rate": 1.294229161263551e-05, "logits/chosen": -2.4434690475463867, "logits/rejected": -2.9554443359375, "logps/chosen": -57.63789367675781, "logps/rejected": -221.6780548095703, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": 0.13814613223075867, "rewards/margins": 5.069609642028809, "rewards/rejected": -4.931463718414307, "step": 1643 }, { "epoch": 0.26, "learning_rate": 1.2941558172104362e-05, "logits/chosen": -2.454210042953491, "logits/rejected": -3.1283626556396484, "logps/chosen": -226.4233856201172, "logps/rejected": -388.37420654296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.41985607147216797, "rewards/margins": 8.025814056396484, "rewards/rejected": -7.60595703125, "step": 1644 }, { "epoch": 0.26, "learning_rate": 1.2940824731573216e-05, "logits/chosen": -2.8664963245391846, "logits/rejected": -1.891586184501648, "logps/chosen": -625.7926025390625, "logps/rejected": -256.504638671875, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": -0.7734138369560242, "rewards/margins": 3.3974738121032715, "rewards/rejected": -4.1708879470825195, "step": 1645 }, { "epoch": 0.26, "learning_rate": 1.2940091291042068e-05, "logits/chosen": -2.6860249042510986, "logits/rejected": -2.6184656620025635, "logps/chosen": -55.03940963745117, "logps/rejected": -121.01396942138672, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": 1.1727838516235352, "rewards/margins": 4.710371017456055, "rewards/rejected": -3.5375866889953613, "step": 1646 }, { "epoch": 0.26, "learning_rate": 1.293935785051092e-05, "logits/chosen": -2.47832989692688, "logits/rejected": -2.8335607051849365, "logps/chosen": -131.60964965820312, "logps/rejected": -257.2695007324219, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.1479167938232422, "rewards/margins": 7.363569259643555, "rewards/rejected": -7.2156524658203125, "step": 1647 }, { "epoch": 0.26, "learning_rate": 1.2938624409979771e-05, "logits/chosen": -2.111868381500244, "logits/rejected": -2.8160300254821777, "logps/chosen": -522.9502563476562, "logps/rejected": -393.8677673339844, "loss": 3.1439, "rewards/accuracies": 0.5, "rewards/chosen": -2.779097080230713, "rewards/margins": 1.58315110206604, "rewards/rejected": -4.362247943878174, "step": 1648 }, { "epoch": 0.26, "learning_rate": 1.2937890969448623e-05, "logits/chosen": -2.988811492919922, "logits/rejected": -2.7662477493286133, "logps/chosen": -795.2930908203125, "logps/rejected": -291.110595703125, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": 0.4009445309638977, "rewards/margins": 3.80208158493042, "rewards/rejected": -3.401136875152588, "step": 1649 }, { "epoch": 0.26, "learning_rate": 1.2937157528917475e-05, "logits/chosen": -1.726966142654419, "logits/rejected": -2.5393941402435303, "logps/chosen": -108.03340148925781, "logps/rejected": -209.2583770751953, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": 0.12364885210990906, "rewards/margins": 4.051985740661621, "rewards/rejected": -3.9283368587493896, "step": 1650 }, { "epoch": 0.26, "learning_rate": 1.2936424088386327e-05, "logits/chosen": -2.4739129543304443, "logits/rejected": -2.8815269470214844, "logps/chosen": -195.008544921875, "logps/rejected": -414.250244140625, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": -0.3805748224258423, "rewards/margins": 5.4426655769348145, "rewards/rejected": -5.823240756988525, "step": 1651 }, { "epoch": 0.26, "learning_rate": 1.2935690647855179e-05, "logits/chosen": -2.6273715496063232, "logits/rejected": -1.8241477012634277, "logps/chosen": -329.0730895996094, "logps/rejected": -275.8061828613281, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 1.220573902130127, "rewards/margins": 6.050644874572754, "rewards/rejected": -4.830071449279785, "step": 1652 }, { "epoch": 0.26, "learning_rate": 1.2934957207324033e-05, "logits/chosen": -1.437223196029663, "logits/rejected": -2.5089266300201416, "logps/chosen": -68.89148712158203, "logps/rejected": -250.18511962890625, "loss": 0.129, "rewards/accuracies": 1.0, "rewards/chosen": -0.7520368695259094, "rewards/margins": 3.443902015686035, "rewards/rejected": -4.195939064025879, "step": 1653 }, { "epoch": 0.26, "learning_rate": 1.2934223766792884e-05, "logits/chosen": -2.845430374145508, "logits/rejected": -2.803741216659546, "logps/chosen": -182.8993377685547, "logps/rejected": -258.2973937988281, "loss": 1.4466, "rewards/accuracies": 0.5, "rewards/chosen": -1.1170822381973267, "rewards/margins": 1.5350395441055298, "rewards/rejected": -2.6521217823028564, "step": 1654 }, { "epoch": 0.26, "learning_rate": 1.2933490326261736e-05, "logits/chosen": -2.937156915664673, "logits/rejected": -2.884787082672119, "logps/chosen": -159.8600311279297, "logps/rejected": -241.5281982421875, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": 0.1678485870361328, "rewards/margins": 4.6482319831848145, "rewards/rejected": -4.480383396148682, "step": 1655 }, { "epoch": 0.26, "learning_rate": 1.2932756885730588e-05, "logits/chosen": -3.0692005157470703, "logits/rejected": -2.896836042404175, "logps/chosen": -326.8744812011719, "logps/rejected": -91.00462341308594, "loss": 6.3242, "rewards/accuracies": 0.0, "rewards/chosen": -5.792815208435059, "rewards/margins": -6.321610450744629, "rewards/rejected": 0.5287955403327942, "step": 1656 }, { "epoch": 0.26, "learning_rate": 1.293202344519944e-05, "logits/chosen": -2.7276923656463623, "logits/rejected": -1.7823193073272705, "logps/chosen": -275.4156494140625, "logps/rejected": -285.49200439453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.8707821369171143, "rewards/margins": 7.884086608886719, "rewards/rejected": -7.013304233551025, "step": 1657 }, { "epoch": 0.26, "learning_rate": 1.2931290004668292e-05, "logits/chosen": -2.862902879714966, "logits/rejected": -2.687591791152954, "logps/chosen": -340.913818359375, "logps/rejected": -452.6360778808594, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.5522987842559814, "rewards/margins": 4.472179412841797, "rewards/rejected": -6.024477958679199, "step": 1658 }, { "epoch": 0.26, "learning_rate": 1.2930556564137144e-05, "logits/chosen": -2.7809526920318604, "logits/rejected": -2.576831817626953, "logps/chosen": -98.77688598632812, "logps/rejected": -110.17607116699219, "loss": 1.3424, "rewards/accuracies": 0.5, "rewards/chosen": -0.4276689887046814, "rewards/margins": 1.5936375856399536, "rewards/rejected": -2.0213065147399902, "step": 1659 }, { "epoch": 0.26, "learning_rate": 1.2929823123605996e-05, "logits/chosen": -1.953912615776062, "logits/rejected": -2.958590269088745, "logps/chosen": -42.60413360595703, "logps/rejected": -178.43736267089844, "loss": 0.0613, "rewards/accuracies": 1.0, "rewards/chosen": 0.36380088329315186, "rewards/margins": 4.961845397949219, "rewards/rejected": -4.598044395446777, "step": 1660 }, { "epoch": 0.26, "learning_rate": 1.2929089683074848e-05, "logits/chosen": -2.947273015975952, "logits/rejected": -1.7408777475357056, "logps/chosen": -241.14254760742188, "logps/rejected": -56.578697204589844, "loss": 1.6415, "rewards/accuracies": 0.5, "rewards/chosen": -1.4152050018310547, "rewards/margins": 0.3424302339553833, "rewards/rejected": -1.757635235786438, "step": 1661 }, { "epoch": 0.26, "learning_rate": 1.2928356242543701e-05, "logits/chosen": -1.8471168279647827, "logits/rejected": -2.9122259616851807, "logps/chosen": -75.75824737548828, "logps/rejected": -272.827392578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.5269895792007446, "rewards/margins": 7.483525276184082, "rewards/rejected": -6.956535339355469, "step": 1662 }, { "epoch": 0.26, "learning_rate": 1.2927622802012553e-05, "logits/chosen": -2.368915557861328, "logits/rejected": -3.0784945487976074, "logps/chosen": -96.86540222167969, "logps/rejected": -328.27734375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.9528261423110962, "rewards/margins": 7.475825309753418, "rewards/rejected": -6.522998809814453, "step": 1663 }, { "epoch": 0.26, "learning_rate": 1.2926889361481405e-05, "logits/chosen": -0.736530065536499, "logits/rejected": -2.5883994102478027, "logps/chosen": -2.6144723892211914, "logps/rejected": -144.0316925048828, "loss": 0.1131, "rewards/accuracies": 1.0, "rewards/chosen": 1.0492796897888184, "rewards/margins": 3.3653321266174316, "rewards/rejected": -2.3160524368286133, "step": 1664 }, { "epoch": 0.26, "learning_rate": 1.2926155920950257e-05, "logits/chosen": -1.6064634323120117, "logits/rejected": -2.838205575942993, "logps/chosen": -410.54144287109375, "logps/rejected": -657.1347045898438, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.4128234386444092, "rewards/margins": 7.713685989379883, "rewards/rejected": -9.126509666442871, "step": 1665 }, { "epoch": 0.26, "learning_rate": 1.2925422480419109e-05, "logits/chosen": -2.4926626682281494, "logits/rejected": -2.9419326782226562, "logps/chosen": -63.58632278442383, "logps/rejected": -220.8446502685547, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 0.9839044809341431, "rewards/margins": 5.278805732727051, "rewards/rejected": -4.294901371002197, "step": 1666 }, { "epoch": 0.26, "learning_rate": 1.292468903988796e-05, "logits/chosen": -2.5097317695617676, "logits/rejected": -1.901311993598938, "logps/chosen": -278.03900146484375, "logps/rejected": -188.19163513183594, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -0.03933257609605789, "rewards/margins": 3.253988742828369, "rewards/rejected": -3.293321132659912, "step": 1667 }, { "epoch": 0.26, "learning_rate": 1.2923955599356812e-05, "logits/chosen": -2.3574211597442627, "logits/rejected": -3.1916377544403076, "logps/chosen": -308.5408630371094, "logps/rejected": -704.0816650390625, "loss": 4.0699, "rewards/accuracies": 0.5, "rewards/chosen": -3.7629051208496094, "rewards/margins": -1.2022147178649902, "rewards/rejected": -2.560690402984619, "step": 1668 }, { "epoch": 0.26, "learning_rate": 1.2923222158825664e-05, "logits/chosen": -2.6065409183502197, "logits/rejected": -3.264845848083496, "logps/chosen": -162.43472290039062, "logps/rejected": -354.39166259765625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 0.042812347412109375, "rewards/margins": 5.299447059631348, "rewards/rejected": -5.256634712219238, "step": 1669 }, { "epoch": 0.26, "learning_rate": 1.2922488718294516e-05, "logits/chosen": -2.758303642272949, "logits/rejected": -2.0844924449920654, "logps/chosen": -550.4024658203125, "logps/rejected": -412.1339111328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1233534812927246, "rewards/margins": 7.442631244659424, "rewards/rejected": -8.565984725952148, "step": 1670 }, { "epoch": 0.26, "learning_rate": 1.292175527776337e-05, "logits/chosen": -2.362915277481079, "logits/rejected": -2.6851894855499268, "logps/chosen": -161.35067749023438, "logps/rejected": -278.78570556640625, "loss": 2.6787, "rewards/accuracies": 0.5, "rewards/chosen": -2.0273756980895996, "rewards/margins": 1.0084748268127441, "rewards/rejected": -3.0358505249023438, "step": 1671 }, { "epoch": 0.26, "learning_rate": 1.2921021837232222e-05, "logits/chosen": -2.6504101753234863, "logits/rejected": -2.850959539413452, "logps/chosen": -337.2112731933594, "logps/rejected": -425.8068542480469, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4779376983642578, "rewards/margins": 6.912788391113281, "rewards/rejected": -7.390726089477539, "step": 1672 }, { "epoch": 0.26, "learning_rate": 1.2920288396701074e-05, "logits/chosen": -2.8408591747283936, "logits/rejected": -1.7455763816833496, "logps/chosen": -548.634765625, "logps/rejected": -368.67803955078125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.3818420469760895, "rewards/margins": 4.935652256011963, "rewards/rejected": -5.3174943923950195, "step": 1673 }, { "epoch": 0.26, "learning_rate": 1.2919554956169925e-05, "logits/chosen": -2.856436014175415, "logits/rejected": -2.327331781387329, "logps/chosen": -213.8955078125, "logps/rejected": -163.1544952392578, "loss": 3.2588, "rewards/accuracies": 0.5, "rewards/chosen": -3.7617976665496826, "rewards/margins": -1.5765101909637451, "rewards/rejected": -2.1852874755859375, "step": 1674 }, { "epoch": 0.26, "learning_rate": 1.2918821515638777e-05, "logits/chosen": -2.1976449489593506, "logits/rejected": -2.9142332077026367, "logps/chosen": -202.53054809570312, "logps/rejected": -322.2803955078125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.604663074016571, "rewards/margins": 5.133505344390869, "rewards/rejected": -5.738168716430664, "step": 1675 }, { "epoch": 0.26, "learning_rate": 1.2918088075107631e-05, "logits/chosen": -2.6691200733184814, "logits/rejected": -2.801933526992798, "logps/chosen": -115.45028686523438, "logps/rejected": -135.13490295410156, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -0.2813582122325897, "rewards/margins": 3.710688591003418, "rewards/rejected": -3.99204683303833, "step": 1676 }, { "epoch": 0.26, "learning_rate": 1.2917354634576483e-05, "logits/chosen": -2.726187229156494, "logits/rejected": -2.7184441089630127, "logps/chosen": -128.5639190673828, "logps/rejected": -133.6844482421875, "loss": 1.7817, "rewards/accuracies": 0.5, "rewards/chosen": -0.5712504386901855, "rewards/margins": 0.6644635200500488, "rewards/rejected": -1.2357139587402344, "step": 1677 }, { "epoch": 0.26, "learning_rate": 1.2916621194045335e-05, "logits/chosen": -2.083089828491211, "logits/rejected": -2.8604183197021484, "logps/chosen": -21.609634399414062, "logps/rejected": -258.1024475097656, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": 0.645645022392273, "rewards/margins": 6.037217617034912, "rewards/rejected": -5.391572952270508, "step": 1678 }, { "epoch": 0.26, "learning_rate": 1.2915887753514186e-05, "logits/chosen": -1.7127364873886108, "logits/rejected": -2.959688425064087, "logps/chosen": -380.4757080078125, "logps/rejected": -623.5770263671875, "loss": 2.9416, "rewards/accuracies": 0.5, "rewards/chosen": -3.8319122791290283, "rewards/margins": 1.7325687408447266, "rewards/rejected": -5.564480781555176, "step": 1679 }, { "epoch": 0.26, "learning_rate": 1.291515431298304e-05, "logits/chosen": -2.663252115249634, "logits/rejected": -2.743645191192627, "logps/chosen": -68.15203857421875, "logps/rejected": -186.80287170410156, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 0.8413518071174622, "rewards/margins": 5.780145645141602, "rewards/rejected": -4.938794136047363, "step": 1680 }, { "epoch": 0.26, "learning_rate": 1.2914420872451892e-05, "logits/chosen": -2.3451716899871826, "logits/rejected": -2.7634665966033936, "logps/chosen": -133.10386657714844, "logps/rejected": -236.07595825195312, "loss": 0.0668, "rewards/accuracies": 1.0, "rewards/chosen": -0.45700952410697937, "rewards/margins": 4.521213531494141, "rewards/rejected": -4.978222846984863, "step": 1681 }, { "epoch": 0.26, "learning_rate": 1.2913687431920744e-05, "logits/chosen": -3.24930477142334, "logits/rejected": -2.978480339050293, "logps/chosen": -122.14642333984375, "logps/rejected": -89.45732879638672, "loss": 2.2365, "rewards/accuracies": 0.5, "rewards/chosen": -1.2233209609985352, "rewards/margins": -0.45670604705810547, "rewards/rejected": -0.7666149139404297, "step": 1682 }, { "epoch": 0.26, "learning_rate": 1.2912953991389596e-05, "logits/chosen": -2.641059398651123, "logits/rejected": -2.972553253173828, "logps/chosen": -18.4390811920166, "logps/rejected": -166.2918243408203, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 0.053955912590026855, "rewards/margins": 5.382499694824219, "rewards/rejected": -5.328543663024902, "step": 1683 }, { "epoch": 0.26, "learning_rate": 1.2912220550858448e-05, "logits/chosen": -2.6471774578094482, "logits/rejected": -3.03790545463562, "logps/chosen": -395.08740234375, "logps/rejected": -315.74176025390625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": 0.9179947376251221, "rewards/margins": 4.235361099243164, "rewards/rejected": -3.317366123199463, "step": 1684 }, { "epoch": 0.26, "learning_rate": 1.29114871103273e-05, "logits/chosen": -1.9500044584274292, "logits/rejected": -3.057903289794922, "logps/chosen": -37.77276611328125, "logps/rejected": -195.55116271972656, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -0.5778404474258423, "rewards/margins": 4.46600341796875, "rewards/rejected": -5.043844223022461, "step": 1685 }, { "epoch": 0.26, "learning_rate": 1.2910753669796151e-05, "logits/chosen": -2.7529516220092773, "logits/rejected": -3.0346479415893555, "logps/chosen": -292.0836486816406, "logps/rejected": -320.5729675292969, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.28180810809135437, "rewards/margins": 8.129944801330566, "rewards/rejected": -7.848136901855469, "step": 1686 }, { "epoch": 0.26, "learning_rate": 1.2910020229265003e-05, "logits/chosen": -2.775162696838379, "logits/rejected": -2.28338623046875, "logps/chosen": -346.4573669433594, "logps/rejected": -322.3621826171875, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": 0.13804593682289124, "rewards/margins": 5.3878607749938965, "rewards/rejected": -5.249814510345459, "step": 1687 }, { "epoch": 0.26, "learning_rate": 1.2909286788733855e-05, "logits/chosen": -2.072385549545288, "logits/rejected": -2.919754981994629, "logps/chosen": -62.466304779052734, "logps/rejected": -240.8387908935547, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 1.3107836246490479, "rewards/margins": 4.595766067504883, "rewards/rejected": -3.284982204437256, "step": 1688 }, { "epoch": 0.26, "learning_rate": 1.2908553348202709e-05, "logits/chosen": -2.5074098110198975, "logits/rejected": -2.4728291034698486, "logps/chosen": -97.39989471435547, "logps/rejected": -225.13998413085938, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.40897324681282043, "rewards/margins": 6.513898849487305, "rewards/rejected": -6.104926109313965, "step": 1689 }, { "epoch": 0.26, "learning_rate": 1.290781990767156e-05, "logits/chosen": -2.073657512664795, "logits/rejected": -2.199960231781006, "logps/chosen": -176.15069580078125, "logps/rejected": -461.978271484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.0028964877128601074, "rewards/margins": 8.079044342041016, "rewards/rejected": -8.07614803314209, "step": 1690 }, { "epoch": 0.26, "learning_rate": 1.2907086467140412e-05, "logits/chosen": -1.624121069908142, "logits/rejected": -2.7768197059631348, "logps/chosen": -53.87317657470703, "logps/rejected": -263.61785888671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.8645204901695251, "rewards/margins": 7.695340156555176, "rewards/rejected": -6.830819606781006, "step": 1691 }, { "epoch": 0.26, "learning_rate": 1.2906353026609264e-05, "logits/chosen": -2.5220282077789307, "logits/rejected": -3.06423020362854, "logps/chosen": -299.45574951171875, "logps/rejected": -334.33526611328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.46287304162979126, "rewards/margins": 7.000799179077148, "rewards/rejected": -6.537925720214844, "step": 1692 }, { "epoch": 0.26, "learning_rate": 1.2905619586078116e-05, "logits/chosen": -2.4775924682617188, "logits/rejected": -3.187476873397827, "logps/chosen": -56.59646987915039, "logps/rejected": -308.8135070800781, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.4110003113746643, "rewards/margins": 6.54248046875, "rewards/rejected": -6.1314802169799805, "step": 1693 }, { "epoch": 0.26, "learning_rate": 1.2904886145546968e-05, "logits/chosen": -1.5124237537384033, "logits/rejected": -2.8768086433410645, "logps/chosen": -68.2511978149414, "logps/rejected": -281.3298034667969, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.45074501633644104, "rewards/margins": 7.150974273681641, "rewards/rejected": -6.700228691101074, "step": 1694 }, { "epoch": 0.26, "learning_rate": 1.290415270501582e-05, "logits/chosen": -2.7817165851593018, "logits/rejected": -3.150791883468628, "logps/chosen": -78.59004211425781, "logps/rejected": -336.4101867675781, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.8231841325759888, "rewards/margins": 8.86699104309082, "rewards/rejected": -8.043807029724121, "step": 1695 }, { "epoch": 0.26, "learning_rate": 1.2903419264484672e-05, "logits/chosen": -2.7667553424835205, "logits/rejected": -2.652632236480713, "logps/chosen": -114.28599548339844, "logps/rejected": -131.55958557128906, "loss": 0.3417, "rewards/accuracies": 1.0, "rewards/chosen": -0.5294170379638672, "rewards/margins": 3.7125065326690674, "rewards/rejected": -4.241923809051514, "step": 1696 }, { "epoch": 0.26, "learning_rate": 1.2902685823953524e-05, "logits/chosen": -2.8032329082489014, "logits/rejected": -2.83188796043396, "logps/chosen": -255.93222045898438, "logps/rejected": -220.50839233398438, "loss": 1.0675, "rewards/accuracies": 0.5, "rewards/chosen": -0.5511009693145752, "rewards/margins": 2.2892608642578125, "rewards/rejected": -2.8403618335723877, "step": 1697 }, { "epoch": 0.26, "learning_rate": 1.2901952383422377e-05, "logits/chosen": -2.0043511390686035, "logits/rejected": -2.848410129547119, "logps/chosen": -67.22380828857422, "logps/rejected": -208.49560546875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 0.27569618821144104, "rewards/margins": 5.096321105957031, "rewards/rejected": -4.820624828338623, "step": 1698 }, { "epoch": 0.26, "learning_rate": 1.2901218942891229e-05, "logits/chosen": -1.8576949834823608, "logits/rejected": -2.8480873107910156, "logps/chosen": -132.33905029296875, "logps/rejected": -325.4984436035156, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.7215092182159424, "rewards/margins": 5.822471618652344, "rewards/rejected": -6.543980598449707, "step": 1699 }, { "epoch": 0.26, "learning_rate": 1.2900485502360081e-05, "logits/chosen": -2.8862698078155518, "logits/rejected": -2.7812440395355225, "logps/chosen": -229.10208129882812, "logps/rejected": -211.28826904296875, "loss": 3.7195, "rewards/accuracies": 0.5, "rewards/chosen": -3.6549599170684814, "rewards/margins": -0.8294291496276855, "rewards/rejected": -2.825530767440796, "step": 1700 }, { "epoch": 0.26, "learning_rate": 1.2899752061828933e-05, "logits/chosen": -0.7396894097328186, "logits/rejected": -1.6981922388076782, "logps/chosen": -181.29827880859375, "logps/rejected": -455.5227355957031, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.9362716674804688, "rewards/margins": 8.300735473632812, "rewards/rejected": -7.364463806152344, "step": 1701 }, { "epoch": 0.26, "learning_rate": 1.2899018621297785e-05, "logits/chosen": -2.7260122299194336, "logits/rejected": -3.1953446865081787, "logps/chosen": -336.27655029296875, "logps/rejected": -509.81109619140625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.3323104977607727, "rewards/margins": 7.460144519805908, "rewards/rejected": -7.127833843231201, "step": 1702 }, { "epoch": 0.26, "learning_rate": 1.2898285180766637e-05, "logits/chosen": -2.809715509414673, "logits/rejected": -1.1756561994552612, "logps/chosen": -261.9870910644531, "logps/rejected": -162.961181640625, "loss": 1.4962, "rewards/accuracies": 0.5, "rewards/chosen": -1.7602583169937134, "rewards/margins": 2.0235490798950195, "rewards/rejected": -3.7838072776794434, "step": 1703 }, { "epoch": 0.27, "learning_rate": 1.2897551740235489e-05, "logits/chosen": -2.5400259494781494, "logits/rejected": -2.986811637878418, "logps/chosen": -65.47933197021484, "logps/rejected": -313.86407470703125, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 0.5302688479423523, "rewards/margins": 6.176949501037598, "rewards/rejected": -5.64668083190918, "step": 1704 }, { "epoch": 0.27, "learning_rate": 1.289681829970434e-05, "logits/chosen": -2.769864797592163, "logits/rejected": -1.8967432975769043, "logps/chosen": -233.19467163085938, "logps/rejected": -131.31422424316406, "loss": 3.5303, "rewards/accuracies": 0.5, "rewards/chosen": -3.2588512897491455, "rewards/margins": -1.3517274856567383, "rewards/rejected": -1.9071239233016968, "step": 1705 }, { "epoch": 0.27, "learning_rate": 1.2896084859173192e-05, "logits/chosen": -2.536649465560913, "logits/rejected": -2.6123239994049072, "logps/chosen": -67.17566680908203, "logps/rejected": -175.5801239013672, "loss": 0.2016, "rewards/accuracies": 1.0, "rewards/chosen": 0.4022493362426758, "rewards/margins": 3.587263345718384, "rewards/rejected": -3.185014009475708, "step": 1706 }, { "epoch": 0.27, "learning_rate": 1.2895351418642046e-05, "logits/chosen": -2.7492737770080566, "logits/rejected": -3.0873889923095703, "logps/chosen": -71.90081024169922, "logps/rejected": -188.0072479248047, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": 0.11957541108131409, "rewards/margins": 4.107292175292969, "rewards/rejected": -3.9877166748046875, "step": 1707 }, { "epoch": 0.27, "learning_rate": 1.2894617978110898e-05, "logits/chosen": -1.7133984565734863, "logits/rejected": -2.9610543251037598, "logps/chosen": -75.56169128417969, "logps/rejected": -388.09326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.8552033305168152, "rewards/margins": 10.366767883300781, "rewards/rejected": -9.511564254760742, "step": 1708 }, { "epoch": 0.27, "learning_rate": 1.289388453757975e-05, "logits/chosen": -2.8779196739196777, "logits/rejected": -2.910569190979004, "logps/chosen": -142.93527221679688, "logps/rejected": -302.27569580078125, "loss": 0.3866, "rewards/accuracies": 0.5, "rewards/chosen": -0.650753378868103, "rewards/margins": 3.1690237522125244, "rewards/rejected": -3.819777011871338, "step": 1709 }, { "epoch": 0.27, "learning_rate": 1.2893151097048603e-05, "logits/chosen": -2.807596206665039, "logits/rejected": -1.681352138519287, "logps/chosen": -124.6976318359375, "logps/rejected": -246.918701171875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 0.37228432297706604, "rewards/margins": 6.908762454986572, "rewards/rejected": -6.536478042602539, "step": 1710 }, { "epoch": 0.27, "learning_rate": 1.2892417656517455e-05, "logits/chosen": -2.316713571548462, "logits/rejected": -3.1565675735473633, "logps/chosen": -44.34111404418945, "logps/rejected": -302.2107238769531, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": 1.0360338687896729, "rewards/margins": 4.26096773147583, "rewards/rejected": -3.224933624267578, "step": 1711 }, { "epoch": 0.27, "learning_rate": 1.2891684215986307e-05, "logits/chosen": -1.4245001077651978, "logits/rejected": -3.0211522579193115, "logps/chosen": -127.24943542480469, "logps/rejected": -413.28497314453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.7665982246398926, "rewards/margins": 7.83686637878418, "rewards/rejected": -7.070268630981445, "step": 1712 }, { "epoch": 0.27, "learning_rate": 1.2890950775455159e-05, "logits/chosen": -2.869576930999756, "logits/rejected": -2.8992486000061035, "logps/chosen": -394.4626770019531, "logps/rejected": -446.0463562011719, "loss": 1.4057, "rewards/accuracies": 0.5, "rewards/chosen": -1.3662253618240356, "rewards/margins": 1.6486694812774658, "rewards/rejected": -3.014894723892212, "step": 1713 }, { "epoch": 0.27, "learning_rate": 1.289021733492401e-05, "logits/chosen": -2.774118185043335, "logits/rejected": -2.8792965412139893, "logps/chosen": -91.604248046875, "logps/rejected": -207.06027221679688, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": 0.6457763910293579, "rewards/margins": 4.624350070953369, "rewards/rejected": -3.978573799133301, "step": 1714 }, { "epoch": 0.27, "learning_rate": 1.2889483894392863e-05, "logits/chosen": -1.7949069738388062, "logits/rejected": -2.6861705780029297, "logps/chosen": -252.9480743408203, "logps/rejected": -297.2268981933594, "loss": 0.0995, "rewards/accuracies": 1.0, "rewards/chosen": 0.6299488544464111, "rewards/margins": 4.501214981079102, "rewards/rejected": -3.8712663650512695, "step": 1715 }, { "epoch": 0.27, "learning_rate": 1.2888750453861716e-05, "logits/chosen": -2.678126811981201, "logits/rejected": -1.683072805404663, "logps/chosen": -288.494140625, "logps/rejected": -306.92181396484375, "loss": 1.8729, "rewards/accuracies": 0.5, "rewards/chosen": -2.8843634128570557, "rewards/margins": 3.5342183113098145, "rewards/rejected": -6.418581485748291, "step": 1716 }, { "epoch": 0.27, "learning_rate": 1.2888017013330568e-05, "logits/chosen": -2.331624984741211, "logits/rejected": -2.5344812870025635, "logps/chosen": -133.4988250732422, "logps/rejected": -210.66078186035156, "loss": 0.3039, "rewards/accuracies": 1.0, "rewards/chosen": 0.6254135370254517, "rewards/margins": 2.9595422744750977, "rewards/rejected": -2.3341286182403564, "step": 1717 }, { "epoch": 0.27, "learning_rate": 1.288728357279942e-05, "logits/chosen": -2.8908915519714355, "logits/rejected": -3.1725714206695557, "logps/chosen": -385.5264587402344, "logps/rejected": -363.8542175292969, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": 0.7855331301689148, "rewards/margins": 6.989022254943848, "rewards/rejected": -6.203489303588867, "step": 1718 }, { "epoch": 0.27, "learning_rate": 1.2886550132268272e-05, "logits/chosen": -2.7444872856140137, "logits/rejected": -2.42533016204834, "logps/chosen": -221.2644500732422, "logps/rejected": -308.0252685546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.2496528625488281, "rewards/margins": 7.229917526245117, "rewards/rejected": -8.479570388793945, "step": 1719 }, { "epoch": 0.27, "learning_rate": 1.2885816691737124e-05, "logits/chosen": -2.7168643474578857, "logits/rejected": -3.153778553009033, "logps/chosen": -290.8067321777344, "logps/rejected": -320.994873046875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.023694366216659546, "rewards/margins": 6.8903303146362305, "rewards/rejected": -6.91402530670166, "step": 1720 }, { "epoch": 0.27, "learning_rate": 1.2885083251205976e-05, "logits/chosen": -2.0919976234436035, "logits/rejected": -2.8013803958892822, "logps/chosen": -106.38653564453125, "logps/rejected": -203.14366149902344, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.0718330442905426, "rewards/margins": 5.486839294433594, "rewards/rejected": -5.5586724281311035, "step": 1721 }, { "epoch": 0.27, "learning_rate": 1.2884349810674827e-05, "logits/chosen": -1.5737661123275757, "logits/rejected": -2.9916675090789795, "logps/chosen": -25.544095993041992, "logps/rejected": -237.83836364746094, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 0.5488130450248718, "rewards/margins": 5.953770637512207, "rewards/rejected": -5.4049577713012695, "step": 1722 }, { "epoch": 0.27, "learning_rate": 1.288361637014368e-05, "logits/chosen": -2.2019052505493164, "logits/rejected": -3.187340021133423, "logps/chosen": -79.42459106445312, "logps/rejected": -332.47686767578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.9992343187332153, "rewards/margins": 8.621284484863281, "rewards/rejected": -7.6220502853393555, "step": 1723 }, { "epoch": 0.27, "learning_rate": 1.2882882929612531e-05, "logits/chosen": -1.968427300453186, "logits/rejected": -3.3950610160827637, "logps/chosen": -75.40937805175781, "logps/rejected": -409.9275817871094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.0663858652114868, "rewards/margins": 7.357852935791016, "rewards/rejected": -6.291467666625977, "step": 1724 }, { "epoch": 0.27, "learning_rate": 1.2882149489081385e-05, "logits/chosen": -2.5007026195526123, "logits/rejected": -2.880078077316284, "logps/chosen": -229.84014892578125, "logps/rejected": -221.208740234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.5123695135116577, "rewards/margins": 6.807016849517822, "rewards/rejected": -6.294647216796875, "step": 1725 }, { "epoch": 0.27, "learning_rate": 1.2881416048550237e-05, "logits/chosen": -2.6404693126678467, "logits/rejected": -2.883474111557007, "logps/chosen": -2.210789203643799, "logps/rejected": -113.81641387939453, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 1.2867436408996582, "rewards/margins": 5.133669376373291, "rewards/rejected": -3.8469254970550537, "step": 1726 }, { "epoch": 0.27, "learning_rate": 1.2880682608019089e-05, "logits/chosen": -2.901304244995117, "logits/rejected": -2.8068652153015137, "logps/chosen": -219.06729125976562, "logps/rejected": -299.64306640625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.019280239939689636, "rewards/margins": 5.856040954589844, "rewards/rejected": -5.875321388244629, "step": 1727 }, { "epoch": 0.27, "learning_rate": 1.287994916748794e-05, "logits/chosen": -2.4000725746154785, "logits/rejected": -2.744502067565918, "logps/chosen": -79.07859802246094, "logps/rejected": -266.4768371582031, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.121252417564392, "rewards/margins": 7.426960468292236, "rewards/rejected": -8.548213005065918, "step": 1728 }, { "epoch": 0.27, "learning_rate": 1.2879215726956792e-05, "logits/chosen": -1.6512410640716553, "logits/rejected": -2.5758895874023438, "logps/chosen": -31.928508758544922, "logps/rejected": -140.87548828125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 0.9859812259674072, "rewards/margins": 5.148394584655762, "rewards/rejected": -4.162413597106934, "step": 1729 }, { "epoch": 0.27, "learning_rate": 1.2878482286425644e-05, "logits/chosen": -2.612051010131836, "logits/rejected": -2.011021852493286, "logps/chosen": -196.41751098632812, "logps/rejected": -268.79058837890625, "loss": 2.752, "rewards/accuracies": 0.5, "rewards/chosen": -2.3746728897094727, "rewards/margins": 2.666522264480591, "rewards/rejected": -5.041194915771484, "step": 1730 }, { "epoch": 0.27, "learning_rate": 1.2877748845894496e-05, "logits/chosen": -3.0396130084991455, "logits/rejected": -2.3158411979675293, "logps/chosen": -382.1328430175781, "logps/rejected": -243.454345703125, "loss": 2.69, "rewards/accuracies": 0.5, "rewards/chosen": -2.310406446456909, "rewards/margins": 0.1078026294708252, "rewards/rejected": -2.4182090759277344, "step": 1731 }, { "epoch": 0.27, "learning_rate": 1.2877015405363348e-05, "logits/chosen": -2.447573661804199, "logits/rejected": -3.1736373901367188, "logps/chosen": -209.08135986328125, "logps/rejected": -287.941162109375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.2562451362609863, "rewards/margins": 5.7154974937438965, "rewards/rejected": -7.971742630004883, "step": 1732 }, { "epoch": 0.27, "learning_rate": 1.28762819648322e-05, "logits/chosen": -2.7638490200042725, "logits/rejected": -3.029024124145508, "logps/chosen": -67.01848602294922, "logps/rejected": -291.0119934082031, "loss": 2.3763, "rewards/accuracies": 0.5, "rewards/chosen": -1.5682618618011475, "rewards/margins": 4.420405864715576, "rewards/rejected": -5.9886674880981445, "step": 1733 }, { "epoch": 0.27, "learning_rate": 1.2875548524301053e-05, "logits/chosen": -1.798041820526123, "logits/rejected": -3.1562469005584717, "logps/chosen": -52.82541275024414, "logps/rejected": -328.6889343261719, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 0.5719265937805176, "rewards/margins": 7.612843036651611, "rewards/rejected": -7.040916442871094, "step": 1734 }, { "epoch": 0.27, "learning_rate": 1.2874815083769905e-05, "logits/chosen": -2.6070449352264404, "logits/rejected": -2.2513229846954346, "logps/chosen": -202.6553497314453, "logps/rejected": -173.61276245117188, "loss": 0.5118, "rewards/accuracies": 0.5, "rewards/chosen": -0.09901905059814453, "rewards/margins": 3.531341791152954, "rewards/rejected": -3.6303608417510986, "step": 1735 }, { "epoch": 0.27, "learning_rate": 1.2874081643238757e-05, "logits/chosen": -2.9027276039123535, "logits/rejected": -3.16688871383667, "logps/chosen": -211.09490966796875, "logps/rejected": -456.3377380371094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.15177002549171448, "rewards/margins": 8.565629959106445, "rewards/rejected": -8.413859367370605, "step": 1736 }, { "epoch": 0.27, "learning_rate": 1.2873348202707609e-05, "logits/chosen": -2.87156081199646, "logits/rejected": -2.6716103553771973, "logps/chosen": -455.55670166015625, "logps/rejected": -205.60093688964844, "loss": 3.2322, "rewards/accuracies": 0.5, "rewards/chosen": -3.01869535446167, "rewards/margins": 0.8397266864776611, "rewards/rejected": -3.858422040939331, "step": 1737 }, { "epoch": 0.27, "learning_rate": 1.2872614762176461e-05, "logits/chosen": -2.5506513118743896, "logits/rejected": -3.030670166015625, "logps/chosen": -235.48187255859375, "logps/rejected": -384.25872802734375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.2792701721191406, "rewards/margins": 4.810066223144531, "rewards/rejected": -5.089336395263672, "step": 1738 }, { "epoch": 0.27, "learning_rate": 1.2871881321645313e-05, "logits/chosen": -2.9979546070098877, "logits/rejected": -2.334325075149536, "logps/chosen": -129.6394500732422, "logps/rejected": -203.02340698242188, "loss": 1.6306, "rewards/accuracies": 0.5, "rewards/chosen": -2.2550735473632812, "rewards/margins": 0.5329698324203491, "rewards/rejected": -2.78804349899292, "step": 1739 }, { "epoch": 0.27, "learning_rate": 1.2871147881114165e-05, "logits/chosen": -2.663222074508667, "logits/rejected": -2.43953537940979, "logps/chosen": -673.3134155273438, "logps/rejected": -524.55517578125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.20053157210350037, "rewards/margins": 6.756377696990967, "rewards/rejected": -6.9569091796875, "step": 1740 }, { "epoch": 0.27, "learning_rate": 1.2870414440583017e-05, "logits/chosen": -3.013568162918091, "logits/rejected": -2.807894468307495, "logps/chosen": -222.44683837890625, "logps/rejected": -149.96737670898438, "loss": 3.7476, "rewards/accuracies": 0.5, "rewards/chosen": -4.20935583114624, "rewards/margins": -1.4055087566375732, "rewards/rejected": -2.803847312927246, "step": 1741 }, { "epoch": 0.27, "learning_rate": 1.286968100005187e-05, "logits/chosen": -1.3138315677642822, "logits/rejected": -2.7686867713928223, "logps/chosen": -44.51592254638672, "logps/rejected": -325.5574035644531, "loss": 0.0956, "rewards/accuracies": 1.0, "rewards/chosen": 0.0005993843078613281, "rewards/margins": 7.0516557693481445, "rewards/rejected": -7.051056861877441, "step": 1742 }, { "epoch": 0.27, "learning_rate": 1.2868947559520722e-05, "logits/chosen": -2.800675392150879, "logits/rejected": -2.045124053955078, "logps/chosen": -148.5615997314453, "logps/rejected": -39.27452850341797, "loss": 6.5361, "rewards/accuracies": 0.0, "rewards/chosen": -5.533808708190918, "rewards/margins": -6.533514976501465, "rewards/rejected": 0.9997060298919678, "step": 1743 }, { "epoch": 0.27, "learning_rate": 1.2868214118989576e-05, "logits/chosen": -1.2590528726577759, "logits/rejected": -2.2552223205566406, "logps/chosen": -58.00861358642578, "logps/rejected": -260.6685485839844, "loss": 0.052, "rewards/accuracies": 1.0, "rewards/chosen": -0.33573153614997864, "rewards/margins": 5.39829158782959, "rewards/rejected": -5.734023094177246, "step": 1744 }, { "epoch": 0.27, "learning_rate": 1.2867480678458427e-05, "logits/chosen": -2.6168315410614014, "logits/rejected": -3.237522602081299, "logps/chosen": -164.679931640625, "logps/rejected": -328.2519226074219, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.3115737736225128, "rewards/margins": 5.693425178527832, "rewards/rejected": -6.004999160766602, "step": 1745 }, { "epoch": 0.27, "learning_rate": 1.286674723792728e-05, "logits/chosen": -2.87656569480896, "logits/rejected": -2.575740098953247, "logps/chosen": -250.75238037109375, "logps/rejected": -193.18707275390625, "loss": 2.4325, "rewards/accuracies": 0.5, "rewards/chosen": -3.7096500396728516, "rewards/margins": 0.5446028709411621, "rewards/rejected": -4.254252910614014, "step": 1746 }, { "epoch": 0.27, "learning_rate": 1.2866013797396131e-05, "logits/chosen": -2.2753915786743164, "logits/rejected": -3.1647698879241943, "logps/chosen": -163.28921508789062, "logps/rejected": -460.0018615722656, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.4549300968647003, "rewards/margins": 6.030508518218994, "rewards/rejected": -6.485438346862793, "step": 1747 }, { "epoch": 0.27, "learning_rate": 1.2865280356864983e-05, "logits/chosen": -2.7493324279785156, "logits/rejected": -3.2686479091644287, "logps/chosen": -23.96563720703125, "logps/rejected": -256.7435302734375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 1.3671274185180664, "rewards/margins": 6.675605773925781, "rewards/rejected": -5.308478355407715, "step": 1748 }, { "epoch": 0.27, "learning_rate": 1.2864546916333835e-05, "logits/chosen": -1.1043365001678467, "logits/rejected": -2.247619390487671, "logps/chosen": -52.07195281982422, "logps/rejected": -280.38311767578125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 0.8582338690757751, "rewards/margins": 5.640861511230469, "rewards/rejected": -4.782628059387207, "step": 1749 }, { "epoch": 0.27, "learning_rate": 1.2863813475802687e-05, "logits/chosen": -2.808260917663574, "logits/rejected": -2.6312382221221924, "logps/chosen": -534.9576416015625, "logps/rejected": -384.8804016113281, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -1.149133324623108, "rewards/margins": 3.606623649597168, "rewards/rejected": -4.755756855010986, "step": 1750 }, { "epoch": 0.27, "learning_rate": 1.286308003527154e-05, "logits/chosen": -2.397942304611206, "logits/rejected": -2.085953712463379, "logps/chosen": -670.66552734375, "logps/rejected": -441.77886962890625, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.5891908407211304, "rewards/margins": 4.806178092956543, "rewards/rejected": -6.395369052886963, "step": 1751 }, { "epoch": 0.27, "learning_rate": 1.2862346594740392e-05, "logits/chosen": -2.90362811088562, "logits/rejected": -1.9224722385406494, "logps/chosen": -368.74981689453125, "logps/rejected": -171.16104125976562, "loss": 5.0403, "rewards/accuracies": 0.0, "rewards/chosen": -5.8248186111450195, "rewards/margins": -5.032413482666016, "rewards/rejected": -0.7924045920372009, "step": 1752 }, { "epoch": 0.27, "learning_rate": 1.2861613154209244e-05, "logits/chosen": -3.0143020153045654, "logits/rejected": -3.1227736473083496, "logps/chosen": -332.3330383300781, "logps/rejected": -391.7698669433594, "loss": 4.2897, "rewards/accuracies": 0.5, "rewards/chosen": -4.498024940490723, "rewards/margins": -0.6113612651824951, "rewards/rejected": -3.8866639137268066, "step": 1753 }, { "epoch": 0.27, "learning_rate": 1.2860879713678096e-05, "logits/chosen": -2.963557004928589, "logits/rejected": -1.8645538091659546, "logps/chosen": -219.31866455078125, "logps/rejected": -128.70880126953125, "loss": 2.3099, "rewards/accuracies": 0.5, "rewards/chosen": -2.223445177078247, "rewards/margins": -0.9562544822692871, "rewards/rejected": -1.267190933227539, "step": 1754 }, { "epoch": 0.27, "learning_rate": 1.2860146273146948e-05, "logits/chosen": -3.0676729679107666, "logits/rejected": -3.14178204536438, "logps/chosen": -27.769012451171875, "logps/rejected": -115.89815521240234, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 0.47793838381767273, "rewards/margins": 4.261876106262207, "rewards/rejected": -3.783937454223633, "step": 1755 }, { "epoch": 0.27, "learning_rate": 1.28594128326158e-05, "logits/chosen": -3.0667781829833984, "logits/rejected": -3.102959156036377, "logps/chosen": -131.78651428222656, "logps/rejected": -189.5799560546875, "loss": 0.5696, "rewards/accuracies": 0.5, "rewards/chosen": 0.4559534192085266, "rewards/margins": 1.6124753952026367, "rewards/rejected": -1.1565220355987549, "step": 1756 }, { "epoch": 0.27, "learning_rate": 1.2858679392084652e-05, "logits/chosen": -2.700486183166504, "logits/rejected": -3.3079497814178467, "logps/chosen": -203.14451599121094, "logps/rejected": -297.69000244140625, "loss": 0.0502, "rewards/accuracies": 1.0, "rewards/chosen": -0.38047945499420166, "rewards/margins": 2.9740898609161377, "rewards/rejected": -3.354569435119629, "step": 1757 }, { "epoch": 0.27, "learning_rate": 1.2857945951553504e-05, "logits/chosen": -2.3391335010528564, "logits/rejected": -2.7959609031677246, "logps/chosen": -78.8091049194336, "logps/rejected": -112.42877197265625, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": 0.8851257562637329, "rewards/margins": 3.6040048599243164, "rewards/rejected": -2.718879222869873, "step": 1758 }, { "epoch": 0.27, "learning_rate": 1.2857212511022355e-05, "logits/chosen": -2.517343282699585, "logits/rejected": -2.7997727394104004, "logps/chosen": -425.6843566894531, "logps/rejected": -587.2618408203125, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": 0.5196723937988281, "rewards/margins": 5.360047340393066, "rewards/rejected": -4.840374946594238, "step": 1759 }, { "epoch": 0.27, "learning_rate": 1.2856479070491209e-05, "logits/chosen": -2.7827115058898926, "logits/rejected": -2.533196210861206, "logps/chosen": -218.8359832763672, "logps/rejected": -183.03814697265625, "loss": 0.1246, "rewards/accuracies": 1.0, "rewards/chosen": -1.0290673971176147, "rewards/margins": 3.224644899368286, "rewards/rejected": -4.2537126541137695, "step": 1760 }, { "epoch": 0.27, "learning_rate": 1.2855745629960061e-05, "logits/chosen": -2.1673429012298584, "logits/rejected": -3.137115478515625, "logps/chosen": -79.10812377929688, "logps/rejected": -358.74102783203125, "loss": 0.1343, "rewards/accuracies": 1.0, "rewards/chosen": 0.04825134575366974, "rewards/margins": 4.766587257385254, "rewards/rejected": -4.71833610534668, "step": 1761 }, { "epoch": 0.27, "learning_rate": 1.2855012189428913e-05, "logits/chosen": -2.260573625564575, "logits/rejected": -2.910353422164917, "logps/chosen": -62.68105697631836, "logps/rejected": -355.76031494140625, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": 0.1845586895942688, "rewards/margins": 7.232594013214111, "rewards/rejected": -7.048035621643066, "step": 1762 }, { "epoch": 0.27, "learning_rate": 1.2854278748897765e-05, "logits/chosen": -1.7453484535217285, "logits/rejected": -2.6786577701568604, "logps/chosen": -190.5099334716797, "logps/rejected": -243.4866485595703, "loss": 1.5042, "rewards/accuracies": 0.5, "rewards/chosen": -1.9364007711410522, "rewards/margins": 1.1482820510864258, "rewards/rejected": -3.0846827030181885, "step": 1763 }, { "epoch": 0.27, "learning_rate": 1.2853545308366616e-05, "logits/chosen": -2.1798226833343506, "logits/rejected": -2.561739921569824, "logps/chosen": -239.84921264648438, "logps/rejected": -407.90692138671875, "loss": 2.1062, "rewards/accuracies": 0.5, "rewards/chosen": -2.1097922325134277, "rewards/margins": 3.9782238006591797, "rewards/rejected": -6.088016033172607, "step": 1764 }, { "epoch": 0.27, "learning_rate": 1.2852811867835468e-05, "logits/chosen": -2.8637712001800537, "logits/rejected": -2.7969110012054443, "logps/chosen": -475.35577392578125, "logps/rejected": -229.37144470214844, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.8211941719055176, "rewards/margins": 5.620948791503906, "rewards/rejected": -4.7997541427612305, "step": 1765 }, { "epoch": 0.27, "learning_rate": 1.285207842730432e-05, "logits/chosen": -2.711643934249878, "logits/rejected": -2.8596670627593994, "logps/chosen": -62.39417266845703, "logps/rejected": -74.31189727783203, "loss": 1.187, "rewards/accuracies": 0.5, "rewards/chosen": -0.22036957740783691, "rewards/margins": 0.18996036052703857, "rewards/rejected": -0.4103298783302307, "step": 1766 }, { "epoch": 0.27, "learning_rate": 1.2851344986773172e-05, "logits/chosen": -2.157271146774292, "logits/rejected": -3.0050125122070312, "logps/chosen": -113.75589752197266, "logps/rejected": -419.2488708496094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.4865437150001526, "rewards/margins": 8.936147689819336, "rewards/rejected": -8.449604988098145, "step": 1767 }, { "epoch": 0.27, "learning_rate": 1.2850611546242024e-05, "logits/chosen": -2.4300696849823, "logits/rejected": -3.044085741043091, "logps/chosen": -235.40623474121094, "logps/rejected": -417.7122802734375, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": 0.1359855681657791, "rewards/margins": 4.532402038574219, "rewards/rejected": -4.396416664123535, "step": 1768 }, { "epoch": 0.28, "learning_rate": 1.2849878105710878e-05, "logits/chosen": -1.72352135181427, "logits/rejected": -2.1118109226226807, "logps/chosen": -167.44064331054688, "logps/rejected": -256.9432373046875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.042413145303726196, "rewards/margins": 6.142032146453857, "rewards/rejected": -6.099618911743164, "step": 1769 }, { "epoch": 0.28, "learning_rate": 1.284914466517973e-05, "logits/chosen": -1.9925678968429565, "logits/rejected": -2.457453489303589, "logps/chosen": -150.41799926757812, "logps/rejected": -235.96102905273438, "loss": 3.4137, "rewards/accuracies": 0.5, "rewards/chosen": -2.408280611038208, "rewards/margins": -0.22664904594421387, "rewards/rejected": -2.181631565093994, "step": 1770 }, { "epoch": 0.28, "learning_rate": 1.2848411224648581e-05, "logits/chosen": -3.1284167766571045, "logits/rejected": -2.364988327026367, "logps/chosen": -630.2489013671875, "logps/rejected": -389.3254089355469, "loss": 2.6519, "rewards/accuracies": 0.5, "rewards/chosen": -3.162156820297241, "rewards/margins": -1.55265212059021, "rewards/rejected": -1.6095046997070312, "step": 1771 }, { "epoch": 0.28, "learning_rate": 1.2847677784117433e-05, "logits/chosen": -1.7432574033737183, "logits/rejected": -0.9382105469703674, "logps/chosen": -494.01800537109375, "logps/rejected": -265.76812744140625, "loss": 2.7688, "rewards/accuracies": 0.5, "rewards/chosen": -2.9199326038360596, "rewards/margins": -0.31250977516174316, "rewards/rejected": -2.6074228286743164, "step": 1772 }, { "epoch": 0.28, "learning_rate": 1.2846944343586285e-05, "logits/chosen": -2.8633604049682617, "logits/rejected": -3.27083420753479, "logps/chosen": -267.1269836425781, "logps/rejected": -278.33331298828125, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.24655914306640625, "rewards/margins": 4.968471527099609, "rewards/rejected": -5.215030670166016, "step": 1773 }, { "epoch": 0.28, "learning_rate": 1.2846210903055137e-05, "logits/chosen": -2.9692323207855225, "logits/rejected": -3.1647133827209473, "logps/chosen": -104.32157897949219, "logps/rejected": -210.9191131591797, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": 0.9287731647491455, "rewards/margins": 3.95823335647583, "rewards/rejected": -3.0294601917266846, "step": 1774 }, { "epoch": 0.28, "learning_rate": 1.2845477462523989e-05, "logits/chosen": -2.845330238342285, "logits/rejected": -2.8978755474090576, "logps/chosen": -172.59161376953125, "logps/rejected": -271.62127685546875, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": -0.3464195430278778, "rewards/margins": 3.5052695274353027, "rewards/rejected": -3.851689100265503, "step": 1775 }, { "epoch": 0.28, "learning_rate": 1.2844744021992842e-05, "logits/chosen": -2.5986244678497314, "logits/rejected": -3.1610963344573975, "logps/chosen": -264.4215087890625, "logps/rejected": -311.0956726074219, "loss": 2.0299, "rewards/accuracies": 0.5, "rewards/chosen": -2.5694591999053955, "rewards/margins": -1.3036730289459229, "rewards/rejected": -1.2657859325408936, "step": 1776 }, { "epoch": 0.28, "learning_rate": 1.2844010581461694e-05, "logits/chosen": -0.8672170639038086, "logits/rejected": -2.64193058013916, "logps/chosen": -156.95896911621094, "logps/rejected": -407.86260986328125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 0.4681053161621094, "rewards/margins": 6.220077037811279, "rewards/rejected": -5.75197172164917, "step": 1777 }, { "epoch": 0.28, "learning_rate": 1.2843277140930548e-05, "logits/chosen": -2.8673133850097656, "logits/rejected": -1.9544055461883545, "logps/chosen": -142.8564910888672, "logps/rejected": -191.7744140625, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.13637809455394745, "rewards/margins": 4.955268383026123, "rewards/rejected": -5.091646671295166, "step": 1778 }, { "epoch": 0.28, "learning_rate": 1.28425437003994e-05, "logits/chosen": -2.398909568786621, "logits/rejected": -3.389657735824585, "logps/chosen": -5.191827297210693, "logps/rejected": -215.00576782226562, "loss": 0.0499, "rewards/accuracies": 1.0, "rewards/chosen": 1.0243654251098633, "rewards/margins": 3.3068885803222656, "rewards/rejected": -2.2825231552124023, "step": 1779 }, { "epoch": 0.28, "learning_rate": 1.2841810259868252e-05, "logits/chosen": -2.347554922103882, "logits/rejected": -2.923380136489868, "logps/chosen": -93.36634826660156, "logps/rejected": -430.0555419921875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 0.8380584716796875, "rewards/margins": 7.892584800720215, "rewards/rejected": -7.054526329040527, "step": 1780 }, { "epoch": 0.28, "learning_rate": 1.2841076819337103e-05, "logits/chosen": -1.778092384338379, "logits/rejected": -2.768219232559204, "logps/chosen": -75.38327026367188, "logps/rejected": -288.9404296875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 1.0997668504714966, "rewards/margins": 5.485419750213623, "rewards/rejected": -4.385653018951416, "step": 1781 }, { "epoch": 0.28, "learning_rate": 1.2840343378805955e-05, "logits/chosen": -1.0360922813415527, "logits/rejected": -3.0701568126678467, "logps/chosen": -34.728965759277344, "logps/rejected": -444.9533386230469, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 0.1775432527065277, "rewards/margins": 6.2805495262146, "rewards/rejected": -6.103006362915039, "step": 1782 }, { "epoch": 0.28, "learning_rate": 1.2839609938274807e-05, "logits/chosen": -2.8456828594207764, "logits/rejected": -2.1890268325805664, "logps/chosen": -446.4527893066406, "logps/rejected": -313.0992431640625, "loss": 5.7264, "rewards/accuracies": 0.5, "rewards/chosen": -5.895474433898926, "rewards/margins": -2.186573028564453, "rewards/rejected": -3.7089014053344727, "step": 1783 }, { "epoch": 0.28, "learning_rate": 1.2838876497743659e-05, "logits/chosen": -2.1288843154907227, "logits/rejected": -2.760052442550659, "logps/chosen": -645.4931640625, "logps/rejected": -638.3282470703125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.5443596243858337, "rewards/margins": 6.104841232299805, "rewards/rejected": -5.560481071472168, "step": 1784 }, { "epoch": 0.28, "learning_rate": 1.2838143057212511e-05, "logits/chosen": -0.9987652897834778, "logits/rejected": -2.8391361236572266, "logps/chosen": -160.21002197265625, "logps/rejected": -449.53009033203125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 0.36655277013778687, "rewards/margins": 4.61397647857666, "rewards/rejected": -4.2474236488342285, "step": 1785 }, { "epoch": 0.28, "learning_rate": 1.2837409616681363e-05, "logits/chosen": -2.6063263416290283, "logits/rejected": -3.1655433177948, "logps/chosen": -149.5064697265625, "logps/rejected": -241.8638458251953, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.683880627155304, "rewards/margins": 6.455971717834473, "rewards/rejected": -5.772090911865234, "step": 1786 }, { "epoch": 0.28, "learning_rate": 1.2836676176150216e-05, "logits/chosen": -1.872057557106018, "logits/rejected": -2.861891984939575, "logps/chosen": -9.959341049194336, "logps/rejected": -252.01663208007812, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.47200441360473633, "rewards/margins": 6.09794807434082, "rewards/rejected": -5.625943660736084, "step": 1787 }, { "epoch": 0.28, "learning_rate": 1.2835942735619068e-05, "logits/chosen": -1.3654539585113525, "logits/rejected": -1.4569605588912964, "logps/chosen": -342.7225646972656, "logps/rejected": -362.9320983886719, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": 0.1930881142616272, "rewards/margins": 4.8336286544799805, "rewards/rejected": -4.640540599822998, "step": 1788 }, { "epoch": 0.28, "learning_rate": 1.283520929508792e-05, "logits/chosen": -1.6202272176742554, "logits/rejected": -2.647033452987671, "logps/chosen": -135.1835479736328, "logps/rejected": -250.06016540527344, "loss": 0.0676, "rewards/accuracies": 1.0, "rewards/chosen": 1.1129052639007568, "rewards/margins": 3.966688632965088, "rewards/rejected": -2.853783369064331, "step": 1789 }, { "epoch": 0.28, "learning_rate": 1.2834475854556772e-05, "logits/chosen": -2.8208580017089844, "logits/rejected": -2.699139356613159, "logps/chosen": -374.50531005859375, "logps/rejected": -373.322509765625, "loss": 0.097, "rewards/accuracies": 1.0, "rewards/chosen": -0.2731720209121704, "rewards/margins": 3.2629165649414062, "rewards/rejected": -3.536088705062866, "step": 1790 }, { "epoch": 0.28, "learning_rate": 1.2833742414025624e-05, "logits/chosen": -2.4256441593170166, "logits/rejected": -2.823676824569702, "logps/chosen": -41.6519775390625, "logps/rejected": -122.07710266113281, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": 0.4228001534938812, "rewards/margins": 3.3516783714294434, "rewards/rejected": -2.92887806892395, "step": 1791 }, { "epoch": 0.28, "learning_rate": 1.2833008973494476e-05, "logits/chosen": -2.9933879375457764, "logits/rejected": -2.42252516746521, "logps/chosen": -266.2558288574219, "logps/rejected": -260.6639099121094, "loss": 3.1117, "rewards/accuracies": 0.5, "rewards/chosen": -2.665271759033203, "rewards/margins": -0.05009961128234863, "rewards/rejected": -2.6151721477508545, "step": 1792 }, { "epoch": 0.28, "learning_rate": 1.2832275532963328e-05, "logits/chosen": -2.817610263824463, "logits/rejected": -3.341646432876587, "logps/chosen": -21.49394416809082, "logps/rejected": -143.17657470703125, "loss": 0.1184, "rewards/accuracies": 1.0, "rewards/chosen": 0.8459669351577759, "rewards/margins": 3.614365816116333, "rewards/rejected": -2.7683990001678467, "step": 1793 }, { "epoch": 0.28, "learning_rate": 1.283154209243218e-05, "logits/chosen": -2.5661306381225586, "logits/rejected": -2.868422508239746, "logps/chosen": -151.4638671875, "logps/rejected": -141.98251342773438, "loss": 2.8018, "rewards/accuracies": 0.5, "rewards/chosen": -2.2583513259887695, "rewards/margins": -0.20900750160217285, "rewards/rejected": -2.0493438243865967, "step": 1794 }, { "epoch": 0.28, "learning_rate": 1.2830808651901031e-05, "logits/chosen": -2.7170259952545166, "logits/rejected": -2.936145067214966, "logps/chosen": -146.36802673339844, "logps/rejected": -273.3447265625, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": 0.46894052624702454, "rewards/margins": 3.772634744644165, "rewards/rejected": -3.303694248199463, "step": 1795 }, { "epoch": 0.28, "learning_rate": 1.2830075211369885e-05, "logits/chosen": -2.487957000732422, "logits/rejected": -2.9177260398864746, "logps/chosen": -35.586769104003906, "logps/rejected": -173.07070922851562, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": 0.4773290455341339, "rewards/margins": 5.162986755371094, "rewards/rejected": -4.685657978057861, "step": 1796 }, { "epoch": 0.28, "learning_rate": 1.2829341770838737e-05, "logits/chosen": -1.5151259899139404, "logits/rejected": -3.12707257270813, "logps/chosen": -114.1384506225586, "logps/rejected": -426.23016357421875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.023161888122558594, "rewards/margins": 5.280457496643066, "rewards/rejected": -5.303619384765625, "step": 1797 }, { "epoch": 0.28, "learning_rate": 1.2828608330307589e-05, "logits/chosen": -2.4726643562316895, "logits/rejected": -1.509195327758789, "logps/chosen": -201.63314819335938, "logps/rejected": -149.80043029785156, "loss": 2.871, "rewards/accuracies": 0.5, "rewards/chosen": -3.3304216861724854, "rewards/margins": -1.6704368591308594, "rewards/rejected": -1.6599849462509155, "step": 1798 }, { "epoch": 0.28, "learning_rate": 1.282787488977644e-05, "logits/chosen": -2.739699602127075, "logits/rejected": -1.1365002393722534, "logps/chosen": -321.3259582519531, "logps/rejected": -198.96405029296875, "loss": 3.4769, "rewards/accuracies": 0.5, "rewards/chosen": -3.545259952545166, "rewards/margins": -1.739698052406311, "rewards/rejected": -1.805561900138855, "step": 1799 }, { "epoch": 0.28, "learning_rate": 1.2827141449245293e-05, "logits/chosen": -3.1098690032958984, "logits/rejected": -2.0666909217834473, "logps/chosen": -200.56710815429688, "logps/rejected": -97.92813873291016, "loss": 2.8211, "rewards/accuracies": 0.5, "rewards/chosen": -1.883543610572815, "rewards/margins": -0.7867794036865234, "rewards/rejected": -1.0967642068862915, "step": 1800 }, { "epoch": 0.28, "learning_rate": 1.2826408008714144e-05, "logits/chosen": -2.921283721923828, "logits/rejected": -2.6596920490264893, "logps/chosen": -100.62965393066406, "logps/rejected": -64.44161987304688, "loss": 2.6575, "rewards/accuracies": 0.5, "rewards/chosen": -1.3510558605194092, "rewards/margins": -2.2054319381713867, "rewards/rejected": 0.8543760180473328, "step": 1801 }, { "epoch": 0.28, "learning_rate": 1.2825674568182996e-05, "logits/chosen": -2.800564765930176, "logits/rejected": -2.7072386741638184, "logps/chosen": -79.56745910644531, "logps/rejected": -290.25482177734375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.7964857220649719, "rewards/margins": 7.000846862792969, "rewards/rejected": -6.2043609619140625, "step": 1802 }, { "epoch": 0.28, "learning_rate": 1.2824941127651848e-05, "logits/chosen": -3.0831804275512695, "logits/rejected": -2.94789981842041, "logps/chosen": -338.81939697265625, "logps/rejected": -340.2712097167969, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.17322367429733276, "rewards/margins": 4.5570149421691895, "rewards/rejected": -4.730238437652588, "step": 1803 }, { "epoch": 0.28, "learning_rate": 1.28242076871207e-05, "logits/chosen": -3.1246113777160645, "logits/rejected": -2.2210772037506104, "logps/chosen": -402.821533203125, "logps/rejected": -221.872314453125, "loss": 2.2182, "rewards/accuracies": 0.5, "rewards/chosen": -2.6350457668304443, "rewards/margins": 0.9905295372009277, "rewards/rejected": -3.625575304031372, "step": 1804 }, { "epoch": 0.28, "learning_rate": 1.2823474246589554e-05, "logits/chosen": -1.323159098625183, "logits/rejected": -2.684748888015747, "logps/chosen": -253.19650268554688, "logps/rejected": -317.68927001953125, "loss": 1.2842, "rewards/accuracies": 0.5, "rewards/chosen": -2.4085190296173096, "rewards/margins": 2.6422605514526367, "rewards/rejected": -5.050779342651367, "step": 1805 }, { "epoch": 0.28, "learning_rate": 1.2822740806058406e-05, "logits/chosen": -2.9303507804870605, "logits/rejected": -2.803295850753784, "logps/chosen": -112.67743682861328, "logps/rejected": -304.7891540527344, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": -0.34910428524017334, "rewards/margins": 5.264401912689209, "rewards/rejected": -5.613506317138672, "step": 1806 }, { "epoch": 0.28, "learning_rate": 1.2822007365527257e-05, "logits/chosen": -3.292996644973755, "logits/rejected": -3.4331374168395996, "logps/chosen": -13.68626594543457, "logps/rejected": -144.52972412109375, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": 0.505101203918457, "rewards/margins": 4.609413146972656, "rewards/rejected": -4.104311943054199, "step": 1807 }, { "epoch": 0.28, "learning_rate": 1.282127392499611e-05, "logits/chosen": -2.7455079555511475, "logits/rejected": -3.2531015872955322, "logps/chosen": -128.78140258789062, "logps/rejected": -287.9796142578125, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.5920440554618835, "rewards/margins": 4.692497253417969, "rewards/rejected": -5.284541130065918, "step": 1808 }, { "epoch": 0.28, "learning_rate": 1.2820540484464961e-05, "logits/chosen": -2.6153998374938965, "logits/rejected": -2.9162676334381104, "logps/chosen": -58.29682159423828, "logps/rejected": -152.6468505859375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.0656936764717102, "rewards/margins": 5.303238391876221, "rewards/rejected": -5.368931770324707, "step": 1809 }, { "epoch": 0.28, "learning_rate": 1.2819807043933815e-05, "logits/chosen": -1.9193007946014404, "logits/rejected": -2.4520668983459473, "logps/chosen": -395.5884704589844, "logps/rejected": -479.49957275390625, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.24355387687683105, "rewards/margins": 4.733306407928467, "rewards/rejected": -4.976860046386719, "step": 1810 }, { "epoch": 0.28, "learning_rate": 1.2819073603402667e-05, "logits/chosen": -3.0704431533813477, "logits/rejected": -2.358898878097534, "logps/chosen": -151.13775634765625, "logps/rejected": -108.68692779541016, "loss": 0.078, "rewards/accuracies": 1.0, "rewards/chosen": 0.5854759216308594, "rewards/margins": 2.514604091644287, "rewards/rejected": -1.9291282892227173, "step": 1811 }, { "epoch": 0.28, "learning_rate": 1.2818340162871518e-05, "logits/chosen": -2.9004273414611816, "logits/rejected": -3.017277479171753, "logps/chosen": -143.17420959472656, "logps/rejected": -217.89266967773438, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.17425191402435303, "rewards/margins": 6.442149639129639, "rewards/rejected": -6.267897605895996, "step": 1812 }, { "epoch": 0.28, "learning_rate": 1.281760672234037e-05, "logits/chosen": -3.081434965133667, "logits/rejected": -2.7311153411865234, "logps/chosen": -550.7122802734375, "logps/rejected": -553.5384521484375, "loss": 3.7448, "rewards/accuracies": 0.5, "rewards/chosen": -4.364744663238525, "rewards/margins": -1.6479661464691162, "rewards/rejected": -2.7167787551879883, "step": 1813 }, { "epoch": 0.28, "learning_rate": 1.2816873281809224e-05, "logits/chosen": -2.7784762382507324, "logits/rejected": -2.8794753551483154, "logps/chosen": -335.4397277832031, "logps/rejected": -231.57322692871094, "loss": 1.1467, "rewards/accuracies": 0.5, "rewards/chosen": -2.3120651245117188, "rewards/margins": 3.8708198070526123, "rewards/rejected": -6.18288516998291, "step": 1814 }, { "epoch": 0.28, "learning_rate": 1.2816139841278076e-05, "logits/chosen": -2.9665749073028564, "logits/rejected": -3.07423734664917, "logps/chosen": -49.10266876220703, "logps/rejected": -155.49539184570312, "loss": 0.1868, "rewards/accuracies": 1.0, "rewards/chosen": -1.3663859367370605, "rewards/margins": 2.5739002227783203, "rewards/rejected": -3.940286159515381, "step": 1815 }, { "epoch": 0.28, "learning_rate": 1.2815406400746928e-05, "logits/chosen": -0.9121135473251343, "logits/rejected": -3.19827938079834, "logps/chosen": -9.40711784362793, "logps/rejected": -430.8587951660156, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 0.4188086986541748, "rewards/margins": 5.552425384521484, "rewards/rejected": -5.133616924285889, "step": 1816 }, { "epoch": 0.28, "learning_rate": 1.281467296021578e-05, "logits/chosen": -2.5314857959747314, "logits/rejected": -2.8808510303497314, "logps/chosen": -133.3312225341797, "logps/rejected": -157.04995727539062, "loss": 1.3772, "rewards/accuracies": 0.5, "rewards/chosen": -2.2977421283721924, "rewards/margins": 1.0166175365447998, "rewards/rejected": -3.314359664916992, "step": 1817 }, { "epoch": 0.28, "learning_rate": 1.2813939519684631e-05, "logits/chosen": -2.9999372959136963, "logits/rejected": -3.090407371520996, "logps/chosen": -201.71800231933594, "logps/rejected": -262.5539855957031, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.16779273748397827, "rewards/margins": 6.260883331298828, "rewards/rejected": -6.428675651550293, "step": 1818 }, { "epoch": 0.28, "learning_rate": 1.2813206079153483e-05, "logits/chosen": -3.214113235473633, "logits/rejected": -2.6359360218048096, "logps/chosen": -1039.517333984375, "logps/rejected": -741.7003173828125, "loss": 1.0169, "rewards/accuracies": 0.5, "rewards/chosen": -1.5602188110351562, "rewards/margins": 0.8225845098495483, "rewards/rejected": -2.382803201675415, "step": 1819 }, { "epoch": 0.28, "learning_rate": 1.2812472638622335e-05, "logits/chosen": -2.491380453109741, "logits/rejected": -3.267946481704712, "logps/chosen": -360.46533203125, "logps/rejected": -548.4774169921875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.2815498113632202, "rewards/margins": 8.315873146057129, "rewards/rejected": -8.034322738647461, "step": 1820 }, { "epoch": 0.28, "learning_rate": 1.2811739198091187e-05, "logits/chosen": -2.9036037921905518, "logits/rejected": -3.0023927688598633, "logps/chosen": -128.04811096191406, "logps/rejected": -249.02931213378906, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": 0.07882995158433914, "rewards/margins": 4.472790718078613, "rewards/rejected": -4.393960952758789, "step": 1821 }, { "epoch": 0.28, "learning_rate": 1.2811005757560039e-05, "logits/chosen": -2.2025609016418457, "logits/rejected": -2.8314390182495117, "logps/chosen": -142.98182678222656, "logps/rejected": -139.42544555664062, "loss": 0.5094, "rewards/accuracies": 0.5, "rewards/chosen": -0.8753048181533813, "rewards/margins": 2.598790168762207, "rewards/rejected": -3.474095106124878, "step": 1822 }, { "epoch": 0.28, "learning_rate": 1.2810272317028893e-05, "logits/chosen": -3.2128913402557373, "logits/rejected": -3.252788543701172, "logps/chosen": -457.5976867675781, "logps/rejected": -388.48748779296875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.18601208925247192, "rewards/margins": 7.2341156005859375, "rewards/rejected": -7.420127868652344, "step": 1823 }, { "epoch": 0.28, "learning_rate": 1.2809538876497744e-05, "logits/chosen": -1.6435915231704712, "logits/rejected": -3.199892997741699, "logps/chosen": -90.7398910522461, "logps/rejected": -397.2132568359375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.27762383222579956, "rewards/margins": 6.450708866119385, "rewards/rejected": -6.72833251953125, "step": 1824 }, { "epoch": 0.28, "learning_rate": 1.2808805435966596e-05, "logits/chosen": -1.8234246969223022, "logits/rejected": -2.651738405227661, "logps/chosen": -205.42868041992188, "logps/rejected": -225.6092071533203, "loss": 2.3466, "rewards/accuracies": 0.5, "rewards/chosen": -2.809650421142578, "rewards/margins": -1.1057569980621338, "rewards/rejected": -1.7038933038711548, "step": 1825 }, { "epoch": 0.28, "learning_rate": 1.2808071995435448e-05, "logits/chosen": -2.601062536239624, "logits/rejected": -2.986717700958252, "logps/chosen": -98.71347045898438, "logps/rejected": -230.2288818359375, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.010369494557380676, "rewards/margins": 4.737510681152344, "rewards/rejected": -4.747879981994629, "step": 1826 }, { "epoch": 0.28, "learning_rate": 1.28073385549043e-05, "logits/chosen": -1.9089035987854004, "logits/rejected": -3.226879358291626, "logps/chosen": -98.32548522949219, "logps/rejected": -386.2835693359375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.3388260006904602, "rewards/margins": 6.7661027908325195, "rewards/rejected": -6.427276611328125, "step": 1827 }, { "epoch": 0.28, "learning_rate": 1.2806605114373152e-05, "logits/chosen": -2.881514549255371, "logits/rejected": -3.0280535221099854, "logps/chosen": -219.85366821289062, "logps/rejected": -210.71051025390625, "loss": 1.6661, "rewards/accuracies": 0.5, "rewards/chosen": -1.346367597579956, "rewards/margins": 1.2420090436935425, "rewards/rejected": -2.588376760482788, "step": 1828 }, { "epoch": 0.28, "learning_rate": 1.2805871673842004e-05, "logits/chosen": -2.4398903846740723, "logits/rejected": -2.821082830429077, "logps/chosen": -594.5092163085938, "logps/rejected": -720.7806396484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5839920043945312, "rewards/margins": 8.738990783691406, "rewards/rejected": -9.322982788085938, "step": 1829 }, { "epoch": 0.28, "learning_rate": 1.2805138233310856e-05, "logits/chosen": -2.616184711456299, "logits/rejected": -2.7518527507781982, "logps/chosen": -68.06177520751953, "logps/rejected": -133.85911560058594, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": -0.8904865384101868, "rewards/margins": 4.496642112731934, "rewards/rejected": -5.3871283531188965, "step": 1830 }, { "epoch": 0.28, "learning_rate": 1.2804404792779708e-05, "logits/chosen": -1.3983763456344604, "logits/rejected": -3.0871851444244385, "logps/chosen": -188.86822509765625, "logps/rejected": -678.5220336914062, "loss": 1.9593, "rewards/accuracies": 0.5, "rewards/chosen": -2.882830858230591, "rewards/margins": -0.4713340997695923, "rewards/rejected": -2.411496639251709, "step": 1831 }, { "epoch": 0.28, "learning_rate": 1.2803671352248561e-05, "logits/chosen": -1.4668699502944946, "logits/rejected": -2.899634599685669, "logps/chosen": -124.90711975097656, "logps/rejected": -306.8511962890625, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": 0.0003963559865951538, "rewards/margins": 4.143093585968018, "rewards/rejected": -4.142697334289551, "step": 1832 }, { "epoch": 0.29, "learning_rate": 1.2802937911717413e-05, "logits/chosen": -2.656733512878418, "logits/rejected": -2.529444456100464, "logps/chosen": -238.6673126220703, "logps/rejected": -285.59674072265625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.6644973754882812, "rewards/margins": 6.246148586273193, "rewards/rejected": -6.910645961761475, "step": 1833 }, { "epoch": 0.29, "learning_rate": 1.2802204471186265e-05, "logits/chosen": -3.069100856781006, "logits/rejected": -2.929924488067627, "logps/chosen": -737.1268310546875, "logps/rejected": -407.5406799316406, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": 0.4234481751918793, "rewards/margins": 4.0223259925842285, "rewards/rejected": -3.5988779067993164, "step": 1834 }, { "epoch": 0.29, "learning_rate": 1.2801471030655117e-05, "logits/chosen": -2.150895357131958, "logits/rejected": -2.9085161685943604, "logps/chosen": -399.7776794433594, "logps/rejected": -523.53662109375, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 0.1255752593278885, "rewards/margins": 5.574161052703857, "rewards/rejected": -5.448585510253906, "step": 1835 }, { "epoch": 0.29, "learning_rate": 1.2800737590123969e-05, "logits/chosen": -3.0293397903442383, "logits/rejected": -3.037522792816162, "logps/chosen": -61.548423767089844, "logps/rejected": -90.88203430175781, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": -0.0528353750705719, "rewards/margins": 2.9842820167541504, "rewards/rejected": -3.0371174812316895, "step": 1836 }, { "epoch": 0.29, "learning_rate": 1.280000414959282e-05, "logits/chosen": -3.0926434993743896, "logits/rejected": -2.720165491104126, "logps/chosen": -195.85787963867188, "logps/rejected": -114.65821838378906, "loss": 0.3597, "rewards/accuracies": 0.5, "rewards/chosen": -0.4176124632358551, "rewards/margins": 2.7552342414855957, "rewards/rejected": -3.172846555709839, "step": 1837 }, { "epoch": 0.29, "learning_rate": 1.2799270709061672e-05, "logits/chosen": -2.9138810634613037, "logits/rejected": -1.9968770742416382, "logps/chosen": -307.09710693359375, "logps/rejected": -156.8795928955078, "loss": 2.5217, "rewards/accuracies": 0.0, "rewards/chosen": -4.0575714111328125, "rewards/margins": -2.421112537384033, "rewards/rejected": -1.6364586353302002, "step": 1838 }, { "epoch": 0.29, "learning_rate": 1.2798537268530524e-05, "logits/chosen": -2.416499376296997, "logits/rejected": -2.7936580181121826, "logps/chosen": -196.2914581298828, "logps/rejected": -508.42132568359375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.7171478271484375, "rewards/margins": 5.774418830871582, "rewards/rejected": -6.4915666580200195, "step": 1839 }, { "epoch": 0.29, "learning_rate": 1.2797803827999378e-05, "logits/chosen": -2.5864601135253906, "logits/rejected": -3.0626065731048584, "logps/chosen": -37.195465087890625, "logps/rejected": -176.846923828125, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -0.3259486258029938, "rewards/margins": 4.3965044021606445, "rewards/rejected": -4.722452640533447, "step": 1840 }, { "epoch": 0.29, "learning_rate": 1.279707038746823e-05, "logits/chosen": -1.5955859422683716, "logits/rejected": -2.94240665435791, "logps/chosen": -70.2293930053711, "logps/rejected": -224.38372802734375, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -0.48712217807769775, "rewards/margins": 4.952155113220215, "rewards/rejected": -5.439276695251465, "step": 1841 }, { "epoch": 0.29, "learning_rate": 1.2796336946937082e-05, "logits/chosen": -1.7520978450775146, "logits/rejected": -2.854898691177368, "logps/chosen": -55.49814224243164, "logps/rejected": -218.277099609375, "loss": 0.2752, "rewards/accuracies": 1.0, "rewards/chosen": -0.5518719553947449, "rewards/margins": 2.0162782669067383, "rewards/rejected": -2.568150281906128, "step": 1842 }, { "epoch": 0.29, "learning_rate": 1.2795603506405934e-05, "logits/chosen": -2.7556886672973633, "logits/rejected": -1.86732017993927, "logps/chosen": -300.3213806152344, "logps/rejected": -254.27456665039062, "loss": 1.0493, "rewards/accuracies": 0.5, "rewards/chosen": -2.0738344192504883, "rewards/margins": -0.23119109869003296, "rewards/rejected": -1.8426433801651, "step": 1843 }, { "epoch": 0.29, "learning_rate": 1.2794870065874787e-05, "logits/chosen": -2.7562925815582275, "logits/rejected": -2.5928351879119873, "logps/chosen": -79.91668701171875, "logps/rejected": -257.2414855957031, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.07656598091125488, "rewards/margins": 4.130565643310547, "rewards/rejected": -4.207131385803223, "step": 1844 }, { "epoch": 0.29, "learning_rate": 1.2794136625343639e-05, "logits/chosen": -2.8358476161956787, "logits/rejected": -2.9694197177886963, "logps/chosen": -262.7568359375, "logps/rejected": -371.86260986328125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.5241370797157288, "rewards/margins": 6.196521759033203, "rewards/rejected": -6.720659255981445, "step": 1845 }, { "epoch": 0.29, "learning_rate": 1.279340318481249e-05, "logits/chosen": -2.9081573486328125, "logits/rejected": -2.939588785171509, "logps/chosen": -427.4540100097656, "logps/rejected": -419.6051940917969, "loss": 0.0701, "rewards/accuracies": 1.0, "rewards/chosen": -2.1973190307617188, "rewards/margins": 3.3402390480041504, "rewards/rejected": -5.537558078765869, "step": 1846 }, { "epoch": 0.29, "learning_rate": 1.2792669744281343e-05, "logits/chosen": -2.7009670734405518, "logits/rejected": -1.9494273662567139, "logps/chosen": -120.48832702636719, "logps/rejected": -127.58570861816406, "loss": 4.3545, "rewards/accuracies": 0.5, "rewards/chosen": -4.333062171936035, "rewards/margins": -0.36414337158203125, "rewards/rejected": -3.968919038772583, "step": 1847 }, { "epoch": 0.29, "learning_rate": 1.2791936303750195e-05, "logits/chosen": -2.222329616546631, "logits/rejected": -3.0194132328033447, "logps/chosen": -234.0774688720703, "logps/rejected": -436.0023193359375, "loss": 2.0976, "rewards/accuracies": 0.5, "rewards/chosen": -3.0333642959594727, "rewards/margins": 2.549333095550537, "rewards/rejected": -5.58269739151001, "step": 1848 }, { "epoch": 0.29, "learning_rate": 1.2791202863219048e-05, "logits/chosen": -1.5138853788375854, "logits/rejected": -2.836993455886841, "logps/chosen": -154.38125610351562, "logps/rejected": -287.62847900390625, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/chosen": -1.1148109436035156, "rewards/margins": 3.1854135990142822, "rewards/rejected": -4.300224781036377, "step": 1849 }, { "epoch": 0.29, "learning_rate": 1.27904694226879e-05, "logits/chosen": -1.2514989376068115, "logits/rejected": -2.7489380836486816, "logps/chosen": -98.48998260498047, "logps/rejected": -481.7452697753906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.6099841594696045, "rewards/margins": 8.01456069946289, "rewards/rejected": -8.624544143676758, "step": 1850 }, { "epoch": 0.29, "learning_rate": 1.2789735982156752e-05, "logits/chosen": -1.026090145111084, "logits/rejected": -2.8380656242370605, "logps/chosen": -63.8199462890625, "logps/rejected": -497.7055358886719, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.0981893539428711, "rewards/margins": 5.7456583976745605, "rewards/rejected": -5.843847751617432, "step": 1851 }, { "epoch": 0.29, "learning_rate": 1.2789002541625604e-05, "logits/chosen": -1.8407117128372192, "logits/rejected": -3.01499342918396, "logps/chosen": -16.204383850097656, "logps/rejected": -122.17489624023438, "loss": 0.3544, "rewards/accuracies": 1.0, "rewards/chosen": -0.2287585288286209, "rewards/margins": 1.5323835611343384, "rewards/rejected": -1.7611420154571533, "step": 1852 }, { "epoch": 0.29, "learning_rate": 1.2788269101094456e-05, "logits/chosen": -2.5144810676574707, "logits/rejected": -2.9838168621063232, "logps/chosen": -79.40115356445312, "logps/rejected": -261.931640625, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": -0.04215306043624878, "rewards/margins": 6.556026458740234, "rewards/rejected": -6.598179817199707, "step": 1853 }, { "epoch": 0.29, "learning_rate": 1.2787535660563308e-05, "logits/chosen": -0.9982916712760925, "logits/rejected": -2.722895860671997, "logps/chosen": -60.08580780029297, "logps/rejected": -407.0062255859375, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.0031574219465255737, "rewards/margins": 6.612506866455078, "rewards/rejected": -6.615664482116699, "step": 1854 }, { "epoch": 0.29, "learning_rate": 1.278680222003216e-05, "logits/chosen": -1.9486749172210693, "logits/rejected": -2.8022704124450684, "logps/chosen": -199.75155639648438, "logps/rejected": -387.9244384765625, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -1.3966575860977173, "rewards/margins": 5.773622989654541, "rewards/rejected": -7.170280456542969, "step": 1855 }, { "epoch": 0.29, "learning_rate": 1.2786068779501011e-05, "logits/chosen": -3.0441324710845947, "logits/rejected": -2.0461323261260986, "logps/chosen": -179.8719482421875, "logps/rejected": -108.66343688964844, "loss": 2.5599, "rewards/accuracies": 0.5, "rewards/chosen": -1.8434597253799438, "rewards/margins": -0.40378546714782715, "rewards/rejected": -1.4396741390228271, "step": 1856 }, { "epoch": 0.29, "learning_rate": 1.2785335338969863e-05, "logits/chosen": -2.7998390197753906, "logits/rejected": -3.2708373069763184, "logps/chosen": -209.20384216308594, "logps/rejected": -399.6222229003906, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": -0.8010124564170837, "rewards/margins": 3.915787696838379, "rewards/rejected": -4.716800212860107, "step": 1857 }, { "epoch": 0.29, "learning_rate": 1.2784601898438717e-05, "logits/chosen": -3.113600492477417, "logits/rejected": -3.050014019012451, "logps/chosen": -118.31893920898438, "logps/rejected": -148.17076110839844, "loss": 2.9246, "rewards/accuracies": 0.5, "rewards/chosen": -3.058164596557617, "rewards/margins": -0.710867166519165, "rewards/rejected": -2.347297430038452, "step": 1858 }, { "epoch": 0.29, "learning_rate": 1.2783868457907569e-05, "logits/chosen": -2.3560125827789307, "logits/rejected": -3.0145950317382812, "logps/chosen": -111.75740814208984, "logps/rejected": -270.81304931640625, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 0.06462249904870987, "rewards/margins": 5.115098476409912, "rewards/rejected": -5.05047607421875, "step": 1859 }, { "epoch": 0.29, "learning_rate": 1.278313501737642e-05, "logits/chosen": -2.923151731491089, "logits/rejected": -2.1727051734924316, "logps/chosen": -207.44654846191406, "logps/rejected": -43.661373138427734, "loss": 2.3532, "rewards/accuracies": 0.5, "rewards/chosen": -1.3175065517425537, "rewards/margins": -1.6516913175582886, "rewards/rejected": 0.33418476581573486, "step": 1860 }, { "epoch": 0.29, "learning_rate": 1.2782401576845272e-05, "logits/chosen": -3.017425298690796, "logits/rejected": -2.9612011909484863, "logps/chosen": -371.74700927734375, "logps/rejected": -355.30450439453125, "loss": 0.1308, "rewards/accuracies": 1.0, "rewards/chosen": -1.2991180419921875, "rewards/margins": 4.525792121887207, "rewards/rejected": -5.8249101638793945, "step": 1861 }, { "epoch": 0.29, "learning_rate": 1.2781668136314124e-05, "logits/chosen": -3.395024061203003, "logits/rejected": -3.4183309078216553, "logps/chosen": -71.54068756103516, "logps/rejected": -135.03057861328125, "loss": 1.8435, "rewards/accuracies": 0.5, "rewards/chosen": -1.636564016342163, "rewards/margins": 0.61583411693573, "rewards/rejected": -2.2523980140686035, "step": 1862 }, { "epoch": 0.29, "learning_rate": 1.2780934695782976e-05, "logits/chosen": -2.540099620819092, "logits/rejected": -3.089871644973755, "logps/chosen": -85.0448989868164, "logps/rejected": -247.43955993652344, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -1.0124115943908691, "rewards/margins": 3.3074378967285156, "rewards/rejected": -4.319849491119385, "step": 1863 }, { "epoch": 0.29, "learning_rate": 1.2780201255251828e-05, "logits/chosen": -1.9195555448532104, "logits/rejected": -2.0785865783691406, "logps/chosen": -193.47450256347656, "logps/rejected": -202.72451782226562, "loss": 1.2908, "rewards/accuracies": 0.5, "rewards/chosen": -1.2990970611572266, "rewards/margins": 2.279369592666626, "rewards/rejected": -3.5784666538238525, "step": 1864 }, { "epoch": 0.29, "learning_rate": 1.277946781472068e-05, "logits/chosen": -2.9243369102478027, "logits/rejected": -2.9941012859344482, "logps/chosen": -72.65755462646484, "logps/rejected": -77.74300384521484, "loss": 1.987, "rewards/accuracies": 0.5, "rewards/chosen": -2.6593692302703857, "rewards/margins": -0.4972977638244629, "rewards/rejected": -2.1620712280273438, "step": 1865 }, { "epoch": 0.29, "learning_rate": 1.2778734374189532e-05, "logits/chosen": -2.1511802673339844, "logits/rejected": -2.790371894836426, "logps/chosen": -245.70709228515625, "logps/rejected": -449.66351318359375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.20001259446144104, "rewards/margins": 7.655296325683594, "rewards/rejected": -7.455284118652344, "step": 1866 }, { "epoch": 0.29, "learning_rate": 1.2778000933658385e-05, "logits/chosen": -2.8396265506744385, "logits/rejected": -2.5105581283569336, "logps/chosen": -248.58534240722656, "logps/rejected": -386.5517272949219, "loss": 3.023, "rewards/accuracies": 0.5, "rewards/chosen": -3.897505283355713, "rewards/margins": 2.0494258403778076, "rewards/rejected": -5.946930885314941, "step": 1867 }, { "epoch": 0.29, "learning_rate": 1.2777267493127237e-05, "logits/chosen": -2.0665886402130127, "logits/rejected": -2.8950719833374023, "logps/chosen": -26.28401756286621, "logps/rejected": -333.1195983886719, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": 0.1955426186323166, "rewards/margins": 6.959473609924316, "rewards/rejected": -6.7639312744140625, "step": 1868 }, { "epoch": 0.29, "learning_rate": 1.2776534052596089e-05, "logits/chosen": -2.488765001296997, "logits/rejected": -2.93601393699646, "logps/chosen": -421.9241943359375, "logps/rejected": -460.32977294921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.08268585801124573, "rewards/margins": 7.686166286468506, "rewards/rejected": -7.768852233886719, "step": 1869 }, { "epoch": 0.29, "learning_rate": 1.2775800612064941e-05, "logits/chosen": -2.88283634185791, "logits/rejected": -2.8146440982818604, "logps/chosen": -151.44085693359375, "logps/rejected": -340.9581298828125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5875098705291748, "rewards/margins": 6.746706962585449, "rewards/rejected": -7.334216594696045, "step": 1870 }, { "epoch": 0.29, "learning_rate": 1.2775067171533793e-05, "logits/chosen": -3.1052024364471436, "logits/rejected": -3.1418213844299316, "logps/chosen": -325.3897399902344, "logps/rejected": -464.75482177734375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.1106669902801514, "rewards/margins": 7.0687360763549805, "rewards/rejected": -8.179403305053711, "step": 1871 }, { "epoch": 0.29, "learning_rate": 1.2774333731002645e-05, "logits/chosen": -2.4583945274353027, "logits/rejected": -2.9020867347717285, "logps/chosen": -117.56155395507812, "logps/rejected": -239.95262145996094, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": 0.24645614624023438, "rewards/margins": 4.409472942352295, "rewards/rejected": -4.1630167961120605, "step": 1872 }, { "epoch": 0.29, "learning_rate": 1.2773600290471497e-05, "logits/chosen": -3.135406255722046, "logits/rejected": -3.1364543437957764, "logps/chosen": -380.9083251953125, "logps/rejected": -332.1920471191406, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.5224627256393433, "rewards/margins": 4.8131184577941895, "rewards/rejected": -6.335581302642822, "step": 1873 }, { "epoch": 0.29, "learning_rate": 1.2772866849940349e-05, "logits/chosen": -3.1905648708343506, "logits/rejected": -3.452863931655884, "logps/chosen": -22.612037658691406, "logps/rejected": -173.69627380371094, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.05011829733848572, "rewards/margins": 4.74540901184082, "rewards/rejected": -4.79552698135376, "step": 1874 }, { "epoch": 0.29, "learning_rate": 1.27721334094092e-05, "logits/chosen": -2.491344928741455, "logits/rejected": -3.1793808937072754, "logps/chosen": -95.77872467041016, "logps/rejected": -236.00108337402344, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.9568912982940674, "rewards/margins": 7.333275318145752, "rewards/rejected": -6.3763837814331055, "step": 1875 }, { "epoch": 0.29, "learning_rate": 1.2771399968878054e-05, "logits/chosen": -2.8793742656707764, "logits/rejected": -2.9004836082458496, "logps/chosen": -745.8392333984375, "logps/rejected": -636.787353515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5131927728652954, "rewards/margins": 7.669041633605957, "rewards/rejected": -8.182233810424805, "step": 1876 }, { "epoch": 0.29, "learning_rate": 1.2770666528346906e-05, "logits/chosen": -2.0299129486083984, "logits/rejected": -2.8991639614105225, "logps/chosen": -214.82037353515625, "logps/rejected": -460.78912353515625, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 0.23492126166820526, "rewards/margins": 5.354034423828125, "rewards/rejected": -5.119112968444824, "step": 1877 }, { "epoch": 0.29, "learning_rate": 1.276993308781576e-05, "logits/chosen": -2.058882474899292, "logits/rejected": -2.254373550415039, "logps/chosen": -283.80517578125, "logps/rejected": -327.5038757324219, "loss": 2.5642, "rewards/accuracies": 0.5, "rewards/chosen": -3.318485975265503, "rewards/margins": 3.21942138671875, "rewards/rejected": -6.537907123565674, "step": 1878 }, { "epoch": 0.29, "learning_rate": 1.2769199647284611e-05, "logits/chosen": -2.879474401473999, "logits/rejected": -2.9100594520568848, "logps/chosen": -62.431053161621094, "logps/rejected": -172.74517822265625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.8079889416694641, "rewards/margins": 5.190751552581787, "rewards/rejected": -5.9987406730651855, "step": 1879 }, { "epoch": 0.29, "learning_rate": 1.2768466206753463e-05, "logits/chosen": -1.9978653192520142, "logits/rejected": -3.0505709648132324, "logps/chosen": -233.74478149414062, "logps/rejected": -374.9508056640625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 0.34536057710647583, "rewards/margins": 5.751229286193848, "rewards/rejected": -5.4058685302734375, "step": 1880 }, { "epoch": 0.29, "learning_rate": 1.2767732766222315e-05, "logits/chosen": -2.8799197673797607, "logits/rejected": -2.323119640350342, "logps/chosen": -269.3410339355469, "logps/rejected": -242.35299682617188, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -1.4772933721542358, "rewards/margins": 4.471017837524414, "rewards/rejected": -5.948310852050781, "step": 1881 }, { "epoch": 0.29, "learning_rate": 1.2766999325691167e-05, "logits/chosen": -2.8607804775238037, "logits/rejected": -2.6323137283325195, "logps/chosen": -173.20008850097656, "logps/rejected": -151.1327362060547, "loss": 3.0968, "rewards/accuracies": 0.0, "rewards/chosen": -2.8857262134552, "rewards/margins": -3.0445566177368164, "rewards/rejected": 0.15883034467697144, "step": 1882 }, { "epoch": 0.29, "learning_rate": 1.2766265885160019e-05, "logits/chosen": -2.7201366424560547, "logits/rejected": -2.4660677909851074, "logps/chosen": -159.14161682128906, "logps/rejected": -219.98974609375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.9159904718399048, "rewards/margins": 5.3805036544799805, "rewards/rejected": -6.296494483947754, "step": 1883 }, { "epoch": 0.29, "learning_rate": 1.276553244462887e-05, "logits/chosen": -1.651121973991394, "logits/rejected": -2.8827056884765625, "logps/chosen": -31.692317962646484, "logps/rejected": -299.97906494140625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 0.2989133894443512, "rewards/margins": 5.016778469085693, "rewards/rejected": -4.717864990234375, "step": 1884 }, { "epoch": 0.29, "learning_rate": 1.2764799004097724e-05, "logits/chosen": -1.715606927871704, "logits/rejected": -2.7789435386657715, "logps/chosen": -58.618770599365234, "logps/rejected": -227.04852294921875, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": 0.5091149806976318, "rewards/margins": 5.176326274871826, "rewards/rejected": -4.667211532592773, "step": 1885 }, { "epoch": 0.29, "learning_rate": 1.2764065563566576e-05, "logits/chosen": -2.620676040649414, "logits/rejected": -3.189980983734131, "logps/chosen": -231.23187255859375, "logps/rejected": -378.1664733886719, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.1522839516401291, "rewards/margins": 6.256903648376465, "rewards/rejected": -6.409187316894531, "step": 1886 }, { "epoch": 0.29, "learning_rate": 1.2763332123035428e-05, "logits/chosen": -2.017014980316162, "logits/rejected": -2.903494358062744, "logps/chosen": -114.07220458984375, "logps/rejected": -275.51654052734375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.3216209411621094, "rewards/margins": 5.5008745193481445, "rewards/rejected": -5.822495460510254, "step": 1887 }, { "epoch": 0.29, "learning_rate": 1.276259868250428e-05, "logits/chosen": -2.5956549644470215, "logits/rejected": -3.156723737716675, "logps/chosen": -150.126953125, "logps/rejected": -428.3329162597656, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.2178516387939453, "rewards/margins": 8.214802742004395, "rewards/rejected": -8.432653427124023, "step": 1888 }, { "epoch": 0.29, "learning_rate": 1.2761865241973132e-05, "logits/chosen": -1.948124647140503, "logits/rejected": -2.538372039794922, "logps/chosen": -179.33746337890625, "logps/rejected": -233.8578643798828, "loss": 1.1024, "rewards/accuracies": 0.5, "rewards/chosen": -1.5359299182891846, "rewards/margins": 3.3801000118255615, "rewards/rejected": -4.916029930114746, "step": 1889 }, { "epoch": 0.29, "learning_rate": 1.2761131801441984e-05, "logits/chosen": -2.494584798812866, "logits/rejected": -3.025571346282959, "logps/chosen": -166.18756103515625, "logps/rejected": -193.98587036132812, "loss": 2.3119, "rewards/accuracies": 0.5, "rewards/chosen": -2.192262887954712, "rewards/margins": 0.5838730335235596, "rewards/rejected": -2.7761359214782715, "step": 1890 }, { "epoch": 0.29, "learning_rate": 1.2760398360910836e-05, "logits/chosen": -1.1314592361450195, "logits/rejected": -2.6189088821411133, "logps/chosen": -14.412240028381348, "logps/rejected": -309.7805480957031, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.28163471817970276, "rewards/margins": 6.7727460861206055, "rewards/rejected": -6.491110801696777, "step": 1891 }, { "epoch": 0.29, "learning_rate": 1.2759664920379687e-05, "logits/chosen": -1.8776614665985107, "logits/rejected": -2.459442615509033, "logps/chosen": -115.79043579101562, "logps/rejected": -197.2719268798828, "loss": 2.4642, "rewards/accuracies": 0.5, "rewards/chosen": -2.282832384109497, "rewards/margins": 2.069127082824707, "rewards/rejected": -4.351959228515625, "step": 1892 }, { "epoch": 0.29, "learning_rate": 1.275893147984854e-05, "logits/chosen": -2.06907320022583, "logits/rejected": -2.360889434814453, "logps/chosen": -186.5653533935547, "logps/rejected": -232.47555541992188, "loss": 3.0583, "rewards/accuracies": 0.5, "rewards/chosen": -3.1270809173583984, "rewards/margins": -1.8727326393127441, "rewards/rejected": -1.2543483972549438, "step": 1893 }, { "epoch": 0.29, "learning_rate": 1.2758198039317393e-05, "logits/chosen": -2.40143084526062, "logits/rejected": -2.9383933544158936, "logps/chosen": -316.4156494140625, "logps/rejected": -336.6020812988281, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 0.2867431640625, "rewards/margins": 4.995883941650391, "rewards/rejected": -4.709140777587891, "step": 1894 }, { "epoch": 0.29, "learning_rate": 1.2757464598786245e-05, "logits/chosen": -2.103170394897461, "logits/rejected": -3.177924156188965, "logps/chosen": -138.35678100585938, "logps/rejected": -361.32342529296875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.09092263877391815, "rewards/margins": 6.358628273010254, "rewards/rejected": -6.267705917358398, "step": 1895 }, { "epoch": 0.29, "learning_rate": 1.2756731158255097e-05, "logits/chosen": -1.661880373954773, "logits/rejected": -2.8498423099517822, "logps/chosen": -126.17753601074219, "logps/rejected": -473.51171875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 0.09471359848976135, "rewards/margins": 8.070565223693848, "rewards/rejected": -7.975852012634277, "step": 1896 }, { "epoch": 0.3, "learning_rate": 1.2755997717723948e-05, "logits/chosen": -2.4105072021484375, "logits/rejected": -2.942831039428711, "logps/chosen": -104.39479064941406, "logps/rejected": -312.7503967285156, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.18503743410110474, "rewards/margins": 5.454327583312988, "rewards/rejected": -5.639364719390869, "step": 1897 }, { "epoch": 0.3, "learning_rate": 1.27552642771928e-05, "logits/chosen": -1.8652536869049072, "logits/rejected": -2.7305099964141846, "logps/chosen": -160.631591796875, "logps/rejected": -484.6360778808594, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.35106581449508667, "rewards/margins": 8.643889427185059, "rewards/rejected": -8.292823791503906, "step": 1898 }, { "epoch": 0.3, "learning_rate": 1.2754530836661652e-05, "logits/chosen": -2.7440404891967773, "logits/rejected": -3.0324411392211914, "logps/chosen": -146.4879150390625, "logps/rejected": -303.4329833984375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 0.31987324357032776, "rewards/margins": 7.024311065673828, "rewards/rejected": -6.704437255859375, "step": 1899 }, { "epoch": 0.3, "learning_rate": 1.2753797396130504e-05, "logits/chosen": -2.816545248031616, "logits/rejected": -1.5598560571670532, "logps/chosen": -292.31341552734375, "logps/rejected": -211.8000946044922, "loss": 2.4312, "rewards/accuracies": 0.5, "rewards/chosen": -3.8623194694519043, "rewards/margins": 1.3147664070129395, "rewards/rejected": -5.177085876464844, "step": 1900 }, { "epoch": 0.3, "learning_rate": 1.2753063955599356e-05, "logits/chosen": -1.5873216390609741, "logits/rejected": -2.694094181060791, "logps/chosen": -77.79885864257812, "logps/rejected": -232.7779541015625, "loss": 0.0944, "rewards/accuracies": 1.0, "rewards/chosen": -1.2930784225463867, "rewards/margins": 2.3159775733947754, "rewards/rejected": -3.609055995941162, "step": 1901 }, { "epoch": 0.3, "learning_rate": 1.2752330515068208e-05, "logits/chosen": -1.6206547021865845, "logits/rejected": -3.0326626300811768, "logps/chosen": -107.52508544921875, "logps/rejected": -487.2862854003906, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4085945188999176, "rewards/margins": 6.575988292694092, "rewards/rejected": -6.984582901000977, "step": 1902 }, { "epoch": 0.3, "learning_rate": 1.2751597074537061e-05, "logits/chosen": -2.222010850906372, "logits/rejected": -2.9115076065063477, "logps/chosen": -92.01699829101562, "logps/rejected": -330.79754638671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2520294189453125, "rewards/margins": 9.689654350280762, "rewards/rejected": -9.43762493133545, "step": 1903 }, { "epoch": 0.3, "learning_rate": 1.2750863634005913e-05, "logits/chosen": -2.7354509830474854, "logits/rejected": -2.6038811206817627, "logps/chosen": -119.65653991699219, "logps/rejected": -288.08514404296875, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.27092018723487854, "rewards/margins": 4.930023670196533, "rewards/rejected": -5.200943946838379, "step": 1904 }, { "epoch": 0.3, "learning_rate": 1.2750130193474765e-05, "logits/chosen": -3.1260313987731934, "logits/rejected": -2.2131145000457764, "logps/chosen": -344.593994140625, "logps/rejected": -252.8826446533203, "loss": 5.3991, "rewards/accuracies": 0.0, "rewards/chosen": -4.912209510803223, "rewards/margins": -5.393162727355957, "rewards/rejected": 0.4809532165527344, "step": 1905 }, { "epoch": 0.3, "learning_rate": 1.2749396752943617e-05, "logits/chosen": -2.3482277393341064, "logits/rejected": -2.6965324878692627, "logps/chosen": -195.74191284179688, "logps/rejected": -189.7835235595703, "loss": 2.0829, "rewards/accuracies": 0.5, "rewards/chosen": -2.1608517169952393, "rewards/margins": 1.0572340488433838, "rewards/rejected": -3.218085765838623, "step": 1906 }, { "epoch": 0.3, "learning_rate": 1.2748663312412469e-05, "logits/chosen": -2.8868117332458496, "logits/rejected": -1.9678858518600464, "logps/chosen": -220.92835998535156, "logps/rejected": -114.78962707519531, "loss": 2.3842, "rewards/accuracies": 0.5, "rewards/chosen": -2.635448455810547, "rewards/margins": 0.6387135982513428, "rewards/rejected": -3.2741620540618896, "step": 1907 }, { "epoch": 0.3, "learning_rate": 1.274792987188132e-05, "logits/chosen": -2.7868762016296387, "logits/rejected": -2.3054287433624268, "logps/chosen": -97.71219635009766, "logps/rejected": -133.67367553710938, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": 1.0821515321731567, "rewards/margins": 5.605529308319092, "rewards/rejected": -4.523377895355225, "step": 1908 }, { "epoch": 0.3, "learning_rate": 1.2747196431350173e-05, "logits/chosen": -2.7325403690338135, "logits/rejected": -2.6243600845336914, "logps/chosen": -316.50421142578125, "logps/rejected": -340.0467834472656, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -1.0473315715789795, "rewards/margins": 4.27271842956543, "rewards/rejected": -5.320050239562988, "step": 1909 }, { "epoch": 0.3, "learning_rate": 1.2746462990819025e-05, "logits/chosen": -2.9957571029663086, "logits/rejected": -2.2346465587615967, "logps/chosen": -235.146240234375, "logps/rejected": -181.3378448486328, "loss": 1.891, "rewards/accuracies": 0.5, "rewards/chosen": -1.99664306640625, "rewards/margins": 1.2646713256835938, "rewards/rejected": -3.261314630508423, "step": 1910 }, { "epoch": 0.3, "learning_rate": 1.2745729550287878e-05, "logits/chosen": -3.0621278285980225, "logits/rejected": -3.124675989151001, "logps/chosen": -69.60655975341797, "logps/rejected": -146.4108123779297, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": 0.665675163269043, "rewards/margins": 3.8246002197265625, "rewards/rejected": -3.1589252948760986, "step": 1911 }, { "epoch": 0.3, "learning_rate": 1.2744996109756732e-05, "logits/chosen": -2.4256832599639893, "logits/rejected": -3.28851318359375, "logps/chosen": -491.9227600097656, "logps/rejected": -1196.497314453125, "loss": 2.0292, "rewards/accuracies": 0.5, "rewards/chosen": -2.1599855422973633, "rewards/margins": 1.6165342330932617, "rewards/rejected": -3.776519775390625, "step": 1912 }, { "epoch": 0.3, "learning_rate": 1.2744262669225584e-05, "logits/chosen": -1.9129897356033325, "logits/rejected": -2.854964017868042, "logps/chosen": -54.49932098388672, "logps/rejected": -332.4307861328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.36661872267723083, "rewards/margins": 7.349022388458252, "rewards/rejected": -6.982403755187988, "step": 1913 }, { "epoch": 0.3, "learning_rate": 1.2743529228694435e-05, "logits/chosen": -2.3749308586120605, "logits/rejected": -2.801767587661743, "logps/chosen": -290.2384033203125, "logps/rejected": -398.18011474609375, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": 0.3762746751308441, "rewards/margins": 3.7615575790405273, "rewards/rejected": -3.3852829933166504, "step": 1914 }, { "epoch": 0.3, "learning_rate": 1.2742795788163287e-05, "logits/chosen": -2.724708318710327, "logits/rejected": -2.8227756023406982, "logps/chosen": -290.70452880859375, "logps/rejected": -459.4869689941406, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.9142343997955322, "rewards/margins": 4.880702972412109, "rewards/rejected": -5.7949371337890625, "step": 1915 }, { "epoch": 0.3, "learning_rate": 1.274206234763214e-05, "logits/chosen": -2.9839484691619873, "logits/rejected": -2.755091905593872, "logps/chosen": -167.32052612304688, "logps/rejected": -119.0466079711914, "loss": 1.3274, "rewards/accuracies": 0.5, "rewards/chosen": -1.463693380355835, "rewards/margins": 0.5018385648727417, "rewards/rejected": -1.965531826019287, "step": 1916 }, { "epoch": 0.3, "learning_rate": 1.2741328907100991e-05, "logits/chosen": -3.1534786224365234, "logits/rejected": -2.815091848373413, "logps/chosen": -657.3760986328125, "logps/rejected": -643.5636596679688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.2617859840393066, "rewards/margins": 8.06341552734375, "rewards/rejected": -9.325201034545898, "step": 1917 }, { "epoch": 0.3, "learning_rate": 1.2740595466569843e-05, "logits/chosen": -2.5040676593780518, "logits/rejected": -3.039217948913574, "logps/chosen": -347.82562255859375, "logps/rejected": -502.36663818359375, "loss": 0.0967, "rewards/accuracies": 1.0, "rewards/chosen": 0.265982061624527, "rewards/margins": 3.374620199203491, "rewards/rejected": -3.108638048171997, "step": 1918 }, { "epoch": 0.3, "learning_rate": 1.2739862026038695e-05, "logits/chosen": -1.7185561656951904, "logits/rejected": -3.09834623336792, "logps/chosen": -141.076171875, "logps/rejected": -568.895263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.03504753112792969, "rewards/margins": 13.076122283935547, "rewards/rejected": -13.111169815063477, "step": 1919 }, { "epoch": 0.3, "learning_rate": 1.2739128585507547e-05, "logits/chosen": -2.130974292755127, "logits/rejected": -2.6811985969543457, "logps/chosen": -337.34466552734375, "logps/rejected": -269.91485595703125, "loss": 1.4396, "rewards/accuracies": 0.5, "rewards/chosen": -1.709578037261963, "rewards/margins": 1.8963489532470703, "rewards/rejected": -3.605926752090454, "step": 1920 }, { "epoch": 0.3, "learning_rate": 1.27383951449764e-05, "logits/chosen": -2.8488991260528564, "logits/rejected": -3.0967824459075928, "logps/chosen": -558.4912719726562, "logps/rejected": -458.8508605957031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.3857051730155945, "rewards/margins": 7.558616638183594, "rewards/rejected": -7.172911167144775, "step": 1921 }, { "epoch": 0.3, "learning_rate": 1.2737661704445252e-05, "logits/chosen": -2.0720083713531494, "logits/rejected": -3.2527430057525635, "logps/chosen": -159.92926025390625, "logps/rejected": -397.135986328125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.4777275323867798, "rewards/margins": 5.769399642944336, "rewards/rejected": -7.247126579284668, "step": 1922 }, { "epoch": 0.3, "learning_rate": 1.2736928263914104e-05, "logits/chosen": -2.5134711265563965, "logits/rejected": -2.982581615447998, "logps/chosen": -163.021240234375, "logps/rejected": -284.18017578125, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -0.5266605615615845, "rewards/margins": 3.9818434715270996, "rewards/rejected": -4.5085039138793945, "step": 1923 }, { "epoch": 0.3, "learning_rate": 1.2736194823382956e-05, "logits/chosen": -2.916259765625, "logits/rejected": -2.904776096343994, "logps/chosen": -81.37736511230469, "logps/rejected": -155.90396118164062, "loss": 1.5021, "rewards/accuracies": 0.5, "rewards/chosen": -1.7016037702560425, "rewards/margins": 0.01449120044708252, "rewards/rejected": -1.716094970703125, "step": 1924 }, { "epoch": 0.3, "learning_rate": 1.2735461382851808e-05, "logits/chosen": -2.3993570804595947, "logits/rejected": -2.620171070098877, "logps/chosen": -237.9419403076172, "logps/rejected": -301.81427001953125, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -0.3558868169784546, "rewards/margins": 4.219845771789551, "rewards/rejected": -4.575732231140137, "step": 1925 }, { "epoch": 0.3, "learning_rate": 1.273472794232066e-05, "logits/chosen": -2.7355663776397705, "logits/rejected": -1.2767558097839355, "logps/chosen": -255.46580505371094, "logps/rejected": -135.11949157714844, "loss": 3.1522, "rewards/accuracies": 0.5, "rewards/chosen": -2.599186897277832, "rewards/margins": -1.716166615486145, "rewards/rejected": -0.8830203413963318, "step": 1926 }, { "epoch": 0.3, "learning_rate": 1.2733994501789512e-05, "logits/chosen": -2.973200798034668, "logits/rejected": -3.029855489730835, "logps/chosen": -97.30443572998047, "logps/rejected": -160.35549926757812, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.3695562481880188, "rewards/margins": 4.896936416625977, "rewards/rejected": -5.26649284362793, "step": 1927 }, { "epoch": 0.3, "learning_rate": 1.2733261061258363e-05, "logits/chosen": -2.6470468044281006, "logits/rejected": -3.0120081901550293, "logps/chosen": -134.9124755859375, "logps/rejected": -322.5745544433594, "loss": 0.1829, "rewards/accuracies": 1.0, "rewards/chosen": 0.34066545963287354, "rewards/margins": 2.719081163406372, "rewards/rejected": -2.378415584564209, "step": 1928 }, { "epoch": 0.3, "learning_rate": 1.2732527620727217e-05, "logits/chosen": -1.018115520477295, "logits/rejected": -2.612316131591797, "logps/chosen": -48.993385314941406, "logps/rejected": -135.90069580078125, "loss": 1.1029, "rewards/accuracies": 0.5, "rewards/chosen": -1.5061030387878418, "rewards/margins": 0.15062451362609863, "rewards/rejected": -1.6567274332046509, "step": 1929 }, { "epoch": 0.3, "learning_rate": 1.2731794180196069e-05, "logits/chosen": -1.0343374013900757, "logits/rejected": -2.8013103008270264, "logps/chosen": -55.554222106933594, "logps/rejected": -366.5150146484375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.6507061123847961, "rewards/margins": 6.101057052612305, "rewards/rejected": -5.450351238250732, "step": 1930 }, { "epoch": 0.3, "learning_rate": 1.273106073966492e-05, "logits/chosen": -3.0423524379730225, "logits/rejected": -2.5685460567474365, "logps/chosen": -179.93588256835938, "logps/rejected": -169.34344482421875, "loss": 2.8575, "rewards/accuracies": 0.5, "rewards/chosen": -3.4338788986206055, "rewards/margins": -0.20334792137145996, "rewards/rejected": -3.2305309772491455, "step": 1931 }, { "epoch": 0.3, "learning_rate": 1.2730327299133773e-05, "logits/chosen": -2.2019741535186768, "logits/rejected": -3.0008506774902344, "logps/chosen": -469.2987976074219, "logps/rejected": -717.7644653320312, "loss": 0.1537, "rewards/accuracies": 1.0, "rewards/chosen": 1.0540268421173096, "rewards/margins": 2.798753261566162, "rewards/rejected": -1.7447266578674316, "step": 1932 }, { "epoch": 0.3, "learning_rate": 1.2729593858602625e-05, "logits/chosen": -1.6114146709442139, "logits/rejected": -2.776962995529175, "logps/chosen": -165.81463623046875, "logps/rejected": -388.5323181152344, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 0.8015915155410767, "rewards/margins": 4.732316017150879, "rewards/rejected": -3.9307243824005127, "step": 1933 }, { "epoch": 0.3, "learning_rate": 1.2728860418071476e-05, "logits/chosen": -1.6510741710662842, "logits/rejected": -2.810609817504883, "logps/chosen": -85.28488159179688, "logps/rejected": -477.88494873046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.5023509860038757, "rewards/margins": 9.070442199707031, "rewards/rejected": -8.56809139251709, "step": 1934 }, { "epoch": 0.3, "learning_rate": 1.2728126977540328e-05, "logits/chosen": -1.5598747730255127, "logits/rejected": -2.984995126724243, "logps/chosen": -45.39435577392578, "logps/rejected": -235.45396423339844, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -0.29190540313720703, "rewards/margins": 4.654086112976074, "rewards/rejected": -4.945991516113281, "step": 1935 }, { "epoch": 0.3, "learning_rate": 1.272739353700918e-05, "logits/chosen": -2.8446271419525146, "logits/rejected": -1.9014531373977661, "logps/chosen": -249.3275146484375, "logps/rejected": -126.53578186035156, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": 1.1243312358856201, "rewards/margins": 3.6354281902313232, "rewards/rejected": -2.511096954345703, "step": 1936 }, { "epoch": 0.3, "learning_rate": 1.2726660096478032e-05, "logits/chosen": -2.5316405296325684, "logits/rejected": -2.9360427856445312, "logps/chosen": -31.815019607543945, "logps/rejected": -89.58735656738281, "loss": 0.0782, "rewards/accuracies": 1.0, "rewards/chosen": 0.11190300434827805, "rewards/margins": 2.9537036418914795, "rewards/rejected": -2.8418006896972656, "step": 1937 }, { "epoch": 0.3, "learning_rate": 1.2725926655946886e-05, "logits/chosen": -2.1847729682922363, "logits/rejected": -2.90138578414917, "logps/chosen": -367.6282043457031, "logps/rejected": -596.480712890625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 0.07513731718063354, "rewards/margins": 7.6896562576293945, "rewards/rejected": -7.614519119262695, "step": 1938 }, { "epoch": 0.3, "learning_rate": 1.2725193215415738e-05, "logits/chosen": -3.1807448863983154, "logits/rejected": -2.324467420578003, "logps/chosen": -268.33642578125, "logps/rejected": -69.5434341430664, "loss": 3.0782, "rewards/accuracies": 0.0, "rewards/chosen": -2.4805538654327393, "rewards/margins": -3.0259451866149902, "rewards/rejected": 0.5453913807868958, "step": 1939 }, { "epoch": 0.3, "learning_rate": 1.272445977488459e-05, "logits/chosen": -1.7187827825546265, "logits/rejected": -2.5833699703216553, "logps/chosen": -154.33094787597656, "logps/rejected": -234.18833923339844, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 0.9335640072822571, "rewards/margins": 6.177877426147461, "rewards/rejected": -5.2443132400512695, "step": 1940 }, { "epoch": 0.3, "learning_rate": 1.2723726334353441e-05, "logits/chosen": -1.7851346731185913, "logits/rejected": -3.094193458557129, "logps/chosen": -53.280616760253906, "logps/rejected": -353.6250915527344, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": 0.3668385148048401, "rewards/margins": 5.136130332946777, "rewards/rejected": -4.769291877746582, "step": 1941 }, { "epoch": 0.3, "learning_rate": 1.2722992893822293e-05, "logits/chosen": -1.4092615842819214, "logits/rejected": -2.691169261932373, "logps/chosen": -172.64268493652344, "logps/rejected": -485.509033203125, "loss": 2.3062, "rewards/accuracies": 0.5, "rewards/chosen": -1.8924201726913452, "rewards/margins": 1.794968605041504, "rewards/rejected": -3.6873886585235596, "step": 1942 }, { "epoch": 0.3, "learning_rate": 1.2722259453291145e-05, "logits/chosen": -2.5657312870025635, "logits/rejected": -3.1414711475372314, "logps/chosen": -46.66924285888672, "logps/rejected": -181.1082763671875, "loss": 0.1509, "rewards/accuracies": 1.0, "rewards/chosen": -0.05029268562793732, "rewards/margins": 2.4424633979797363, "rewards/rejected": -2.492755889892578, "step": 1943 }, { "epoch": 0.3, "learning_rate": 1.2721526012759997e-05, "logits/chosen": -2.944288492202759, "logits/rejected": -0.9299918413162231, "logps/chosen": -552.3153076171875, "logps/rejected": -151.82528686523438, "loss": 2.221, "rewards/accuracies": 0.5, "rewards/chosen": -2.3341643810272217, "rewards/margins": 0.6462068557739258, "rewards/rejected": -2.9803712368011475, "step": 1944 }, { "epoch": 0.3, "learning_rate": 1.272079257222885e-05, "logits/chosen": -2.4810147285461426, "logits/rejected": -3.0801618099212646, "logps/chosen": -156.58547973632812, "logps/rejected": -393.4100646972656, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.17362213134765625, "rewards/margins": 6.902054786682129, "rewards/rejected": -7.075676918029785, "step": 1945 }, { "epoch": 0.3, "learning_rate": 1.2720059131697702e-05, "logits/chosen": -2.8179421424865723, "logits/rejected": -2.148394823074341, "logps/chosen": -189.0383758544922, "logps/rejected": -316.9556884765625, "loss": 2.8802, "rewards/accuracies": 0.5, "rewards/chosen": -1.5442066192626953, "rewards/margins": 1.984100103378296, "rewards/rejected": -3.528306722640991, "step": 1946 }, { "epoch": 0.3, "learning_rate": 1.2719325691166556e-05, "logits/chosen": -3.104053497314453, "logits/rejected": -3.236419200897217, "logps/chosen": -173.048583984375, "logps/rejected": -272.61883544921875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.6666069030761719, "rewards/margins": 5.882035255432129, "rewards/rejected": -5.215428352355957, "step": 1947 }, { "epoch": 0.3, "learning_rate": 1.2718592250635408e-05, "logits/chosen": -2.658256769180298, "logits/rejected": -2.4298012256622314, "logps/chosen": -264.7898254394531, "logps/rejected": -324.7538757324219, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -1.2572150230407715, "rewards/margins": 6.073763847351074, "rewards/rejected": -7.330978870391846, "step": 1948 }, { "epoch": 0.3, "learning_rate": 1.271785881010426e-05, "logits/chosen": -2.9764723777770996, "logits/rejected": -3.223083257675171, "logps/chosen": -241.59213256835938, "logps/rejected": -298.01654052734375, "loss": 0.0625, "rewards/accuracies": 1.0, "rewards/chosen": -0.4170929193496704, "rewards/margins": 3.280045509338379, "rewards/rejected": -3.697138547897339, "step": 1949 }, { "epoch": 0.3, "learning_rate": 1.2717125369573112e-05, "logits/chosen": -2.441930055618286, "logits/rejected": -3.007815361022949, "logps/chosen": -71.20343017578125, "logps/rejected": -259.19482421875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.21106110513210297, "rewards/margins": 5.751413822174072, "rewards/rejected": -5.962474822998047, "step": 1950 }, { "epoch": 0.3, "learning_rate": 1.2716391929041963e-05, "logits/chosen": -2.621664524078369, "logits/rejected": -2.8137080669403076, "logps/chosen": -331.49798583984375, "logps/rejected": -387.22369384765625, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -0.24140587449073792, "rewards/margins": 3.7675552368164062, "rewards/rejected": -4.008961200714111, "step": 1951 }, { "epoch": 0.3, "learning_rate": 1.2715658488510815e-05, "logits/chosen": -1.7991758584976196, "logits/rejected": -2.0836167335510254, "logps/chosen": -444.513671875, "logps/rejected": -407.9678955078125, "loss": 0.0745, "rewards/accuracies": 1.0, "rewards/chosen": -1.8551368713378906, "rewards/margins": 2.5927820205688477, "rewards/rejected": -4.447918891906738, "step": 1952 }, { "epoch": 0.3, "learning_rate": 1.2714925047979667e-05, "logits/chosen": -2.4823451042175293, "logits/rejected": -3.1143929958343506, "logps/chosen": -462.4586486816406, "logps/rejected": -436.5479736328125, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.7660354375839233, "rewards/margins": 4.437019348144531, "rewards/rejected": -5.203054904937744, "step": 1953 }, { "epoch": 0.3, "learning_rate": 1.2714191607448519e-05, "logits/chosen": -2.1730215549468994, "logits/rejected": -2.973581075668335, "logps/chosen": -85.27276611328125, "logps/rejected": -150.13140869140625, "loss": 2.5584, "rewards/accuracies": 0.5, "rewards/chosen": -2.7225112915039062, "rewards/margins": 0.07090067863464355, "rewards/rejected": -2.79341197013855, "step": 1954 }, { "epoch": 0.3, "learning_rate": 1.2713458166917371e-05, "logits/chosen": -0.7952420115470886, "logits/rejected": -2.6907687187194824, "logps/chosen": -31.339582443237305, "logps/rejected": -362.5960388183594, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": 0.20444317162036896, "rewards/margins": 7.3213605880737305, "rewards/rejected": -7.116917610168457, "step": 1955 }, { "epoch": 0.3, "learning_rate": 1.2712724726386225e-05, "logits/chosen": -3.0117111206054688, "logits/rejected": -3.117758274078369, "logps/chosen": -359.15032958984375, "logps/rejected": -303.9795227050781, "loss": 0.0482, "rewards/accuracies": 1.0, "rewards/chosen": -0.3686836361885071, "rewards/margins": 3.4234812259674072, "rewards/rejected": -3.7921648025512695, "step": 1956 }, { "epoch": 0.3, "learning_rate": 1.2711991285855076e-05, "logits/chosen": -1.4400334358215332, "logits/rejected": -2.263237476348877, "logps/chosen": -126.20265197753906, "logps/rejected": -359.3933410644531, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.26615065336227417, "rewards/margins": 6.401961326599121, "rewards/rejected": -6.668112277984619, "step": 1957 }, { "epoch": 0.3, "learning_rate": 1.2711257845323928e-05, "logits/chosen": -2.457515001296997, "logits/rejected": -2.899357795715332, "logps/chosen": -153.36322021484375, "logps/rejected": -476.1706237792969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.0733200311660767, "rewards/margins": 10.128911018371582, "rewards/rejected": -9.055591583251953, "step": 1958 }, { "epoch": 0.3, "learning_rate": 1.271052440479278e-05, "logits/chosen": -1.4946630001068115, "logits/rejected": -3.1084907054901123, "logps/chosen": -44.475425720214844, "logps/rejected": -342.0898132324219, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.8563187122344971, "rewards/margins": 7.011702060699463, "rewards/rejected": -6.155383110046387, "step": 1959 }, { "epoch": 0.3, "learning_rate": 1.2709790964261632e-05, "logits/chosen": -3.2445192337036133, "logits/rejected": -2.933586359024048, "logps/chosen": -177.91268920898438, "logps/rejected": -173.40969848632812, "loss": 1.3095, "rewards/accuracies": 0.5, "rewards/chosen": -2.743591785430908, "rewards/margins": 1.5025544166564941, "rewards/rejected": -4.246146202087402, "step": 1960 }, { "epoch": 0.3, "learning_rate": 1.2709057523730484e-05, "logits/chosen": -1.4053114652633667, "logits/rejected": -2.8368866443634033, "logps/chosen": -247.9165496826172, "logps/rejected": -391.7955627441406, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.5221859216690063, "rewards/margins": 6.3882012367248535, "rewards/rejected": -7.91038703918457, "step": 1961 }, { "epoch": 0.31, "learning_rate": 1.2708324083199336e-05, "logits/chosen": -3.069657802581787, "logits/rejected": -2.9439094066619873, "logps/chosen": -257.6864318847656, "logps/rejected": -252.73016357421875, "loss": 2.239, "rewards/accuracies": 0.5, "rewards/chosen": -2.31135892868042, "rewards/margins": 1.696197271347046, "rewards/rejected": -4.007555961608887, "step": 1962 }, { "epoch": 0.31, "learning_rate": 1.2707590642668188e-05, "logits/chosen": -2.9642465114593506, "logits/rejected": -2.836707592010498, "logps/chosen": -285.52716064453125, "logps/rejected": -169.89231872558594, "loss": 2.07, "rewards/accuracies": 0.5, "rewards/chosen": -3.7049882411956787, "rewards/margins": -0.6190431118011475, "rewards/rejected": -3.0859451293945312, "step": 1963 }, { "epoch": 0.31, "learning_rate": 1.270685720213704e-05, "logits/chosen": -1.6998975276947021, "logits/rejected": -2.6236624717712402, "logps/chosen": -110.77519226074219, "logps/rejected": -221.30357360839844, "loss": 1.1908, "rewards/accuracies": 0.5, "rewards/chosen": -1.3143514394760132, "rewards/margins": 1.7107502222061157, "rewards/rejected": -3.025101661682129, "step": 1964 }, { "epoch": 0.31, "learning_rate": 1.2706123761605893e-05, "logits/chosen": -2.9020237922668457, "logits/rejected": -3.0359108448028564, "logps/chosen": -59.66413879394531, "logps/rejected": -171.24960327148438, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": 0.6024109125137329, "rewards/margins": 3.958587169647217, "rewards/rejected": -3.3561763763427734, "step": 1965 }, { "epoch": 0.31, "learning_rate": 1.2705390321074745e-05, "logits/chosen": -2.718088388442993, "logits/rejected": -2.6357691287994385, "logps/chosen": -634.916748046875, "logps/rejected": -495.892333984375, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -1.1152515411376953, "rewards/margins": 4.908437728881836, "rewards/rejected": -6.023689270019531, "step": 1966 }, { "epoch": 0.31, "learning_rate": 1.2704656880543597e-05, "logits/chosen": -1.4914805889129639, "logits/rejected": -2.795240640640259, "logps/chosen": -108.85603332519531, "logps/rejected": -349.1504821777344, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.742357611656189, "rewards/margins": 5.427372932434082, "rewards/rejected": -6.169730186462402, "step": 1967 }, { "epoch": 0.31, "learning_rate": 1.2703923440012449e-05, "logits/chosen": -1.9338245391845703, "logits/rejected": -2.88051700592041, "logps/chosen": -39.34684753417969, "logps/rejected": -245.55389404296875, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": 0.08563882112503052, "rewards/margins": 5.794307231903076, "rewards/rejected": -5.7086687088012695, "step": 1968 }, { "epoch": 0.31, "learning_rate": 1.27031899994813e-05, "logits/chosen": -3.05351185798645, "logits/rejected": -1.7135146856307983, "logps/chosen": -796.531005859375, "logps/rejected": -355.2212829589844, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -1.1624665260314941, "rewards/margins": 3.7755844593048096, "rewards/rejected": -4.938051223754883, "step": 1969 }, { "epoch": 0.31, "learning_rate": 1.2702456558950153e-05, "logits/chosen": -2.6772961616516113, "logits/rejected": -2.866361141204834, "logps/chosen": -119.58937072753906, "logps/rejected": -275.356201171875, "loss": 0.0815, "rewards/accuracies": 1.0, "rewards/chosen": -0.028909504413604736, "rewards/margins": 2.888430118560791, "rewards/rejected": -2.917339563369751, "step": 1970 }, { "epoch": 0.31, "learning_rate": 1.2701723118419004e-05, "logits/chosen": -2.915969133377075, "logits/rejected": -2.643934488296509, "logps/chosen": -469.76934814453125, "logps/rejected": -486.10345458984375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.2638992369174957, "rewards/margins": 6.186234951019287, "rewards/rejected": -6.45013427734375, "step": 1971 }, { "epoch": 0.31, "learning_rate": 1.2700989677887856e-05, "logits/chosen": -2.3828125, "logits/rejected": -3.3459312915802, "logps/chosen": -348.9107666015625, "logps/rejected": -388.2572937011719, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.5082661509513855, "rewards/margins": 4.936618804931641, "rewards/rejected": -5.44488525390625, "step": 1972 }, { "epoch": 0.31, "learning_rate": 1.2700256237356708e-05, "logits/chosen": -2.5709547996520996, "logits/rejected": -2.776521921157837, "logps/chosen": -574.0574340820312, "logps/rejected": -542.91845703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7689918279647827, "rewards/margins": 6.535605430603027, "rewards/rejected": -7.3045973777771, "step": 1973 }, { "epoch": 0.31, "learning_rate": 1.2699522796825562e-05, "logits/chosen": -1.6521687507629395, "logits/rejected": -2.755284070968628, "logps/chosen": -72.48939514160156, "logps/rejected": -270.0237731933594, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.009895667433738708, "rewards/margins": 6.146601676940918, "rewards/rejected": -6.1367058753967285, "step": 1974 }, { "epoch": 0.31, "learning_rate": 1.2698789356294414e-05, "logits/chosen": -2.3902602195739746, "logits/rejected": -3.121894598007202, "logps/chosen": -48.651493072509766, "logps/rejected": -288.95672607421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.3830869197845459, "rewards/margins": 7.33956241607666, "rewards/rejected": -7.722649574279785, "step": 1975 }, { "epoch": 0.31, "learning_rate": 1.2698055915763266e-05, "logits/chosen": -3.0321431159973145, "logits/rejected": -3.1526427268981934, "logps/chosen": -419.7966613769531, "logps/rejected": -571.0155029296875, "loss": 3.8684, "rewards/accuracies": 0.5, "rewards/chosen": -4.8666462898254395, "rewards/margins": -2.2691564559936523, "rewards/rejected": -2.597489833831787, "step": 1976 }, { "epoch": 0.31, "learning_rate": 1.2697322475232117e-05, "logits/chosen": -2.818578004837036, "logits/rejected": -2.7866880893707275, "logps/chosen": -452.572998046875, "logps/rejected": -433.074951171875, "loss": 2.541, "rewards/accuracies": 0.5, "rewards/chosen": -2.8899993896484375, "rewards/margins": 0.03498530387878418, "rewards/rejected": -2.9249846935272217, "step": 1977 }, { "epoch": 0.31, "learning_rate": 1.269658903470097e-05, "logits/chosen": -1.925575613975525, "logits/rejected": -2.7516510486602783, "logps/chosen": -177.34005737304688, "logps/rejected": -450.1086120605469, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -0.026554875075817108, "rewards/margins": 6.011979103088379, "rewards/rejected": -6.038534164428711, "step": 1978 }, { "epoch": 0.31, "learning_rate": 1.2695855594169823e-05, "logits/chosen": -2.7476043701171875, "logits/rejected": -1.7550307512283325, "logps/chosen": -367.5628967285156, "logps/rejected": -303.0601806640625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.1700553894042969, "rewards/margins": 6.804734706878662, "rewards/rejected": -7.974790096282959, "step": 1979 }, { "epoch": 0.31, "learning_rate": 1.2695122153638675e-05, "logits/chosen": -1.5842392444610596, "logits/rejected": -3.132870674133301, "logps/chosen": -214.38487243652344, "logps/rejected": -552.2999267578125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.7187581658363342, "rewards/margins": 6.747594833374023, "rewards/rejected": -7.466352939605713, "step": 1980 }, { "epoch": 0.31, "learning_rate": 1.2694388713107527e-05, "logits/chosen": -2.5877017974853516, "logits/rejected": -3.0797770023345947, "logps/chosen": -199.3700408935547, "logps/rejected": -245.95205688476562, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.33364027738571167, "rewards/margins": 5.368227005004883, "rewards/rejected": -5.70186710357666, "step": 1981 }, { "epoch": 0.31, "learning_rate": 1.2693655272576378e-05, "logits/chosen": -1.5002378225326538, "logits/rejected": -2.869555950164795, "logps/chosen": -22.432079315185547, "logps/rejected": -295.3297424316406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.0040239095687866, "rewards/margins": 8.12198543548584, "rewards/rejected": -7.117961406707764, "step": 1982 }, { "epoch": 0.31, "learning_rate": 1.2692921832045232e-05, "logits/chosen": -2.919322967529297, "logits/rejected": -2.0365138053894043, "logps/chosen": -214.623291015625, "logps/rejected": -194.64443969726562, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -0.9981590509414673, "rewards/margins": 3.724806070327759, "rewards/rejected": -4.722965240478516, "step": 1983 }, { "epoch": 0.31, "learning_rate": 1.2692188391514084e-05, "logits/chosen": -2.8552021980285645, "logits/rejected": -2.907259464263916, "logps/chosen": -408.9208984375, "logps/rejected": -452.54986572265625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.242950439453125, "rewards/margins": 5.714319229125977, "rewards/rejected": -5.957269668579102, "step": 1984 }, { "epoch": 0.31, "learning_rate": 1.2691454950982936e-05, "logits/chosen": -2.637371778488159, "logits/rejected": -2.6557984352111816, "logps/chosen": -513.3170776367188, "logps/rejected": -485.25164794921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.29023823142051697, "rewards/margins": 9.034706115722656, "rewards/rejected": -9.324943542480469, "step": 1985 }, { "epoch": 0.31, "learning_rate": 1.2690721510451788e-05, "logits/chosen": -1.9508048295974731, "logits/rejected": -2.650669813156128, "logps/chosen": -227.9825439453125, "logps/rejected": -436.5884094238281, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 0.49707263708114624, "rewards/margins": 7.938375473022461, "rewards/rejected": -7.44130277633667, "step": 1986 }, { "epoch": 0.31, "learning_rate": 1.268998806992064e-05, "logits/chosen": -2.309979200363159, "logits/rejected": -3.2290587425231934, "logps/chosen": -109.16574096679688, "logps/rejected": -384.73004150390625, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.35236895084381104, "rewards/margins": 4.092988014221191, "rewards/rejected": -4.445356845855713, "step": 1987 }, { "epoch": 0.31, "learning_rate": 1.2689254629389491e-05, "logits/chosen": -2.838829278945923, "logits/rejected": -3.2142603397369385, "logps/chosen": -320.21484375, "logps/rejected": -345.1397705078125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.3272246718406677, "rewards/margins": 5.526798248291016, "rewards/rejected": -5.85402250289917, "step": 1988 }, { "epoch": 0.31, "learning_rate": 1.2688521188858343e-05, "logits/chosen": -2.6322555541992188, "logits/rejected": -3.0006356239318848, "logps/chosen": -435.654541015625, "logps/rejected": -498.23248291015625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 0.06688232719898224, "rewards/margins": 7.528983116149902, "rewards/rejected": -7.462100982666016, "step": 1989 }, { "epoch": 0.31, "learning_rate": 1.2687787748327195e-05, "logits/chosen": -2.95810866355896, "logits/rejected": -3.184262752532959, "logps/chosen": -104.53113555908203, "logps/rejected": -311.3674011230469, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -0.6299573183059692, "rewards/margins": 4.477115631103516, "rewards/rejected": -5.107072830200195, "step": 1990 }, { "epoch": 0.31, "learning_rate": 1.2687054307796047e-05, "logits/chosen": -2.4012508392333984, "logits/rejected": -2.9885880947113037, "logps/chosen": -608.6608276367188, "logps/rejected": -794.199951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0135574340820312, "rewards/margins": 10.730592727661133, "rewards/rejected": -12.744150161743164, "step": 1991 }, { "epoch": 0.31, "learning_rate": 1.26863208672649e-05, "logits/chosen": -2.8324410915374756, "logits/rejected": -1.739638090133667, "logps/chosen": -599.708984375, "logps/rejected": -298.6774597167969, "loss": 2.0704, "rewards/accuracies": 0.5, "rewards/chosen": -3.731006145477295, "rewards/margins": 2.0045182704925537, "rewards/rejected": -5.7355241775512695, "step": 1992 }, { "epoch": 0.31, "learning_rate": 1.2685587426733753e-05, "logits/chosen": -2.907973051071167, "logits/rejected": -3.0603864192962646, "logps/chosen": -91.03005981445312, "logps/rejected": -277.8623962402344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.1228936910629272, "rewards/margins": 8.434000015258789, "rewards/rejected": -7.3111066818237305, "step": 1993 }, { "epoch": 0.31, "learning_rate": 1.2684853986202604e-05, "logits/chosen": -2.4251153469085693, "logits/rejected": -2.8431355953216553, "logps/chosen": -326.616943359375, "logps/rejected": -531.1676025390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6963337063789368, "rewards/margins": 7.1924543380737305, "rewards/rejected": -7.888788223266602, "step": 1994 }, { "epoch": 0.31, "learning_rate": 1.2684120545671456e-05, "logits/chosen": -1.2865675687789917, "logits/rejected": -2.965597152709961, "logps/chosen": -57.91530990600586, "logps/rejected": -242.762939453125, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -0.4282824397087097, "rewards/margins": 4.731529235839844, "rewards/rejected": -5.159811496734619, "step": 1995 }, { "epoch": 0.31, "learning_rate": 1.2683387105140308e-05, "logits/chosen": -2.239105224609375, "logits/rejected": -3.0900394916534424, "logps/chosen": -212.0501708984375, "logps/rejected": -358.1922607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.46987685561180115, "rewards/margins": 8.98487663269043, "rewards/rejected": -9.454752922058105, "step": 1996 }, { "epoch": 0.31, "learning_rate": 1.268265366460916e-05, "logits/chosen": -2.741084098815918, "logits/rejected": -2.8227627277374268, "logps/chosen": -38.740203857421875, "logps/rejected": -176.75439453125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 0.45902833342552185, "rewards/margins": 6.607159614562988, "rewards/rejected": -6.148131370544434, "step": 1997 }, { "epoch": 0.31, "learning_rate": 1.2681920224078012e-05, "logits/chosen": -2.5901870727539062, "logits/rejected": -3.2491824626922607, "logps/chosen": -6.320318222045898, "logps/rejected": -213.86685180664062, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 1.062637448310852, "rewards/margins": 9.05193042755127, "rewards/rejected": -7.989292621612549, "step": 1998 }, { "epoch": 0.31, "learning_rate": 1.2681186783546864e-05, "logits/chosen": -2.6843461990356445, "logits/rejected": -2.7337210178375244, "logps/chosen": -586.9756469726562, "logps/rejected": -819.3717041015625, "loss": 2.4359, "rewards/accuracies": 0.5, "rewards/chosen": -2.9837026596069336, "rewards/margins": 1.116119623184204, "rewards/rejected": -4.099822521209717, "step": 1999 }, { "epoch": 0.31, "learning_rate": 1.2680453343015716e-05, "logits/chosen": -2.5682568550109863, "logits/rejected": -2.858802318572998, "logps/chosen": -29.303333282470703, "logps/rejected": -168.1971435546875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.2881244719028473, "rewards/margins": 6.127768516540527, "rewards/rejected": -6.415893077850342, "step": 2000 }, { "epoch": 0.31, "learning_rate": 1.267971990248457e-05, "logits/chosen": -2.566470146179199, "logits/rejected": -2.749542713165283, "logps/chosen": -198.68814086914062, "logps/rejected": -109.8207778930664, "loss": 2.3815, "rewards/accuracies": 0.5, "rewards/chosen": -2.566699266433716, "rewards/margins": 0.20140838623046875, "rewards/rejected": -2.7681076526641846, "step": 2001 }, { "epoch": 0.31, "learning_rate": 1.2678986461953421e-05, "logits/chosen": -2.4942684173583984, "logits/rejected": -2.923469066619873, "logps/chosen": -275.72283935546875, "logps/rejected": -258.4832763671875, "loss": 3.191, "rewards/accuracies": 0.5, "rewards/chosen": -3.3023159503936768, "rewards/margins": 2.0974204540252686, "rewards/rejected": -5.399735927581787, "step": 2002 }, { "epoch": 0.31, "learning_rate": 1.2678253021422273e-05, "logits/chosen": -2.790794849395752, "logits/rejected": -2.8767223358154297, "logps/chosen": -99.94577026367188, "logps/rejected": -377.06005859375, "loss": 1.4757, "rewards/accuracies": 0.5, "rewards/chosen": -1.104704737663269, "rewards/margins": 4.911870956420898, "rewards/rejected": -6.016575813293457, "step": 2003 }, { "epoch": 0.31, "learning_rate": 1.2677519580891125e-05, "logits/chosen": -2.297760248184204, "logits/rejected": -2.968670606613159, "logps/chosen": -278.1766052246094, "logps/rejected": -552.3615112304688, "loss": 3.5461, "rewards/accuracies": 0.5, "rewards/chosen": -3.521684169769287, "rewards/margins": 0.802708625793457, "rewards/rejected": -4.324392795562744, "step": 2004 }, { "epoch": 0.31, "learning_rate": 1.2676786140359977e-05, "logits/chosen": -2.0180394649505615, "logits/rejected": -2.935666561126709, "logps/chosen": -196.97523498535156, "logps/rejected": -430.57080078125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.45407789945602417, "rewards/margins": 6.8740997314453125, "rewards/rejected": -7.328177452087402, "step": 2005 }, { "epoch": 0.31, "learning_rate": 1.2676052699828829e-05, "logits/chosen": -1.5634621381759644, "logits/rejected": -3.1803691387176514, "logps/chosen": -383.946533203125, "logps/rejected": -562.416748046875, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.5431793332099915, "rewards/margins": 4.895254611968994, "rewards/rejected": -5.438433647155762, "step": 2006 }, { "epoch": 0.31, "learning_rate": 1.267531925929768e-05, "logits/chosen": -2.323702812194824, "logits/rejected": -2.853883743286133, "logps/chosen": -380.7027282714844, "logps/rejected": -438.6985168457031, "loss": 0.7347, "rewards/accuracies": 0.5, "rewards/chosen": -2.3585023880004883, "rewards/margins": 2.695131778717041, "rewards/rejected": -5.053634166717529, "step": 2007 }, { "epoch": 0.31, "learning_rate": 1.2674585818766532e-05, "logits/chosen": -2.7776074409484863, "logits/rejected": -2.841160535812378, "logps/chosen": -94.29891967773438, "logps/rejected": -150.13916015625, "loss": 1.7475, "rewards/accuracies": 0.5, "rewards/chosen": -1.7165796756744385, "rewards/margins": 1.6801420450210571, "rewards/rejected": -3.396721601486206, "step": 2008 }, { "epoch": 0.31, "learning_rate": 1.2673852378235384e-05, "logits/chosen": -1.1790540218353271, "logits/rejected": -2.7492482662200928, "logps/chosen": -38.02787399291992, "logps/rejected": -164.75082397460938, "loss": 0.4867, "rewards/accuracies": 0.5, "rewards/chosen": -1.2670111656188965, "rewards/margins": 2.4996728897094727, "rewards/rejected": -3.766684055328369, "step": 2009 }, { "epoch": 0.31, "learning_rate": 1.2673118937704238e-05, "logits/chosen": -2.4069314002990723, "logits/rejected": -2.8350627422332764, "logps/chosen": -423.1661071777344, "logps/rejected": -523.119384765625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.1100364923477173, "rewards/margins": 6.3386054039001465, "rewards/rejected": -7.448641777038574, "step": 2010 }, { "epoch": 0.31, "learning_rate": 1.267238549717309e-05, "logits/chosen": -2.7404515743255615, "logits/rejected": -3.0400710105895996, "logps/chosen": -80.70550537109375, "logps/rejected": -161.29742431640625, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.2564736306667328, "rewards/margins": 4.577028274536133, "rewards/rejected": -4.833501815795898, "step": 2011 }, { "epoch": 0.31, "learning_rate": 1.2671652056641942e-05, "logits/chosen": -3.144832134246826, "logits/rejected": -2.722022294998169, "logps/chosen": -508.7405090332031, "logps/rejected": -428.1182861328125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.1631298065185547, "rewards/margins": 9.326467514038086, "rewards/rejected": -10.48959732055664, "step": 2012 }, { "epoch": 0.31, "learning_rate": 1.2670918616110795e-05, "logits/chosen": -1.5374857187271118, "logits/rejected": -2.977102756500244, "logps/chosen": -71.49616241455078, "logps/rejected": -437.86517333984375, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.3507733345031738, "rewards/margins": 10.069511413574219, "rewards/rejected": -11.420284271240234, "step": 2013 }, { "epoch": 0.31, "learning_rate": 1.2670185175579647e-05, "logits/chosen": -2.1782114505767822, "logits/rejected": -2.1539652347564697, "logps/chosen": -209.07826232910156, "logps/rejected": -369.11016845703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6341294050216675, "rewards/margins": 7.139059066772461, "rewards/rejected": -8.773188591003418, "step": 2014 }, { "epoch": 0.31, "learning_rate": 1.2669451735048499e-05, "logits/chosen": -2.7101638317108154, "logits/rejected": -2.6728665828704834, "logps/chosen": -98.57453155517578, "logps/rejected": -217.92257690429688, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -0.9513870477676392, "rewards/margins": 4.689891338348389, "rewards/rejected": -5.641278266906738, "step": 2015 }, { "epoch": 0.31, "learning_rate": 1.266871829451735e-05, "logits/chosen": -1.73086416721344, "logits/rejected": -2.63424015045166, "logps/chosen": -132.85433959960938, "logps/rejected": -325.9208984375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.088568091392517, "rewards/margins": 7.472569465637207, "rewards/rejected": -8.561138153076172, "step": 2016 }, { "epoch": 0.31, "learning_rate": 1.2667984853986203e-05, "logits/chosen": -2.4717166423797607, "logits/rejected": -2.9901089668273926, "logps/chosen": -43.191734313964844, "logps/rejected": -220.09718322753906, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": -0.4049030542373657, "rewards/margins": 4.248290061950684, "rewards/rejected": -4.65319299697876, "step": 2017 }, { "epoch": 0.31, "learning_rate": 1.2667251413455055e-05, "logits/chosen": -2.757585048675537, "logits/rejected": -2.5262084007263184, "logps/chosen": -628.5830688476562, "logps/rejected": -552.417236328125, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -4.363905429840088, "rewards/margins": 5.389115810394287, "rewards/rejected": -9.753021240234375, "step": 2018 }, { "epoch": 0.31, "learning_rate": 1.2666517972923908e-05, "logits/chosen": -1.9909342527389526, "logits/rejected": -2.751979112625122, "logps/chosen": -238.63519287109375, "logps/rejected": -196.0597686767578, "loss": 3.016, "rewards/accuracies": 0.5, "rewards/chosen": -4.4112114906311035, "rewards/margins": 0.11717724800109863, "rewards/rejected": -4.528388500213623, "step": 2019 }, { "epoch": 0.31, "learning_rate": 1.266578453239276e-05, "logits/chosen": -3.2448408603668213, "logits/rejected": -3.3207032680511475, "logps/chosen": -34.7259521484375, "logps/rejected": -120.97760009765625, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9470678567886353, "rewards/margins": 4.132308006286621, "rewards/rejected": -5.079376220703125, "step": 2020 }, { "epoch": 0.31, "learning_rate": 1.2665051091861612e-05, "logits/chosen": -2.628459930419922, "logits/rejected": -1.0751780271530151, "logps/chosen": -605.5481567382812, "logps/rejected": -376.2384033203125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.9540510177612305, "rewards/margins": 6.23311710357666, "rewards/rejected": -9.18716812133789, "step": 2021 }, { "epoch": 0.31, "learning_rate": 1.2664317651330464e-05, "logits/chosen": -1.8431600332260132, "logits/rejected": -2.7811055183410645, "logps/chosen": -347.201904296875, "logps/rejected": -1157.5224609375, "loss": 3.9555, "rewards/accuracies": 0.5, "rewards/chosen": -6.123253345489502, "rewards/margins": 2.734992504119873, "rewards/rejected": -8.858245849609375, "step": 2022 }, { "epoch": 0.31, "learning_rate": 1.2663584210799316e-05, "logits/chosen": -2.312183141708374, "logits/rejected": -2.7131967544555664, "logps/chosen": -152.53636169433594, "logps/rejected": -201.0384521484375, "loss": 2.6633, "rewards/accuracies": 0.5, "rewards/chosen": -4.40092134475708, "rewards/margins": -1.643657922744751, "rewards/rejected": -2.757263660430908, "step": 2023 }, { "epoch": 0.31, "learning_rate": 1.2662850770268168e-05, "logits/chosen": -2.191628932952881, "logits/rejected": -2.7788338661193848, "logps/chosen": -132.47930908203125, "logps/rejected": -275.4282531738281, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.826072335243225, "rewards/margins": 6.052034378051758, "rewards/rejected": -7.878107070922852, "step": 2024 }, { "epoch": 0.31, "learning_rate": 1.266211732973702e-05, "logits/chosen": -1.4031604528427124, "logits/rejected": -2.8263659477233887, "logps/chosen": -62.96883773803711, "logps/rejected": -227.48013305664062, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.5766489505767822, "rewards/margins": 4.5232720375061035, "rewards/rejected": -6.099921226501465, "step": 2025 }, { "epoch": 0.32, "learning_rate": 1.2661383889205871e-05, "logits/chosen": -1.7653257846832275, "logits/rejected": -2.9237639904022217, "logps/chosen": -93.28316497802734, "logps/rejected": -254.51571655273438, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -1.8462755680084229, "rewards/margins": 4.296858787536621, "rewards/rejected": -6.143134117126465, "step": 2026 }, { "epoch": 0.32, "learning_rate": 1.2660650448674725e-05, "logits/chosen": -3.3031961917877197, "logits/rejected": -3.3253443241119385, "logps/chosen": -79.835205078125, "logps/rejected": -147.99224853515625, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -1.1843414306640625, "rewards/margins": 3.558277130126953, "rewards/rejected": -4.742618560791016, "step": 2027 }, { "epoch": 0.32, "learning_rate": 1.2659917008143577e-05, "logits/chosen": -2.7770161628723145, "logits/rejected": -2.8151986598968506, "logps/chosen": -24.604318618774414, "logps/rejected": -213.3173828125, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.8579337000846863, "rewards/margins": 6.64303731918335, "rewards/rejected": -7.500970840454102, "step": 2028 }, { "epoch": 0.32, "learning_rate": 1.2659183567612429e-05, "logits/chosen": -2.830968141555786, "logits/rejected": -2.3218226432800293, "logps/chosen": -394.9537048339844, "logps/rejected": -215.29611206054688, "loss": 9.029, "rewards/accuracies": 0.5, "rewards/chosen": -10.70612907409668, "rewards/margins": -7.144231796264648, "rewards/rejected": -3.5618972778320312, "step": 2029 }, { "epoch": 0.32, "learning_rate": 1.265845012708128e-05, "logits/chosen": -3.1038947105407715, "logits/rejected": -3.0775978565216064, "logps/chosen": -158.5301513671875, "logps/rejected": -207.51351928710938, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -1.5103957653045654, "rewards/margins": 4.049699783325195, "rewards/rejected": -5.56009578704834, "step": 2030 }, { "epoch": 0.32, "learning_rate": 1.2657716686550132e-05, "logits/chosen": -1.6190893650054932, "logits/rejected": -3.088271379470825, "logps/chosen": -50.55093765258789, "logps/rejected": -276.90478515625, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -1.5711960792541504, "rewards/margins": 4.043544292449951, "rewards/rejected": -5.614740371704102, "step": 2031 }, { "epoch": 0.32, "learning_rate": 1.2656983246018984e-05, "logits/chosen": -1.0283174514770508, "logits/rejected": -2.4115536212921143, "logps/chosen": -100.35272216796875, "logps/rejected": -396.8299560546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8009306192398071, "rewards/margins": 7.878480911254883, "rewards/rejected": -8.679410934448242, "step": 2032 }, { "epoch": 0.32, "learning_rate": 1.2656249805487836e-05, "logits/chosen": -2.6307590007781982, "logits/rejected": -2.6500563621520996, "logps/chosen": -185.8866424560547, "logps/rejected": -240.68919372558594, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.7503790855407715, "rewards/margins": 4.987024307250977, "rewards/rejected": -6.73740291595459, "step": 2033 }, { "epoch": 0.32, "learning_rate": 1.2655516364956688e-05, "logits/chosen": -2.6651387214660645, "logits/rejected": -3.045095443725586, "logps/chosen": -60.319305419921875, "logps/rejected": -321.5570983886719, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.0384790897369385, "rewards/margins": 9.135690689086914, "rewards/rejected": -10.174169540405273, "step": 2034 }, { "epoch": 0.32, "learning_rate": 1.265478292442554e-05, "logits/chosen": -2.4813687801361084, "logits/rejected": -2.6500580310821533, "logps/chosen": -117.27664184570312, "logps/rejected": -231.32611083984375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.915276288986206, "rewards/margins": 5.591710090637207, "rewards/rejected": -7.506986618041992, "step": 2035 }, { "epoch": 0.32, "learning_rate": 1.2654049483894393e-05, "logits/chosen": -1.105882167816162, "logits/rejected": -3.06249737739563, "logps/chosen": -40.85630798339844, "logps/rejected": -335.9656982421875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.3973004817962646, "rewards/margins": 5.506004333496094, "rewards/rejected": -6.9033050537109375, "step": 2036 }, { "epoch": 0.32, "learning_rate": 1.2653316043363245e-05, "logits/chosen": -2.694448232650757, "logits/rejected": -1.1594091653823853, "logps/chosen": -321.8190612792969, "logps/rejected": -195.25621032714844, "loss": 2.0111, "rewards/accuracies": 0.5, "rewards/chosen": -4.298520088195801, "rewards/margins": 1.751441240310669, "rewards/rejected": -6.049961566925049, "step": 2037 }, { "epoch": 0.32, "learning_rate": 1.2652582602832097e-05, "logits/chosen": -2.950139045715332, "logits/rejected": -3.1780126094818115, "logps/chosen": -119.52311706542969, "logps/rejected": -200.02371215820312, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.1331114768981934, "rewards/margins": 5.194090366363525, "rewards/rejected": -7.327201843261719, "step": 2038 }, { "epoch": 0.32, "learning_rate": 1.2651849162300949e-05, "logits/chosen": -2.7085001468658447, "logits/rejected": -2.8801534175872803, "logps/chosen": -286.44573974609375, "logps/rejected": -605.2182006835938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3022751808166504, "rewards/margins": 11.199041366577148, "rewards/rejected": -12.501317024230957, "step": 2039 }, { "epoch": 0.32, "learning_rate": 1.2651115721769801e-05, "logits/chosen": -1.6883398294448853, "logits/rejected": -2.826690673828125, "logps/chosen": -65.19756317138672, "logps/rejected": -176.97384643554688, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -1.4044840335845947, "rewards/margins": 4.844699859619141, "rewards/rejected": -6.249183654785156, "step": 2040 }, { "epoch": 0.32, "learning_rate": 1.2650382281238653e-05, "logits/chosen": -1.8104900121688843, "logits/rejected": -2.6000912189483643, "logps/chosen": -80.01100158691406, "logps/rejected": -194.9159393310547, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -2.068422555923462, "rewards/margins": 5.528914451599121, "rewards/rejected": -7.597336769104004, "step": 2041 }, { "epoch": 0.32, "learning_rate": 1.2649648840707505e-05, "logits/chosen": -1.570587158203125, "logits/rejected": -2.892012357711792, "logps/chosen": -105.85601806640625, "logps/rejected": -268.4685363769531, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.699594497680664, "rewards/margins": 6.76605224609375, "rewards/rejected": -8.465646743774414, "step": 2042 }, { "epoch": 0.32, "learning_rate": 1.2648915400176357e-05, "logits/chosen": -2.7946343421936035, "logits/rejected": -1.5738191604614258, "logps/chosen": -461.40765380859375, "logps/rejected": -213.3591766357422, "loss": 2.1156, "rewards/accuracies": 0.5, "rewards/chosen": -4.329646587371826, "rewards/margins": 1.5250704288482666, "rewards/rejected": -5.854716777801514, "step": 2043 }, { "epoch": 0.32, "learning_rate": 1.2648181959645208e-05, "logits/chosen": -2.672586441040039, "logits/rejected": -2.2757670879364014, "logps/chosen": -181.49383544921875, "logps/rejected": -216.3351593017578, "loss": 0.1434, "rewards/accuracies": 1.0, "rewards/chosen": -2.7738821506500244, "rewards/margins": 4.251829147338867, "rewards/rejected": -7.0257110595703125, "step": 2044 }, { "epoch": 0.32, "learning_rate": 1.2647448519114062e-05, "logits/chosen": -2.2095448970794678, "logits/rejected": -2.8924663066864014, "logps/chosen": -322.99505615234375, "logps/rejected": -324.63653564453125, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -2.091632127761841, "rewards/margins": 6.006709098815918, "rewards/rejected": -8.09834098815918, "step": 2045 }, { "epoch": 0.32, "learning_rate": 1.2646715078582914e-05, "logits/chosen": -2.360567808151245, "logits/rejected": -2.905766725540161, "logps/chosen": -251.69668579101562, "logps/rejected": -439.679443359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.507643222808838, "rewards/margins": 8.11840534210205, "rewards/rejected": -10.626049041748047, "step": 2046 }, { "epoch": 0.32, "learning_rate": 1.2645981638051767e-05, "logits/chosen": -3.0332345962524414, "logits/rejected": -2.821025848388672, "logps/chosen": -359.75128173828125, "logps/rejected": -443.87255859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.045414924621582, "rewards/margins": 7.445767402648926, "rewards/rejected": -10.491182327270508, "step": 2047 }, { "epoch": 0.32, "learning_rate": 1.264524819752062e-05, "logits/chosen": -1.7645483016967773, "logits/rejected": -2.98107647895813, "logps/chosen": -79.20650482177734, "logps/rejected": -319.506591796875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.1310625076293945, "rewards/margins": 5.786080360412598, "rewards/rejected": -7.917142868041992, "step": 2048 }, { "epoch": 0.32, "learning_rate": 1.2644514756989471e-05, "logits/chosen": -2.590925693511963, "logits/rejected": -2.950925827026367, "logps/chosen": -128.81777954101562, "logps/rejected": -261.8007507324219, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.3527836799621582, "rewards/margins": 6.741283416748047, "rewards/rejected": -8.094066619873047, "step": 2049 }, { "epoch": 0.32, "learning_rate": 1.2643781316458323e-05, "logits/chosen": -1.2506614923477173, "logits/rejected": -2.8128607273101807, "logps/chosen": -65.41450500488281, "logps/rejected": -419.600830078125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.9590942859649658, "rewards/margins": 6.742927551269531, "rewards/rejected": -8.702022552490234, "step": 2050 }, { "epoch": 0.32, "learning_rate": 1.2643047875927175e-05, "logits/chosen": -1.596300721168518, "logits/rejected": -2.7299177646636963, "logps/chosen": -184.3592529296875, "logps/rejected": -253.7578887939453, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -3.1245665550231934, "rewards/margins": 6.505340099334717, "rewards/rejected": -9.62990665435791, "step": 2051 }, { "epoch": 0.32, "learning_rate": 1.2642314435396027e-05, "logits/chosen": -2.142648696899414, "logits/rejected": -2.870380401611328, "logps/chosen": -85.32149505615234, "logps/rejected": -478.0631103515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.940691351890564, "rewards/margins": 8.554327011108398, "rewards/rejected": -9.495018005371094, "step": 2052 }, { "epoch": 0.32, "learning_rate": 1.2641580994864879e-05, "logits/chosen": -3.1323890686035156, "logits/rejected": -2.8860104084014893, "logps/chosen": -376.8186340332031, "logps/rejected": -290.92572021484375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -4.092720985412598, "rewards/margins": 5.21573543548584, "rewards/rejected": -9.308456420898438, "step": 2053 }, { "epoch": 0.32, "learning_rate": 1.2640847554333732e-05, "logits/chosen": -3.0587093830108643, "logits/rejected": -2.954054832458496, "logps/chosen": -426.95587158203125, "logps/rejected": -457.88946533203125, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -1.7288872003555298, "rewards/margins": 4.447015762329102, "rewards/rejected": -6.175902843475342, "step": 2054 }, { "epoch": 0.32, "learning_rate": 1.2640114113802584e-05, "logits/chosen": -2.274491310119629, "logits/rejected": -2.7004902362823486, "logps/chosen": -528.8342895507812, "logps/rejected": -330.4144287109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7645723819732666, "rewards/margins": 8.12209701538086, "rewards/rejected": -9.886669158935547, "step": 2055 }, { "epoch": 0.32, "learning_rate": 1.2639380673271436e-05, "logits/chosen": -2.119216203689575, "logits/rejected": -2.520552396774292, "logps/chosen": -72.81819152832031, "logps/rejected": -229.25399780273438, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.471880316734314, "rewards/margins": 7.771913528442383, "rewards/rejected": -9.243793487548828, "step": 2056 }, { "epoch": 0.32, "learning_rate": 1.2638647232740288e-05, "logits/chosen": -1.851230263710022, "logits/rejected": -2.6138651371002197, "logps/chosen": -65.36750793457031, "logps/rejected": -206.83584594726562, "loss": 1.4229, "rewards/accuracies": 0.5, "rewards/chosen": -3.1488521099090576, "rewards/margins": 2.798875093460083, "rewards/rejected": -5.947727203369141, "step": 2057 }, { "epoch": 0.32, "learning_rate": 1.263791379220914e-05, "logits/chosen": -3.069979667663574, "logits/rejected": -2.7345688343048096, "logps/chosen": -1084.38330078125, "logps/rejected": -622.5360717773438, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.5603363513946533, "rewards/margins": 6.820330619812012, "rewards/rejected": -9.380666732788086, "step": 2058 }, { "epoch": 0.32, "learning_rate": 1.2637180351677992e-05, "logits/chosen": -2.7382214069366455, "logits/rejected": -3.0794177055358887, "logps/chosen": -440.2270812988281, "logps/rejected": -714.5662841796875, "loss": 2.0322, "rewards/accuracies": 0.5, "rewards/chosen": -5.227888584136963, "rewards/margins": 0.5546815395355225, "rewards/rejected": -5.782569885253906, "step": 2059 }, { "epoch": 0.32, "learning_rate": 1.2636446911146844e-05, "logits/chosen": -2.764951705932617, "logits/rejected": -2.9547770023345947, "logps/chosen": -66.11827850341797, "logps/rejected": -269.18206787109375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.9945913553237915, "rewards/margins": 6.735757827758789, "rewards/rejected": -7.730349540710449, "step": 2060 }, { "epoch": 0.32, "learning_rate": 1.2635713470615695e-05, "logits/chosen": -2.955376386642456, "logits/rejected": -2.4803051948547363, "logps/chosen": -580.7905883789062, "logps/rejected": -423.8682861328125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.4219460487365723, "rewards/margins": 7.282364845275879, "rewards/rejected": -9.70431137084961, "step": 2061 }, { "epoch": 0.32, "learning_rate": 1.2634980030084547e-05, "logits/chosen": -1.9376291036605835, "logits/rejected": -2.7855358123779297, "logps/chosen": -175.65225219726562, "logps/rejected": -449.2118835449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4277191162109375, "rewards/margins": 11.122003555297852, "rewards/rejected": -13.549722671508789, "step": 2062 }, { "epoch": 0.32, "learning_rate": 1.2634246589553401e-05, "logits/chosen": -2.3170716762542725, "logits/rejected": -2.666738271713257, "logps/chosen": -232.6552734375, "logps/rejected": -356.9025573730469, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.1399941444396973, "rewards/margins": 7.374448299407959, "rewards/rejected": -9.514442443847656, "step": 2063 }, { "epoch": 0.32, "learning_rate": 1.2633513149022253e-05, "logits/chosen": -2.050755262374878, "logits/rejected": -2.86482834815979, "logps/chosen": -480.17724609375, "logps/rejected": -602.1260986328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.352813720703125, "rewards/margins": 7.060021877288818, "rewards/rejected": -9.412836074829102, "step": 2064 }, { "epoch": 0.32, "learning_rate": 1.2632779708491105e-05, "logits/chosen": -1.5835283994674683, "logits/rejected": -2.6505744457244873, "logps/chosen": -186.33917236328125, "logps/rejected": -452.61151123046875, "loss": 2.3971, "rewards/accuracies": 0.5, "rewards/chosen": -4.490903854370117, "rewards/margins": 2.9849843978881836, "rewards/rejected": -7.475888252258301, "step": 2065 }, { "epoch": 0.32, "learning_rate": 1.2632046267959957e-05, "logits/chosen": -2.6560351848602295, "logits/rejected": -3.0597586631774902, "logps/chosen": -174.45489501953125, "logps/rejected": -415.2165222167969, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.3939309120178223, "rewards/margins": 7.409392833709717, "rewards/rejected": -9.803323745727539, "step": 2066 }, { "epoch": 0.32, "learning_rate": 1.2631312827428808e-05, "logits/chosen": -1.4691011905670166, "logits/rejected": -2.8568224906921387, "logps/chosen": -135.2246856689453, "logps/rejected": -433.26568603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.809657096862793, "rewards/margins": 10.614934921264648, "rewards/rejected": -13.424591064453125, "step": 2067 }, { "epoch": 0.32, "learning_rate": 1.263057938689766e-05, "logits/chosen": -2.8396546840667725, "logits/rejected": -2.57746958732605, "logps/chosen": -426.9767150878906, "logps/rejected": -467.8607177734375, "loss": 3.7021, "rewards/accuracies": 0.0, "rewards/chosen": -6.05547571182251, "rewards/margins": -3.673717498779297, "rewards/rejected": -2.381758213043213, "step": 2068 }, { "epoch": 0.32, "learning_rate": 1.2629845946366512e-05, "logits/chosen": -2.5425097942352295, "logits/rejected": -3.032362461090088, "logps/chosen": -64.85345458984375, "logps/rejected": -197.41563415527344, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.9129620790481567, "rewards/margins": 5.047149658203125, "rewards/rejected": -6.960111618041992, "step": 2069 }, { "epoch": 0.32, "learning_rate": 1.2629112505835364e-05, "logits/chosen": -2.718503475189209, "logits/rejected": -1.955881118774414, "logps/chosen": -485.8073425292969, "logps/rejected": -398.15789794921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.091756820678711, "rewards/margins": 7.934502124786377, "rewards/rejected": -11.02625846862793, "step": 2070 }, { "epoch": 0.32, "learning_rate": 1.2628379065304216e-05, "logits/chosen": -2.6346402168273926, "logits/rejected": -2.9541170597076416, "logps/chosen": -89.780029296875, "logps/rejected": -127.06086730957031, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -2.2573604583740234, "rewards/margins": 3.7907466888427734, "rewards/rejected": -6.048107147216797, "step": 2071 }, { "epoch": 0.32, "learning_rate": 1.262764562477307e-05, "logits/chosen": -1.6011326313018799, "logits/rejected": -2.864013195037842, "logps/chosen": -182.32728576660156, "logps/rejected": -428.402587890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.644728422164917, "rewards/margins": 8.058697700500488, "rewards/rejected": -10.703426361083984, "step": 2072 }, { "epoch": 0.32, "learning_rate": 1.2626912184241921e-05, "logits/chosen": -2.6174941062927246, "logits/rejected": -3.0574288368225098, "logps/chosen": -144.33151245117188, "logps/rejected": -223.97348022460938, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.715155839920044, "rewards/margins": 5.175983428955078, "rewards/rejected": -6.891139030456543, "step": 2073 }, { "epoch": 0.32, "learning_rate": 1.2626178743710773e-05, "logits/chosen": -2.660090208053589, "logits/rejected": -2.057382106781006, "logps/chosen": -422.94140625, "logps/rejected": -418.5015869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6668946743011475, "rewards/margins": 9.792882919311523, "rewards/rejected": -13.459776878356934, "step": 2074 }, { "epoch": 0.32, "learning_rate": 1.2625445303179625e-05, "logits/chosen": -2.8440988063812256, "logits/rejected": -3.0809099674224854, "logps/chosen": -454.3155822753906, "logps/rejected": -502.30902099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.081481456756592, "rewards/margins": 12.874100685119629, "rewards/rejected": -14.955581665039062, "step": 2075 }, { "epoch": 0.32, "learning_rate": 1.2624711862648477e-05, "logits/chosen": -2.6260905265808105, "logits/rejected": -2.9850013256073, "logps/chosen": -274.9197082519531, "logps/rejected": -311.8919372558594, "loss": 3.33, "rewards/accuracies": 0.5, "rewards/chosen": -5.3751983642578125, "rewards/margins": 1.615095615386963, "rewards/rejected": -6.990293979644775, "step": 2076 }, { "epoch": 0.32, "learning_rate": 1.2623978422117329e-05, "logits/chosen": -2.009493827819824, "logits/rejected": -3.0010600090026855, "logps/chosen": -162.73822021484375, "logps/rejected": -335.5971374511719, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3776969909667969, "rewards/margins": 8.232986450195312, "rewards/rejected": -9.61068344116211, "step": 2077 }, { "epoch": 0.32, "learning_rate": 1.262324498158618e-05, "logits/chosen": -1.39255690574646, "logits/rejected": -2.221712112426758, "logps/chosen": -481.57586669921875, "logps/rejected": -449.63653564453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.285946846008301, "rewards/margins": 6.7745771408081055, "rewards/rejected": -10.060523986816406, "step": 2078 }, { "epoch": 0.32, "learning_rate": 1.2622511541055034e-05, "logits/chosen": -1.5113469362258911, "logits/rejected": -2.556626558303833, "logps/chosen": -268.8033752441406, "logps/rejected": -502.08526611328125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.931837558746338, "rewards/margins": 7.38055419921875, "rewards/rejected": -10.31239128112793, "step": 2079 }, { "epoch": 0.32, "learning_rate": 1.2621778100523886e-05, "logits/chosen": -2.8384788036346436, "logits/rejected": -1.673849105834961, "logps/chosen": -336.99237060546875, "logps/rejected": -360.81201171875, "loss": 2.9813, "rewards/accuracies": 0.5, "rewards/chosen": -5.912181854248047, "rewards/margins": 0.38477134704589844, "rewards/rejected": -6.296953201293945, "step": 2080 }, { "epoch": 0.32, "learning_rate": 1.262104465999274e-05, "logits/chosen": -2.0323657989501953, "logits/rejected": -2.5868990421295166, "logps/chosen": -96.19087219238281, "logps/rejected": -189.95965576171875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.4732789993286133, "rewards/margins": 5.845177173614502, "rewards/rejected": -7.318456172943115, "step": 2081 }, { "epoch": 0.32, "learning_rate": 1.2620311219461592e-05, "logits/chosen": -2.769070863723755, "logits/rejected": -2.6937880516052246, "logps/chosen": -128.88401794433594, "logps/rejected": -245.51486206054688, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.1952344179153442, "rewards/margins": 5.614261627197266, "rewards/rejected": -6.80949592590332, "step": 2082 }, { "epoch": 0.32, "learning_rate": 1.2619577778930444e-05, "logits/chosen": -2.5560004711151123, "logits/rejected": -1.97079598903656, "logps/chosen": -381.3566589355469, "logps/rejected": -441.77410888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0627951622009277, "rewards/margins": 12.297087669372559, "rewards/rejected": -15.359882354736328, "step": 2083 }, { "epoch": 0.32, "learning_rate": 1.2618844338399295e-05, "logits/chosen": -1.3209205865859985, "logits/rejected": -2.5971615314483643, "logps/chosen": -92.68016815185547, "logps/rejected": -512.2630615234375, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -1.9442896842956543, "rewards/margins": 8.634843826293945, "rewards/rejected": -10.579133033752441, "step": 2084 }, { "epoch": 0.32, "learning_rate": 1.2618110897868147e-05, "logits/chosen": -1.6882166862487793, "logits/rejected": -1.1264524459838867, "logps/chosen": -431.1568298339844, "logps/rejected": -261.00189208984375, "loss": 5.819, "rewards/accuracies": 0.0, "rewards/chosen": -7.982048034667969, "rewards/margins": -5.815984725952148, "rewards/rejected": -2.1660633087158203, "step": 2085 }, { "epoch": 0.32, "learning_rate": 1.2617377457337e-05, "logits/chosen": -1.879991054534912, "logits/rejected": -2.543523073196411, "logps/chosen": -126.03756713867188, "logps/rejected": -210.19493103027344, "loss": 1.6503, "rewards/accuracies": 0.5, "rewards/chosen": -3.610596179962158, "rewards/margins": 2.403640031814575, "rewards/rejected": -6.0142364501953125, "step": 2086 }, { "epoch": 0.32, "learning_rate": 1.2616644016805851e-05, "logits/chosen": -2.4784042835235596, "logits/rejected": -1.8235962390899658, "logps/chosen": -544.4928588867188, "logps/rejected": -485.26116943359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.853374481201172, "rewards/margins": 7.030481815338135, "rewards/rejected": -10.883855819702148, "step": 2087 }, { "epoch": 0.32, "learning_rate": 1.2615910576274703e-05, "logits/chosen": -2.363117218017578, "logits/rejected": -2.83813214302063, "logps/chosen": -36.483795166015625, "logps/rejected": -419.5647888183594, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.4554752111434937, "rewards/margins": 9.441041946411133, "rewards/rejected": -10.896516799926758, "step": 2088 }, { "epoch": 0.32, "learning_rate": 1.2615177135743555e-05, "logits/chosen": -2.0034565925598145, "logits/rejected": -2.8432252407073975, "logps/chosen": -137.9580078125, "logps/rejected": -212.1309356689453, "loss": 2.6193, "rewards/accuracies": 0.5, "rewards/chosen": -4.264518737792969, "rewards/margins": 2.452204465866089, "rewards/rejected": -6.716723442077637, "step": 2089 }, { "epoch": 0.33, "learning_rate": 1.2614443695212408e-05, "logits/chosen": -1.9737706184387207, "logits/rejected": -2.993520498275757, "logps/chosen": -130.10848999023438, "logps/rejected": -293.3274230957031, "loss": 2.5916, "rewards/accuracies": 0.5, "rewards/chosen": -4.523875713348389, "rewards/margins": 0.9532525539398193, "rewards/rejected": -5.477128028869629, "step": 2090 }, { "epoch": 0.33, "learning_rate": 1.261371025468126e-05, "logits/chosen": -2.7866148948669434, "logits/rejected": -3.098381280899048, "logps/chosen": -332.7039489746094, "logps/rejected": -400.7469482421875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.9727790355682373, "rewards/margins": 5.8510942459106445, "rewards/rejected": -7.823873519897461, "step": 2091 }, { "epoch": 0.33, "learning_rate": 1.2612976814150112e-05, "logits/chosen": -3.2432150840759277, "logits/rejected": -3.3519303798675537, "logps/chosen": -53.60121154785156, "logps/rejected": -152.99807739257812, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -1.121931552886963, "rewards/margins": 5.338784694671631, "rewards/rejected": -6.460716247558594, "step": 2092 }, { "epoch": 0.33, "learning_rate": 1.2612243373618964e-05, "logits/chosen": -2.208305597305298, "logits/rejected": -3.023301839828491, "logps/chosen": -132.43678283691406, "logps/rejected": -295.4962463378906, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.5650465488433838, "rewards/margins": 6.146944046020508, "rewards/rejected": -7.7119903564453125, "step": 2093 }, { "epoch": 0.33, "learning_rate": 1.2611509933087816e-05, "logits/chosen": -1.5639642477035522, "logits/rejected": -2.6844263076782227, "logps/chosen": -313.240234375, "logps/rejected": -732.2030029296875, "loss": 3.7719, "rewards/accuracies": 0.5, "rewards/chosen": -5.666886806488037, "rewards/margins": 0.09166717529296875, "rewards/rejected": -5.758553981781006, "step": 2094 }, { "epoch": 0.33, "learning_rate": 1.2610776492556668e-05, "logits/chosen": -1.547633409500122, "logits/rejected": -2.6029014587402344, "logps/chosen": -159.75546264648438, "logps/rejected": -266.99884033203125, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -3.194700241088867, "rewards/margins": 4.740595817565918, "rewards/rejected": -7.935296058654785, "step": 2095 }, { "epoch": 0.33, "learning_rate": 1.261004305202552e-05, "logits/chosen": -2.8623745441436768, "logits/rejected": -2.4032058715820312, "logps/chosen": -460.5403137207031, "logps/rejected": -446.4787292480469, "loss": 5.7321, "rewards/accuracies": 0.0, "rewards/chosen": -7.251285552978516, "rewards/margins": -5.723355293273926, "rewards/rejected": -1.5279300212860107, "step": 2096 }, { "epoch": 0.33, "learning_rate": 1.2609309611494372e-05, "logits/chosen": -2.3070485591888428, "logits/rejected": -2.7372429370880127, "logps/chosen": -208.30181884765625, "logps/rejected": -201.6854248046875, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -2.5354042053222656, "rewards/margins": 3.5261712074279785, "rewards/rejected": -6.061575889587402, "step": 2097 }, { "epoch": 0.33, "learning_rate": 1.2608576170963223e-05, "logits/chosen": -2.8633220195770264, "logits/rejected": -2.4284565448760986, "logps/chosen": -381.3526916503906, "logps/rejected": -425.9610290527344, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.871225118637085, "rewards/margins": 5.594001770019531, "rewards/rejected": -9.465227127075195, "step": 2098 }, { "epoch": 0.33, "learning_rate": 1.2607842730432077e-05, "logits/chosen": -2.297109603881836, "logits/rejected": -2.838041067123413, "logps/chosen": -228.21388244628906, "logps/rejected": -369.750732421875, "loss": 1.1049, "rewards/accuracies": 0.5, "rewards/chosen": -1.6397655010223389, "rewards/margins": 2.6249983310699463, "rewards/rejected": -4.264763832092285, "step": 2099 }, { "epoch": 0.33, "learning_rate": 1.2607109289900929e-05, "logits/chosen": -2.5098068714141846, "logits/rejected": -3.121354103088379, "logps/chosen": -211.80101013183594, "logps/rejected": -387.00244140625, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -2.3348631858825684, "rewards/margins": 6.923803329467773, "rewards/rejected": -9.258666038513184, "step": 2100 }, { "epoch": 0.33, "learning_rate": 1.260637584936978e-05, "logits/chosen": -2.431490421295166, "logits/rejected": -2.956221580505371, "logps/chosen": -204.02999877929688, "logps/rejected": -435.04833984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.6819586753845215, "rewards/margins": 8.441513061523438, "rewards/rejected": -11.1234712600708, "step": 2101 }, { "epoch": 0.33, "learning_rate": 1.2605642408838633e-05, "logits/chosen": -1.7826564311981201, "logits/rejected": -2.6596477031707764, "logps/chosen": -147.8816375732422, "logps/rejected": -378.1238098144531, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.4545410871505737, "rewards/margins": 6.66831636428833, "rewards/rejected": -8.122858047485352, "step": 2102 }, { "epoch": 0.33, "learning_rate": 1.2604908968307485e-05, "logits/chosen": -2.8489718437194824, "logits/rejected": -2.1591954231262207, "logps/chosen": -264.688720703125, "logps/rejected": -227.49072265625, "loss": 3.2273, "rewards/accuracies": 0.5, "rewards/chosen": -5.954267501831055, "rewards/margins": -1.4995651245117188, "rewards/rejected": -4.454702377319336, "step": 2103 }, { "epoch": 0.33, "learning_rate": 1.2604175527776336e-05, "logits/chosen": -2.2861499786376953, "logits/rejected": -2.9410860538482666, "logps/chosen": -120.6400146484375, "logps/rejected": -487.63470458984375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.2978847026824951, "rewards/margins": 5.124422073364258, "rewards/rejected": -6.422307014465332, "step": 2104 }, { "epoch": 0.33, "learning_rate": 1.2603442087245188e-05, "logits/chosen": -2.6352121829986572, "logits/rejected": -2.9499945640563965, "logps/chosen": -29.528661727905273, "logps/rejected": -133.50875854492188, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.5507102012634277, "rewards/margins": 4.081020832061768, "rewards/rejected": -5.631731033325195, "step": 2105 }, { "epoch": 0.33, "learning_rate": 1.260270864671404e-05, "logits/chosen": -2.300502300262451, "logits/rejected": -2.8411753177642822, "logps/chosen": -408.74737548828125, "logps/rejected": -608.7900390625, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -3.458076000213623, "rewards/margins": 3.8106470108032227, "rewards/rejected": -7.2687225341796875, "step": 2106 }, { "epoch": 0.33, "learning_rate": 1.2601975206182892e-05, "logits/chosen": -3.096605062484741, "logits/rejected": -2.392697334289551, "logps/chosen": -158.08615112304688, "logps/rejected": -126.62605285644531, "loss": 1.6293, "rewards/accuracies": 0.5, "rewards/chosen": -4.025224208831787, "rewards/margins": 0.43538808822631836, "rewards/rejected": -4.4606122970581055, "step": 2107 }, { "epoch": 0.33, "learning_rate": 1.2601241765651746e-05, "logits/chosen": -1.979143500328064, "logits/rejected": -2.9552881717681885, "logps/chosen": -162.98252868652344, "logps/rejected": -368.9617919921875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7968578338623047, "rewards/margins": 7.273785591125488, "rewards/rejected": -8.070643424987793, "step": 2108 }, { "epoch": 0.33, "learning_rate": 1.2600508325120598e-05, "logits/chosen": -1.8305915594100952, "logits/rejected": -2.583527088165283, "logps/chosen": -205.89968872070312, "logps/rejected": -351.6512756347656, "loss": 1.9359, "rewards/accuracies": 0.5, "rewards/chosen": -4.027230739593506, "rewards/margins": 2.0224738121032715, "rewards/rejected": -6.049704551696777, "step": 2109 }, { "epoch": 0.33, "learning_rate": 1.259977488458945e-05, "logits/chosen": -1.3873534202575684, "logits/rejected": -2.2568368911743164, "logps/chosen": -84.72511291503906, "logps/rejected": -382.5104064941406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2093493938446045, "rewards/margins": 9.45263385772705, "rewards/rejected": -10.661983489990234, "step": 2110 }, { "epoch": 0.33, "learning_rate": 1.2599041444058301e-05, "logits/chosen": -2.536013126373291, "logits/rejected": -2.6665666103363037, "logps/chosen": -351.62750244140625, "logps/rejected": -420.9764404296875, "loss": 2.2328, "rewards/accuracies": 0.5, "rewards/chosen": -5.289316654205322, "rewards/margins": 2.7474093437194824, "rewards/rejected": -8.036725997924805, "step": 2111 }, { "epoch": 0.33, "learning_rate": 1.2598308003527153e-05, "logits/chosen": -2.4531562328338623, "logits/rejected": -2.578434944152832, "logps/chosen": -195.68331909179688, "logps/rejected": -269.13519287109375, "loss": 2.5247, "rewards/accuracies": 0.5, "rewards/chosen": -5.156834125518799, "rewards/margins": 0.2894101142883301, "rewards/rejected": -5.446244239807129, "step": 2112 }, { "epoch": 0.33, "learning_rate": 1.2597574562996007e-05, "logits/chosen": -3.0078601837158203, "logits/rejected": -2.884746551513672, "logps/chosen": -200.29026794433594, "logps/rejected": -195.72325134277344, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -2.4784462451934814, "rewards/margins": 5.039560794830322, "rewards/rejected": -7.518007278442383, "step": 2113 }, { "epoch": 0.33, "learning_rate": 1.2596841122464859e-05, "logits/chosen": -2.630986452102661, "logits/rejected": -2.132572889328003, "logps/chosen": -132.24171447753906, "logps/rejected": -208.73428344726562, "loss": 0.4197, "rewards/accuracies": 0.5, "rewards/chosen": -1.8115298748016357, "rewards/margins": 4.993706703186035, "rewards/rejected": -6.805236339569092, "step": 2114 }, { "epoch": 0.33, "learning_rate": 1.259610768193371e-05, "logits/chosen": -3.0749897956848145, "logits/rejected": -3.118013381958008, "logps/chosen": -142.46087646484375, "logps/rejected": -95.3343276977539, "loss": 1.3277, "rewards/accuracies": 0.5, "rewards/chosen": -2.9051284790039062, "rewards/margins": 0.1372741460800171, "rewards/rejected": -3.042402744293213, "step": 2115 }, { "epoch": 0.33, "learning_rate": 1.2595374241402564e-05, "logits/chosen": -2.5777339935302734, "logits/rejected": -2.6546878814697266, "logps/chosen": -118.11665344238281, "logps/rejected": -120.04195404052734, "loss": 1.4662, "rewards/accuracies": 0.5, "rewards/chosen": -2.694978713989258, "rewards/margins": 1.0126752853393555, "rewards/rejected": -3.7076539993286133, "step": 2116 }, { "epoch": 0.33, "learning_rate": 1.2594640800871416e-05, "logits/chosen": -2.841230630874634, "logits/rejected": -2.700840473175049, "logps/chosen": -164.09762573242188, "logps/rejected": -286.34320068359375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.7718453407287598, "rewards/margins": 7.017879486083984, "rewards/rejected": -8.789725303649902, "step": 2117 }, { "epoch": 0.33, "learning_rate": 1.2593907360340268e-05, "logits/chosen": -2.883197546005249, "logits/rejected": -2.9631543159484863, "logps/chosen": -145.6005096435547, "logps/rejected": -159.03590393066406, "loss": 1.9292, "rewards/accuracies": 0.5, "rewards/chosen": -3.5827488899230957, "rewards/margins": 0.8928244113922119, "rewards/rejected": -4.475573539733887, "step": 2118 }, { "epoch": 0.33, "learning_rate": 1.259317391980912e-05, "logits/chosen": -3.0239500999450684, "logits/rejected": -3.015645742416382, "logps/chosen": -258.6458435058594, "logps/rejected": -247.93304443359375, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -3.6149682998657227, "rewards/margins": 3.7613778114318848, "rewards/rejected": -7.376346111297607, "step": 2119 }, { "epoch": 0.33, "learning_rate": 1.2592440479277972e-05, "logits/chosen": -2.847537040710449, "logits/rejected": -2.446577787399292, "logps/chosen": -180.31829833984375, "logps/rejected": -195.1053924560547, "loss": 2.3541, "rewards/accuracies": 0.5, "rewards/chosen": -4.235729694366455, "rewards/margins": 0.5616426467895508, "rewards/rejected": -4.797372341156006, "step": 2120 }, { "epoch": 0.33, "learning_rate": 1.2591707038746823e-05, "logits/chosen": -2.050624132156372, "logits/rejected": -3.1117172241210938, "logps/chosen": -102.03711700439453, "logps/rejected": -525.7623901367188, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1945946216583252, "rewards/margins": 10.656320571899414, "rewards/rejected": -11.850915908813477, "step": 2121 }, { "epoch": 0.33, "learning_rate": 1.2590973598215675e-05, "logits/chosen": -1.9126012325286865, "logits/rejected": -2.605257987976074, "logps/chosen": -260.8865661621094, "logps/rejected": -323.94415283203125, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/chosen": -2.0535659790039062, "rewards/margins": 4.180368423461914, "rewards/rejected": -6.23393440246582, "step": 2122 }, { "epoch": 0.33, "learning_rate": 1.2590240157684527e-05, "logits/chosen": -1.2811905145645142, "logits/rejected": -2.6352286338806152, "logps/chosen": -97.03968811035156, "logps/rejected": -419.07025146484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.759972333908081, "rewards/margins": 9.102399826049805, "rewards/rejected": -10.862371444702148, "step": 2123 }, { "epoch": 0.33, "learning_rate": 1.2589506717153379e-05, "logits/chosen": -1.908007264137268, "logits/rejected": -2.0450236797332764, "logps/chosen": -205.08206176757812, "logps/rejected": -301.19891357421875, "loss": 0.7784, "rewards/accuracies": 0.5, "rewards/chosen": -3.4582297801971436, "rewards/margins": 1.7471586465835571, "rewards/rejected": -5.20538854598999, "step": 2124 }, { "epoch": 0.33, "learning_rate": 1.2588773276622233e-05, "logits/chosen": -2.774841785430908, "logits/rejected": -2.806713342666626, "logps/chosen": -162.52662658691406, "logps/rejected": -157.97702026367188, "loss": 1.196, "rewards/accuracies": 0.5, "rewards/chosen": -2.5962767601013184, "rewards/margins": 2.321772813796997, "rewards/rejected": -4.9180498123168945, "step": 2125 }, { "epoch": 0.33, "learning_rate": 1.2588039836091085e-05, "logits/chosen": -2.5255870819091797, "logits/rejected": -2.8110060691833496, "logps/chosen": -125.82955932617188, "logps/rejected": -266.90289306640625, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": -2.0658950805664062, "rewards/margins": 4.670798301696777, "rewards/rejected": -6.736693382263184, "step": 2126 }, { "epoch": 0.33, "learning_rate": 1.2587306395559936e-05, "logits/chosen": -2.6772964000701904, "logits/rejected": -3.281071186065674, "logps/chosen": -265.02764892578125, "logps/rejected": -276.20977783203125, "loss": 0.0763, "rewards/accuracies": 1.0, "rewards/chosen": -2.1087727546691895, "rewards/margins": 3.179029941558838, "rewards/rejected": -5.287802696228027, "step": 2127 }, { "epoch": 0.33, "learning_rate": 1.2586572955028788e-05, "logits/chosen": -2.5225093364715576, "logits/rejected": -2.314378499984741, "logps/chosen": -132.12046813964844, "logps/rejected": -281.4680480957031, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.035691738128662, "rewards/margins": 7.1907877922058105, "rewards/rejected": -9.226479530334473, "step": 2128 }, { "epoch": 0.33, "learning_rate": 1.258583951449764e-05, "logits/chosen": -2.8415846824645996, "logits/rejected": -2.0460011959075928, "logps/chosen": -961.140869140625, "logps/rejected": -575.0323486328125, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": -2.149296760559082, "rewards/margins": 3.863703966140747, "rewards/rejected": -6.01300048828125, "step": 2129 }, { "epoch": 0.33, "learning_rate": 1.2585106073966492e-05, "logits/chosen": -2.5541388988494873, "logits/rejected": -2.9010958671569824, "logps/chosen": -214.37319946289062, "logps/rejected": -395.1005859375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.9329991340637207, "rewards/margins": 6.352964401245117, "rewards/rejected": -8.285964012145996, "step": 2130 }, { "epoch": 0.33, "learning_rate": 1.2584372633435344e-05, "logits/chosen": -2.710345506668091, "logits/rejected": -3.343433141708374, "logps/chosen": -34.00614929199219, "logps/rejected": -314.4437561035156, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.4763619899749756, "rewards/margins": 4.855896472930908, "rewards/rejected": -6.332258224487305, "step": 2131 }, { "epoch": 0.33, "learning_rate": 1.2583639192904196e-05, "logits/chosen": -2.8280515670776367, "logits/rejected": -3.1293485164642334, "logps/chosen": -131.93910217285156, "logps/rejected": -450.1622619628906, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.581148386001587, "rewards/margins": 8.078275680541992, "rewards/rejected": -9.659424781799316, "step": 2132 }, { "epoch": 0.33, "learning_rate": 1.2582905752373048e-05, "logits/chosen": -2.6237587928771973, "logits/rejected": -2.8527028560638428, "logps/chosen": -103.66116333007812, "logps/rejected": -232.2854461669922, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.2596559524536133, "rewards/margins": 6.039937973022461, "rewards/rejected": -7.299593925476074, "step": 2133 }, { "epoch": 0.33, "learning_rate": 1.2582172311841901e-05, "logits/chosen": -2.063211441040039, "logits/rejected": -2.877697706222534, "logps/chosen": -173.24853515625, "logps/rejected": -289.7519226074219, "loss": 2.1396, "rewards/accuracies": 0.5, "rewards/chosen": -3.678180694580078, "rewards/margins": 1.297025203704834, "rewards/rejected": -4.975205898284912, "step": 2134 }, { "epoch": 0.33, "learning_rate": 1.2581438871310753e-05, "logits/chosen": -2.7384250164031982, "logits/rejected": -1.9617525339126587, "logps/chosen": -403.75042724609375, "logps/rejected": -325.9080810546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.587298631668091, "rewards/margins": 7.259417533874512, "rewards/rejected": -10.846715927124023, "step": 2135 }, { "epoch": 0.33, "learning_rate": 1.2580705430779605e-05, "logits/chosen": -2.738849639892578, "logits/rejected": -2.8710625171661377, "logps/chosen": -155.11053466796875, "logps/rejected": -198.90020751953125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.320380687713623, "rewards/margins": 5.02557373046875, "rewards/rejected": -6.345954418182373, "step": 2136 }, { "epoch": 0.33, "learning_rate": 1.2579971990248457e-05, "logits/chosen": -2.1491003036499023, "logits/rejected": -2.860842704772949, "logps/chosen": -55.304988861083984, "logps/rejected": -174.0052032470703, "loss": 0.1591, "rewards/accuracies": 1.0, "rewards/chosen": -2.85959529876709, "rewards/margins": 2.45015287399292, "rewards/rejected": -5.30974817276001, "step": 2137 }, { "epoch": 0.33, "learning_rate": 1.2579238549717309e-05, "logits/chosen": -2.5286004543304443, "logits/rejected": -2.8561956882476807, "logps/chosen": -90.51741027832031, "logps/rejected": -221.28399658203125, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -1.9668943881988525, "rewards/margins": 3.732219696044922, "rewards/rejected": -5.6991143226623535, "step": 2138 }, { "epoch": 0.33, "learning_rate": 1.257850510918616e-05, "logits/chosen": -2.362732172012329, "logits/rejected": -2.7656140327453613, "logps/chosen": -815.3809814453125, "logps/rejected": -879.8756103515625, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": -2.1470086574554443, "rewards/margins": 6.087800979614258, "rewards/rejected": -8.234809875488281, "step": 2139 }, { "epoch": 0.33, "learning_rate": 1.2577771668655013e-05, "logits/chosen": -2.014073610305786, "logits/rejected": -3.135467290878296, "logps/chosen": -192.65185546875, "logps/rejected": -333.5777893066406, "loss": 0.0764, "rewards/accuracies": 1.0, "rewards/chosen": -2.4347739219665527, "rewards/margins": 2.5709543228149414, "rewards/rejected": -5.005728244781494, "step": 2140 }, { "epoch": 0.33, "learning_rate": 1.2577038228123864e-05, "logits/chosen": -2.4789323806762695, "logits/rejected": -2.79060435295105, "logps/chosen": -437.7354736328125, "logps/rejected": -486.2554931640625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1885719299316406, "rewards/margins": 7.134282112121582, "rewards/rejected": -8.322854042053223, "step": 2141 }, { "epoch": 0.33, "learning_rate": 1.2576304787592716e-05, "logits/chosen": -1.2143560647964478, "logits/rejected": -2.658775806427002, "logps/chosen": -159.48995971679688, "logps/rejected": -397.96435546875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.092926502227783, "rewards/margins": 5.281814098358154, "rewards/rejected": -7.3747406005859375, "step": 2142 }, { "epoch": 0.33, "learning_rate": 1.257557134706157e-05, "logits/chosen": -2.4292776584625244, "logits/rejected": -2.7529408931732178, "logps/chosen": -139.11172485351562, "logps/rejected": -245.75282287597656, "loss": 0.0795, "rewards/accuracies": 1.0, "rewards/chosen": -1.6597893238067627, "rewards/margins": 3.305680751800537, "rewards/rejected": -4.965470314025879, "step": 2143 }, { "epoch": 0.33, "learning_rate": 1.2574837906530422e-05, "logits/chosen": -2.7684919834136963, "logits/rejected": -1.577196478843689, "logps/chosen": -702.0205688476562, "logps/rejected": -345.199951171875, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -2.032811164855957, "rewards/margins": 4.098863124847412, "rewards/rejected": -6.131674289703369, "step": 2144 }, { "epoch": 0.33, "learning_rate": 1.2574104465999274e-05, "logits/chosen": -1.60150146484375, "logits/rejected": -2.9835045337677, "logps/chosen": -108.35501098632812, "logps/rejected": -427.0872802734375, "loss": 0.0738, "rewards/accuracies": 1.0, "rewards/chosen": -2.360548496246338, "rewards/margins": 3.909968852996826, "rewards/rejected": -6.270517349243164, "step": 2145 }, { "epoch": 0.33, "learning_rate": 1.2573371025468125e-05, "logits/chosen": -2.148897171020508, "logits/rejected": -2.7549099922180176, "logps/chosen": -152.16400146484375, "logps/rejected": -326.9621276855469, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -2.177121162414551, "rewards/margins": 5.693328857421875, "rewards/rejected": -7.870450019836426, "step": 2146 }, { "epoch": 0.33, "learning_rate": 1.2572637584936979e-05, "logits/chosen": -1.3015320301055908, "logits/rejected": -2.6895692348480225, "logps/chosen": -179.84591674804688, "logps/rejected": -351.2367248535156, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.4738717079162598, "rewards/margins": 6.561962127685547, "rewards/rejected": -8.035833358764648, "step": 2147 }, { "epoch": 0.33, "learning_rate": 1.2571904144405831e-05, "logits/chosen": -1.3889187574386597, "logits/rejected": -2.769742488861084, "logps/chosen": -192.00439453125, "logps/rejected": -516.2237548828125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -2.8875679969787598, "rewards/margins": 7.864691734313965, "rewards/rejected": -10.752260208129883, "step": 2148 }, { "epoch": 0.33, "learning_rate": 1.2571170703874683e-05, "logits/chosen": -2.7626826763153076, "logits/rejected": -3.273052930831909, "logps/chosen": -37.747528076171875, "logps/rejected": -208.52783203125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.4393417835235596, "rewards/margins": 6.076229572296143, "rewards/rejected": -7.515571594238281, "step": 2149 }, { "epoch": 0.33, "learning_rate": 1.2570437263343535e-05, "logits/chosen": -1.7855021953582764, "logits/rejected": -3.0802388191223145, "logps/chosen": -118.7602310180664, "logps/rejected": -393.20855712890625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.638527274131775, "rewards/margins": 5.11830997467041, "rewards/rejected": -6.756837844848633, "step": 2150 }, { "epoch": 0.33, "learning_rate": 1.2569703822812387e-05, "logits/chosen": -2.3475286960601807, "logits/rejected": -2.65096116065979, "logps/chosen": -164.41680908203125, "logps/rejected": -182.62696838378906, "loss": 1.9126, "rewards/accuracies": 0.5, "rewards/chosen": -3.8785617351531982, "rewards/margins": 0.7090948820114136, "rewards/rejected": -4.587656497955322, "step": 2151 }, { "epoch": 0.33, "learning_rate": 1.256897038228124e-05, "logits/chosen": -2.7922892570495605, "logits/rejected": -1.598654866218567, "logps/chosen": -638.6580810546875, "logps/rejected": -367.9926452636719, "loss": 1.296, "rewards/accuracies": 0.5, "rewards/chosen": -3.444488525390625, "rewards/margins": 1.434522271156311, "rewards/rejected": -4.8790106773376465, "step": 2152 }, { "epoch": 0.33, "learning_rate": 1.2568236941750092e-05, "logits/chosen": -1.356542944908142, "logits/rejected": -2.704894781112671, "logps/chosen": -234.3984375, "logps/rejected": -357.6732482910156, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -2.209298610687256, "rewards/margins": 4.025872230529785, "rewards/rejected": -6.235170841217041, "step": 2153 }, { "epoch": 0.33, "learning_rate": 1.2567503501218944e-05, "logits/chosen": -2.052412509918213, "logits/rejected": -3.2723050117492676, "logps/chosen": -89.0309829711914, "logps/rejected": -461.1630859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3260841369628906, "rewards/margins": 8.213314056396484, "rewards/rejected": -9.539398193359375, "step": 2154 }, { "epoch": 0.34, "learning_rate": 1.2566770060687796e-05, "logits/chosen": -2.5009424686431885, "logits/rejected": -2.5971200466156006, "logps/chosen": -494.6304016113281, "logps/rejected": -183.67477416992188, "loss": 4.0616, "rewards/accuracies": 0.5, "rewards/chosen": -6.1438398361206055, "rewards/margins": -1.020653247833252, "rewards/rejected": -5.1231865882873535, "step": 2155 }, { "epoch": 0.34, "learning_rate": 1.2566036620156648e-05, "logits/chosen": -2.845261812210083, "logits/rejected": -2.802746295928955, "logps/chosen": -220.21255493164062, "logps/rejected": -321.70513916015625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.8095123767852783, "rewards/margins": 6.0569353103637695, "rewards/rejected": -7.866447448730469, "step": 2156 }, { "epoch": 0.34, "learning_rate": 1.25653031796255e-05, "logits/chosen": -2.543177604675293, "logits/rejected": -2.832185983657837, "logps/chosen": -171.28402709960938, "logps/rejected": -336.6904296875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.8459373712539673, "rewards/margins": 6.533435821533203, "rewards/rejected": -8.379373550415039, "step": 2157 }, { "epoch": 0.34, "learning_rate": 1.2564569739094351e-05, "logits/chosen": -2.9983885288238525, "logits/rejected": -3.0898759365081787, "logps/chosen": -257.4879150390625, "logps/rejected": -339.3807373046875, "loss": 4.0512, "rewards/accuracies": 0.5, "rewards/chosen": -5.333485126495361, "rewards/margins": -1.4489774703979492, "rewards/rejected": -3.884507656097412, "step": 2158 }, { "epoch": 0.34, "learning_rate": 1.2563836298563203e-05, "logits/chosen": -1.1702723503112793, "logits/rejected": -1.5210444927215576, "logps/chosen": -277.48699951171875, "logps/rejected": -385.362060546875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.927647352218628, "rewards/margins": 6.410619735717773, "rewards/rejected": -8.33826732635498, "step": 2159 }, { "epoch": 0.34, "learning_rate": 1.2563102858032055e-05, "logits/chosen": -2.860492706298828, "logits/rejected": -2.977698802947998, "logps/chosen": -176.5548858642578, "logps/rejected": -217.25396728515625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -2.7771096229553223, "rewards/margins": 5.06672477722168, "rewards/rejected": -7.843833923339844, "step": 2160 }, { "epoch": 0.34, "learning_rate": 1.2562369417500909e-05, "logits/chosen": -1.6632038354873657, "logits/rejected": -2.532883882522583, "logps/chosen": -93.24683380126953, "logps/rejected": -340.3429870605469, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.6648426055908203, "rewards/margins": 6.949923992156982, "rewards/rejected": -9.614767074584961, "step": 2161 }, { "epoch": 0.34, "learning_rate": 1.256163597696976e-05, "logits/chosen": -2.3022615909576416, "logits/rejected": -2.879657030105591, "logps/chosen": -241.739013671875, "logps/rejected": -276.9521484375, "loss": 2.7909, "rewards/accuracies": 0.5, "rewards/chosen": -4.8273515701293945, "rewards/margins": 0.9409127235412598, "rewards/rejected": -5.768263816833496, "step": 2162 }, { "epoch": 0.34, "learning_rate": 1.2560902536438612e-05, "logits/chosen": -2.2955877780914307, "logits/rejected": -2.7657952308654785, "logps/chosen": -66.52725219726562, "logps/rejected": -315.3480224609375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.9677824974060059, "rewards/margins": 7.938229560852051, "rewards/rejected": -9.906012535095215, "step": 2163 }, { "epoch": 0.34, "learning_rate": 1.2560169095907464e-05, "logits/chosen": -1.3561043739318848, "logits/rejected": -2.972299337387085, "logps/chosen": -103.07174682617188, "logps/rejected": -364.1676025390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.057323455810547, "rewards/margins": 9.428476333618164, "rewards/rejected": -11.485799789428711, "step": 2164 }, { "epoch": 0.34, "learning_rate": 1.2559435655376316e-05, "logits/chosen": -2.3884713649749756, "logits/rejected": -2.6603896617889404, "logps/chosen": -157.5394287109375, "logps/rejected": -199.46890258789062, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -2.242765188217163, "rewards/margins": 4.87716007232666, "rewards/rejected": -7.119925022125244, "step": 2165 }, { "epoch": 0.34, "learning_rate": 1.2558702214845168e-05, "logits/chosen": -2.057809591293335, "logits/rejected": -2.976639747619629, "logps/chosen": -129.890869140625, "logps/rejected": -278.5553894042969, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.9000561237335205, "rewards/margins": 6.514030456542969, "rewards/rejected": -8.41408634185791, "step": 2166 }, { "epoch": 0.34, "learning_rate": 1.255796877431402e-05, "logits/chosen": -2.4198405742645264, "logits/rejected": -2.7910313606262207, "logps/chosen": -105.95323944091797, "logps/rejected": -281.0646057128906, "loss": 1.5147, "rewards/accuracies": 0.5, "rewards/chosen": -3.398550033569336, "rewards/margins": 3.909916639328003, "rewards/rejected": -7.308466911315918, "step": 2167 }, { "epoch": 0.34, "learning_rate": 1.2557235333782872e-05, "logits/chosen": -2.1260359287261963, "logits/rejected": -2.7083094120025635, "logps/chosen": -329.5784912109375, "logps/rejected": -340.1706237792969, "loss": 1.511, "rewards/accuracies": 0.5, "rewards/chosen": -4.719424247741699, "rewards/margins": 1.0285407304763794, "rewards/rejected": -5.747965335845947, "step": 2168 }, { "epoch": 0.34, "learning_rate": 1.2556501893251724e-05, "logits/chosen": -2.9149296283721924, "logits/rejected": -2.016552448272705, "logps/chosen": -271.79693603515625, "logps/rejected": -151.38560485839844, "loss": 1.0294, "rewards/accuracies": 0.5, "rewards/chosen": -2.400377035140991, "rewards/margins": 2.4899654388427734, "rewards/rejected": -4.8903422355651855, "step": 2169 }, { "epoch": 0.34, "learning_rate": 1.2555768452720577e-05, "logits/chosen": -2.340398073196411, "logits/rejected": -2.722790479660034, "logps/chosen": -161.9694061279297, "logps/rejected": -216.26683044433594, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -1.695397973060608, "rewards/margins": 4.7656097412109375, "rewards/rejected": -6.461007118225098, "step": 2170 }, { "epoch": 0.34, "learning_rate": 1.255503501218943e-05, "logits/chosen": -2.3758912086486816, "logits/rejected": -2.8332624435424805, "logps/chosen": -176.36521911621094, "logps/rejected": -289.33123779296875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.455780029296875, "rewards/margins": 6.286509990692139, "rewards/rejected": -8.742290496826172, "step": 2171 }, { "epoch": 0.34, "learning_rate": 1.2554301571658281e-05, "logits/chosen": -2.172569751739502, "logits/rejected": -2.999516248703003, "logps/chosen": -358.19732666015625, "logps/rejected": -479.5684509277344, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": -3.244556427001953, "rewards/margins": 3.6531028747558594, "rewards/rejected": -6.8976593017578125, "step": 2172 }, { "epoch": 0.34, "learning_rate": 1.2553568131127133e-05, "logits/chosen": -1.8816980123519897, "logits/rejected": -2.188568115234375, "logps/chosen": -263.74920654296875, "logps/rejected": -620.32666015625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.560946226119995, "rewards/margins": 5.901303768157959, "rewards/rejected": -8.462249755859375, "step": 2173 }, { "epoch": 0.34, "learning_rate": 1.2552834690595985e-05, "logits/chosen": -2.8827104568481445, "logits/rejected": -2.6629204750061035, "logps/chosen": -181.80953979492188, "logps/rejected": -289.5757141113281, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.7794349193572998, "rewards/margins": 6.740777492523193, "rewards/rejected": -8.520212173461914, "step": 2174 }, { "epoch": 0.34, "learning_rate": 1.2552101250064837e-05, "logits/chosen": -1.7015905380249023, "logits/rejected": -2.821038007736206, "logps/chosen": -285.3590393066406, "logps/rejected": -491.2203674316406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.625875949859619, "rewards/margins": 7.533573150634766, "rewards/rejected": -10.159448623657227, "step": 2175 }, { "epoch": 0.34, "learning_rate": 1.2551367809533689e-05, "logits/chosen": -2.128335475921631, "logits/rejected": -2.8832433223724365, "logps/chosen": -243.88351440429688, "logps/rejected": -275.90667724609375, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -2.722419023513794, "rewards/margins": 4.068817138671875, "rewards/rejected": -6.79123592376709, "step": 2176 }, { "epoch": 0.34, "learning_rate": 1.255063436900254e-05, "logits/chosen": -2.2593204975128174, "logits/rejected": -2.8204479217529297, "logps/chosen": -157.63787841796875, "logps/rejected": -263.0927429199219, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -1.4166290760040283, "rewards/margins": 4.335958957672119, "rewards/rejected": -5.752588272094727, "step": 2177 }, { "epoch": 0.34, "learning_rate": 1.2549900928471392e-05, "logits/chosen": -2.7501132488250732, "logits/rejected": -2.5744540691375732, "logps/chosen": -536.2553100585938, "logps/rejected": -543.1029052734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.6235320568084717, "rewards/margins": 8.889371871948242, "rewards/rejected": -10.51290512084961, "step": 2178 }, { "epoch": 0.34, "learning_rate": 1.2549167487940246e-05, "logits/chosen": -2.722712755203247, "logits/rejected": -2.904010057449341, "logps/chosen": -359.4263916015625, "logps/rejected": -509.6830139160156, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.3082504272460938, "rewards/margins": 5.8221635818481445, "rewards/rejected": -7.130414009094238, "step": 2179 }, { "epoch": 0.34, "learning_rate": 1.2548434047409098e-05, "logits/chosen": -2.4701833724975586, "logits/rejected": -2.5576171875, "logps/chosen": -73.60404968261719, "logps/rejected": -191.76177978515625, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -1.9676761627197266, "rewards/margins": 5.786464214324951, "rewards/rejected": -7.754140377044678, "step": 2180 }, { "epoch": 0.34, "learning_rate": 1.2547700606877951e-05, "logits/chosen": -2.7189576625823975, "logits/rejected": -1.5720081329345703, "logps/chosen": -486.4772644042969, "logps/rejected": -315.209716796875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.641849160194397, "rewards/margins": 7.437530517578125, "rewards/rejected": -9.07938003540039, "step": 2181 }, { "epoch": 0.34, "learning_rate": 1.2546967166346803e-05, "logits/chosen": -1.247725486755371, "logits/rejected": -2.977224588394165, "logps/chosen": -117.85505676269531, "logps/rejected": -389.08575439453125, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -1.8966708183288574, "rewards/margins": 4.069189071655273, "rewards/rejected": -5.965859889984131, "step": 2182 }, { "epoch": 0.34, "learning_rate": 1.2546233725815655e-05, "logits/chosen": -2.9481310844421387, "logits/rejected": -2.301292896270752, "logps/chosen": -327.5700378417969, "logps/rejected": -283.380859375, "loss": 3.3139, "rewards/accuracies": 0.5, "rewards/chosen": -5.7320356369018555, "rewards/margins": -0.6682937145233154, "rewards/rejected": -5.063742160797119, "step": 2183 }, { "epoch": 0.34, "learning_rate": 1.2545500285284507e-05, "logits/chosen": -2.349722385406494, "logits/rejected": -2.6895439624786377, "logps/chosen": -269.39398193359375, "logps/rejected": -296.54669189453125, "loss": 1.2027, "rewards/accuracies": 0.5, "rewards/chosen": -4.626336097717285, "rewards/margins": 3.299683094024658, "rewards/rejected": -7.926018714904785, "step": 2184 }, { "epoch": 0.34, "learning_rate": 1.2544766844753359e-05, "logits/chosen": -2.992603302001953, "logits/rejected": -2.7619547843933105, "logps/chosen": -99.36121368408203, "logps/rejected": -201.71836853027344, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.9297091960906982, "rewards/margins": 6.5147294998168945, "rewards/rejected": -8.444438934326172, "step": 2185 }, { "epoch": 0.34, "learning_rate": 1.254403340422221e-05, "logits/chosen": -1.9523485898971558, "logits/rejected": -2.7602174282073975, "logps/chosen": -271.4727783203125, "logps/rejected": -310.539306640625, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -3.3436074256896973, "rewards/margins": 4.27731990814209, "rewards/rejected": -7.620926856994629, "step": 2186 }, { "epoch": 0.34, "learning_rate": 1.2543299963691063e-05, "logits/chosen": -2.792679786682129, "logits/rejected": -2.6653103828430176, "logps/chosen": -448.04925537109375, "logps/rejected": -458.8905334472656, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.0335533618927, "rewards/margins": 6.476314544677734, "rewards/rejected": -8.509868621826172, "step": 2187 }, { "epoch": 0.34, "learning_rate": 1.2542566523159916e-05, "logits/chosen": -3.0253183841705322, "logits/rejected": -2.5429506301879883, "logps/chosen": -288.5469665527344, "logps/rejected": -304.0860900878906, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -3.0557756423950195, "rewards/margins": 5.79490852355957, "rewards/rejected": -8.85068416595459, "step": 2188 }, { "epoch": 0.34, "learning_rate": 1.2541833082628768e-05, "logits/chosen": -2.4666271209716797, "logits/rejected": -2.4519550800323486, "logps/chosen": -326.07373046875, "logps/rejected": -443.8735656738281, "loss": 3.2946, "rewards/accuracies": 0.5, "rewards/chosen": -5.899214267730713, "rewards/margins": 0.9913718700408936, "rewards/rejected": -6.890585899353027, "step": 2189 }, { "epoch": 0.34, "learning_rate": 1.254109964209762e-05, "logits/chosen": -1.9748197793960571, "logits/rejected": -2.835550546646118, "logps/chosen": -161.02667236328125, "logps/rejected": -286.75628662109375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.2772674560546875, "rewards/margins": 6.296506404876709, "rewards/rejected": -8.573774337768555, "step": 2190 }, { "epoch": 0.34, "learning_rate": 1.2540366201566472e-05, "logits/chosen": -2.8599541187286377, "logits/rejected": -2.6866681575775146, "logps/chosen": -409.1099548339844, "logps/rejected": -418.57110595703125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -2.7625350952148438, "rewards/margins": 5.630793571472168, "rewards/rejected": -8.393328666687012, "step": 2191 }, { "epoch": 0.34, "learning_rate": 1.2539632761035324e-05, "logits/chosen": -2.3761866092681885, "logits/rejected": -3.1336989402770996, "logps/chosen": -110.33470916748047, "logps/rejected": -558.5596923828125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.5931057929992676, "rewards/margins": 4.86275053024292, "rewards/rejected": -7.4558563232421875, "step": 2192 }, { "epoch": 0.34, "learning_rate": 1.2538899320504176e-05, "logits/chosen": -2.6266024112701416, "logits/rejected": -2.70304536819458, "logps/chosen": -230.66526794433594, "logps/rejected": -341.58258056640625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -2.8855559825897217, "rewards/margins": 5.9061737060546875, "rewards/rejected": -8.791728973388672, "step": 2193 }, { "epoch": 0.34, "learning_rate": 1.2538165879973027e-05, "logits/chosen": -2.5777604579925537, "logits/rejected": -2.81215500831604, "logps/chosen": -448.8651123046875, "logps/rejected": -513.51708984375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.4573776721954346, "rewards/margins": 6.484890937805176, "rewards/rejected": -8.942268371582031, "step": 2194 }, { "epoch": 0.34, "learning_rate": 1.253743243944188e-05, "logits/chosen": -2.8014416694641113, "logits/rejected": -2.5703506469726562, "logps/chosen": -332.9901123046875, "logps/rejected": -342.372802734375, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -1.8343925476074219, "rewards/margins": 5.269899368286133, "rewards/rejected": -7.104291915893555, "step": 2195 }, { "epoch": 0.34, "learning_rate": 1.2536698998910731e-05, "logits/chosen": -0.958946168422699, "logits/rejected": -2.702291488647461, "logps/chosen": -211.99063110351562, "logps/rejected": -522.1834106445312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.3499491214752197, "rewards/margins": 8.019082069396973, "rewards/rejected": -10.369030952453613, "step": 2196 }, { "epoch": 0.34, "learning_rate": 1.2535965558379585e-05, "logits/chosen": -2.3760550022125244, "logits/rejected": -2.2566611766815186, "logps/chosen": -208.58987426757812, "logps/rejected": -196.46080017089844, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.225314736366272, "rewards/margins": 6.9962158203125, "rewards/rejected": -8.221529960632324, "step": 2197 }, { "epoch": 0.34, "learning_rate": 1.2535232117848437e-05, "logits/chosen": -2.5511927604675293, "logits/rejected": -2.0748372077941895, "logps/chosen": -303.3442077636719, "logps/rejected": -371.5540771484375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.435708999633789, "rewards/margins": 7.379970550537109, "rewards/rejected": -9.815679550170898, "step": 2198 }, { "epoch": 0.34, "learning_rate": 1.2534498677317289e-05, "logits/chosen": -3.3057379722595215, "logits/rejected": -3.1022393703460693, "logps/chosen": -171.72564697265625, "logps/rejected": -319.38775634765625, "loss": 3.1312, "rewards/accuracies": 0.5, "rewards/chosen": -4.246889114379883, "rewards/margins": 2.267354965209961, "rewards/rejected": -6.514244079589844, "step": 2199 }, { "epoch": 0.34, "learning_rate": 1.253376523678614e-05, "logits/chosen": -2.415224552154541, "logits/rejected": -2.8489792346954346, "logps/chosen": -362.34033203125, "logps/rejected": -424.38226318359375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.640704393386841, "rewards/margins": 8.074625015258789, "rewards/rejected": -10.71532917022705, "step": 2200 }, { "epoch": 0.34, "learning_rate": 1.2533031796254992e-05, "logits/chosen": -2.6386468410491943, "logits/rejected": -2.8276970386505127, "logps/chosen": -44.30043029785156, "logps/rejected": -108.04269409179688, "loss": 0.0654, "rewards/accuracies": 1.0, "rewards/chosen": -1.7177624702453613, "rewards/margins": 3.378506660461426, "rewards/rejected": -5.096269130706787, "step": 2201 }, { "epoch": 0.34, "learning_rate": 1.2532298355723844e-05, "logits/chosen": -2.953805923461914, "logits/rejected": -2.2545254230499268, "logps/chosen": -342.8008117675781, "logps/rejected": -214.8377685546875, "loss": 3.8004, "rewards/accuracies": 0.5, "rewards/chosen": -6.981744766235352, "rewards/margins": -1.2402033805847168, "rewards/rejected": -5.741541385650635, "step": 2202 }, { "epoch": 0.34, "learning_rate": 1.2531564915192696e-05, "logits/chosen": -1.1802161931991577, "logits/rejected": -2.1875739097595215, "logps/chosen": -109.72802734375, "logps/rejected": -241.9433135986328, "loss": 1.1187, "rewards/accuracies": 0.5, "rewards/chosen": -3.8010268211364746, "rewards/margins": 2.0006775856018066, "rewards/rejected": -5.801704406738281, "step": 2203 }, { "epoch": 0.34, "learning_rate": 1.2530831474661548e-05, "logits/chosen": -2.0498549938201904, "logits/rejected": -2.6885597705841064, "logps/chosen": -247.7301788330078, "logps/rejected": -356.7474670410156, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": -2.8025174140930176, "rewards/margins": 4.594727993011475, "rewards/rejected": -7.397245407104492, "step": 2204 }, { "epoch": 0.34, "learning_rate": 1.25300980341304e-05, "logits/chosen": -2.1782102584838867, "logits/rejected": -2.999845266342163, "logps/chosen": -224.41390991210938, "logps/rejected": -397.753662109375, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -2.6739773750305176, "rewards/margins": 5.478525161743164, "rewards/rejected": -8.15250301361084, "step": 2205 }, { "epoch": 0.34, "learning_rate": 1.2529364593599253e-05, "logits/chosen": -2.587411880493164, "logits/rejected": -2.393770456314087, "logps/chosen": -294.79638671875, "logps/rejected": -380.0209655761719, "loss": 2.0711, "rewards/accuracies": 0.5, "rewards/chosen": -5.386099338531494, "rewards/margins": 1.2906205654144287, "rewards/rejected": -6.676719665527344, "step": 2206 }, { "epoch": 0.34, "learning_rate": 1.2528631153068105e-05, "logits/chosen": -2.5936572551727295, "logits/rejected": -2.8208117485046387, "logps/chosen": -215.6241455078125, "logps/rejected": -322.2091064453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.4487130641937256, "rewards/margins": 7.338522911071777, "rewards/rejected": -9.787236213684082, "step": 2207 }, { "epoch": 0.34, "learning_rate": 1.2527897712536957e-05, "logits/chosen": -0.795987069606781, "logits/rejected": -2.8889002799987793, "logps/chosen": -47.83919143676758, "logps/rejected": -388.8279724121094, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.5434404611587524, "rewards/margins": 9.268054008483887, "rewards/rejected": -10.811494827270508, "step": 2208 }, { "epoch": 0.34, "learning_rate": 1.2527164272005809e-05, "logits/chosen": -1.383734941482544, "logits/rejected": -2.6603100299835205, "logps/chosen": -519.3251342773438, "logps/rejected": -659.7269287109375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.9661293029785156, "rewards/margins": 10.360596656799316, "rewards/rejected": -12.326726913452148, "step": 2209 }, { "epoch": 0.34, "learning_rate": 1.2526430831474661e-05, "logits/chosen": -1.9269744157791138, "logits/rejected": -2.6956825256347656, "logps/chosen": -140.18751525878906, "logps/rejected": -391.216064453125, "loss": 1.6694, "rewards/accuracies": 0.5, "rewards/chosen": -3.148806571960449, "rewards/margins": 0.07884371280670166, "rewards/rejected": -3.2276501655578613, "step": 2210 }, { "epoch": 0.34, "learning_rate": 1.2525697390943513e-05, "logits/chosen": -1.5325838327407837, "logits/rejected": -2.53310227394104, "logps/chosen": -82.86753845214844, "logps/rejected": -333.49761962890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.171955108642578, "rewards/margins": 7.52036190032959, "rewards/rejected": -9.692316055297852, "step": 2211 }, { "epoch": 0.34, "learning_rate": 1.2524963950412365e-05, "logits/chosen": -1.2818313837051392, "logits/rejected": -2.8453469276428223, "logps/chosen": -162.77743530273438, "logps/rejected": -486.5615234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.3377768993377686, "rewards/margins": 8.688227653503418, "rewards/rejected": -11.026004791259766, "step": 2212 }, { "epoch": 0.34, "learning_rate": 1.2524230509881218e-05, "logits/chosen": -1.5823886394500732, "logits/rejected": -2.6944711208343506, "logps/chosen": -124.97369384765625, "logps/rejected": -318.0306701660156, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.4233696460723877, "rewards/margins": 6.752100944519043, "rewards/rejected": -9.175470352172852, "step": 2213 }, { "epoch": 0.34, "learning_rate": 1.252349706935007e-05, "logits/chosen": -3.3107142448425293, "logits/rejected": -2.702587604522705, "logps/chosen": -356.5397644042969, "logps/rejected": -137.3244171142578, "loss": 3.5819, "rewards/accuracies": 0.5, "rewards/chosen": -5.481584072113037, "rewards/margins": -1.1596782207489014, "rewards/rejected": -4.321906089782715, "step": 2214 }, { "epoch": 0.34, "learning_rate": 1.2522763628818924e-05, "logits/chosen": -2.809054136276245, "logits/rejected": -2.681180953979492, "logps/chosen": -258.66864013671875, "logps/rejected": -280.0461120605469, "loss": 2.2962, "rewards/accuracies": 0.5, "rewards/chosen": -4.337518215179443, "rewards/margins": 0.9703443050384521, "rewards/rejected": -5.307862758636475, "step": 2215 }, { "epoch": 0.34, "learning_rate": 1.2522030188287776e-05, "logits/chosen": -2.024669885635376, "logits/rejected": -2.677154064178467, "logps/chosen": -142.13351440429688, "logps/rejected": -345.6229248046875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -2.0965843200683594, "rewards/margins": 6.140012264251709, "rewards/rejected": -8.236597061157227, "step": 2216 }, { "epoch": 0.34, "learning_rate": 1.2521296747756627e-05, "logits/chosen": -2.6736207008361816, "logits/rejected": -3.1519575119018555, "logps/chosen": -329.6849365234375, "logps/rejected": -326.11077880859375, "loss": 3.425, "rewards/accuracies": 0.5, "rewards/chosen": -4.3420090675354, "rewards/margins": 0.8589944839477539, "rewards/rejected": -5.201003074645996, "step": 2217 }, { "epoch": 0.34, "learning_rate": 1.252056330722548e-05, "logits/chosen": -2.5505332946777344, "logits/rejected": -2.2114884853363037, "logps/chosen": -439.75946044921875, "logps/rejected": -430.7539978027344, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3146908283233643, "rewards/margins": 6.2416276931762695, "rewards/rejected": -7.556318759918213, "step": 2218 }, { "epoch": 0.35, "learning_rate": 1.2519829866694331e-05, "logits/chosen": -2.022899866104126, "logits/rejected": -2.9478416442871094, "logps/chosen": -72.07849884033203, "logps/rejected": -301.6375732421875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.0943729877471924, "rewards/margins": 7.759263038635254, "rewards/rejected": -8.853635787963867, "step": 2219 }, { "epoch": 0.35, "learning_rate": 1.2519096426163183e-05, "logits/chosen": -2.581768035888672, "logits/rejected": -2.9554169178009033, "logps/chosen": -211.9912109375, "logps/rejected": -399.17620849609375, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -3.380298137664795, "rewards/margins": 4.1292524337768555, "rewards/rejected": -7.50955057144165, "step": 2220 }, { "epoch": 0.35, "learning_rate": 1.2518362985632035e-05, "logits/chosen": -2.7423324584960938, "logits/rejected": -2.817354440689087, "logps/chosen": -124.14991760253906, "logps/rejected": -319.5126953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.0918960571289062, "rewards/margins": 8.715707778930664, "rewards/rejected": -10.80760383605957, "step": 2221 }, { "epoch": 0.35, "learning_rate": 1.2517629545100887e-05, "logits/chosen": -2.0857486724853516, "logits/rejected": -2.8687548637390137, "logps/chosen": -209.10665893554688, "logps/rejected": -234.42864990234375, "loss": 2.029, "rewards/accuracies": 0.5, "rewards/chosen": -4.1664628982543945, "rewards/margins": 0.6574604511260986, "rewards/rejected": -4.823923587799072, "step": 2222 }, { "epoch": 0.35, "learning_rate": 1.251689610456974e-05, "logits/chosen": -2.5136570930480957, "logits/rejected": -2.91032338142395, "logps/chosen": -83.58021545410156, "logps/rejected": -338.4887390136719, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.7308523654937744, "rewards/margins": 10.095977783203125, "rewards/rejected": -11.82682991027832, "step": 2223 }, { "epoch": 0.35, "learning_rate": 1.2516162664038592e-05, "logits/chosen": -2.063693046569824, "logits/rejected": -0.7853249907493591, "logps/chosen": -396.79217529296875, "logps/rejected": -239.8372802734375, "loss": 3.9326, "rewards/accuracies": 0.5, "rewards/chosen": -6.6124467849731445, "rewards/margins": -1.6058621406555176, "rewards/rejected": -5.006585121154785, "step": 2224 }, { "epoch": 0.35, "learning_rate": 1.2515429223507444e-05, "logits/chosen": -2.8844025135040283, "logits/rejected": -2.2383477687835693, "logps/chosen": -950.51708984375, "logps/rejected": -573.67041015625, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -3.0531158447265625, "rewards/margins": 4.440223693847656, "rewards/rejected": -7.493339538574219, "step": 2225 }, { "epoch": 0.35, "learning_rate": 1.2514695782976296e-05, "logits/chosen": -2.4522705078125, "logits/rejected": -2.7682981491088867, "logps/chosen": -177.341552734375, "logps/rejected": -118.44910430908203, "loss": 2.9316, "rewards/accuracies": 0.5, "rewards/chosen": -4.499774932861328, "rewards/margins": -0.47160911560058594, "rewards/rejected": -4.028165817260742, "step": 2226 }, { "epoch": 0.35, "learning_rate": 1.2513962342445148e-05, "logits/chosen": -2.736126661300659, "logits/rejected": -3.017786979675293, "logps/chosen": -62.31822204589844, "logps/rejected": -252.1658477783203, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.4174776077270508, "rewards/margins": 6.788203239440918, "rewards/rejected": -8.205680847167969, "step": 2227 }, { "epoch": 0.35, "learning_rate": 1.2513228901914e-05, "logits/chosen": -1.6351536512374878, "logits/rejected": -3.0018694400787354, "logps/chosen": -277.1517333984375, "logps/rejected": -743.7939453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.9279375076293945, "rewards/margins": 10.78672981262207, "rewards/rejected": -13.714667320251465, "step": 2228 }, { "epoch": 0.35, "learning_rate": 1.2512495461382852e-05, "logits/chosen": -2.704253673553467, "logits/rejected": -2.937350273132324, "logps/chosen": -47.74774169921875, "logps/rejected": -212.27911376953125, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -2.2456750869750977, "rewards/margins": 4.441483974456787, "rewards/rejected": -6.687159061431885, "step": 2229 }, { "epoch": 0.35, "learning_rate": 1.2511762020851704e-05, "logits/chosen": -2.138533115386963, "logits/rejected": -2.9647459983825684, "logps/chosen": -466.0723876953125, "logps/rejected": -519.823486328125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.1149611473083496, "rewards/margins": 4.945960998535156, "rewards/rejected": -7.060922622680664, "step": 2230 }, { "epoch": 0.35, "learning_rate": 1.2511028580320555e-05, "logits/chosen": -2.794957160949707, "logits/rejected": -2.322631359100342, "logps/chosen": -340.2454833984375, "logps/rejected": -311.5870056152344, "loss": 3.2115, "rewards/accuracies": 0.5, "rewards/chosen": -5.785667419433594, "rewards/margins": -0.5961425304412842, "rewards/rejected": -5.189525127410889, "step": 2231 }, { "epoch": 0.35, "learning_rate": 1.2510295139789409e-05, "logits/chosen": -3.2779910564422607, "logits/rejected": -3.2375731468200684, "logps/chosen": -119.84105682373047, "logps/rejected": -150.00491333007812, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.2340433597564697, "rewards/margins": 4.855340003967285, "rewards/rejected": -6.089383125305176, "step": 2232 }, { "epoch": 0.35, "learning_rate": 1.2509561699258261e-05, "logits/chosen": -2.4019196033477783, "logits/rejected": -2.602271556854248, "logps/chosen": -689.0556030273438, "logps/rejected": -702.46533203125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.902148485183716, "rewards/margins": 8.07069206237793, "rewards/rejected": -10.972841262817383, "step": 2233 }, { "epoch": 0.35, "learning_rate": 1.2508828258727113e-05, "logits/chosen": -2.230456829071045, "logits/rejected": -2.5541200637817383, "logps/chosen": -503.66802978515625, "logps/rejected": -438.9344482421875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -4.834326267242432, "rewards/margins": 6.137855529785156, "rewards/rejected": -10.97218132019043, "step": 2234 }, { "epoch": 0.35, "learning_rate": 1.2508094818195965e-05, "logits/chosen": -2.8694958686828613, "logits/rejected": -3.0226709842681885, "logps/chosen": -54.5806770324707, "logps/rejected": -133.81494140625, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": -1.7393994331359863, "rewards/margins": 3.0839343070983887, "rewards/rejected": -4.823333740234375, "step": 2235 }, { "epoch": 0.35, "learning_rate": 1.2507361377664817e-05, "logits/chosen": -2.49833083152771, "logits/rejected": -2.934403419494629, "logps/chosen": -230.68907165527344, "logps/rejected": -331.9930419921875, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.8003387451171875, "rewards/margins": 8.037676811218262, "rewards/rejected": -8.83801555633545, "step": 2236 }, { "epoch": 0.35, "learning_rate": 1.2506627937133668e-05, "logits/chosen": -1.8921483755111694, "logits/rejected": -2.957416534423828, "logps/chosen": -79.60922241210938, "logps/rejected": -243.71240234375, "loss": 0.087, "rewards/accuracies": 1.0, "rewards/chosen": -2.309573173522949, "rewards/margins": 3.7132339477539062, "rewards/rejected": -6.0228071212768555, "step": 2237 }, { "epoch": 0.35, "learning_rate": 1.250589449660252e-05, "logits/chosen": -3.2658050060272217, "logits/rejected": -2.418210029602051, "logps/chosen": -404.081298828125, "logps/rejected": -171.896484375, "loss": 5.637, "rewards/accuracies": 0.0, "rewards/chosen": -6.991002559661865, "rewards/margins": -5.631969928741455, "rewards/rejected": -1.3590326309204102, "step": 2238 }, { "epoch": 0.35, "learning_rate": 1.2505161056071372e-05, "logits/chosen": -2.6238396167755127, "logits/rejected": -2.8153951168060303, "logps/chosen": -73.04051971435547, "logps/rejected": -181.59866333007812, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -2.1180009841918945, "rewards/margins": 4.027937412261963, "rewards/rejected": -6.145938396453857, "step": 2239 }, { "epoch": 0.35, "learning_rate": 1.2504427615540224e-05, "logits/chosen": -1.8051589727401733, "logits/rejected": -2.455798625946045, "logps/chosen": -222.51919555664062, "logps/rejected": -247.74908447265625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.8230717182159424, "rewards/margins": 6.261548042297363, "rewards/rejected": -8.084619522094727, "step": 2240 }, { "epoch": 0.35, "learning_rate": 1.2503694175009078e-05, "logits/chosen": -2.576022148132324, "logits/rejected": -2.812767505645752, "logps/chosen": -271.2547607421875, "logps/rejected": -172.80636596679688, "loss": 0.2287, "rewards/accuracies": 1.0, "rewards/chosen": -2.4467196464538574, "rewards/margins": 1.6349701881408691, "rewards/rejected": -4.081689834594727, "step": 2241 }, { "epoch": 0.35, "learning_rate": 1.250296073447793e-05, "logits/chosen": -1.560270071029663, "logits/rejected": -2.904284954071045, "logps/chosen": -173.43450927734375, "logps/rejected": -274.06219482421875, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -2.4157748222351074, "rewards/margins": 6.1259589195251465, "rewards/rejected": -8.541733741760254, "step": 2242 }, { "epoch": 0.35, "learning_rate": 1.2502227293946781e-05, "logits/chosen": -2.8284382820129395, "logits/rejected": -3.1634013652801514, "logps/chosen": -83.31697845458984, "logps/rejected": -274.031982421875, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/chosen": -2.017894744873047, "rewards/margins": 3.8994522094726562, "rewards/rejected": -5.917346954345703, "step": 2243 }, { "epoch": 0.35, "learning_rate": 1.2501493853415633e-05, "logits/chosen": -2.6004507541656494, "logits/rejected": -2.8432676792144775, "logps/chosen": -152.42721557617188, "logps/rejected": -284.53436279296875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.928272008895874, "rewards/margins": 9.873164176940918, "rewards/rejected": -12.801436424255371, "step": 2244 }, { "epoch": 0.35, "learning_rate": 1.2500760412884485e-05, "logits/chosen": -2.4638423919677734, "logits/rejected": -2.6806654930114746, "logps/chosen": -376.405029296875, "logps/rejected": -332.9576110839844, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.662139892578125, "rewards/margins": 5.733366966247559, "rewards/rejected": -7.395506858825684, "step": 2245 }, { "epoch": 0.35, "learning_rate": 1.2500026972353337e-05, "logits/chosen": -2.243256092071533, "logits/rejected": -2.8356096744537354, "logps/chosen": -360.9831237792969, "logps/rejected": -369.42877197265625, "loss": 2.6686, "rewards/accuracies": 0.5, "rewards/chosen": -6.087298393249512, "rewards/margins": 0.3854644298553467, "rewards/rejected": -6.4727630615234375, "step": 2246 }, { "epoch": 0.35, "learning_rate": 1.249929353182219e-05, "logits/chosen": -2.6834728717803955, "logits/rejected": -2.680711269378662, "logps/chosen": -312.14300537109375, "logps/rejected": -510.1964111328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2895333766937256, "rewards/margins": 11.445968627929688, "rewards/rejected": -12.735501289367676, "step": 2247 }, { "epoch": 0.35, "learning_rate": 1.2498560091291042e-05, "logits/chosen": -2.5170624256134033, "logits/rejected": -3.277876615524292, "logps/chosen": -123.07451629638672, "logps/rejected": -379.224853515625, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": -0.6676467061042786, "rewards/margins": 4.006841659545898, "rewards/rejected": -4.674488544464111, "step": 2248 }, { "epoch": 0.35, "learning_rate": 1.2497826650759894e-05, "logits/chosen": -2.5777950286865234, "logits/rejected": -3.1642556190490723, "logps/chosen": -102.49906921386719, "logps/rejected": -455.5159606933594, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -2.442474126815796, "rewards/margins": 8.94882583618164, "rewards/rejected": -11.3912992477417, "step": 2249 }, { "epoch": 0.35, "learning_rate": 1.2497093210228748e-05, "logits/chosen": -2.1470139026641846, "logits/rejected": -2.784883737564087, "logps/chosen": -388.5838928222656, "logps/rejected": -517.0206298828125, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -1.3187968730926514, "rewards/margins": 4.71220588684082, "rewards/rejected": -6.031002998352051, "step": 2250 }, { "epoch": 0.35, "learning_rate": 1.24963597696976e-05, "logits/chosen": -2.8307788372039795, "logits/rejected": -3.2060019969940186, "logps/chosen": -141.31887817382812, "logps/rejected": -175.83560180664062, "loss": 3.4381, "rewards/accuracies": 0.5, "rewards/chosen": -5.088850021362305, "rewards/margins": -1.3535683155059814, "rewards/rejected": -3.735281467437744, "step": 2251 }, { "epoch": 0.35, "learning_rate": 1.2495626329166452e-05, "logits/chosen": -2.5900073051452637, "logits/rejected": -3.1395487785339355, "logps/chosen": -136.78221130371094, "logps/rejected": -271.376220703125, "loss": 2.091, "rewards/accuracies": 0.5, "rewards/chosen": -3.8037774562835693, "rewards/margins": 2.0056169033050537, "rewards/rejected": -5.809394359588623, "step": 2252 }, { "epoch": 0.35, "learning_rate": 1.2494892888635304e-05, "logits/chosen": -2.4814836978912354, "logits/rejected": -2.823221445083618, "logps/chosen": -92.31765747070312, "logps/rejected": -175.2329559326172, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -1.239087700843811, "rewards/margins": 4.048274517059326, "rewards/rejected": -5.287362575531006, "step": 2253 }, { "epoch": 0.35, "learning_rate": 1.2494159448104155e-05, "logits/chosen": -1.352428913116455, "logits/rejected": -2.602966547012329, "logps/chosen": -80.74443054199219, "logps/rejected": -369.9889221191406, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.8146376609802246, "rewards/margins": 5.918887138366699, "rewards/rejected": -8.733525276184082, "step": 2254 }, { "epoch": 0.35, "learning_rate": 1.2493426007573007e-05, "logits/chosen": -2.4064762592315674, "logits/rejected": -2.884829521179199, "logps/chosen": -200.78781127929688, "logps/rejected": -534.2943725585938, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7168502807617188, "rewards/margins": 9.162114143371582, "rewards/rejected": -10.8789644241333, "step": 2255 }, { "epoch": 0.35, "learning_rate": 1.249269256704186e-05, "logits/chosen": -1.3758713006973267, "logits/rejected": -2.5986437797546387, "logps/chosen": -96.81846618652344, "logps/rejected": -387.3177490234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.449336051940918, "rewards/margins": 7.458563327789307, "rewards/rejected": -9.907898902893066, "step": 2256 }, { "epoch": 0.35, "learning_rate": 1.2491959126510711e-05, "logits/chosen": -2.9586942195892334, "logits/rejected": -1.9457693099975586, "logps/chosen": -512.265869140625, "logps/rejected": -250.5595703125, "loss": 0.7105, "rewards/accuracies": 0.5, "rewards/chosen": -3.789328098297119, "rewards/margins": 3.1995770931243896, "rewards/rejected": -6.98890495300293, "step": 2257 }, { "epoch": 0.35, "learning_rate": 1.2491225685979563e-05, "logits/chosen": -3.3084278106689453, "logits/rejected": -3.111523389816284, "logps/chosen": -333.22760009765625, "logps/rejected": -186.07196044921875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -2.3340539932250977, "rewards/margins": 4.906558990478516, "rewards/rejected": -7.240612983703613, "step": 2258 }, { "epoch": 0.35, "learning_rate": 1.2490492245448417e-05, "logits/chosen": -2.6748952865600586, "logits/rejected": -1.2280693054199219, "logps/chosen": -264.73675537109375, "logps/rejected": -153.4793243408203, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -2.1848723888397217, "rewards/margins": 4.9225897789001465, "rewards/rejected": -7.107461929321289, "step": 2259 }, { "epoch": 0.35, "learning_rate": 1.2489758804917268e-05, "logits/chosen": -1.172792911529541, "logits/rejected": -2.8586559295654297, "logps/chosen": -47.87937927246094, "logps/rejected": -440.6668701171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.8217648267745972, "rewards/margins": 10.421504974365234, "rewards/rejected": -12.243268966674805, "step": 2260 }, { "epoch": 0.35, "learning_rate": 1.248902536438612e-05, "logits/chosen": -2.7851364612579346, "logits/rejected": -2.3367207050323486, "logps/chosen": -240.90768432617188, "logps/rejected": -317.76123046875, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -3.0236873626708984, "rewards/margins": 4.048277854919434, "rewards/rejected": -7.071965217590332, "step": 2261 }, { "epoch": 0.35, "learning_rate": 1.2488291923854972e-05, "logits/chosen": -2.6054975986480713, "logits/rejected": -3.1220524311065674, "logps/chosen": -293.1630859375, "logps/rejected": -486.31561279296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.0372931957244873, "rewards/margins": 9.83694076538086, "rewards/rejected": -11.874234199523926, "step": 2262 }, { "epoch": 0.35, "learning_rate": 1.2487558483323824e-05, "logits/chosen": -2.8242833614349365, "logits/rejected": -2.4454638957977295, "logps/chosen": -310.6705017089844, "logps/rejected": -242.09280395507812, "loss": 0.6748, "rewards/accuracies": 0.5, "rewards/chosen": -3.4890003204345703, "rewards/margins": 0.6910066604614258, "rewards/rejected": -4.180006980895996, "step": 2263 }, { "epoch": 0.35, "learning_rate": 1.2486825042792676e-05, "logits/chosen": -2.0134363174438477, "logits/rejected": -2.813408613204956, "logps/chosen": -83.01448059082031, "logps/rejected": -370.752685546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.548750638961792, "rewards/margins": 9.763219833374023, "rewards/rejected": -11.311970710754395, "step": 2264 }, { "epoch": 0.35, "learning_rate": 1.2486091602261528e-05, "logits/chosen": -2.2238564491271973, "logits/rejected": -2.6797869205474854, "logps/chosen": -127.93278503417969, "logps/rejected": -372.2897644042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0200047492980957, "rewards/margins": 10.843673706054688, "rewards/rejected": -12.863678932189941, "step": 2265 }, { "epoch": 0.35, "learning_rate": 1.248535816173038e-05, "logits/chosen": -1.2613285779953003, "logits/rejected": -2.634644031524658, "logps/chosen": -109.41958618164062, "logps/rejected": -458.07574462890625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.6870741844177246, "rewards/margins": 8.771434783935547, "rewards/rejected": -11.458508491516113, "step": 2266 }, { "epoch": 0.35, "learning_rate": 1.2484624721199232e-05, "logits/chosen": -2.5360395908355713, "logits/rejected": -2.8460967540740967, "logps/chosen": -226.904541015625, "logps/rejected": -295.32440185546875, "loss": 1.752, "rewards/accuracies": 0.5, "rewards/chosen": -4.66717004776001, "rewards/margins": 1.841719388961792, "rewards/rejected": -6.508889198303223, "step": 2267 }, { "epoch": 0.35, "learning_rate": 1.2483891280668085e-05, "logits/chosen": -2.7935688495635986, "logits/rejected": -2.669849395751953, "logps/chosen": -186.2435302734375, "logps/rejected": -245.7740020751953, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.928126573562622, "rewards/margins": 6.599737644195557, "rewards/rejected": -8.527864456176758, "step": 2268 }, { "epoch": 0.35, "learning_rate": 1.2483157840136937e-05, "logits/chosen": -2.9029998779296875, "logits/rejected": -2.601296901702881, "logps/chosen": -403.2633361816406, "logps/rejected": -391.59881591796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9169028997421265, "rewards/margins": 9.280389785766602, "rewards/rejected": -11.197293281555176, "step": 2269 }, { "epoch": 0.35, "learning_rate": 1.2482424399605789e-05, "logits/chosen": -2.5975990295410156, "logits/rejected": -2.0636696815490723, "logps/chosen": -278.4592590332031, "logps/rejected": -214.23129272460938, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.253929615020752, "rewards/margins": 7.014163017272949, "rewards/rejected": -9.26809310913086, "step": 2270 }, { "epoch": 0.35, "learning_rate": 1.248169095907464e-05, "logits/chosen": -2.776998281478882, "logits/rejected": -2.5804059505462646, "logps/chosen": -84.57785034179688, "logps/rejected": -156.21044921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.4952094554901123, "rewards/margins": 7.01995849609375, "rewards/rejected": -8.515167236328125, "step": 2271 }, { "epoch": 0.35, "learning_rate": 1.2480957518543493e-05, "logits/chosen": -2.5699713230133057, "logits/rejected": -3.080963611602783, "logps/chosen": -78.2861328125, "logps/rejected": -450.60333251953125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.9191220998764038, "rewards/margins": 9.816941261291504, "rewards/rejected": -11.736063003540039, "step": 2272 }, { "epoch": 0.35, "learning_rate": 1.2480224078012345e-05, "logits/chosen": -3.2409324645996094, "logits/rejected": -3.2343406677246094, "logps/chosen": -86.71308898925781, "logps/rejected": -137.1149444580078, "loss": 0.1509, "rewards/accuracies": 1.0, "rewards/chosen": -4.7662739753723145, "rewards/margins": 1.9193248748779297, "rewards/rejected": -6.685598850250244, "step": 2273 }, { "epoch": 0.35, "learning_rate": 1.2479490637481196e-05, "logits/chosen": -0.4709379971027374, "logits/rejected": -2.5948634147644043, "logps/chosen": -33.805885314941406, "logps/rejected": -328.23541259765625, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": -1.969465732574463, "rewards/margins": 4.4051923751831055, "rewards/rejected": -6.374658584594727, "step": 2274 }, { "epoch": 0.35, "learning_rate": 1.2478757196950048e-05, "logits/chosen": -2.791771173477173, "logits/rejected": -1.6987841129302979, "logps/chosen": -442.58892822265625, "logps/rejected": -298.19232177734375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -2.719830274581909, "rewards/margins": 4.383676528930664, "rewards/rejected": -7.103507041931152, "step": 2275 }, { "epoch": 0.35, "learning_rate": 1.24780237564189e-05, "logits/chosen": -3.1075806617736816, "logits/rejected": -2.5171282291412354, "logps/chosen": -193.68905639648438, "logps/rejected": -207.30393981933594, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -2.2726688385009766, "rewards/margins": 3.9502315521240234, "rewards/rejected": -6.222900390625, "step": 2276 }, { "epoch": 0.35, "learning_rate": 1.2477290315887754e-05, "logits/chosen": -1.0759752988815308, "logits/rejected": -1.5168699026107788, "logps/chosen": -31.405132293701172, "logps/rejected": -174.74888610839844, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.457413673400879, "rewards/margins": 5.689571857452393, "rewards/rejected": -7.1469855308532715, "step": 2277 }, { "epoch": 0.35, "learning_rate": 1.2476556875356606e-05, "logits/chosen": -2.8556430339813232, "logits/rejected": -3.142153024673462, "logps/chosen": -293.77679443359375, "logps/rejected": -316.87420654296875, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -2.266667366027832, "rewards/margins": 4.768941402435303, "rewards/rejected": -7.035609245300293, "step": 2278 }, { "epoch": 0.35, "learning_rate": 1.2475823434825457e-05, "logits/chosen": -2.7745559215545654, "logits/rejected": -2.6968002319335938, "logps/chosen": -247.3695831298828, "logps/rejected": -329.0826416015625, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -3.506488084793091, "rewards/margins": 4.590277671813965, "rewards/rejected": -8.096765518188477, "step": 2279 }, { "epoch": 0.35, "learning_rate": 1.247508999429431e-05, "logits/chosen": -2.794311761856079, "logits/rejected": -2.8335983753204346, "logps/chosen": -637.928955078125, "logps/rejected": -204.2421875, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -3.488654613494873, "rewards/margins": 4.705019950866699, "rewards/rejected": -8.193674087524414, "step": 2280 }, { "epoch": 0.35, "learning_rate": 1.2474356553763161e-05, "logits/chosen": -2.132676601409912, "logits/rejected": -2.978384017944336, "logps/chosen": -131.0524139404297, "logps/rejected": -332.14947509765625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -2.2481048107147217, "rewards/margins": 4.6260986328125, "rewards/rejected": -6.874203681945801, "step": 2281 }, { "epoch": 0.35, "learning_rate": 1.2473623113232015e-05, "logits/chosen": -3.1067287921905518, "logits/rejected": -3.1488306522369385, "logps/chosen": -224.47564697265625, "logps/rejected": -309.7368469238281, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.1353070735931396, "rewards/margins": 6.065122604370117, "rewards/rejected": -8.200429916381836, "step": 2282 }, { "epoch": 0.36, "learning_rate": 1.2472889672700867e-05, "logits/chosen": -3.06960391998291, "logits/rejected": -2.3839404582977295, "logps/chosen": -745.5437622070312, "logps/rejected": -473.5934143066406, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -2.862056255340576, "rewards/margins": 5.0848236083984375, "rewards/rejected": -7.946879863739014, "step": 2283 }, { "epoch": 0.36, "learning_rate": 1.2472156232169719e-05, "logits/chosen": -0.8296168446540833, "logits/rejected": -1.8826937675476074, "logps/chosen": -186.66070556640625, "logps/rejected": -493.188232421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9679884910583496, "rewards/margins": 10.63899040222168, "rewards/rejected": -13.606979370117188, "step": 2284 }, { "epoch": 0.36, "learning_rate": 1.247142279163857e-05, "logits/chosen": -2.133118152618408, "logits/rejected": -2.6595311164855957, "logps/chosen": -108.1887435913086, "logps/rejected": -227.63197326660156, "loss": 0.1891, "rewards/accuracies": 1.0, "rewards/chosen": -4.754628658294678, "rewards/margins": 3.1397416591644287, "rewards/rejected": -7.894370079040527, "step": 2285 }, { "epoch": 0.36, "learning_rate": 1.2470689351107424e-05, "logits/chosen": -1.4988642930984497, "logits/rejected": -2.485166072845459, "logps/chosen": -152.85494995117188, "logps/rejected": -576.8624267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.422945976257324, "rewards/margins": 13.652250289916992, "rewards/rejected": -16.0751953125, "step": 2286 }, { "epoch": 0.36, "learning_rate": 1.2469955910576276e-05, "logits/chosen": -3.0589520931243896, "logits/rejected": -1.9968233108520508, "logps/chosen": -418.57757568359375, "logps/rejected": -299.659423828125, "loss": 3.9105, "rewards/accuracies": 0.0, "rewards/chosen": -7.822299480438232, "rewards/margins": -3.884521961212158, "rewards/rejected": -3.937777519226074, "step": 2287 }, { "epoch": 0.36, "learning_rate": 1.2469222470045128e-05, "logits/chosen": -2.648852586746216, "logits/rejected": -3.026301145553589, "logps/chosen": -29.714027404785156, "logps/rejected": -241.00030517578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.3643444776535034, "rewards/margins": 8.55030632019043, "rewards/rejected": -9.914649963378906, "step": 2288 }, { "epoch": 0.36, "learning_rate": 1.246848902951398e-05, "logits/chosen": -2.277104139328003, "logits/rejected": -2.593417167663574, "logps/chosen": -175.089599609375, "logps/rejected": -486.3211669921875, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -1.8565682172775269, "rewards/margins": 9.050346374511719, "rewards/rejected": -10.906914710998535, "step": 2289 }, { "epoch": 0.36, "learning_rate": 1.2467755588982832e-05, "logits/chosen": -2.405148983001709, "logits/rejected": -2.8661372661590576, "logps/chosen": -53.03694152832031, "logps/rejected": -218.72581481933594, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -2.2953031063079834, "rewards/margins": 4.992961406707764, "rewards/rejected": -7.288264274597168, "step": 2290 }, { "epoch": 0.36, "learning_rate": 1.2467022148451683e-05, "logits/chosen": -2.228235960006714, "logits/rejected": -2.888089179992676, "logps/chosen": -363.72088623046875, "logps/rejected": -346.57666015625, "loss": 3.8526, "rewards/accuracies": 0.5, "rewards/chosen": -6.882190704345703, "rewards/margins": -0.4580817222595215, "rewards/rejected": -6.424108982086182, "step": 2291 }, { "epoch": 0.36, "learning_rate": 1.2466288707920535e-05, "logits/chosen": -2.7241973876953125, "logits/rejected": -3.2517764568328857, "logps/chosen": -81.4399642944336, "logps/rejected": -272.3717346191406, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": -1.312528371810913, "rewards/margins": 4.91350793838501, "rewards/rejected": -6.226036071777344, "step": 2292 }, { "epoch": 0.36, "learning_rate": 1.2465555267389387e-05, "logits/chosen": -3.080395460128784, "logits/rejected": -2.501357316970825, "logps/chosen": -352.7489013671875, "logps/rejected": -304.34698486328125, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -2.4541175365448, "rewards/margins": 6.305427551269531, "rewards/rejected": -8.75954532623291, "step": 2293 }, { "epoch": 0.36, "learning_rate": 1.2464821826858239e-05, "logits/chosen": -1.8035104274749756, "logits/rejected": -2.622594118118286, "logps/chosen": -44.84225845336914, "logps/rejected": -354.7348937988281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7180631160736084, "rewards/margins": 9.673535346984863, "rewards/rejected": -11.39159870147705, "step": 2294 }, { "epoch": 0.36, "learning_rate": 1.2464088386327093e-05, "logits/chosen": -3.126877546310425, "logits/rejected": -3.1799256801605225, "logps/chosen": -108.9927749633789, "logps/rejected": -198.2611083984375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.7258338928222656, "rewards/margins": 5.679358959197998, "rewards/rejected": -8.405193328857422, "step": 2295 }, { "epoch": 0.36, "learning_rate": 1.2463354945795944e-05, "logits/chosen": -2.378281354904175, "logits/rejected": -2.9639861583709717, "logps/chosen": -157.45907592773438, "logps/rejected": -433.10272216796875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.4560606479644775, "rewards/margins": 6.479527473449707, "rewards/rejected": -8.935587882995605, "step": 2296 }, { "epoch": 0.36, "learning_rate": 1.2462621505264796e-05, "logits/chosen": -1.1993138790130615, "logits/rejected": -1.6827774047851562, "logps/chosen": -149.38194274902344, "logps/rejected": -395.3562316894531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.4994921684265137, "rewards/margins": 8.238380432128906, "rewards/rejected": -10.737873077392578, "step": 2297 }, { "epoch": 0.36, "learning_rate": 1.2461888064733648e-05, "logits/chosen": -3.0225069522857666, "logits/rejected": -3.0241262912750244, "logps/chosen": -362.5572814941406, "logps/rejected": -418.89495849609375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.9819564819335938, "rewards/margins": 6.646846294403076, "rewards/rejected": -9.628803253173828, "step": 2298 }, { "epoch": 0.36, "learning_rate": 1.24611546242025e-05, "logits/chosen": -2.776045560836792, "logits/rejected": -3.017598867416382, "logps/chosen": -97.73966979980469, "logps/rejected": -188.6905517578125, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.5145702362060547, "rewards/margins": 4.890346050262451, "rewards/rejected": -7.404916763305664, "step": 2299 }, { "epoch": 0.36, "learning_rate": 1.2460421183671352e-05, "logits/chosen": -2.35514497756958, "logits/rejected": -2.8931291103363037, "logps/chosen": -369.5454406738281, "logps/rejected": -431.32220458984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.5032958984375, "rewards/margins": 8.443243026733398, "rewards/rejected": -10.946538925170898, "step": 2300 }, { "epoch": 0.36, "learning_rate": 1.2459687743140204e-05, "logits/chosen": -1.4117798805236816, "logits/rejected": -2.7625253200531006, "logps/chosen": -128.261474609375, "logps/rejected": -423.5765380859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.888253927230835, "rewards/margins": 10.395939826965332, "rewards/rejected": -13.284193992614746, "step": 2301 }, { "epoch": 0.36, "learning_rate": 1.2458954302609056e-05, "logits/chosen": -2.7002687454223633, "logits/rejected": -2.9579060077667236, "logps/chosen": -202.0983123779297, "logps/rejected": -214.76254272460938, "loss": 3.631, "rewards/accuracies": 0.5, "rewards/chosen": -7.470002174377441, "rewards/margins": 0.6054081916809082, "rewards/rejected": -8.075409889221191, "step": 2302 }, { "epoch": 0.36, "learning_rate": 1.245822086207791e-05, "logits/chosen": -2.24242901802063, "logits/rejected": -2.8163750171661377, "logps/chosen": -68.89825439453125, "logps/rejected": -458.7456359863281, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.8133275508880615, "rewards/margins": 8.449915885925293, "rewards/rejected": -10.263242721557617, "step": 2303 }, { "epoch": 0.36, "learning_rate": 1.2457487421546761e-05, "logits/chosen": -2.7920539379119873, "logits/rejected": -2.3682851791381836, "logps/chosen": -342.1562805175781, "logps/rejected": -178.77110290527344, "loss": 9.0637, "rewards/accuracies": 0.0, "rewards/chosen": -12.125061988830566, "rewards/margins": -9.06352710723877, "rewards/rejected": -3.0615346431732178, "step": 2304 }, { "epoch": 0.36, "learning_rate": 1.2456753981015613e-05, "logits/chosen": -1.6599782705307007, "logits/rejected": -2.803560972213745, "logps/chosen": -103.9867935180664, "logps/rejected": -432.9834289550781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5636374950408936, "rewards/margins": 10.255922317504883, "rewards/rejected": -11.819561004638672, "step": 2305 }, { "epoch": 0.36, "learning_rate": 1.2456020540484465e-05, "logits/chosen": -2.760016918182373, "logits/rejected": -2.7603256702423096, "logps/chosen": -250.5330810546875, "logps/rejected": -311.4820251464844, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.8389320373535156, "rewards/margins": 5.207720756530762, "rewards/rejected": -8.046653747558594, "step": 2306 }, { "epoch": 0.36, "learning_rate": 1.2455287099953317e-05, "logits/chosen": -3.0487465858459473, "logits/rejected": -2.302398681640625, "logps/chosen": -692.7494506835938, "logps/rejected": -529.8844604492188, "loss": 7.3506, "rewards/accuracies": 0.0, "rewards/chosen": -9.73754596710205, "rewards/margins": -7.348814010620117, "rewards/rejected": -2.3887321949005127, "step": 2307 }, { "epoch": 0.36, "learning_rate": 1.2454553659422169e-05, "logits/chosen": -2.1668097972869873, "logits/rejected": -2.983959674835205, "logps/chosen": -94.21692657470703, "logps/rejected": -358.10272216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.208569049835205, "rewards/margins": 10.086666107177734, "rewards/rejected": -12.295235633850098, "step": 2308 }, { "epoch": 0.36, "learning_rate": 1.245382021889102e-05, "logits/chosen": -2.585294008255005, "logits/rejected": -2.9350621700286865, "logps/chosen": -113.36455535888672, "logps/rejected": -183.36181640625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.2971322536468506, "rewards/margins": 5.272923469543457, "rewards/rejected": -7.570055961608887, "step": 2309 }, { "epoch": 0.36, "learning_rate": 1.2453086778359872e-05, "logits/chosen": -1.467890739440918, "logits/rejected": -2.507701873779297, "logps/chosen": -186.8366241455078, "logps/rejected": -421.6246643066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.029938220977783, "rewards/margins": 10.303287506103516, "rewards/rejected": -13.333226203918457, "step": 2310 }, { "epoch": 0.36, "learning_rate": 1.2452353337828724e-05, "logits/chosen": -2.644484519958496, "logits/rejected": -2.3494369983673096, "logps/chosen": -479.44134521484375, "logps/rejected": -561.15380859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.0399861335754395, "rewards/margins": 8.559017181396484, "rewards/rejected": -11.599003791809082, "step": 2311 }, { "epoch": 0.36, "learning_rate": 1.2451619897297578e-05, "logits/chosen": -2.6140975952148438, "logits/rejected": -2.9180567264556885, "logps/chosen": -71.57513427734375, "logps/rejected": -209.23394775390625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.7925715446472168, "rewards/margins": 6.617532253265381, "rewards/rejected": -8.410103797912598, "step": 2312 }, { "epoch": 0.36, "learning_rate": 1.245088645676643e-05, "logits/chosen": -0.8744166493415833, "logits/rejected": -2.7296383380889893, "logps/chosen": -33.00034713745117, "logps/rejected": -393.04400634765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3999741077423096, "rewards/margins": 9.47846794128418, "rewards/rejected": -10.878442764282227, "step": 2313 }, { "epoch": 0.36, "learning_rate": 1.2450153016235282e-05, "logits/chosen": -2.5652410984039307, "logits/rejected": -2.675978899002075, "logps/chosen": -406.1219177246094, "logps/rejected": -427.6738586425781, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -5.37910270690918, "rewards/margins": 4.316471099853516, "rewards/rejected": -9.695573806762695, "step": 2314 }, { "epoch": 0.36, "learning_rate": 1.2449419575704134e-05, "logits/chosen": -2.676043748855591, "logits/rejected": -1.1829404830932617, "logps/chosen": -252.41897583007812, "logps/rejected": -84.31171417236328, "loss": 5.1708, "rewards/accuracies": 0.5, "rewards/chosen": -6.570727348327637, "rewards/margins": -1.8763959407806396, "rewards/rejected": -4.694331645965576, "step": 2315 }, { "epoch": 0.36, "learning_rate": 1.2448686135172987e-05, "logits/chosen": -2.7506861686706543, "logits/rejected": -2.3859691619873047, "logps/chosen": -171.45040893554688, "logps/rejected": -273.64739990234375, "loss": 4.4462, "rewards/accuracies": 0.5, "rewards/chosen": -6.436844348907471, "rewards/margins": 1.2631464004516602, "rewards/rejected": -7.699990749359131, "step": 2316 }, { "epoch": 0.36, "learning_rate": 1.2447952694641839e-05, "logits/chosen": -2.65586256980896, "logits/rejected": -2.5598690509796143, "logps/chosen": -108.75973510742188, "logps/rejected": -243.01995849609375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.7804532051086426, "rewards/margins": 6.877081394195557, "rewards/rejected": -9.6575345993042, "step": 2317 }, { "epoch": 0.36, "learning_rate": 1.2447219254110691e-05, "logits/chosen": -2.142777442932129, "logits/rejected": -2.780574321746826, "logps/chosen": -132.05113220214844, "logps/rejected": -344.7126159667969, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.977898120880127, "rewards/margins": 6.519728183746338, "rewards/rejected": -8.497626304626465, "step": 2318 }, { "epoch": 0.36, "learning_rate": 1.2446485813579543e-05, "logits/chosen": -2.9476842880249023, "logits/rejected": -3.4022891521453857, "logps/chosen": -61.32402801513672, "logps/rejected": -241.25306701660156, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1131919622421265, "rewards/margins": 7.345332145690918, "rewards/rejected": -8.458523750305176, "step": 2319 }, { "epoch": 0.36, "learning_rate": 1.2445752373048395e-05, "logits/chosen": -2.3429415225982666, "logits/rejected": -2.5687360763549805, "logps/chosen": -124.18922424316406, "logps/rejected": -105.49942779541016, "loss": 1.6258, "rewards/accuracies": 0.5, "rewards/chosen": -3.5546932220458984, "rewards/margins": 0.430819034576416, "rewards/rejected": -3.9855122566223145, "step": 2320 }, { "epoch": 0.36, "learning_rate": 1.2445018932517248e-05, "logits/chosen": -2.500544548034668, "logits/rejected": -3.2813806533813477, "logps/chosen": -232.64329528808594, "logps/rejected": -328.9151611328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.6244003772735596, "rewards/margins": 6.7089128494262695, "rewards/rejected": -10.33331298828125, "step": 2321 }, { "epoch": 0.36, "learning_rate": 1.24442854919861e-05, "logits/chosen": -2.662257194519043, "logits/rejected": -2.845588445663452, "logps/chosen": -303.0872497558594, "logps/rejected": -230.25714111328125, "loss": 4.8744, "rewards/accuracies": 0.5, "rewards/chosen": -8.022842407226562, "rewards/margins": -0.708686351776123, "rewards/rejected": -7.314155578613281, "step": 2322 }, { "epoch": 0.36, "learning_rate": 1.2443552051454952e-05, "logits/chosen": -2.802700996398926, "logits/rejected": -2.5083489418029785, "logps/chosen": -485.0997619628906, "logps/rejected": -334.14630126953125, "loss": 2.6641, "rewards/accuracies": 0.5, "rewards/chosen": -6.293499946594238, "rewards/margins": -0.6123712062835693, "rewards/rejected": -5.68112850189209, "step": 2323 }, { "epoch": 0.36, "learning_rate": 1.2442818610923804e-05, "logits/chosen": -2.2023894786834717, "logits/rejected": -2.243260383605957, "logps/chosen": -230.7947998046875, "logps/rejected": -346.0452575683594, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.3779702186584473, "rewards/margins": 7.4825334548950195, "rewards/rejected": -9.860504150390625, "step": 2324 }, { "epoch": 0.36, "learning_rate": 1.2442085170392656e-05, "logits/chosen": -1.8386049270629883, "logits/rejected": -2.819124698638916, "logps/chosen": -176.1399383544922, "logps/rejected": -244.34371948242188, "loss": 2.4547, "rewards/accuracies": 0.5, "rewards/chosen": -5.013383388519287, "rewards/margins": 0.4407663345336914, "rewards/rejected": -5.454150199890137, "step": 2325 }, { "epoch": 0.36, "learning_rate": 1.2441351729861508e-05, "logits/chosen": -1.7601420879364014, "logits/rejected": -2.8912272453308105, "logps/chosen": -136.17578125, "logps/rejected": -413.309326171875, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -1.787032127380371, "rewards/margins": 4.8953776359558105, "rewards/rejected": -6.682409763336182, "step": 2326 }, { "epoch": 0.36, "learning_rate": 1.244061828933036e-05, "logits/chosen": -2.727928638458252, "logits/rejected": -2.766937017440796, "logps/chosen": -90.38218688964844, "logps/rejected": -291.9676513671875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.1718521118164062, "rewards/margins": 8.99856948852539, "rewards/rejected": -10.170421600341797, "step": 2327 }, { "epoch": 0.36, "learning_rate": 1.2439884848799211e-05, "logits/chosen": -2.706455707550049, "logits/rejected": -2.0120482444763184, "logps/chosen": -829.725341796875, "logps/rejected": -586.2886962890625, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": -3.2541699409484863, "rewards/margins": 8.324668884277344, "rewards/rejected": -11.578839302062988, "step": 2328 }, { "epoch": 0.36, "learning_rate": 1.2439151408268063e-05, "logits/chosen": -2.8134522438049316, "logits/rejected": -2.8858559131622314, "logps/chosen": -33.49679946899414, "logps/rejected": -184.17605590820312, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.6702243089675903, "rewards/margins": 6.256921768188477, "rewards/rejected": -7.927145957946777, "step": 2329 }, { "epoch": 0.36, "learning_rate": 1.2438417967736917e-05, "logits/chosen": -3.0684568881988525, "logits/rejected": -2.9443869590759277, "logps/chosen": -169.72451782226562, "logps/rejected": -152.3543701171875, "loss": 2.3051, "rewards/accuracies": 0.5, "rewards/chosen": -3.649933099746704, "rewards/margins": 1.456956148147583, "rewards/rejected": -5.106889247894287, "step": 2330 }, { "epoch": 0.36, "learning_rate": 1.2437684527205769e-05, "logits/chosen": -2.9190609455108643, "logits/rejected": -2.5185649394989014, "logps/chosen": -245.14517211914062, "logps/rejected": -190.25511169433594, "loss": 3.5086, "rewards/accuracies": 0.5, "rewards/chosen": -6.470372200012207, "rewards/margins": -1.325019121170044, "rewards/rejected": -5.145353317260742, "step": 2331 }, { "epoch": 0.36, "learning_rate": 1.243695108667462e-05, "logits/chosen": -3.044238328933716, "logits/rejected": -1.838846206665039, "logps/chosen": -384.81402587890625, "logps/rejected": -342.40887451171875, "loss": 1.4526, "rewards/accuracies": 0.5, "rewards/chosen": -4.0670905113220215, "rewards/margins": 3.9376955032348633, "rewards/rejected": -8.004786491394043, "step": 2332 }, { "epoch": 0.36, "learning_rate": 1.2436217646143472e-05, "logits/chosen": -2.934220552444458, "logits/rejected": -2.527552604675293, "logps/chosen": -263.5335388183594, "logps/rejected": -244.06976318359375, "loss": 0.3847, "rewards/accuracies": 0.5, "rewards/chosen": -0.8948585987091064, "rewards/margins": 4.671643257141113, "rewards/rejected": -5.566502094268799, "step": 2333 }, { "epoch": 0.36, "learning_rate": 1.2435484205612324e-05, "logits/chosen": -2.8675694465637207, "logits/rejected": -1.5700653791427612, "logps/chosen": -505.32183837890625, "logps/rejected": -177.8487091064453, "loss": 2.2798, "rewards/accuracies": 0.5, "rewards/chosen": -4.244805335998535, "rewards/margins": 0.2774229049682617, "rewards/rejected": -4.522228240966797, "step": 2334 }, { "epoch": 0.36, "learning_rate": 1.2434750765081176e-05, "logits/chosen": -2.223006248474121, "logits/rejected": -2.911867618560791, "logps/chosen": -218.36524963378906, "logps/rejected": -300.605224609375, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -3.545015335083008, "rewards/margins": 4.781466484069824, "rewards/rejected": -8.326481819152832, "step": 2335 }, { "epoch": 0.36, "learning_rate": 1.2434017324550028e-05, "logits/chosen": -2.6929078102111816, "logits/rejected": -3.139674663543701, "logps/chosen": -30.768081665039062, "logps/rejected": -277.3795471191406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.4167060852050781, "rewards/margins": 7.6752142906188965, "rewards/rejected": -9.091920852661133, "step": 2336 }, { "epoch": 0.36, "learning_rate": 1.243328388401888e-05, "logits/chosen": -2.058259963989258, "logits/rejected": -2.995835781097412, "logps/chosen": -96.96534729003906, "logps/rejected": -333.6275634765625, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -2.6716182231903076, "rewards/margins": 4.894352436065674, "rewards/rejected": -7.565970420837402, "step": 2337 }, { "epoch": 0.36, "learning_rate": 1.2432550443487732e-05, "logits/chosen": -3.046449899673462, "logits/rejected": -3.0015742778778076, "logps/chosen": -113.71726989746094, "logps/rejected": -252.40371704101562, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7828320264816284, "rewards/margins": 7.321156024932861, "rewards/rejected": -9.103988647460938, "step": 2338 }, { "epoch": 0.36, "learning_rate": 1.2431817002956585e-05, "logits/chosen": -2.707414388656616, "logits/rejected": -1.5444324016571045, "logps/chosen": -312.5592041015625, "logps/rejected": -200.789306640625, "loss": 2.534, "rewards/accuracies": 0.5, "rewards/chosen": -4.7324323654174805, "rewards/margins": -0.1728367805480957, "rewards/rejected": -4.559595584869385, "step": 2339 }, { "epoch": 0.36, "learning_rate": 1.2431083562425437e-05, "logits/chosen": -2.7816147804260254, "logits/rejected": -1.843668818473816, "logps/chosen": -307.79180908203125, "logps/rejected": -277.5382080078125, "loss": 0.9889, "rewards/accuracies": 0.5, "rewards/chosen": -4.089129447937012, "rewards/margins": 2.862642288208008, "rewards/rejected": -6.9517717361450195, "step": 2340 }, { "epoch": 0.36, "learning_rate": 1.243035012189429e-05, "logits/chosen": -2.856154680252075, "logits/rejected": -2.558385133743286, "logps/chosen": -435.6400146484375, "logps/rejected": -355.7995300292969, "loss": 0.0517, "rewards/accuracies": 1.0, "rewards/chosen": -3.1134352684020996, "rewards/margins": 4.2436418533325195, "rewards/rejected": -7.357077121734619, "step": 2341 }, { "epoch": 0.36, "learning_rate": 1.2429616681363141e-05, "logits/chosen": -0.8029887676239014, "logits/rejected": -2.3519740104675293, "logps/chosen": -108.90177154541016, "logps/rejected": -404.8136291503906, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.849387168884277, "rewards/margins": 7.639957904815674, "rewards/rejected": -12.48934555053711, "step": 2342 }, { "epoch": 0.36, "learning_rate": 1.2428883240831993e-05, "logits/chosen": -1.9711663722991943, "logits/rejected": -2.932786464691162, "logps/chosen": -264.70281982421875, "logps/rejected": -580.7679443359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3367607593536377, "rewards/margins": 9.346166610717773, "rewards/rejected": -12.682927131652832, "step": 2343 }, { "epoch": 0.36, "learning_rate": 1.2428149800300845e-05, "logits/chosen": -2.0569756031036377, "logits/rejected": -2.993896007537842, "logps/chosen": -34.37640380859375, "logps/rejected": -406.4631042480469, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -1.5467607975006104, "rewards/margins": 4.7917985916137695, "rewards/rejected": -6.338559150695801, "step": 2344 }, { "epoch": 0.36, "learning_rate": 1.2427416359769697e-05, "logits/chosen": -2.949733018875122, "logits/rejected": -2.5004515647888184, "logps/chosen": -102.03252410888672, "logps/rejected": -218.1058349609375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.7242844104766846, "rewards/margins": 6.215595245361328, "rewards/rejected": -7.939879417419434, "step": 2345 }, { "epoch": 0.36, "learning_rate": 1.2426682919238549e-05, "logits/chosen": -1.1914232969284058, "logits/rejected": -2.766848564147949, "logps/chosen": -175.76382446289062, "logps/rejected": -353.355224609375, "loss": 2.0051, "rewards/accuracies": 0.5, "rewards/chosen": -4.801902770996094, "rewards/margins": -0.6225345134735107, "rewards/rejected": -4.179368019104004, "step": 2346 }, { "epoch": 0.37, "learning_rate": 1.24259494787074e-05, "logits/chosen": -1.7061902284622192, "logits/rejected": -2.6922194957733154, "logps/chosen": -226.2718505859375, "logps/rejected": -457.13531494140625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.454010009765625, "rewards/margins": 7.297492980957031, "rewards/rejected": -9.751502990722656, "step": 2347 }, { "epoch": 0.37, "learning_rate": 1.2425216038176254e-05, "logits/chosen": -1.56748628616333, "logits/rejected": -2.809922933578491, "logps/chosen": -148.76956176757812, "logps/rejected": -356.0230712890625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.046628713607788, "rewards/margins": 5.5677361488342285, "rewards/rejected": -7.6143646240234375, "step": 2348 }, { "epoch": 0.37, "learning_rate": 1.2424482597645106e-05, "logits/chosen": -2.807253122329712, "logits/rejected": -2.6996569633483887, "logps/chosen": -202.18484497070312, "logps/rejected": -218.15948486328125, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -1.806128740310669, "rewards/margins": 3.5134029388427734, "rewards/rejected": -5.3195319175720215, "step": 2349 }, { "epoch": 0.37, "learning_rate": 1.242374915711396e-05, "logits/chosen": -3.124612331390381, "logits/rejected": -3.40561842918396, "logps/chosen": -37.86686325073242, "logps/rejected": -187.27371215820312, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -1.9431560039520264, "rewards/margins": 5.232296943664551, "rewards/rejected": -7.175453186035156, "step": 2350 }, { "epoch": 0.37, "learning_rate": 1.2423015716582811e-05, "logits/chosen": -1.8626936674118042, "logits/rejected": -2.637096881866455, "logps/chosen": -168.1197052001953, "logps/rejected": -378.18182373046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.066661834716797, "rewards/margins": 8.631416320800781, "rewards/rejected": -10.698078155517578, "step": 2351 }, { "epoch": 0.37, "learning_rate": 1.2422282276051663e-05, "logits/chosen": -2.2040467262268066, "logits/rejected": -3.0575153827667236, "logps/chosen": -184.3306427001953, "logps/rejected": -342.2982177734375, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.954408884048462, "rewards/margins": 4.408304691314697, "rewards/rejected": -6.362713813781738, "step": 2352 }, { "epoch": 0.37, "learning_rate": 1.2421548835520515e-05, "logits/chosen": -2.450303554534912, "logits/rejected": -2.931154489517212, "logps/chosen": -82.552978515625, "logps/rejected": -325.94415283203125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.9508123397827148, "rewards/margins": 7.801145076751709, "rewards/rejected": -9.751956939697266, "step": 2353 }, { "epoch": 0.37, "learning_rate": 1.2420815394989367e-05, "logits/chosen": -3.1322410106658936, "logits/rejected": -2.6591107845306396, "logps/chosen": -165.9018096923828, "logps/rejected": -173.7353515625, "loss": 0.5361, "rewards/accuracies": 0.5, "rewards/chosen": -3.3351869583129883, "rewards/margins": 2.142547607421875, "rewards/rejected": -5.477734565734863, "step": 2354 }, { "epoch": 0.37, "learning_rate": 1.2420081954458219e-05, "logits/chosen": -2.1871542930603027, "logits/rejected": -2.8488311767578125, "logps/chosen": -43.346275329589844, "logps/rejected": -176.6440887451172, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -1.167121171951294, "rewards/margins": 4.254133224487305, "rewards/rejected": -5.4212541580200195, "step": 2355 }, { "epoch": 0.37, "learning_rate": 1.241934851392707e-05, "logits/chosen": -1.8104355335235596, "logits/rejected": -2.536858081817627, "logps/chosen": -134.21319580078125, "logps/rejected": -329.99761962890625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -3.4560790061950684, "rewards/margins": 5.283369541168213, "rewards/rejected": -8.739448547363281, "step": 2356 }, { "epoch": 0.37, "learning_rate": 1.2418615073395924e-05, "logits/chosen": -2.695884943008423, "logits/rejected": -2.8442347049713135, "logps/chosen": -577.0980224609375, "logps/rejected": -502.7503356933594, "loss": 1.7261, "rewards/accuracies": 0.5, "rewards/chosen": -4.030054092407227, "rewards/margins": 0.4751434326171875, "rewards/rejected": -4.505197525024414, "step": 2357 }, { "epoch": 0.37, "learning_rate": 1.2417881632864776e-05, "logits/chosen": -1.6970763206481934, "logits/rejected": -2.5137882232666016, "logps/chosen": -106.59493255615234, "logps/rejected": -366.1064453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2571697235107422, "rewards/margins": 8.343106269836426, "rewards/rejected": -9.600275993347168, "step": 2358 }, { "epoch": 0.37, "learning_rate": 1.2417148192333628e-05, "logits/chosen": -2.8396143913269043, "logits/rejected": -1.9654505252838135, "logps/chosen": -361.0316162109375, "logps/rejected": -331.3131103515625, "loss": 1.215, "rewards/accuracies": 0.5, "rewards/chosen": -2.6939454078674316, "rewards/margins": 1.3045647144317627, "rewards/rejected": -3.9985098838806152, "step": 2359 }, { "epoch": 0.37, "learning_rate": 1.241641475180248e-05, "logits/chosen": -2.8415145874023438, "logits/rejected": -1.7835205793380737, "logps/chosen": -327.82476806640625, "logps/rejected": -148.2316131591797, "loss": 5.623, "rewards/accuracies": 0.0, "rewards/chosen": -8.013897895812988, "rewards/margins": -5.616191387176514, "rewards/rejected": -2.3977065086364746, "step": 2360 }, { "epoch": 0.37, "learning_rate": 1.2415681311271332e-05, "logits/chosen": -2.8360965251922607, "logits/rejected": -2.4742298126220703, "logps/chosen": -154.4273681640625, "logps/rejected": -220.4499053955078, "loss": 3.0282, "rewards/accuracies": 0.5, "rewards/chosen": -4.373071670532227, "rewards/margins": 0.9897871017456055, "rewards/rejected": -5.362858772277832, "step": 2361 }, { "epoch": 0.37, "learning_rate": 1.2414947870740184e-05, "logits/chosen": -2.9438695907592773, "logits/rejected": -2.878986120223999, "logps/chosen": -115.6012191772461, "logps/rejected": -144.12646484375, "loss": 1.8376, "rewards/accuracies": 0.5, "rewards/chosen": -2.647270679473877, "rewards/margins": 1.135079264640808, "rewards/rejected": -3.7823498249053955, "step": 2362 }, { "epoch": 0.37, "learning_rate": 1.2414214430209036e-05, "logits/chosen": -2.674989700317383, "logits/rejected": -2.762104034423828, "logps/chosen": -475.4700927734375, "logps/rejected": -287.4470520019531, "loss": 0.6506, "rewards/accuracies": 0.5, "rewards/chosen": -2.8423728942871094, "rewards/margins": 2.404747724533081, "rewards/rejected": -5.2471208572387695, "step": 2363 }, { "epoch": 0.37, "learning_rate": 1.2413480989677887e-05, "logits/chosen": -2.8487043380737305, "logits/rejected": -3.0974714756011963, "logps/chosen": -119.8013916015625, "logps/rejected": -356.9579162597656, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.1830650269985199, "rewards/margins": 6.545754432678223, "rewards/rejected": -6.728819370269775, "step": 2364 }, { "epoch": 0.37, "learning_rate": 1.241274754914674e-05, "logits/chosen": -2.3714804649353027, "logits/rejected": -2.802995443344116, "logps/chosen": -349.6417541503906, "logps/rejected": -372.6842041015625, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -1.109636664390564, "rewards/margins": 3.4823389053344727, "rewards/rejected": -4.591975688934326, "step": 2365 }, { "epoch": 0.37, "learning_rate": 1.2412014108615593e-05, "logits/chosen": -2.5421464443206787, "logits/rejected": -3.2293214797973633, "logps/chosen": -295.27032470703125, "logps/rejected": -394.07183837890625, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.4349669218063354, "rewards/margins": 6.008786678314209, "rewards/rejected": -7.443753242492676, "step": 2366 }, { "epoch": 0.37, "learning_rate": 1.2411280668084445e-05, "logits/chosen": -3.0266590118408203, "logits/rejected": -2.6762990951538086, "logps/chosen": -1076.93701171875, "logps/rejected": -820.9356079101562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.139556884765625, "rewards/margins": 8.426122665405273, "rewards/rejected": -10.565679550170898, "step": 2367 }, { "epoch": 0.37, "learning_rate": 1.2410547227553297e-05, "logits/chosen": -2.5957272052764893, "logits/rejected": -3.113901376724243, "logps/chosen": -204.4210968017578, "logps/rejected": -284.22314453125, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -3.4993066787719727, "rewards/margins": 4.106678009033203, "rewards/rejected": -7.605984687805176, "step": 2368 }, { "epoch": 0.37, "learning_rate": 1.2409813787022149e-05, "logits/chosen": -2.683424949645996, "logits/rejected": -2.43617844581604, "logps/chosen": -566.3106079101562, "logps/rejected": -440.87835693359375, "loss": 0.0776, "rewards/accuracies": 1.0, "rewards/chosen": -2.173837423324585, "rewards/margins": 4.919950008392334, "rewards/rejected": -7.09378719329834, "step": 2369 }, { "epoch": 0.37, "learning_rate": 1.2409080346491e-05, "logits/chosen": -2.724780559539795, "logits/rejected": -2.3122458457946777, "logps/chosen": -1040.3681640625, "logps/rejected": -541.45654296875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.7524750232696533, "rewards/margins": 5.142657279968262, "rewards/rejected": -6.895132541656494, "step": 2370 }, { "epoch": 0.37, "learning_rate": 1.2408346905959852e-05, "logits/chosen": -1.760222315788269, "logits/rejected": -2.945828914642334, "logps/chosen": -435.377685546875, "logps/rejected": -560.9024047851562, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.274219036102295, "rewards/margins": 4.223384857177734, "rewards/rejected": -5.497603416442871, "step": 2371 }, { "epoch": 0.37, "learning_rate": 1.2407613465428704e-05, "logits/chosen": -2.967200517654419, "logits/rejected": -2.230045795440674, "logps/chosen": -213.32252502441406, "logps/rejected": -128.73587036132812, "loss": 1.0189, "rewards/accuracies": 0.5, "rewards/chosen": -2.615408420562744, "rewards/margins": 1.4491369724273682, "rewards/rejected": -4.064545631408691, "step": 2372 }, { "epoch": 0.37, "learning_rate": 1.2406880024897556e-05, "logits/chosen": -2.5523834228515625, "logits/rejected": -3.0777223110198975, "logps/chosen": -317.2428283691406, "logps/rejected": -500.30810546875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.0817084312438965, "rewards/margins": 6.385178565979004, "rewards/rejected": -9.466887474060059, "step": 2373 }, { "epoch": 0.37, "learning_rate": 1.2406146584366408e-05, "logits/chosen": -2.7823472023010254, "logits/rejected": -1.018048882484436, "logps/chosen": -170.35842895507812, "logps/rejected": -123.64662170410156, "loss": 2.4512, "rewards/accuracies": 0.5, "rewards/chosen": -4.771303176879883, "rewards/margins": -1.2084019184112549, "rewards/rejected": -3.562901496887207, "step": 2374 }, { "epoch": 0.37, "learning_rate": 1.2405413143835261e-05, "logits/chosen": -2.4371724128723145, "logits/rejected": -3.376277446746826, "logps/chosen": -79.46348571777344, "logps/rejected": -276.18743896484375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.781651258468628, "rewards/margins": 5.431732177734375, "rewards/rejected": -7.213383674621582, "step": 2375 }, { "epoch": 0.37, "learning_rate": 1.2404679703304113e-05, "logits/chosen": -2.3399558067321777, "logits/rejected": -2.361433506011963, "logps/chosen": -558.5504760742188, "logps/rejected": -629.4110107421875, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -2.073148488998413, "rewards/margins": 5.960712432861328, "rewards/rejected": -8.03386116027832, "step": 2376 }, { "epoch": 0.37, "learning_rate": 1.2403946262772965e-05, "logits/chosen": -2.506530284881592, "logits/rejected": -2.9207799434661865, "logps/chosen": -103.79043579101562, "logps/rejected": -259.47308349609375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -2.5490469932556152, "rewards/margins": 5.433099746704102, "rewards/rejected": -7.982146739959717, "step": 2377 }, { "epoch": 0.37, "learning_rate": 1.2403212822241817e-05, "logits/chosen": -3.058459997177124, "logits/rejected": -2.443676710128784, "logps/chosen": -319.4231262207031, "logps/rejected": -261.03521728515625, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -1.2204586267471313, "rewards/margins": 4.194439888000488, "rewards/rejected": -5.414898872375488, "step": 2378 }, { "epoch": 0.37, "learning_rate": 1.2402479381710669e-05, "logits/chosen": -2.5494236946105957, "logits/rejected": -2.9678075313568115, "logps/chosen": -67.6201400756836, "logps/rejected": -197.79757690429688, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -1.847856879234314, "rewards/margins": 3.762666702270508, "rewards/rejected": -5.610523223876953, "step": 2379 }, { "epoch": 0.37, "learning_rate": 1.2401745941179521e-05, "logits/chosen": -1.9367091655731201, "logits/rejected": -2.4950525760650635, "logps/chosen": -231.63064575195312, "logps/rejected": -368.95989990234375, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -3.356053352355957, "rewards/margins": 4.440607070922852, "rewards/rejected": -7.796660423278809, "step": 2380 }, { "epoch": 0.37, "learning_rate": 1.2401012500648373e-05, "logits/chosen": -3.081698417663574, "logits/rejected": -2.291304349899292, "logps/chosen": -276.89984130859375, "logps/rejected": -187.243408203125, "loss": 3.2313, "rewards/accuracies": 0.0, "rewards/chosen": -5.036536693572998, "rewards/margins": -3.1908679008483887, "rewards/rejected": -1.8456687927246094, "step": 2381 }, { "epoch": 0.37, "learning_rate": 1.2400279060117226e-05, "logits/chosen": -1.5974475145339966, "logits/rejected": -2.7112181186676025, "logps/chosen": -125.24442291259766, "logps/rejected": -382.82965087890625, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.655224084854126, "rewards/margins": 4.906659126281738, "rewards/rejected": -6.561882972717285, "step": 2382 }, { "epoch": 0.37, "learning_rate": 1.2399545619586078e-05, "logits/chosen": -2.97365403175354, "logits/rejected": -3.0129599571228027, "logps/chosen": -138.14321899414062, "logps/rejected": -265.0374755859375, "loss": 0.4466, "rewards/accuracies": 0.5, "rewards/chosen": -1.445913553237915, "rewards/margins": 4.023033142089844, "rewards/rejected": -5.468946933746338, "step": 2383 }, { "epoch": 0.37, "learning_rate": 1.2398812179054932e-05, "logits/chosen": -1.7973532676696777, "logits/rejected": -2.9519851207733154, "logps/chosen": -249.73085021972656, "logps/rejected": -466.94195556640625, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -5.0190324783325195, "rewards/margins": 4.425432205200195, "rewards/rejected": -9.444463729858398, "step": 2384 }, { "epoch": 0.37, "learning_rate": 1.2398078738523784e-05, "logits/chosen": -2.7254998683929443, "logits/rejected": -3.3151557445526123, "logps/chosen": -276.01220703125, "logps/rejected": -417.1043701171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8222832679748535, "rewards/margins": 9.652586936950684, "rewards/rejected": -12.474870681762695, "step": 2385 }, { "epoch": 0.37, "learning_rate": 1.2397345297992636e-05, "logits/chosen": -3.1495983600616455, "logits/rejected": -3.053250312805176, "logps/chosen": -455.6953125, "logps/rejected": -338.83917236328125, "loss": 1.5985, "rewards/accuracies": 0.5, "rewards/chosen": -3.6980886459350586, "rewards/margins": 2.3579039573669434, "rewards/rejected": -6.055992603302002, "step": 2386 }, { "epoch": 0.37, "learning_rate": 1.2396611857461487e-05, "logits/chosen": -2.8883605003356934, "logits/rejected": -3.0647470951080322, "logps/chosen": -147.23631286621094, "logps/rejected": -208.6302490234375, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -3.8819212913513184, "rewards/margins": 3.349663734436035, "rewards/rejected": -7.2315850257873535, "step": 2387 }, { "epoch": 0.37, "learning_rate": 1.239587841693034e-05, "logits/chosen": -2.5457208156585693, "logits/rejected": -3.2157087326049805, "logps/chosen": -44.842063903808594, "logps/rejected": -239.63034057617188, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.6288292407989502, "rewards/margins": 4.63252067565918, "rewards/rejected": -6.261349678039551, "step": 2388 }, { "epoch": 0.37, "learning_rate": 1.2395144976399191e-05, "logits/chosen": -2.340594530105591, "logits/rejected": -2.5378715991973877, "logps/chosen": -526.093505859375, "logps/rejected": -338.22723388671875, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -1.8308234214782715, "rewards/margins": 3.7023088932037354, "rewards/rejected": -5.533132553100586, "step": 2389 }, { "epoch": 0.37, "learning_rate": 1.2394411535868043e-05, "logits/chosen": -2.5587472915649414, "logits/rejected": -3.2866785526275635, "logps/chosen": -103.5731201171875, "logps/rejected": -241.52947998046875, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -3.092454433441162, "rewards/margins": 4.475969314575195, "rewards/rejected": -7.568423748016357, "step": 2390 }, { "epoch": 0.37, "learning_rate": 1.2393678095336895e-05, "logits/chosen": -1.2206703424453735, "logits/rejected": -2.7070045471191406, "logps/chosen": -193.26866149902344, "logps/rejected": -512.36279296875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.4195423126220703, "rewards/margins": 6.452906131744385, "rewards/rejected": -8.872447967529297, "step": 2391 }, { "epoch": 0.37, "learning_rate": 1.2392944654805747e-05, "logits/chosen": -2.7718143463134766, "logits/rejected": -1.9691672325134277, "logps/chosen": -485.4169921875, "logps/rejected": -542.587646484375, "loss": 2.6135, "rewards/accuracies": 0.5, "rewards/chosen": -6.834951400756836, "rewards/margins": -0.6042134761810303, "rewards/rejected": -6.230737686157227, "step": 2392 }, { "epoch": 0.37, "learning_rate": 1.23922112142746e-05, "logits/chosen": -2.298952579498291, "logits/rejected": -3.203247308731079, "logps/chosen": -184.04690551757812, "logps/rejected": -362.3731994628906, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.258306860923767, "rewards/margins": 6.624571800231934, "rewards/rejected": -7.882878303527832, "step": 2393 }, { "epoch": 0.37, "learning_rate": 1.2391477773743452e-05, "logits/chosen": -2.36541748046875, "logits/rejected": -3.11716890335083, "logps/chosen": -693.4533081054688, "logps/rejected": -838.1787109375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2470123767852783, "rewards/margins": 7.040860176086426, "rewards/rejected": -9.287872314453125, "step": 2394 }, { "epoch": 0.37, "learning_rate": 1.2390744333212304e-05, "logits/chosen": -2.8258285522460938, "logits/rejected": -2.568998098373413, "logps/chosen": -269.34552001953125, "logps/rejected": -304.75970458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.6573312282562256, "rewards/margins": 9.779983520507812, "rewards/rejected": -12.437314987182617, "step": 2395 }, { "epoch": 0.37, "learning_rate": 1.2390010892681156e-05, "logits/chosen": -2.807162284851074, "logits/rejected": -2.178879737854004, "logps/chosen": -431.14697265625, "logps/rejected": -302.07208251953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.5409088134765625, "rewards/margins": 8.631006240844727, "rewards/rejected": -11.171915054321289, "step": 2396 }, { "epoch": 0.37, "learning_rate": 1.2389277452150008e-05, "logits/chosen": -2.6091060638427734, "logits/rejected": -2.8878540992736816, "logps/chosen": -210.33375549316406, "logps/rejected": -143.24502563476562, "loss": 1.9219, "rewards/accuracies": 0.5, "rewards/chosen": -4.537969589233398, "rewards/margins": 2.148439407348633, "rewards/rejected": -6.686408996582031, "step": 2397 }, { "epoch": 0.37, "learning_rate": 1.238854401161886e-05, "logits/chosen": -2.900527000427246, "logits/rejected": -3.085151433944702, "logps/chosen": -198.798583984375, "logps/rejected": -372.4727783203125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.332448959350586, "rewards/margins": 5.058774471282959, "rewards/rejected": -6.391223430633545, "step": 2398 }, { "epoch": 0.37, "learning_rate": 1.2387810571087712e-05, "logits/chosen": -2.59000301361084, "logits/rejected": -2.7778854370117188, "logps/chosen": -49.86654281616211, "logps/rejected": -245.3887939453125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.5284714698791504, "rewards/margins": 6.720303058624268, "rewards/rejected": -8.248774528503418, "step": 2399 }, { "epoch": 0.37, "learning_rate": 1.2387077130556564e-05, "logits/chosen": -2.727435827255249, "logits/rejected": -2.60782527923584, "logps/chosen": -382.8839111328125, "logps/rejected": -368.37969970703125, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -4.578519821166992, "rewards/margins": 4.230988502502441, "rewards/rejected": -8.809508323669434, "step": 2400 }, { "epoch": 0.37, "learning_rate": 1.2386343690025417e-05, "logits/chosen": -1.7089447975158691, "logits/rejected": -2.8701858520507812, "logps/chosen": -242.5642547607422, "logps/rejected": -518.4515380859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.8864620923995972, "rewards/margins": 9.630911827087402, "rewards/rejected": -11.517374038696289, "step": 2401 }, { "epoch": 0.37, "learning_rate": 1.2385610249494269e-05, "logits/chosen": -2.504188299179077, "logits/rejected": -3.0978310108184814, "logps/chosen": -227.63011169433594, "logps/rejected": -397.30950927734375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.3724700212478638, "rewards/margins": 7.419349670410156, "rewards/rejected": -8.791820526123047, "step": 2402 }, { "epoch": 0.37, "learning_rate": 1.2384876808963121e-05, "logits/chosen": -2.181617498397827, "logits/rejected": -2.7151153087615967, "logps/chosen": -380.4087829589844, "logps/rejected": -562.5379638671875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.101097583770752, "rewards/margins": 7.181608200073242, "rewards/rejected": -9.282705307006836, "step": 2403 }, { "epoch": 0.37, "learning_rate": 1.2384143368431973e-05, "logits/chosen": -2.750457763671875, "logits/rejected": -2.5485496520996094, "logps/chosen": -212.35789489746094, "logps/rejected": -246.89666748046875, "loss": 3.3657, "rewards/accuracies": 0.5, "rewards/chosen": -5.302671432495117, "rewards/margins": -2.3541314601898193, "rewards/rejected": -2.9485397338867188, "step": 2404 }, { "epoch": 0.37, "learning_rate": 1.2383409927900825e-05, "logits/chosen": -1.9011633396148682, "logits/rejected": -2.9386038780212402, "logps/chosen": -193.25338745117188, "logps/rejected": -307.28662109375, "loss": 4.3644, "rewards/accuracies": 0.5, "rewards/chosen": -5.615885257720947, "rewards/margins": -2.3442649841308594, "rewards/rejected": -3.271620273590088, "step": 2405 }, { "epoch": 0.37, "learning_rate": 1.2382676487369677e-05, "logits/chosen": -2.0350005626678467, "logits/rejected": -2.704343557357788, "logps/chosen": -203.2784881591797, "logps/rejected": -220.49415588378906, "loss": 1.1643, "rewards/accuracies": 0.5, "rewards/chosen": -3.767807960510254, "rewards/margins": 2.875607967376709, "rewards/rejected": -6.643415927886963, "step": 2406 }, { "epoch": 0.37, "learning_rate": 1.2381943046838528e-05, "logits/chosen": -2.049769639968872, "logits/rejected": -3.10688853263855, "logps/chosen": -84.89904022216797, "logps/rejected": -502.8233337402344, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -2.0316996574401855, "rewards/margins": 6.550586700439453, "rewards/rejected": -8.58228588104248, "step": 2407 }, { "epoch": 0.37, "learning_rate": 1.238120960630738e-05, "logits/chosen": -2.821207046508789, "logits/rejected": -2.716264009475708, "logps/chosen": -607.0799560546875, "logps/rejected": -491.92828369140625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.907231092453003, "rewards/margins": 6.04470157623291, "rewards/rejected": -7.951932907104492, "step": 2408 }, { "epoch": 0.37, "learning_rate": 1.2380476165776232e-05, "logits/chosen": -2.9944233894348145, "logits/rejected": -2.5006351470947266, "logps/chosen": -935.7868041992188, "logps/rejected": -703.03857421875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.479788303375244, "rewards/margins": 6.248034954071045, "rewards/rejected": -8.727823257446289, "step": 2409 }, { "epoch": 0.37, "learning_rate": 1.2379742725245086e-05, "logits/chosen": -2.759416341781616, "logits/rejected": -2.6583690643310547, "logps/chosen": -264.9725341796875, "logps/rejected": -303.2733154296875, "loss": 4.0005, "rewards/accuracies": 0.5, "rewards/chosen": -5.672241687774658, "rewards/margins": -0.8816184997558594, "rewards/rejected": -4.790623188018799, "step": 2410 }, { "epoch": 0.37, "learning_rate": 1.2379009284713938e-05, "logits/chosen": -2.9155824184417725, "logits/rejected": -2.809002161026001, "logps/chosen": -256.22064208984375, "logps/rejected": -345.74346923828125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.148531436920166, "rewards/margins": 6.074101448059082, "rewards/rejected": -7.22263240814209, "step": 2411 }, { "epoch": 0.38, "learning_rate": 1.237827584418279e-05, "logits/chosen": -2.6287128925323486, "logits/rejected": -3.053978443145752, "logps/chosen": -57.08852005004883, "logps/rejected": -261.74188232421875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.413782835006714, "rewards/margins": 7.957921981811523, "rewards/rejected": -10.371705055236816, "step": 2412 }, { "epoch": 0.38, "learning_rate": 1.2377542403651641e-05, "logits/chosen": -2.7635817527770996, "logits/rejected": -2.7452776432037354, "logps/chosen": -191.67123413085938, "logps/rejected": -139.75399780273438, "loss": 1.169, "rewards/accuracies": 0.5, "rewards/chosen": -4.295152187347412, "rewards/margins": 2.1912682056427, "rewards/rejected": -6.486420631408691, "step": 2413 }, { "epoch": 0.38, "learning_rate": 1.2376808963120493e-05, "logits/chosen": -1.9575766324996948, "logits/rejected": -2.914067506790161, "logps/chosen": -381.7893371582031, "logps/rejected": -489.5132141113281, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.443566918373108, "rewards/margins": 5.739900588989258, "rewards/rejected": -7.183467388153076, "step": 2414 }, { "epoch": 0.38, "learning_rate": 1.2376075522589345e-05, "logits/chosen": -1.4377193450927734, "logits/rejected": -2.8885040283203125, "logps/chosen": -199.98243713378906, "logps/rejected": -476.4290771484375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.281820774078369, "rewards/margins": 6.625936985015869, "rewards/rejected": -8.907757759094238, "step": 2415 }, { "epoch": 0.38, "learning_rate": 1.2375342082058199e-05, "logits/chosen": -2.7211599349975586, "logits/rejected": -2.756324291229248, "logps/chosen": -333.75994873046875, "logps/rejected": -607.14453125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.330104112625122, "rewards/margins": 9.887935638427734, "rewards/rejected": -12.218039512634277, "step": 2416 }, { "epoch": 0.38, "learning_rate": 1.237460864152705e-05, "logits/chosen": -2.0963940620422363, "logits/rejected": -2.356001615524292, "logps/chosen": -161.2621307373047, "logps/rejected": -315.17364501953125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.362886428833008, "rewards/margins": 6.427711486816406, "rewards/rejected": -9.790597915649414, "step": 2417 }, { "epoch": 0.38, "learning_rate": 1.2373875200995902e-05, "logits/chosen": -2.7998578548431396, "logits/rejected": -2.4117629528045654, "logps/chosen": -490.1125793457031, "logps/rejected": -400.7251892089844, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -3.3140029907226562, "rewards/margins": 6.127490043640137, "rewards/rejected": -9.441493034362793, "step": 2418 }, { "epoch": 0.38, "learning_rate": 1.2373141760464756e-05, "logits/chosen": -1.3894226551055908, "logits/rejected": -2.6173558235168457, "logps/chosen": -210.448974609375, "logps/rejected": -336.2926025390625, "loss": 2.2181, "rewards/accuracies": 0.5, "rewards/chosen": -4.5411529541015625, "rewards/margins": 1.3435509204864502, "rewards/rejected": -5.884703636169434, "step": 2419 }, { "epoch": 0.38, "learning_rate": 1.2372408319933608e-05, "logits/chosen": -1.935328722000122, "logits/rejected": -3.06999135017395, "logps/chosen": -173.00845336914062, "logps/rejected": -476.28350830078125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.673281192779541, "rewards/margins": 7.251697540283203, "rewards/rejected": -10.924979209899902, "step": 2420 }, { "epoch": 0.38, "learning_rate": 1.237167487940246e-05, "logits/chosen": -2.701841115951538, "logits/rejected": -1.627087950706482, "logps/chosen": -613.3336791992188, "logps/rejected": -434.1495056152344, "loss": 0.0897, "rewards/accuracies": 1.0, "rewards/chosen": -4.823941230773926, "rewards/margins": 6.449200630187988, "rewards/rejected": -11.273141860961914, "step": 2421 }, { "epoch": 0.38, "learning_rate": 1.2370941438871312e-05, "logits/chosen": -3.0265560150146484, "logits/rejected": -3.0288896560668945, "logps/chosen": -388.4695739746094, "logps/rejected": -367.4715576171875, "loss": 1.3749, "rewards/accuracies": 0.5, "rewards/chosen": -2.682797431945801, "rewards/margins": 1.8291430473327637, "rewards/rejected": -4.5119404792785645, "step": 2422 }, { "epoch": 0.38, "learning_rate": 1.2370207998340164e-05, "logits/chosen": -2.833568572998047, "logits/rejected": -3.288261651992798, "logps/chosen": -110.35875701904297, "logps/rejected": -250.1282196044922, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -2.3458752632141113, "rewards/margins": 3.8730716705322266, "rewards/rejected": -6.218946933746338, "step": 2423 }, { "epoch": 0.38, "learning_rate": 1.2369474557809015e-05, "logits/chosen": -3.214181423187256, "logits/rejected": -3.0414702892303467, "logps/chosen": -155.75352478027344, "logps/rejected": -139.5012664794922, "loss": 3.0517, "rewards/accuracies": 0.5, "rewards/chosen": -4.933573246002197, "rewards/margins": -0.4514482021331787, "rewards/rejected": -4.4821248054504395, "step": 2424 }, { "epoch": 0.38, "learning_rate": 1.2368741117277867e-05, "logits/chosen": -1.7929412126541138, "logits/rejected": -1.9949517250061035, "logps/chosen": -252.26666259765625, "logps/rejected": -226.47476196289062, "loss": 2.4176, "rewards/accuracies": 0.5, "rewards/chosen": -5.221640110015869, "rewards/margins": -0.02339792251586914, "rewards/rejected": -5.1982421875, "step": 2425 }, { "epoch": 0.38, "learning_rate": 1.2368007676746719e-05, "logits/chosen": -1.6990385055541992, "logits/rejected": -2.572065830230713, "logps/chosen": -161.90907287597656, "logps/rejected": -461.3828430175781, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.9005956649780273, "rewards/margins": 8.162548065185547, "rewards/rejected": -11.06314468383789, "step": 2426 }, { "epoch": 0.38, "learning_rate": 1.2367274236215571e-05, "logits/chosen": -2.4576518535614014, "logits/rejected": -3.0005102157592773, "logps/chosen": -122.47236633300781, "logps/rejected": -266.8895263671875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0621638298034668, "rewards/margins": 7.119538307189941, "rewards/rejected": -8.18170166015625, "step": 2427 }, { "epoch": 0.38, "learning_rate": 1.2366540795684425e-05, "logits/chosen": -2.495558023452759, "logits/rejected": -2.7546722888946533, "logps/chosen": -126.68180084228516, "logps/rejected": -260.2491149902344, "loss": 0.1536, "rewards/accuracies": 1.0, "rewards/chosen": -2.342299222946167, "rewards/margins": 3.168017864227295, "rewards/rejected": -5.510316848754883, "step": 2428 }, { "epoch": 0.38, "learning_rate": 1.2365807355153276e-05, "logits/chosen": -2.658473014831543, "logits/rejected": -2.477341413497925, "logps/chosen": -750.814453125, "logps/rejected": -545.6597900390625, "loss": 1.8258, "rewards/accuracies": 0.5, "rewards/chosen": -5.718123435974121, "rewards/margins": 0.8399595022201538, "rewards/rejected": -6.558082580566406, "step": 2429 }, { "epoch": 0.38, "learning_rate": 1.2365073914622128e-05, "logits/chosen": -2.526947021484375, "logits/rejected": -2.930210828781128, "logps/chosen": -264.7341613769531, "logps/rejected": -347.2688903808594, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": -1.5018516778945923, "rewards/margins": 3.9937219619750977, "rewards/rejected": -5.4955735206604, "step": 2430 }, { "epoch": 0.38, "learning_rate": 1.236434047409098e-05, "logits/chosen": -2.797508716583252, "logits/rejected": -2.261956214904785, "logps/chosen": -288.2773132324219, "logps/rejected": -219.2031707763672, "loss": 0.053, "rewards/accuracies": 1.0, "rewards/chosen": -3.3431732654571533, "rewards/margins": 4.295182228088379, "rewards/rejected": -7.638355255126953, "step": 2431 }, { "epoch": 0.38, "learning_rate": 1.2363607033559832e-05, "logits/chosen": -2.9527499675750732, "logits/rejected": -3.0490734577178955, "logps/chosen": -113.43268585205078, "logps/rejected": -153.962890625, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -3.1183996200561523, "rewards/margins": 4.509448051452637, "rewards/rejected": -7.627847671508789, "step": 2432 }, { "epoch": 0.38, "learning_rate": 1.2362873593028684e-05, "logits/chosen": -2.8580589294433594, "logits/rejected": -1.3917264938354492, "logps/chosen": -215.39028930664062, "logps/rejected": -138.9238739013672, "loss": 0.3764, "rewards/accuracies": 0.5, "rewards/chosen": -2.8063154220581055, "rewards/margins": 3.8952765464782715, "rewards/rejected": -6.701591968536377, "step": 2433 }, { "epoch": 0.38, "learning_rate": 1.2362140152497536e-05, "logits/chosen": -2.2540392875671387, "logits/rejected": -2.0386736392974854, "logps/chosen": -575.3684692382812, "logps/rejected": -436.80450439453125, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -2.346357822418213, "rewards/margins": 6.898083686828613, "rewards/rejected": -9.244441032409668, "step": 2434 }, { "epoch": 0.38, "learning_rate": 1.2361406711966388e-05, "logits/chosen": -2.8128528594970703, "logits/rejected": -3.0570499897003174, "logps/chosen": -107.22506713867188, "logps/rejected": -172.63491821289062, "loss": 0.0514, "rewards/accuracies": 1.0, "rewards/chosen": -2.1482608318328857, "rewards/margins": 3.6405739784240723, "rewards/rejected": -5.788834571838379, "step": 2435 }, { "epoch": 0.38, "learning_rate": 1.236067327143524e-05, "logits/chosen": -2.732750654220581, "logits/rejected": -1.977631688117981, "logps/chosen": -240.77072143554688, "logps/rejected": -242.2498321533203, "loss": 1.911, "rewards/accuracies": 0.5, "rewards/chosen": -3.3233776092529297, "rewards/margins": 1.9636460542678833, "rewards/rejected": -5.287024021148682, "step": 2436 }, { "epoch": 0.38, "learning_rate": 1.2359939830904093e-05, "logits/chosen": -2.0588467121124268, "logits/rejected": -2.895813465118408, "logps/chosen": -133.01040649414062, "logps/rejected": -285.1654357910156, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -2.3024964332580566, "rewards/margins": 4.684837341308594, "rewards/rejected": -6.987334251403809, "step": 2437 }, { "epoch": 0.38, "learning_rate": 1.2359206390372945e-05, "logits/chosen": -2.8269495964050293, "logits/rejected": -2.784291982650757, "logps/chosen": -137.61544799804688, "logps/rejected": -189.6474609375, "loss": 2.1189, "rewards/accuracies": 0.5, "rewards/chosen": -4.442296028137207, "rewards/margins": -0.06030988693237305, "rewards/rejected": -4.381986141204834, "step": 2438 }, { "epoch": 0.38, "learning_rate": 1.2358472949841797e-05, "logits/chosen": -2.917595386505127, "logits/rejected": -2.4440622329711914, "logps/chosen": -252.60995483398438, "logps/rejected": -246.35276794433594, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -3.1374573707580566, "rewards/margins": 3.7944257259368896, "rewards/rejected": -6.931883335113525, "step": 2439 }, { "epoch": 0.38, "learning_rate": 1.2357739509310649e-05, "logits/chosen": -2.833514451980591, "logits/rejected": -2.925759792327881, "logps/chosen": -191.21957397460938, "logps/rejected": -229.02725219726562, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": -2.4065945148468018, "rewards/margins": 2.8305516242980957, "rewards/rejected": -5.237146377563477, "step": 2440 }, { "epoch": 0.38, "learning_rate": 1.23570060687795e-05, "logits/chosen": -2.6700499057769775, "logits/rejected": -2.7386419773101807, "logps/chosen": -349.06439208984375, "logps/rejected": -446.8115234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1742477416992188, "rewards/margins": 8.126957893371582, "rewards/rejected": -9.3012056350708, "step": 2441 }, { "epoch": 0.38, "learning_rate": 1.2356272628248353e-05, "logits/chosen": -1.6370785236358643, "logits/rejected": -3.1412363052368164, "logps/chosen": -139.1959686279297, "logps/rejected": -545.8635864257812, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -5.075705051422119, "rewards/margins": 5.9138031005859375, "rewards/rejected": -10.989507675170898, "step": 2442 }, { "epoch": 0.38, "learning_rate": 1.2355539187717204e-05, "logits/chosen": -2.5870256423950195, "logits/rejected": -3.0266945362091064, "logps/chosen": -186.8486785888672, "logps/rejected": -172.93531799316406, "loss": 1.0688, "rewards/accuracies": 0.5, "rewards/chosen": -4.265573501586914, "rewards/margins": 0.9777445793151855, "rewards/rejected": -5.2433180809021, "step": 2443 }, { "epoch": 0.38, "learning_rate": 1.2354805747186056e-05, "logits/chosen": -3.3458433151245117, "logits/rejected": -3.046273946762085, "logps/chosen": -306.0798034667969, "logps/rejected": -260.4588623046875, "loss": 2.7486, "rewards/accuracies": 0.5, "rewards/chosen": -4.462179660797119, "rewards/margins": -0.6930117607116699, "rewards/rejected": -3.769167900085449, "step": 2444 }, { "epoch": 0.38, "learning_rate": 1.2354072306654908e-05, "logits/chosen": -2.7073686122894287, "logits/rejected": -2.940561056137085, "logps/chosen": -228.86195373535156, "logps/rejected": -336.30694580078125, "loss": 4.0703, "rewards/accuracies": 0.5, "rewards/chosen": -6.181782245635986, "rewards/margins": 1.0321450233459473, "rewards/rejected": -7.213927268981934, "step": 2445 }, { "epoch": 0.38, "learning_rate": 1.2353338866123762e-05, "logits/chosen": -2.908045530319214, "logits/rejected": -3.1386642456054688, "logps/chosen": -464.3140563964844, "logps/rejected": -453.87652587890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.6049516201019287, "rewards/margins": 8.845489501953125, "rewards/rejected": -11.450441360473633, "step": 2446 }, { "epoch": 0.38, "learning_rate": 1.2352605425592614e-05, "logits/chosen": -1.6625361442565918, "logits/rejected": -3.156970262527466, "logps/chosen": -221.95664978027344, "logps/rejected": -504.72760009765625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.8126540184020996, "rewards/margins": 6.318425178527832, "rewards/rejected": -9.131078720092773, "step": 2447 }, { "epoch": 0.38, "learning_rate": 1.2351871985061466e-05, "logits/chosen": -3.0559024810791016, "logits/rejected": -2.3172309398651123, "logps/chosen": -354.1484375, "logps/rejected": -298.76983642578125, "loss": 1.3536, "rewards/accuracies": 0.5, "rewards/chosen": -3.7488389015197754, "rewards/margins": 4.655474662780762, "rewards/rejected": -8.404314041137695, "step": 2448 }, { "epoch": 0.38, "learning_rate": 1.2351138544530317e-05, "logits/chosen": -2.7177748680114746, "logits/rejected": -2.5767576694488525, "logps/chosen": -99.15867614746094, "logps/rejected": -191.19976806640625, "loss": 1.8093, "rewards/accuracies": 0.5, "rewards/chosen": -4.088193416595459, "rewards/margins": 0.8794790506362915, "rewards/rejected": -4.967672824859619, "step": 2449 }, { "epoch": 0.38, "learning_rate": 1.2350405103999171e-05, "logits/chosen": -1.664303183555603, "logits/rejected": -2.5999083518981934, "logps/chosen": -117.2762451171875, "logps/rejected": -349.97198486328125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.2729809284210205, "rewards/margins": 6.853672504425049, "rewards/rejected": -9.126653671264648, "step": 2450 }, { "epoch": 0.38, "learning_rate": 1.2349671663468023e-05, "logits/chosen": -3.0160367488861084, "logits/rejected": -2.6457467079162598, "logps/chosen": -527.5205078125, "logps/rejected": -522.8493041992188, "loss": 3.4764, "rewards/accuracies": 0.5, "rewards/chosen": -6.906503677368164, "rewards/margins": 0.6032273769378662, "rewards/rejected": -7.509730815887451, "step": 2451 }, { "epoch": 0.38, "learning_rate": 1.2348938222936875e-05, "logits/chosen": -2.245565176010132, "logits/rejected": -2.2430717945098877, "logps/chosen": -178.60130310058594, "logps/rejected": -246.73814392089844, "loss": 2.5172, "rewards/accuracies": 0.5, "rewards/chosen": -5.221537113189697, "rewards/margins": 1.3181829452514648, "rewards/rejected": -6.53972053527832, "step": 2452 }, { "epoch": 0.38, "learning_rate": 1.2348204782405727e-05, "logits/chosen": -2.9321212768554688, "logits/rejected": -2.167574167251587, "logps/chosen": -660.498291015625, "logps/rejected": -366.6029052734375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.63710355758667, "rewards/margins": 6.108944892883301, "rewards/rejected": -9.746047973632812, "step": 2453 }, { "epoch": 0.38, "learning_rate": 1.2347471341874579e-05, "logits/chosen": -1.9070793390274048, "logits/rejected": -2.8934004306793213, "logps/chosen": -150.26881408691406, "logps/rejected": -323.40093994140625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.3776021003723145, "rewards/margins": 4.769529342651367, "rewards/rejected": -7.147131443023682, "step": 2454 }, { "epoch": 0.38, "learning_rate": 1.2346737901343432e-05, "logits/chosen": -1.981328010559082, "logits/rejected": -2.9587624073028564, "logps/chosen": -116.57183837890625, "logps/rejected": -393.3139953613281, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.631056547164917, "rewards/margins": 7.058509349822998, "rewards/rejected": -8.689565658569336, "step": 2455 }, { "epoch": 0.38, "learning_rate": 1.2346004460812284e-05, "logits/chosen": -2.9103171825408936, "logits/rejected": -1.7029714584350586, "logps/chosen": -635.9049072265625, "logps/rejected": -500.630126953125, "loss": 3.2446, "rewards/accuracies": 0.5, "rewards/chosen": -5.322656154632568, "rewards/margins": 0.2524843215942383, "rewards/rejected": -5.575140476226807, "step": 2456 }, { "epoch": 0.38, "learning_rate": 1.2345271020281136e-05, "logits/chosen": -1.8533084392547607, "logits/rejected": -2.9777133464813232, "logps/chosen": -63.02593994140625, "logps/rejected": -263.27728271484375, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -2.0032241344451904, "rewards/margins": 4.311139106750488, "rewards/rejected": -6.3143630027771, "step": 2457 }, { "epoch": 0.38, "learning_rate": 1.2344537579749988e-05, "logits/chosen": -2.975339889526367, "logits/rejected": -3.0035512447357178, "logps/chosen": -429.419189453125, "logps/rejected": -466.39410400390625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.2651572227478027, "rewards/margins": 6.444051742553711, "rewards/rejected": -8.709208488464355, "step": 2458 }, { "epoch": 0.38, "learning_rate": 1.234380413921884e-05, "logits/chosen": -2.398512840270996, "logits/rejected": -2.8420746326446533, "logps/chosen": -123.063232421875, "logps/rejected": -275.7484436035156, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.007094621658325, "rewards/margins": 6.283715724945068, "rewards/rejected": -8.290810585021973, "step": 2459 }, { "epoch": 0.38, "learning_rate": 1.2343070698687691e-05, "logits/chosen": -2.93204665184021, "logits/rejected": -3.102260112762451, "logps/chosen": -224.58718872070312, "logps/rejected": -412.61798095703125, "loss": 1.2811, "rewards/accuracies": 0.5, "rewards/chosen": -3.6158454418182373, "rewards/margins": 1.2297197580337524, "rewards/rejected": -4.845565319061279, "step": 2460 }, { "epoch": 0.38, "learning_rate": 1.2342337258156543e-05, "logits/chosen": -1.6035022735595703, "logits/rejected": -2.783930540084839, "logps/chosen": -68.44530487060547, "logps/rejected": -265.6310729980469, "loss": 0.1111, "rewards/accuracies": 1.0, "rewards/chosen": -2.852843761444092, "rewards/margins": 4.633358478546143, "rewards/rejected": -7.486202239990234, "step": 2461 }, { "epoch": 0.38, "learning_rate": 1.2341603817625395e-05, "logits/chosen": -2.525723934173584, "logits/rejected": -2.915750503540039, "logps/chosen": -96.65933227539062, "logps/rejected": -248.80743408203125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -3.3387985229492188, "rewards/margins": 5.685300827026367, "rewards/rejected": -9.024099349975586, "step": 2462 }, { "epoch": 0.38, "learning_rate": 1.2340870377094247e-05, "logits/chosen": -2.956258535385132, "logits/rejected": -2.791254758834839, "logps/chosen": -330.3538818359375, "logps/rejected": -359.4058837890625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.901092529296875, "rewards/margins": 6.733038425445557, "rewards/rejected": -9.634130477905273, "step": 2463 }, { "epoch": 0.38, "learning_rate": 1.23401369365631e-05, "logits/chosen": -2.2861275672912598, "logits/rejected": -2.783863067626953, "logps/chosen": -68.50881958007812, "logps/rejected": -273.0718994140625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.7005527019500732, "rewards/margins": 7.458313941955566, "rewards/rejected": -10.158866882324219, "step": 2464 }, { "epoch": 0.38, "learning_rate": 1.2339403496031953e-05, "logits/chosen": -1.6302660703659058, "logits/rejected": -2.836515188217163, "logps/chosen": -137.21063232421875, "logps/rejected": -336.90472412109375, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": -5.011777877807617, "rewards/margins": 3.4056010246276855, "rewards/rejected": -8.417379379272461, "step": 2465 }, { "epoch": 0.38, "learning_rate": 1.2338670055500804e-05, "logits/chosen": -2.6987109184265137, "logits/rejected": -2.7892251014709473, "logps/chosen": -109.8629379272461, "logps/rejected": -205.95135498046875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.0008034706115723, "rewards/margins": 7.578611850738525, "rewards/rejected": -9.579415321350098, "step": 2466 }, { "epoch": 0.38, "learning_rate": 1.2337936614969656e-05, "logits/chosen": -2.721933364868164, "logits/rejected": -2.956557035446167, "logps/chosen": -188.7289581298828, "logps/rejected": -320.2718505859375, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -3.463451862335205, "rewards/margins": 5.579300880432129, "rewards/rejected": -9.042753219604492, "step": 2467 }, { "epoch": 0.38, "learning_rate": 1.2337203174438508e-05, "logits/chosen": -1.9888769388198853, "logits/rejected": -2.699965715408325, "logps/chosen": -330.6829528808594, "logps/rejected": -613.6593017578125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -4.546329021453857, "rewards/margins": 7.727118015289307, "rewards/rejected": -12.273447036743164, "step": 2468 }, { "epoch": 0.38, "learning_rate": 1.233646973390736e-05, "logits/chosen": -2.6140499114990234, "logits/rejected": -2.8307008743286133, "logps/chosen": -113.27082824707031, "logps/rejected": -150.10055541992188, "loss": 1.7487, "rewards/accuracies": 0.5, "rewards/chosen": -3.7036643028259277, "rewards/margins": -0.5129499435424805, "rewards/rejected": -3.1907143592834473, "step": 2469 }, { "epoch": 0.38, "learning_rate": 1.2335736293376212e-05, "logits/chosen": -2.2933623790740967, "logits/rejected": -2.7733139991760254, "logps/chosen": -671.0095825195312, "logps/rejected": -603.7669677734375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.077300548553467, "rewards/margins": 7.334836959838867, "rewards/rejected": -10.412137985229492, "step": 2470 }, { "epoch": 0.38, "learning_rate": 1.2335002852845064e-05, "logits/chosen": -3.3381245136260986, "logits/rejected": -3.1866257190704346, "logps/chosen": -203.52597045898438, "logps/rejected": -147.58258056640625, "loss": 2.8048, "rewards/accuracies": 0.5, "rewards/chosen": -4.25872802734375, "rewards/margins": -1.7589890956878662, "rewards/rejected": -2.4997386932373047, "step": 2471 }, { "epoch": 0.38, "learning_rate": 1.2334269412313916e-05, "logits/chosen": -2.7967259883880615, "logits/rejected": -2.6271114349365234, "logps/chosen": -193.97158813476562, "logps/rejected": -198.25450134277344, "loss": 2.6511, "rewards/accuracies": 0.5, "rewards/chosen": -5.663119316101074, "rewards/margins": -0.04198789596557617, "rewards/rejected": -5.621131420135498, "step": 2472 }, { "epoch": 0.38, "learning_rate": 1.233353597178277e-05, "logits/chosen": -2.928999900817871, "logits/rejected": -2.321657180786133, "logps/chosen": -301.2892150878906, "logps/rejected": -307.04193115234375, "loss": 3.5316, "rewards/accuracies": 0.5, "rewards/chosen": -5.718822002410889, "rewards/margins": 0.8960888385772705, "rewards/rejected": -6.614911079406738, "step": 2473 }, { "epoch": 0.38, "learning_rate": 1.2332802531251621e-05, "logits/chosen": -2.4918160438537598, "logits/rejected": -3.0498926639556885, "logps/chosen": -286.1204833984375, "logps/rejected": -361.04498291015625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -3.0980403423309326, "rewards/margins": 6.004199981689453, "rewards/rejected": -9.102240562438965, "step": 2474 }, { "epoch": 0.38, "learning_rate": 1.2332069090720473e-05, "logits/chosen": -0.8839108943939209, "logits/rejected": -2.774301528930664, "logps/chosen": -90.28569030761719, "logps/rejected": -598.8626098632812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.97841215133667, "rewards/margins": 10.925451278686523, "rewards/rejected": -13.903863906860352, "step": 2475 }, { "epoch": 0.39, "learning_rate": 1.2331335650189325e-05, "logits/chosen": -2.848255157470703, "logits/rejected": -2.928452968597412, "logps/chosen": -172.60838317871094, "logps/rejected": -194.08663940429688, "loss": 2.1373, "rewards/accuracies": 0.5, "rewards/chosen": -4.6662750244140625, "rewards/margins": 0.3147282600402832, "rewards/rejected": -4.9810028076171875, "step": 2476 }, { "epoch": 0.39, "learning_rate": 1.2330602209658177e-05, "logits/chosen": -2.6134324073791504, "logits/rejected": -3.0219340324401855, "logps/chosen": -121.18226623535156, "logps/rejected": -323.5585632324219, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.473595142364502, "rewards/margins": 6.970179557800293, "rewards/rejected": -11.443775177001953, "step": 2477 }, { "epoch": 0.39, "learning_rate": 1.2329868769127029e-05, "logits/chosen": -2.268432378768921, "logits/rejected": -2.9956483840942383, "logps/chosen": -134.0056610107422, "logps/rejected": -322.79010009765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.8184423446655273, "rewards/margins": 7.621972560882568, "rewards/rejected": -10.440415382385254, "step": 2478 }, { "epoch": 0.39, "learning_rate": 1.232913532859588e-05, "logits/chosen": -2.657867193222046, "logits/rejected": -3.073981285095215, "logps/chosen": -114.38602447509766, "logps/rejected": -264.54296875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -2.577263355255127, "rewards/margins": 5.323375225067139, "rewards/rejected": -7.900638580322266, "step": 2479 }, { "epoch": 0.39, "learning_rate": 1.2328401888064732e-05, "logits/chosen": -2.6711225509643555, "logits/rejected": -1.4267771244049072, "logps/chosen": -296.36090087890625, "logps/rejected": -112.63004302978516, "loss": 2.1456, "rewards/accuracies": 0.5, "rewards/chosen": -4.471258163452148, "rewards/margins": -1.705127477645874, "rewards/rejected": -2.7661304473876953, "step": 2480 }, { "epoch": 0.39, "learning_rate": 1.2327668447533584e-05, "logits/chosen": -1.0688459873199463, "logits/rejected": -2.0102880001068115, "logps/chosen": -537.385498046875, "logps/rejected": -763.7685546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.70343017578125, "rewards/margins": 10.841412544250488, "rewards/rejected": -13.544842720031738, "step": 2481 }, { "epoch": 0.39, "learning_rate": 1.2326935007002438e-05, "logits/chosen": -2.4297006130218506, "logits/rejected": -2.9743905067443848, "logps/chosen": -117.09375762939453, "logps/rejected": -304.703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.671083450317383, "rewards/margins": 6.488945960998535, "rewards/rejected": -9.160029411315918, "step": 2482 }, { "epoch": 0.39, "learning_rate": 1.232620156647129e-05, "logits/chosen": -1.8042218685150146, "logits/rejected": -1.8274351358413696, "logps/chosen": -230.28150939941406, "logps/rejected": -263.423583984375, "loss": 3.7958, "rewards/accuracies": 0.5, "rewards/chosen": -5.933268070220947, "rewards/margins": -3.108994722366333, "rewards/rejected": -2.8242735862731934, "step": 2483 }, { "epoch": 0.39, "learning_rate": 1.2325468125940143e-05, "logits/chosen": -2.866535186767578, "logits/rejected": -2.8010573387145996, "logps/chosen": -612.9996948242188, "logps/rejected": -587.334716796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.232818603515625, "rewards/margins": 8.81242561340332, "rewards/rejected": -11.045244216918945, "step": 2484 }, { "epoch": 0.39, "learning_rate": 1.2324734685408995e-05, "logits/chosen": -3.15869140625, "logits/rejected": -2.4664998054504395, "logps/chosen": -519.12109375, "logps/rejected": -336.1854248046875, "loss": 1.6649, "rewards/accuracies": 0.5, "rewards/chosen": -5.038843154907227, "rewards/margins": 4.45525598526001, "rewards/rejected": -9.494098663330078, "step": 2485 }, { "epoch": 0.39, "learning_rate": 1.2324001244877847e-05, "logits/chosen": -2.8159313201904297, "logits/rejected": -2.8271780014038086, "logps/chosen": -421.31103515625, "logps/rejected": -220.51724243164062, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -3.2929489612579346, "rewards/margins": 3.9128074645996094, "rewards/rejected": -7.205756664276123, "step": 2486 }, { "epoch": 0.39, "learning_rate": 1.2323267804346699e-05, "logits/chosen": -2.961486339569092, "logits/rejected": -2.778961181640625, "logps/chosen": -191.75079345703125, "logps/rejected": -263.337890625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.1869325637817383, "rewards/margins": 5.64024543762207, "rewards/rejected": -8.827178001403809, "step": 2487 }, { "epoch": 0.39, "learning_rate": 1.2322534363815551e-05, "logits/chosen": -2.7218403816223145, "logits/rejected": -3.1198153495788574, "logps/chosen": -118.85127258300781, "logps/rejected": -268.0555725097656, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.7256687879562378, "rewards/margins": 6.092597007751465, "rewards/rejected": -7.818265914916992, "step": 2488 }, { "epoch": 0.39, "learning_rate": 1.2321800923284403e-05, "logits/chosen": -2.7836756706237793, "logits/rejected": -2.947171211242676, "logps/chosen": -166.4822998046875, "logps/rejected": -238.71456909179688, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -3.0876224040985107, "rewards/margins": 4.445363521575928, "rewards/rejected": -7.532985687255859, "step": 2489 }, { "epoch": 0.39, "learning_rate": 1.2321067482753255e-05, "logits/chosen": -2.342003583908081, "logits/rejected": -2.854358673095703, "logps/chosen": -120.87186431884766, "logps/rejected": -197.16397094726562, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.6601905822753906, "rewards/margins": 6.227658271789551, "rewards/rejected": -7.887848854064941, "step": 2490 }, { "epoch": 0.39, "learning_rate": 1.2320334042222108e-05, "logits/chosen": -2.133004903793335, "logits/rejected": -3.1732537746429443, "logps/chosen": -101.83462524414062, "logps/rejected": -504.85406494140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5958715677261353, "rewards/margins": 8.334383010864258, "rewards/rejected": -9.930253982543945, "step": 2491 }, { "epoch": 0.39, "learning_rate": 1.231960060169096e-05, "logits/chosen": -2.8251729011535645, "logits/rejected": -3.19353985786438, "logps/chosen": -130.92698669433594, "logps/rejected": -258.8355712890625, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/chosen": -2.564291477203369, "rewards/margins": 3.9684629440307617, "rewards/rejected": -6.532754421234131, "step": 2492 }, { "epoch": 0.39, "learning_rate": 1.2318867161159812e-05, "logits/chosen": -2.2162885665893555, "logits/rejected": -3.017136812210083, "logps/chosen": -64.1697769165039, "logps/rejected": -338.73370361328125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -3.0709757804870605, "rewards/margins": 6.361135005950928, "rewards/rejected": -9.432110786437988, "step": 2493 }, { "epoch": 0.39, "learning_rate": 1.2318133720628664e-05, "logits/chosen": -1.580634593963623, "logits/rejected": -2.81187105178833, "logps/chosen": -101.69033813476562, "logps/rejected": -318.539794921875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.5988879203796387, "rewards/margins": 7.1132097244262695, "rewards/rejected": -10.71209716796875, "step": 2494 }, { "epoch": 0.39, "learning_rate": 1.2317400280097516e-05, "logits/chosen": -2.40547513961792, "logits/rejected": -2.7210562229156494, "logps/chosen": -386.61492919921875, "logps/rejected": -540.720458984375, "loss": 4.1838, "rewards/accuracies": 0.5, "rewards/chosen": -7.560034275054932, "rewards/margins": -2.3568968772888184, "rewards/rejected": -5.203137397766113, "step": 2495 }, { "epoch": 0.39, "learning_rate": 1.2316666839566368e-05, "logits/chosen": -2.719148635864258, "logits/rejected": -2.206430196762085, "logps/chosen": -228.30267333984375, "logps/rejected": -192.3574676513672, "loss": 2.7179, "rewards/accuracies": 0.5, "rewards/chosen": -4.108963966369629, "rewards/margins": 0.7181463241577148, "rewards/rejected": -4.8271098136901855, "step": 2496 }, { "epoch": 0.39, "learning_rate": 1.231593339903522e-05, "logits/chosen": -3.065678358078003, "logits/rejected": -3.0782737731933594, "logps/chosen": -378.5195007324219, "logps/rejected": -376.99334716796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.331671237945557, "rewards/margins": 7.707355499267578, "rewards/rejected": -14.039026260375977, "step": 2497 }, { "epoch": 0.39, "learning_rate": 1.2315199958504071e-05, "logits/chosen": -2.714812994003296, "logits/rejected": -3.0674211978912354, "logps/chosen": -122.79389953613281, "logps/rejected": -222.8602294921875, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -2.0658938884735107, "rewards/margins": 4.582029342651367, "rewards/rejected": -6.647923469543457, "step": 2498 }, { "epoch": 0.39, "learning_rate": 1.2314466517972925e-05, "logits/chosen": -2.930973768234253, "logits/rejected": -2.9506313800811768, "logps/chosen": -126.17877960205078, "logps/rejected": -303.40118408203125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.3991031646728516, "rewards/margins": 8.109467506408691, "rewards/rejected": -9.508569717407227, "step": 2499 }, { "epoch": 0.39, "learning_rate": 1.2313733077441777e-05, "logits/chosen": -1.662233591079712, "logits/rejected": -2.995274782180786, "logps/chosen": -112.79798889160156, "logps/rejected": -499.4424133300781, "loss": 0.0663, "rewards/accuracies": 1.0, "rewards/chosen": -2.319023847579956, "rewards/margins": 2.9860873222351074, "rewards/rejected": -5.305110931396484, "step": 2500 }, { "epoch": 0.39, "learning_rate": 1.2312999636910629e-05, "logits/chosen": -2.826451063156128, "logits/rejected": -2.5832014083862305, "logps/chosen": -455.74114990234375, "logps/rejected": -471.4956970214844, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.239368438720703, "rewards/margins": 6.822915077209473, "rewards/rejected": -10.062283515930176, "step": 2501 }, { "epoch": 0.39, "learning_rate": 1.231226619637948e-05, "logits/chosen": -2.6409103870391846, "logits/rejected": -2.4324498176574707, "logps/chosen": -194.34536743164062, "logps/rejected": -225.41026306152344, "loss": 2.3477, "rewards/accuracies": 0.5, "rewards/chosen": -4.575217247009277, "rewards/margins": 1.315772533416748, "rewards/rejected": -5.890989780426025, "step": 2502 }, { "epoch": 0.39, "learning_rate": 1.2311532755848332e-05, "logits/chosen": -2.570068836212158, "logits/rejected": -3.1057982444763184, "logps/chosen": -171.963134765625, "logps/rejected": -337.6609802246094, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -2.364356517791748, "rewards/margins": 5.204090595245361, "rewards/rejected": -7.568447113037109, "step": 2503 }, { "epoch": 0.39, "learning_rate": 1.2310799315317184e-05, "logits/chosen": -0.9633077383041382, "logits/rejected": -3.16249680519104, "logps/chosen": -112.25360107421875, "logps/rejected": -444.34942626953125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.051896572113037, "rewards/margins": 5.690508842468262, "rewards/rejected": -7.742405891418457, "step": 2504 }, { "epoch": 0.39, "learning_rate": 1.2310065874786036e-05, "logits/chosen": -2.8475141525268555, "logits/rejected": -0.9842345118522644, "logps/chosen": -634.8826904296875, "logps/rejected": -263.4400634765625, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": -3.082047462463379, "rewards/margins": 4.632701396942139, "rewards/rejected": -7.714749336242676, "step": 2505 }, { "epoch": 0.39, "learning_rate": 1.2309332434254888e-05, "logits/chosen": -2.862919569015503, "logits/rejected": -2.7099084854125977, "logps/chosen": -173.34207153320312, "logps/rejected": -123.08574676513672, "loss": 0.3067, "rewards/accuracies": 1.0, "rewards/chosen": -3.60799503326416, "rewards/margins": 2.7955641746520996, "rewards/rejected": -6.40355920791626, "step": 2506 }, { "epoch": 0.39, "learning_rate": 1.230859899372374e-05, "logits/chosen": -3.0060770511627197, "logits/rejected": -2.6463751792907715, "logps/chosen": -134.66522216796875, "logps/rejected": -192.7664337158203, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -2.4548749923706055, "rewards/margins": 4.181184768676758, "rewards/rejected": -6.636059761047363, "step": 2507 }, { "epoch": 0.39, "learning_rate": 1.2307865553192593e-05, "logits/chosen": -2.744189977645874, "logits/rejected": -2.7496776580810547, "logps/chosen": -127.27434539794922, "logps/rejected": -213.84564208984375, "loss": 0.3279, "rewards/accuracies": 1.0, "rewards/chosen": -3.9245986938476562, "rewards/margins": 2.805772304534912, "rewards/rejected": -6.730371475219727, "step": 2508 }, { "epoch": 0.39, "learning_rate": 1.2307132112661445e-05, "logits/chosen": -2.87654447555542, "logits/rejected": -1.2032605409622192, "logps/chosen": -589.9638671875, "logps/rejected": -284.6369934082031, "loss": 1.2341, "rewards/accuracies": 0.5, "rewards/chosen": -5.310218811035156, "rewards/margins": 2.1078999042510986, "rewards/rejected": -7.418118476867676, "step": 2509 }, { "epoch": 0.39, "learning_rate": 1.2306398672130297e-05, "logits/chosen": -3.3361294269561768, "logits/rejected": -2.9846925735473633, "logps/chosen": -453.5613708496094, "logps/rejected": -241.38543701171875, "loss": 6.8315, "rewards/accuracies": 0.0, "rewards/chosen": -8.553812980651855, "rewards/margins": -6.830011367797852, "rewards/rejected": -1.7238022089004517, "step": 2510 }, { "epoch": 0.39, "learning_rate": 1.2305665231599149e-05, "logits/chosen": -3.1304233074188232, "logits/rejected": -2.6334474086761475, "logps/chosen": -274.3729248046875, "logps/rejected": -179.80787658691406, "loss": 0.9888, "rewards/accuracies": 0.5, "rewards/chosen": -4.914769172668457, "rewards/margins": 3.5862951278686523, "rewards/rejected": -8.50106430053711, "step": 2511 }, { "epoch": 0.39, "learning_rate": 1.2304931791068001e-05, "logits/chosen": -2.148951530456543, "logits/rejected": -2.8755180835723877, "logps/chosen": -231.5941162109375, "logps/rejected": -411.6230773925781, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.80961012840271, "rewards/margins": 7.5638108253479, "rewards/rejected": -9.373420715332031, "step": 2512 }, { "epoch": 0.39, "learning_rate": 1.2304198350536853e-05, "logits/chosen": -2.6291470527648926, "logits/rejected": -2.450406789779663, "logps/chosen": -150.1585235595703, "logps/rejected": -285.4278869628906, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -2.8287882804870605, "rewards/margins": 4.3417582511901855, "rewards/rejected": -7.170546531677246, "step": 2513 }, { "epoch": 0.39, "learning_rate": 1.2303464910005705e-05, "logits/chosen": -1.6814290285110474, "logits/rejected": -3.20220685005188, "logps/chosen": -64.70071411132812, "logps/rejected": -208.52651977539062, "loss": 0.0465, "rewards/accuracies": 1.0, "rewards/chosen": -4.483493804931641, "rewards/margins": 3.346996784210205, "rewards/rejected": -7.830490589141846, "step": 2514 }, { "epoch": 0.39, "learning_rate": 1.2302731469474557e-05, "logits/chosen": -2.6834261417388916, "logits/rejected": -2.733769178390503, "logps/chosen": -161.5296173095703, "logps/rejected": -174.31529235839844, "loss": 0.1326, "rewards/accuracies": 1.0, "rewards/chosen": -3.6296586990356445, "rewards/margins": 2.9427199363708496, "rewards/rejected": -6.572378635406494, "step": 2515 }, { "epoch": 0.39, "learning_rate": 1.230199802894341e-05, "logits/chosen": -1.6965587139129639, "logits/rejected": -2.581031560897827, "logps/chosen": -380.8690185546875, "logps/rejected": -448.1921691894531, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -3.719618320465088, "rewards/margins": 4.940587043762207, "rewards/rejected": -8.660204887390137, "step": 2516 }, { "epoch": 0.39, "learning_rate": 1.2301264588412262e-05, "logits/chosen": -2.3079609870910645, "logits/rejected": -3.0076212882995605, "logps/chosen": -266.977783203125, "logps/rejected": -396.2750244140625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.314688205718994, "rewards/margins": 5.798027038574219, "rewards/rejected": -8.112714767456055, "step": 2517 }, { "epoch": 0.39, "learning_rate": 1.2300531147881116e-05, "logits/chosen": -0.9893330931663513, "logits/rejected": -2.6743085384368896, "logps/chosen": -71.6707534790039, "logps/rejected": -203.58535766601562, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": -4.830260753631592, "rewards/margins": 3.540501117706299, "rewards/rejected": -8.37076187133789, "step": 2518 }, { "epoch": 0.39, "learning_rate": 1.2299797707349968e-05, "logits/chosen": -2.86418080329895, "logits/rejected": -1.5995457172393799, "logps/chosen": -236.32362365722656, "logps/rejected": -176.77964782714844, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -3.7960128784179688, "rewards/margins": 3.852614402770996, "rewards/rejected": -7.648627281188965, "step": 2519 }, { "epoch": 0.39, "learning_rate": 1.229906426681882e-05, "logits/chosen": -2.719935178756714, "logits/rejected": -3.0849406719207764, "logps/chosen": -301.578125, "logps/rejected": -425.61065673828125, "loss": 4.8448, "rewards/accuracies": 0.5, "rewards/chosen": -7.03176212310791, "rewards/margins": -2.060243844985962, "rewards/rejected": -4.971518039703369, "step": 2520 }, { "epoch": 0.39, "learning_rate": 1.2298330826287671e-05, "logits/chosen": -2.2439112663269043, "logits/rejected": -3.088697671890259, "logps/chosen": -398.21484375, "logps/rejected": -383.45233154296875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -2.365687608718872, "rewards/margins": 4.989490032196045, "rewards/rejected": -7.355177879333496, "step": 2521 }, { "epoch": 0.39, "learning_rate": 1.2297597385756523e-05, "logits/chosen": -1.7614314556121826, "logits/rejected": -2.9097070693969727, "logps/chosen": -141.66864013671875, "logps/rejected": -266.63836669921875, "loss": 0.0627, "rewards/accuracies": 1.0, "rewards/chosen": -4.430296897888184, "rewards/margins": 2.755065679550171, "rewards/rejected": -7.185362339019775, "step": 2522 }, { "epoch": 0.39, "learning_rate": 1.2296863945225375e-05, "logits/chosen": -3.011647939682007, "logits/rejected": -2.3382396697998047, "logps/chosen": -334.1783447265625, "logps/rejected": -236.08334350585938, "loss": 1.087, "rewards/accuracies": 0.5, "rewards/chosen": -6.823494911193848, "rewards/margins": 2.2336068153381348, "rewards/rejected": -9.05710220336914, "step": 2523 }, { "epoch": 0.39, "learning_rate": 1.2296130504694227e-05, "logits/chosen": -2.310716152191162, "logits/rejected": -2.9499266147613525, "logps/chosen": -180.2716064453125, "logps/rejected": -310.24560546875, "loss": 0.1738, "rewards/accuracies": 1.0, "rewards/chosen": -3.3443188667297363, "rewards/margins": 6.0039496421813965, "rewards/rejected": -9.348268508911133, "step": 2524 }, { "epoch": 0.39, "learning_rate": 1.2295397064163079e-05, "logits/chosen": -3.0919809341430664, "logits/rejected": -2.7972218990325928, "logps/chosen": -369.39129638671875, "logps/rejected": -380.9642333984375, "loss": 0.1015, "rewards/accuracies": 1.0, "rewards/chosen": -2.248574733734131, "rewards/margins": 3.7232775688171387, "rewards/rejected": -5.9718523025512695, "step": 2525 }, { "epoch": 0.39, "learning_rate": 1.2294663623631932e-05, "logits/chosen": -2.944255828857422, "logits/rejected": -3.2488558292388916, "logps/chosen": -143.86668395996094, "logps/rejected": -255.52560424804688, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.606104612350464, "rewards/margins": 6.347796440124512, "rewards/rejected": -8.953901290893555, "step": 2526 }, { "epoch": 0.39, "learning_rate": 1.2293930183100784e-05, "logits/chosen": -3.03385591506958, "logits/rejected": -2.4076433181762695, "logps/chosen": -742.8206787109375, "logps/rejected": -540.541015625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.378518581390381, "rewards/margins": 7.2513580322265625, "rewards/rejected": -10.629877090454102, "step": 2527 }, { "epoch": 0.39, "learning_rate": 1.2293196742569636e-05, "logits/chosen": -2.919046401977539, "logits/rejected": -2.0836381912231445, "logps/chosen": -228.1074981689453, "logps/rejected": -122.93560028076172, "loss": 0.0789, "rewards/accuracies": 1.0, "rewards/chosen": -4.010442733764648, "rewards/margins": 2.934004545211792, "rewards/rejected": -6.9444475173950195, "step": 2528 }, { "epoch": 0.39, "learning_rate": 1.2292463302038488e-05, "logits/chosen": -2.496227264404297, "logits/rejected": -2.665497303009033, "logps/chosen": -153.74632263183594, "logps/rejected": -181.36257934570312, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -3.966567039489746, "rewards/margins": 5.595586776733398, "rewards/rejected": -9.562154769897461, "step": 2529 }, { "epoch": 0.39, "learning_rate": 1.229172986150734e-05, "logits/chosen": -2.191446542739868, "logits/rejected": -2.6911213397979736, "logps/chosen": -233.64793395996094, "logps/rejected": -358.10040283203125, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -3.0712010860443115, "rewards/margins": 5.174127578735352, "rewards/rejected": -8.245328903198242, "step": 2530 }, { "epoch": 0.39, "learning_rate": 1.2290996420976192e-05, "logits/chosen": -2.4162466526031494, "logits/rejected": -3.01851487159729, "logps/chosen": -117.19570922851562, "logps/rejected": -367.19384765625, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -2.9727699756622314, "rewards/margins": 3.820402145385742, "rewards/rejected": -6.7931718826293945, "step": 2531 }, { "epoch": 0.39, "learning_rate": 1.2290262980445044e-05, "logits/chosen": -2.6519932746887207, "logits/rejected": -3.0416553020477295, "logps/chosen": -87.68997192382812, "logps/rejected": -180.83050537109375, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -3.0948195457458496, "rewards/margins": 5.095659255981445, "rewards/rejected": -8.190479278564453, "step": 2532 }, { "epoch": 0.39, "learning_rate": 1.2289529539913896e-05, "logits/chosen": -1.8500674962997437, "logits/rejected": -2.750800371170044, "logps/chosen": -195.2843780517578, "logps/rejected": -335.6427307128906, "loss": 2.5252, "rewards/accuracies": 0.5, "rewards/chosen": -6.434111595153809, "rewards/margins": -0.44304823875427246, "rewards/rejected": -5.991063594818115, "step": 2533 }, { "epoch": 0.39, "learning_rate": 1.2288796099382747e-05, "logits/chosen": -1.7543232440948486, "logits/rejected": -3.0058043003082275, "logps/chosen": -111.71565246582031, "logps/rejected": -428.31256103515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.195922374725342, "rewards/margins": 9.161840438842773, "rewards/rejected": -13.357762336730957, "step": 2534 }, { "epoch": 0.39, "learning_rate": 1.2288062658851601e-05, "logits/chosen": -1.7364299297332764, "logits/rejected": -2.8446712493896484, "logps/chosen": -79.841552734375, "logps/rejected": -286.99365234375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -4.891841888427734, "rewards/margins": 5.450092315673828, "rewards/rejected": -10.341934204101562, "step": 2535 }, { "epoch": 0.39, "learning_rate": 1.2287329218320453e-05, "logits/chosen": -2.469283103942871, "logits/rejected": -2.9196975231170654, "logps/chosen": -104.11338806152344, "logps/rejected": -235.3345947265625, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -4.1073899269104, "rewards/margins": 4.064244270324707, "rewards/rejected": -8.171634674072266, "step": 2536 }, { "epoch": 0.39, "learning_rate": 1.2286595777789305e-05, "logits/chosen": -2.746107339859009, "logits/rejected": -2.4764959812164307, "logps/chosen": -220.94908142089844, "logps/rejected": -305.14898681640625, "loss": 0.1229, "rewards/accuracies": 1.0, "rewards/chosen": -4.029735565185547, "rewards/margins": 3.777521848678589, "rewards/rejected": -7.807257175445557, "step": 2537 }, { "epoch": 0.39, "learning_rate": 1.2285862337258157e-05, "logits/chosen": -1.7421003580093384, "logits/rejected": -2.670318126678467, "logps/chosen": -224.07012939453125, "logps/rejected": -431.1833801269531, "loss": 0.082, "rewards/accuracies": 1.0, "rewards/chosen": -5.660612106323242, "rewards/margins": 3.426079511642456, "rewards/rejected": -9.086690902709961, "step": 2538 }, { "epoch": 0.39, "learning_rate": 1.2285128896727009e-05, "logits/chosen": -1.6688060760498047, "logits/rejected": -2.811906576156616, "logps/chosen": -244.29754638671875, "logps/rejected": -231.1650390625, "loss": 1.4266, "rewards/accuracies": 0.5, "rewards/chosen": -6.636660575866699, "rewards/margins": 1.989898681640625, "rewards/rejected": -8.626559257507324, "step": 2539 }, { "epoch": 0.4, "learning_rate": 1.228439545619586e-05, "logits/chosen": -2.990051031112671, "logits/rejected": -2.6632866859436035, "logps/chosen": -143.82533264160156, "logps/rejected": -188.56546020507812, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -4.7247633934021, "rewards/margins": 4.458059787750244, "rewards/rejected": -9.182823181152344, "step": 2540 }, { "epoch": 0.4, "learning_rate": 1.2283662015664712e-05, "logits/chosen": -1.4257971048355103, "logits/rejected": -2.5358309745788574, "logps/chosen": -175.92486572265625, "logps/rejected": -358.94287109375, "loss": 0.3368, "rewards/accuracies": 1.0, "rewards/chosen": -3.580742359161377, "rewards/margins": 4.10258150100708, "rewards/rejected": -7.683323860168457, "step": 2541 }, { "epoch": 0.4, "learning_rate": 1.2282928575133564e-05, "logits/chosen": -1.873030424118042, "logits/rejected": -2.819180727005005, "logps/chosen": -188.91714477539062, "logps/rejected": -331.6988525390625, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -3.935807228088379, "rewards/margins": 4.536797523498535, "rewards/rejected": -8.472604751586914, "step": 2542 }, { "epoch": 0.4, "learning_rate": 1.2282195134602416e-05, "logits/chosen": -3.1826577186584473, "logits/rejected": -2.953482151031494, "logps/chosen": -275.19805908203125, "logps/rejected": -262.7688293457031, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": -3.0886306762695312, "rewards/margins": 5.074667930603027, "rewards/rejected": -8.163298606872559, "step": 2543 }, { "epoch": 0.4, "learning_rate": 1.228146169407127e-05, "logits/chosen": -2.634888172149658, "logits/rejected": -3.031909942626953, "logps/chosen": -62.342567443847656, "logps/rejected": -220.97634887695312, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -4.415035247802734, "rewards/margins": 6.7098236083984375, "rewards/rejected": -11.124857902526855, "step": 2544 }, { "epoch": 0.4, "learning_rate": 1.2280728253540121e-05, "logits/chosen": -2.604090452194214, "logits/rejected": -2.316469669342041, "logps/chosen": -176.28733825683594, "logps/rejected": -153.55712890625, "loss": 3.8146, "rewards/accuracies": 0.0, "rewards/chosen": -8.10165023803711, "rewards/margins": -3.7531468868255615, "rewards/rejected": -4.348503589630127, "step": 2545 }, { "epoch": 0.4, "learning_rate": 1.2279994813008973e-05, "logits/chosen": -3.013399600982666, "logits/rejected": -2.2828683853149414, "logps/chosen": -250.8392333984375, "logps/rejected": -173.31622314453125, "loss": 4.003, "rewards/accuracies": 0.5, "rewards/chosen": -7.617398262023926, "rewards/margins": 0.04878807067871094, "rewards/rejected": -7.666186332702637, "step": 2546 }, { "epoch": 0.4, "learning_rate": 1.2279261372477825e-05, "logits/chosen": -2.5098750591278076, "logits/rejected": -3.12481689453125, "logps/chosen": -219.14920043945312, "logps/rejected": -350.5589904785156, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -2.8916192054748535, "rewards/margins": 6.440214157104492, "rewards/rejected": -9.331832885742188, "step": 2547 }, { "epoch": 0.4, "learning_rate": 1.2278527931946677e-05, "logits/chosen": -1.9738129377365112, "logits/rejected": -3.2418551445007324, "logps/chosen": -87.90460205078125, "logps/rejected": -514.6546630859375, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -3.936232566833496, "rewards/margins": 5.170948505401611, "rewards/rejected": -9.107181549072266, "step": 2548 }, { "epoch": 0.4, "learning_rate": 1.2277794491415529e-05, "logits/chosen": -2.243731737136841, "logits/rejected": -2.9233059883117676, "logps/chosen": -187.6360321044922, "logps/rejected": -352.4969177246094, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.376143455505371, "rewards/margins": 7.476022720336914, "rewards/rejected": -9.852166175842285, "step": 2549 }, { "epoch": 0.4, "learning_rate": 1.2277061050884383e-05, "logits/chosen": -2.934695243835449, "logits/rejected": -2.9706320762634277, "logps/chosen": -213.23577880859375, "logps/rejected": -366.32891845703125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.682025194168091, "rewards/margins": 6.480469703674316, "rewards/rejected": -10.162494659423828, "step": 2550 }, { "epoch": 0.4, "learning_rate": 1.2276327610353234e-05, "logits/chosen": -1.5421489477157593, "logits/rejected": -2.7682743072509766, "logps/chosen": -142.4834747314453, "logps/rejected": -388.84637451171875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.145125389099121, "rewards/margins": 8.192642211914062, "rewards/rejected": -12.337766647338867, "step": 2551 }, { "epoch": 0.4, "learning_rate": 1.2275594169822086e-05, "logits/chosen": -2.5833356380462646, "logits/rejected": -3.036637783050537, "logps/chosen": -249.97076416015625, "logps/rejected": -378.6918640136719, "loss": 4.9096, "rewards/accuracies": 0.5, "rewards/chosen": -9.375995635986328, "rewards/margins": -3.2937002182006836, "rewards/rejected": -6.082295894622803, "step": 2552 }, { "epoch": 0.4, "learning_rate": 1.227486072929094e-05, "logits/chosen": -2.8307783603668213, "logits/rejected": -1.5050233602523804, "logps/chosen": -623.8966674804688, "logps/rejected": -317.4357604980469, "loss": 0.1156, "rewards/accuracies": 1.0, "rewards/chosen": -4.5459794998168945, "rewards/margins": 3.6250596046447754, "rewards/rejected": -8.171039581298828, "step": 2553 }, { "epoch": 0.4, "learning_rate": 1.2274127288759792e-05, "logits/chosen": -2.8992555141448975, "logits/rejected": -3.0131661891937256, "logps/chosen": -357.1246337890625, "logps/rejected": -472.4712219238281, "loss": 3.2618, "rewards/accuracies": 0.5, "rewards/chosen": -8.53157901763916, "rewards/margins": -2.5542867183685303, "rewards/rejected": -5.977292060852051, "step": 2554 }, { "epoch": 0.4, "learning_rate": 1.2273393848228644e-05, "logits/chosen": -2.7625350952148438, "logits/rejected": -3.154881715774536, "logps/chosen": -151.34597778320312, "logps/rejected": -239.81175231933594, "loss": 2.0957, "rewards/accuracies": 0.5, "rewards/chosen": -5.090324878692627, "rewards/margins": 0.3483595848083496, "rewards/rejected": -5.438683986663818, "step": 2555 }, { "epoch": 0.4, "learning_rate": 1.2272660407697496e-05, "logits/chosen": -2.571474075317383, "logits/rejected": -2.994161367416382, "logps/chosen": -156.09010314941406, "logps/rejected": -323.3167724609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.098994731903076, "rewards/margins": 7.276273250579834, "rewards/rejected": -11.37526798248291, "step": 2556 }, { "epoch": 0.4, "learning_rate": 1.2271926967166347e-05, "logits/chosen": -2.7219808101654053, "logits/rejected": -1.781612753868103, "logps/chosen": -502.54638671875, "logps/rejected": -1128.75146484375, "loss": 5.2799, "rewards/accuracies": 0.5, "rewards/chosen": -10.570713996887207, "rewards/margins": -2.5704360008239746, "rewards/rejected": -8.00027847290039, "step": 2557 }, { "epoch": 0.4, "learning_rate": 1.22711935266352e-05, "logits/chosen": -2.1199634075164795, "logits/rejected": -3.062506914138794, "logps/chosen": -104.22080993652344, "logps/rejected": -437.6639099121094, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -2.9349184036254883, "rewards/margins": 8.452099800109863, "rewards/rejected": -11.387018203735352, "step": 2558 }, { "epoch": 0.4, "learning_rate": 1.2270460086104051e-05, "logits/chosen": -1.6233326196670532, "logits/rejected": -2.614163875579834, "logps/chosen": -162.254150390625, "logps/rejected": -392.1000061035156, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -5.315866470336914, "rewards/margins": 7.0280046463012695, "rewards/rejected": -12.343871116638184, "step": 2559 }, { "epoch": 0.4, "learning_rate": 1.2269726645572903e-05, "logits/chosen": -2.453721761703491, "logits/rejected": -2.7589380741119385, "logps/chosen": -268.4395751953125, "logps/rejected": -215.4581298828125, "loss": 3.7542, "rewards/accuracies": 0.5, "rewards/chosen": -7.60986328125, "rewards/margins": -0.9902396202087402, "rewards/rejected": -6.61962366104126, "step": 2560 }, { "epoch": 0.4, "learning_rate": 1.2268993205041755e-05, "logits/chosen": -3.11663818359375, "logits/rejected": -2.8294479846954346, "logps/chosen": -358.9443359375, "logps/rejected": -349.1342468261719, "loss": 0.2233, "rewards/accuracies": 1.0, "rewards/chosen": -5.795490264892578, "rewards/margins": 1.50223708152771, "rewards/rejected": -7.297727584838867, "step": 2561 }, { "epoch": 0.4, "learning_rate": 1.2268259764510608e-05, "logits/chosen": -2.940225124359131, "logits/rejected": -2.6312198638916016, "logps/chosen": -195.45355224609375, "logps/rejected": -242.750732421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.3316216468811035, "rewards/margins": 6.986569404602051, "rewards/rejected": -9.318190574645996, "step": 2562 }, { "epoch": 0.4, "learning_rate": 1.226752632397946e-05, "logits/chosen": -2.37821626663208, "logits/rejected": -3.067432403564453, "logps/chosen": -180.5700225830078, "logps/rejected": -468.6003723144531, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": -5.70640754699707, "rewards/margins": 7.390695095062256, "rewards/rejected": -13.097103118896484, "step": 2563 }, { "epoch": 0.4, "learning_rate": 1.2266792883448312e-05, "logits/chosen": -2.327723503112793, "logits/rejected": -2.8253345489501953, "logps/chosen": -72.23439025878906, "logps/rejected": -216.82864379882812, "loss": 0.265, "rewards/accuracies": 1.0, "rewards/chosen": -3.0273094177246094, "rewards/margins": 2.2539353370666504, "rewards/rejected": -5.28124475479126, "step": 2564 }, { "epoch": 0.4, "learning_rate": 1.2266059442917164e-05, "logits/chosen": -2.4821319580078125, "logits/rejected": -2.7666547298431396, "logps/chosen": -146.88543701171875, "logps/rejected": -427.0440979003906, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.5191755294799805, "rewards/margins": 10.882362365722656, "rewards/rejected": -14.401538848876953, "step": 2565 }, { "epoch": 0.4, "learning_rate": 1.2265326002386016e-05, "logits/chosen": -2.234299421310425, "logits/rejected": -2.9552597999572754, "logps/chosen": -191.39697265625, "logps/rejected": -315.26641845703125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.7573418617248535, "rewards/margins": 6.146491527557373, "rewards/rejected": -8.903833389282227, "step": 2566 }, { "epoch": 0.4, "learning_rate": 1.2264592561854868e-05, "logits/chosen": -1.9383108615875244, "logits/rejected": -2.808433771133423, "logps/chosen": -264.16693115234375, "logps/rejected": -252.01226806640625, "loss": 0.119, "rewards/accuracies": 1.0, "rewards/chosen": -3.806126117706299, "rewards/margins": 2.8871965408325195, "rewards/rejected": -6.693322658538818, "step": 2567 }, { "epoch": 0.4, "learning_rate": 1.226385912132372e-05, "logits/chosen": -2.058938980102539, "logits/rejected": -2.8459842205047607, "logps/chosen": -178.5052032470703, "logps/rejected": -616.615966796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.3124165534973145, "rewards/margins": 10.570682525634766, "rewards/rejected": -16.883098602294922, "step": 2568 }, { "epoch": 0.4, "learning_rate": 1.2263125680792572e-05, "logits/chosen": -2.1853175163269043, "logits/rejected": -2.5861542224884033, "logps/chosen": -215.55197143554688, "logps/rejected": -220.4384307861328, "loss": 2.1699, "rewards/accuracies": 0.5, "rewards/chosen": -6.557437419891357, "rewards/margins": 2.011234998703003, "rewards/rejected": -8.568672180175781, "step": 2569 }, { "epoch": 0.4, "learning_rate": 1.2262392240261424e-05, "logits/chosen": -2.576265573501587, "logits/rejected": -2.981877565383911, "logps/chosen": -84.28793334960938, "logps/rejected": -366.0198059082031, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.787907361984253, "rewards/margins": 9.095321655273438, "rewards/rejected": -11.883228302001953, "step": 2570 }, { "epoch": 0.4, "learning_rate": 1.2261658799730277e-05, "logits/chosen": -2.091088056564331, "logits/rejected": -2.651423931121826, "logps/chosen": -251.76841735839844, "logps/rejected": -275.0350341796875, "loss": 4.2045, "rewards/accuracies": 0.5, "rewards/chosen": -7.27788782119751, "rewards/margins": -1.5651156902313232, "rewards/rejected": -5.712772369384766, "step": 2571 }, { "epoch": 0.4, "learning_rate": 1.2260925359199129e-05, "logits/chosen": -2.691790819168091, "logits/rejected": -2.9523167610168457, "logps/chosen": -247.96734619140625, "logps/rejected": -232.60498046875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -3.1822638511657715, "rewards/margins": 5.312018394470215, "rewards/rejected": -8.494281768798828, "step": 2572 }, { "epoch": 0.4, "learning_rate": 1.226019191866798e-05, "logits/chosen": -2.5266504287719727, "logits/rejected": -2.8871235847473145, "logps/chosen": -250.12274169921875, "logps/rejected": -411.9954833984375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.80625319480896, "rewards/margins": 7.320220947265625, "rewards/rejected": -10.126474380493164, "step": 2573 }, { "epoch": 0.4, "learning_rate": 1.2259458478136833e-05, "logits/chosen": -2.7652177810668945, "logits/rejected": -2.945605516433716, "logps/chosen": -60.45610046386719, "logps/rejected": -211.90626525878906, "loss": 0.2491, "rewards/accuracies": 1.0, "rewards/chosen": -3.5816195011138916, "rewards/margins": 2.27404522895813, "rewards/rejected": -5.8556647300720215, "step": 2574 }, { "epoch": 0.4, "learning_rate": 1.2258725037605685e-05, "logits/chosen": -2.8157849311828613, "logits/rejected": -2.145456314086914, "logps/chosen": -317.0751647949219, "logps/rejected": -320.7189025878906, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -3.688145637512207, "rewards/margins": 7.926024436950684, "rewards/rejected": -11.61417007446289, "step": 2575 }, { "epoch": 0.4, "learning_rate": 1.2257991597074536e-05, "logits/chosen": -2.885991096496582, "logits/rejected": -3.2952558994293213, "logps/chosen": -380.7322692871094, "logps/rejected": -477.1689147949219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.4817490577697754, "rewards/margins": 8.385295867919922, "rewards/rejected": -10.867044448852539, "step": 2576 }, { "epoch": 0.4, "learning_rate": 1.2257258156543388e-05, "logits/chosen": -1.7878165245056152, "logits/rejected": -2.9894611835479736, "logps/chosen": -85.22865295410156, "logps/rejected": -331.1916809082031, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -4.090169906616211, "rewards/margins": 5.76878547668457, "rewards/rejected": -9.858955383300781, "step": 2577 }, { "epoch": 0.4, "learning_rate": 1.225652471601224e-05, "logits/chosen": -2.2949440479278564, "logits/rejected": -2.8400678634643555, "logps/chosen": -369.65386962890625, "logps/rejected": -457.19683837890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.234400987625122, "rewards/margins": 7.126436710357666, "rewards/rejected": -9.360837936401367, "step": 2578 }, { "epoch": 0.4, "learning_rate": 1.2255791275481092e-05, "logits/chosen": -1.6033436059951782, "logits/rejected": -2.840303421020508, "logps/chosen": -191.7945098876953, "logps/rejected": -343.49053955078125, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -3.7768397331237793, "rewards/margins": 4.631860733032227, "rewards/rejected": -8.408699989318848, "step": 2579 }, { "epoch": 0.4, "learning_rate": 1.2255057834949946e-05, "logits/chosen": -2.154191017150879, "logits/rejected": -3.001030683517456, "logps/chosen": -140.83120727539062, "logps/rejected": -489.18084716796875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.5944881439208984, "rewards/margins": 9.455341339111328, "rewards/rejected": -13.049829483032227, "step": 2580 }, { "epoch": 0.4, "learning_rate": 1.2254324394418798e-05, "logits/chosen": -2.7478153705596924, "logits/rejected": -3.2991161346435547, "logps/chosen": -199.36279296875, "logps/rejected": -255.96170043945312, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -3.5448708534240723, "rewards/margins": 5.715025901794434, "rewards/rejected": -9.259897232055664, "step": 2581 }, { "epoch": 0.4, "learning_rate": 1.225359095388765e-05, "logits/chosen": -2.7992117404937744, "logits/rejected": -1.8894637823104858, "logps/chosen": -202.4573974609375, "logps/rejected": -196.95692443847656, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": -2.7971582412719727, "rewards/margins": 3.988739490509033, "rewards/rejected": -6.785897731781006, "step": 2582 }, { "epoch": 0.4, "learning_rate": 1.2252857513356501e-05, "logits/chosen": -1.3367887735366821, "logits/rejected": -2.774674415588379, "logps/chosen": -124.72836303710938, "logps/rejected": -348.7649841308594, "loss": 1.9505, "rewards/accuracies": 0.5, "rewards/chosen": -6.29903507232666, "rewards/margins": 3.418921947479248, "rewards/rejected": -9.71795654296875, "step": 2583 }, { "epoch": 0.4, "learning_rate": 1.2252124072825355e-05, "logits/chosen": -2.7339861392974854, "logits/rejected": -3.091679334640503, "logps/chosen": -204.32679748535156, "logps/rejected": -478.10791015625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.036203145980835, "rewards/margins": 6.553309440612793, "rewards/rejected": -8.589512825012207, "step": 2584 }, { "epoch": 0.4, "learning_rate": 1.2251390632294207e-05, "logits/chosen": -2.6653079986572266, "logits/rejected": -2.6087799072265625, "logps/chosen": -269.00726318359375, "logps/rejected": -224.31805419921875, "loss": 1.445, "rewards/accuracies": 0.5, "rewards/chosen": -5.1787285804748535, "rewards/margins": 1.0488368272781372, "rewards/rejected": -6.227565288543701, "step": 2585 }, { "epoch": 0.4, "learning_rate": 1.2250657191763059e-05, "logits/chosen": -3.2860631942749023, "logits/rejected": -2.814912796020508, "logps/chosen": -616.9114990234375, "logps/rejected": -497.373291015625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.650465488433838, "rewards/margins": 5.93272590637207, "rewards/rejected": -10.58319091796875, "step": 2586 }, { "epoch": 0.4, "learning_rate": 1.224992375123191e-05, "logits/chosen": -3.004981756210327, "logits/rejected": -3.4343156814575195, "logps/chosen": -44.98119354248047, "logps/rejected": -222.55548095703125, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -2.8033082485198975, "rewards/margins": 4.727243423461914, "rewards/rejected": -7.530551910400391, "step": 2587 }, { "epoch": 0.4, "learning_rate": 1.2249190310700764e-05, "logits/chosen": -3.0785350799560547, "logits/rejected": -3.16727614402771, "logps/chosen": -214.64309692382812, "logps/rejected": -279.08367919921875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -2.5113022327423096, "rewards/margins": 5.001223087310791, "rewards/rejected": -7.51252555847168, "step": 2588 }, { "epoch": 0.4, "learning_rate": 1.2248456870169616e-05, "logits/chosen": -3.304934501647949, "logits/rejected": -3.4337501525878906, "logps/chosen": -81.36831665039062, "logps/rejected": -150.74038696289062, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": -3.2513279914855957, "rewards/margins": 3.1971631050109863, "rewards/rejected": -6.448491096496582, "step": 2589 }, { "epoch": 0.4, "learning_rate": 1.2247723429638468e-05, "logits/chosen": -2.484830856323242, "logits/rejected": -3.025066375732422, "logps/chosen": -125.17767333984375, "logps/rejected": -154.73196411132812, "loss": 2.0779, "rewards/accuracies": 0.5, "rewards/chosen": -5.674643039703369, "rewards/margins": 1.287947177886963, "rewards/rejected": -6.962590217590332, "step": 2590 }, { "epoch": 0.4, "learning_rate": 1.224698998910732e-05, "logits/chosen": -2.579777240753174, "logits/rejected": -2.9321393966674805, "logps/chosen": -132.7530975341797, "logps/rejected": -159.83192443847656, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": -2.890571117401123, "rewards/margins": 4.389768600463867, "rewards/rejected": -7.28033971786499, "step": 2591 }, { "epoch": 0.4, "learning_rate": 1.2246256548576172e-05, "logits/chosen": -2.889159917831421, "logits/rejected": -2.9124233722686768, "logps/chosen": -548.0023193359375, "logps/rejected": -541.2169799804688, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -3.529132843017578, "rewards/margins": 5.516181468963623, "rewards/rejected": -9.04531478881836, "step": 2592 }, { "epoch": 0.4, "learning_rate": 1.2245523108045023e-05, "logits/chosen": -2.761599063873291, "logits/rejected": -2.773296594619751, "logps/chosen": -416.89892578125, "logps/rejected": -291.8734436035156, "loss": 2.1539, "rewards/accuracies": 0.5, "rewards/chosen": -4.905634880065918, "rewards/margins": 1.8881521224975586, "rewards/rejected": -6.793787479400635, "step": 2593 }, { "epoch": 0.4, "learning_rate": 1.2244789667513875e-05, "logits/chosen": -1.9247888326644897, "logits/rejected": -2.7130579948425293, "logps/chosen": -139.00628662109375, "logps/rejected": -459.8509521484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4822964668273926, "rewards/margins": 10.286073684692383, "rewards/rejected": -13.768369674682617, "step": 2594 }, { "epoch": 0.4, "learning_rate": 1.2244056226982727e-05, "logits/chosen": -3.3424885272979736, "logits/rejected": -3.0276072025299072, "logps/chosen": -331.291259765625, "logps/rejected": -229.24383544921875, "loss": 3.6301, "rewards/accuracies": 0.5, "rewards/chosen": -6.056475639343262, "rewards/margins": 0.4940969944000244, "rewards/rejected": -6.550572395324707, "step": 2595 }, { "epoch": 0.4, "learning_rate": 1.2243322786451579e-05, "logits/chosen": -2.4228408336639404, "logits/rejected": -2.9890024662017822, "logps/chosen": -104.69535064697266, "logps/rejected": -324.1778869628906, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.9503867626190186, "rewards/margins": 5.90650749206543, "rewards/rejected": -9.856894493103027, "step": 2596 }, { "epoch": 0.4, "learning_rate": 1.2242589345920433e-05, "logits/chosen": -2.6087920665740967, "logits/rejected": -2.500500202178955, "logps/chosen": -399.5428771972656, "logps/rejected": -414.0458984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.9917144775390625, "rewards/margins": 6.86207914352417, "rewards/rejected": -10.85379409790039, "step": 2597 }, { "epoch": 0.4, "learning_rate": 1.2241855905389285e-05, "logits/chosen": -2.9943089485168457, "logits/rejected": -2.868830442428589, "logps/chosen": -477.4556579589844, "logps/rejected": -310.14874267578125, "loss": 0.1123, "rewards/accuracies": 1.0, "rewards/chosen": -4.638450622558594, "rewards/margins": 2.96724534034729, "rewards/rejected": -7.605695724487305, "step": 2598 }, { "epoch": 0.4, "learning_rate": 1.2241122464858136e-05, "logits/chosen": -1.8353936672210693, "logits/rejected": -2.7317748069763184, "logps/chosen": -119.11062622070312, "logps/rejected": -432.6590576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8471477031707764, "rewards/margins": 11.27725601196289, "rewards/rejected": -15.12440299987793, "step": 2599 }, { "epoch": 0.4, "learning_rate": 1.2240389024326988e-05, "logits/chosen": -2.431864023208618, "logits/rejected": -2.9104256629943848, "logps/chosen": -146.11587524414062, "logps/rejected": -360.7823181152344, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.44901967048645, "rewards/margins": 10.192776679992676, "rewards/rejected": -13.641796112060547, "step": 2600 }, { "epoch": 0.4, "learning_rate": 1.223965558379584e-05, "logits/chosen": -1.569157361984253, "logits/rejected": -2.884162187576294, "logps/chosen": -135.08120727539062, "logps/rejected": -403.8299560546875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.055004596710205, "rewards/margins": 8.697746276855469, "rewards/rejected": -11.752751350402832, "step": 2601 }, { "epoch": 0.4, "learning_rate": 1.2238922143264692e-05, "logits/chosen": -2.7728796005249023, "logits/rejected": -3.13034987449646, "logps/chosen": -205.25762939453125, "logps/rejected": -432.2597961425781, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -4.573904991149902, "rewards/margins": 7.2989912033081055, "rewards/rejected": -11.872896194458008, "step": 2602 }, { "epoch": 0.4, "learning_rate": 1.2238188702733544e-05, "logits/chosen": -2.9951179027557373, "logits/rejected": -2.985074996948242, "logps/chosen": -217.8022003173828, "logps/rejected": -198.6361846923828, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -4.5588178634643555, "rewards/margins": 4.777523994445801, "rewards/rejected": -9.336341857910156, "step": 2603 }, { "epoch": 0.4, "learning_rate": 1.2237455262202396e-05, "logits/chosen": -2.709503650665283, "logits/rejected": -2.9912078380584717, "logps/chosen": -186.89862060546875, "logps/rejected": -146.039794921875, "loss": 4.0144, "rewards/accuracies": 0.0, "rewards/chosen": -5.994855880737305, "rewards/margins": -3.953800678253174, "rewards/rejected": -2.0410549640655518, "step": 2604 }, { "epoch": 0.41, "learning_rate": 1.2236721821671248e-05, "logits/chosen": -2.618460178375244, "logits/rejected": -2.5996005535125732, "logps/chosen": -190.94583129882812, "logps/rejected": -290.7159118652344, "loss": 2.2672, "rewards/accuracies": 0.5, "rewards/chosen": -4.616943359375, "rewards/margins": 2.0201642513275146, "rewards/rejected": -6.6371073722839355, "step": 2605 }, { "epoch": 0.41, "learning_rate": 1.2235988381140101e-05, "logits/chosen": -1.5264596939086914, "logits/rejected": -2.5734972953796387, "logps/chosen": -111.31088256835938, "logps/rejected": -328.44940185546875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -3.9040117263793945, "rewards/margins": 5.546674728393555, "rewards/rejected": -9.450687408447266, "step": 2606 }, { "epoch": 0.41, "learning_rate": 1.2235254940608953e-05, "logits/chosen": -1.26730477809906, "logits/rejected": -2.7996740341186523, "logps/chosen": -252.28604125976562, "logps/rejected": -455.09039306640625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -3.205052375793457, "rewards/margins": 5.9377899169921875, "rewards/rejected": -9.142842292785645, "step": 2607 }, { "epoch": 0.41, "learning_rate": 1.2234521500077805e-05, "logits/chosen": -2.377012252807617, "logits/rejected": -2.951876163482666, "logps/chosen": -123.55162048339844, "logps/rejected": -210.08912658691406, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -7.62451171875, "rewards/margins": 4.6005682945251465, "rewards/rejected": -12.225080490112305, "step": 2608 }, { "epoch": 0.41, "learning_rate": 1.2233788059546657e-05, "logits/chosen": -2.9728171825408936, "logits/rejected": -3.230135917663574, "logps/chosen": -135.1090087890625, "logps/rejected": -231.26560974121094, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -2.4658761024475098, "rewards/margins": 4.772271633148193, "rewards/rejected": -7.238147735595703, "step": 2609 }, { "epoch": 0.41, "learning_rate": 1.2233054619015509e-05, "logits/chosen": -2.591210126876831, "logits/rejected": -2.6779725551605225, "logps/chosen": -181.52597045898438, "logps/rejected": -183.1748809814453, "loss": 0.4072, "rewards/accuracies": 0.5, "rewards/chosen": -4.9891462326049805, "rewards/margins": 2.02360200881958, "rewards/rejected": -7.012747764587402, "step": 2610 }, { "epoch": 0.41, "learning_rate": 1.223232117848436e-05, "logits/chosen": -2.642545700073242, "logits/rejected": -2.3500802516937256, "logps/chosen": -566.9266357421875, "logps/rejected": -561.1882934570312, "loss": 4.6378, "rewards/accuracies": 0.5, "rewards/chosen": -8.320511817932129, "rewards/margins": -2.7399659156799316, "rewards/rejected": -5.580545902252197, "step": 2611 }, { "epoch": 0.41, "learning_rate": 1.2231587737953213e-05, "logits/chosen": -2.529836654663086, "logits/rejected": -2.8856236934661865, "logps/chosen": -87.8770751953125, "logps/rejected": -278.60919189453125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.967904567718506, "rewards/margins": 6.846051216125488, "rewards/rejected": -11.813955307006836, "step": 2612 }, { "epoch": 0.41, "learning_rate": 1.2230854297422064e-05, "logits/chosen": -2.835279941558838, "logits/rejected": -2.6496527194976807, "logps/chosen": -465.01690673828125, "logps/rejected": -622.5398559570312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.545154571533203, "rewards/margins": 12.351566314697266, "rewards/rejected": -15.896720886230469, "step": 2613 }, { "epoch": 0.41, "learning_rate": 1.2230120856890916e-05, "logits/chosen": -2.4858362674713135, "logits/rejected": -2.812617301940918, "logps/chosen": -270.2530212402344, "logps/rejected": -334.6280212402344, "loss": 2.3216, "rewards/accuracies": 0.5, "rewards/chosen": -6.676807403564453, "rewards/margins": 1.4241797924041748, "rewards/rejected": -8.100987434387207, "step": 2614 }, { "epoch": 0.41, "learning_rate": 1.222938741635977e-05, "logits/chosen": -2.5103278160095215, "logits/rejected": -3.0871429443359375, "logps/chosen": -202.03744506835938, "logps/rejected": -385.9554443359375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -4.197817325592041, "rewards/margins": 6.14793062210083, "rewards/rejected": -10.345747947692871, "step": 2615 }, { "epoch": 0.41, "learning_rate": 1.2228653975828622e-05, "logits/chosen": -1.7587860822677612, "logits/rejected": -3.2201218605041504, "logps/chosen": -207.95135498046875, "logps/rejected": -523.0947875976562, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.731151819229126, "rewards/margins": 7.184117317199707, "rewards/rejected": -10.915268898010254, "step": 2616 }, { "epoch": 0.41, "learning_rate": 1.2227920535297474e-05, "logits/chosen": -3.1626431941986084, "logits/rejected": -3.046078681945801, "logps/chosen": -108.48057556152344, "logps/rejected": -176.2584686279297, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.15262508392334, "rewards/margins": 7.0209503173828125, "rewards/rejected": -11.173576354980469, "step": 2617 }, { "epoch": 0.41, "learning_rate": 1.2227187094766327e-05, "logits/chosen": -1.166561245918274, "logits/rejected": -2.9095239639282227, "logps/chosen": -128.49176025390625, "logps/rejected": -454.80218505859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.290596008300781, "rewards/margins": 7.858000755310059, "rewards/rejected": -12.148595809936523, "step": 2618 }, { "epoch": 0.41, "learning_rate": 1.2226453654235179e-05, "logits/chosen": -2.587451219558716, "logits/rejected": -2.6161575317382812, "logps/chosen": -96.77427673339844, "logps/rejected": -238.5029754638672, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -6.248614311218262, "rewards/margins": 5.879453659057617, "rewards/rejected": -12.128067970275879, "step": 2619 }, { "epoch": 0.41, "learning_rate": 1.2225720213704031e-05, "logits/chosen": -2.292658567428589, "logits/rejected": -2.6641077995300293, "logps/chosen": -119.40728759765625, "logps/rejected": -291.58416748046875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.2355198860168457, "rewards/margins": 6.287085056304932, "rewards/rejected": -9.522604942321777, "step": 2620 }, { "epoch": 0.41, "learning_rate": 1.2224986773172883e-05, "logits/chosen": -2.9390766620635986, "logits/rejected": -3.150660276412964, "logps/chosen": -327.2685852050781, "logps/rejected": -605.748291015625, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -2.838749885559082, "rewards/margins": 5.685915946960449, "rewards/rejected": -8.524665832519531, "step": 2621 }, { "epoch": 0.41, "learning_rate": 1.2224253332641735e-05, "logits/chosen": -3.023010492324829, "logits/rejected": -2.3977906703948975, "logps/chosen": -295.0085144042969, "logps/rejected": -314.47760009765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.5821118354797363, "rewards/margins": 9.573081016540527, "rewards/rejected": -13.155193328857422, "step": 2622 }, { "epoch": 0.41, "learning_rate": 1.2223519892110587e-05, "logits/chosen": -2.1035044193267822, "logits/rejected": -2.781003713607788, "logps/chosen": -79.68968200683594, "logps/rejected": -173.13534545898438, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -4.502445220947266, "rewards/margins": 4.6290483474731445, "rewards/rejected": -9.13149356842041, "step": 2623 }, { "epoch": 0.41, "learning_rate": 1.222278645157944e-05, "logits/chosen": -2.764012575149536, "logits/rejected": -2.8454413414001465, "logps/chosen": -190.214599609375, "logps/rejected": -143.53396606445312, "loss": 0.1043, "rewards/accuracies": 1.0, "rewards/chosen": -4.750239372253418, "rewards/margins": 4.118236541748047, "rewards/rejected": -8.868476867675781, "step": 2624 }, { "epoch": 0.41, "learning_rate": 1.2222053011048292e-05, "logits/chosen": -2.2525267601013184, "logits/rejected": -3.135343074798584, "logps/chosen": -172.23992919921875, "logps/rejected": -448.248779296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.498227119445801, "rewards/margins": 8.244746208190918, "rewards/rejected": -11.742973327636719, "step": 2625 }, { "epoch": 0.41, "learning_rate": 1.2221319570517144e-05, "logits/chosen": -2.9113426208496094, "logits/rejected": -3.086055278778076, "logps/chosen": -369.1086120605469, "logps/rejected": -492.16839599609375, "loss": 4.6877, "rewards/accuracies": 0.5, "rewards/chosen": -8.191825866699219, "rewards/margins": 0.3607058525085449, "rewards/rejected": -8.552532196044922, "step": 2626 }, { "epoch": 0.41, "learning_rate": 1.2220586129985996e-05, "logits/chosen": -3.177809238433838, "logits/rejected": -3.255279302597046, "logps/chosen": -60.99046325683594, "logps/rejected": -215.6549072265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.9444546699523926, "rewards/margins": 8.554676055908203, "rewards/rejected": -11.499130249023438, "step": 2627 }, { "epoch": 0.41, "learning_rate": 1.2219852689454848e-05, "logits/chosen": -2.601299524307251, "logits/rejected": -3.0203187465667725, "logps/chosen": -486.3715515136719, "logps/rejected": -434.87554931640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.6210670471191406, "rewards/margins": 6.653626918792725, "rewards/rejected": -10.274694442749023, "step": 2628 }, { "epoch": 0.41, "learning_rate": 1.22191192489237e-05, "logits/chosen": -3.2028067111968994, "logits/rejected": -2.997123956680298, "logps/chosen": -609.376708984375, "logps/rejected": -508.6495666503906, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.5938706398010254, "rewards/margins": 7.258969306945801, "rewards/rejected": -9.852840423583984, "step": 2629 }, { "epoch": 0.41, "learning_rate": 1.2218385808392551e-05, "logits/chosen": -2.1985466480255127, "logits/rejected": -2.919536828994751, "logps/chosen": -57.990997314453125, "logps/rejected": -351.9556579589844, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.6680283546447754, "rewards/margins": 8.85454273223877, "rewards/rejected": -12.522571563720703, "step": 2630 }, { "epoch": 0.41, "learning_rate": 1.2217652367861403e-05, "logits/chosen": -2.53523850440979, "logits/rejected": -3.00762939453125, "logps/chosen": -112.73393249511719, "logps/rejected": -190.8528594970703, "loss": 2.0925, "rewards/accuracies": 0.5, "rewards/chosen": -6.6392059326171875, "rewards/margins": -0.05402779579162598, "rewards/rejected": -6.585178375244141, "step": 2631 }, { "epoch": 0.41, "learning_rate": 1.2216918927330255e-05, "logits/chosen": -1.8544610738754272, "logits/rejected": -2.8442299365997314, "logps/chosen": -79.96056365966797, "logps/rejected": -272.2068176269531, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -3.664010763168335, "rewards/margins": 4.99009370803833, "rewards/rejected": -8.654104232788086, "step": 2632 }, { "epoch": 0.41, "learning_rate": 1.2216185486799109e-05, "logits/chosen": -3.1672744750976562, "logits/rejected": -2.77632737159729, "logps/chosen": -139.492431640625, "logps/rejected": -138.06390380859375, "loss": 0.4397, "rewards/accuracies": 0.5, "rewards/chosen": -4.328661918640137, "rewards/margins": 3.1896207332611084, "rewards/rejected": -7.518282890319824, "step": 2633 }, { "epoch": 0.41, "learning_rate": 1.221545204626796e-05, "logits/chosen": -2.3007586002349854, "logits/rejected": -2.4689126014709473, "logps/chosen": -212.45016479492188, "logps/rejected": -478.85784912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2603416442871094, "rewards/margins": 12.62813949584961, "rewards/rejected": -14.888481140136719, "step": 2634 }, { "epoch": 0.41, "learning_rate": 1.2214718605736813e-05, "logits/chosen": -3.446014404296875, "logits/rejected": -3.1296536922454834, "logps/chosen": -179.4443359375, "logps/rejected": -73.30169677734375, "loss": 2.5837, "rewards/accuracies": 0.5, "rewards/chosen": -7.081088066101074, "rewards/margins": -1.7714757919311523, "rewards/rejected": -5.309611797332764, "step": 2635 }, { "epoch": 0.41, "learning_rate": 1.2213985165205664e-05, "logits/chosen": -2.054715394973755, "logits/rejected": -2.857792854309082, "logps/chosen": -141.59228515625, "logps/rejected": -246.44212341308594, "loss": 1.6906, "rewards/accuracies": 0.5, "rewards/chosen": -4.928574562072754, "rewards/margins": 2.068930149078369, "rewards/rejected": -6.997505187988281, "step": 2636 }, { "epoch": 0.41, "learning_rate": 1.2213251724674516e-05, "logits/chosen": -3.0697638988494873, "logits/rejected": -2.824678659439087, "logps/chosen": -437.964111328125, "logps/rejected": -236.77450561523438, "loss": 3.5315, "rewards/accuracies": 0.5, "rewards/chosen": -8.087089538574219, "rewards/margins": -1.1965250968933105, "rewards/rejected": -6.890564441680908, "step": 2637 }, { "epoch": 0.41, "learning_rate": 1.2212518284143368e-05, "logits/chosen": -2.606738805770874, "logits/rejected": -3.3790342807769775, "logps/chosen": -67.61656188964844, "logps/rejected": -370.5246887207031, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -5.307954788208008, "rewards/margins": 3.945902109146118, "rewards/rejected": -9.253856658935547, "step": 2638 }, { "epoch": 0.41, "learning_rate": 1.221178484361222e-05, "logits/chosen": -2.655252695083618, "logits/rejected": -2.870520830154419, "logps/chosen": -153.47775268554688, "logps/rejected": -170.0371856689453, "loss": 0.6704, "rewards/accuracies": 0.5, "rewards/chosen": -4.433693885803223, "rewards/margins": 2.2534215450286865, "rewards/rejected": -6.687115669250488, "step": 2639 }, { "epoch": 0.41, "learning_rate": 1.2211051403081072e-05, "logits/chosen": -2.704617738723755, "logits/rejected": -2.7011938095092773, "logps/chosen": -253.3329620361328, "logps/rejected": -244.83636474609375, "loss": 3.0819, "rewards/accuracies": 0.5, "rewards/chosen": -6.126246929168701, "rewards/margins": 0.7939794063568115, "rewards/rejected": -6.920226097106934, "step": 2640 }, { "epoch": 0.41, "learning_rate": 1.2210317962549924e-05, "logits/chosen": -2.729628324508667, "logits/rejected": -2.1957077980041504, "logps/chosen": -628.7552490234375, "logps/rejected": -530.7460327148438, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.277276039123535, "rewards/margins": 8.421929359436035, "rewards/rejected": -10.69920539855957, "step": 2641 }, { "epoch": 0.41, "learning_rate": 1.2209584522018777e-05, "logits/chosen": -2.8513593673706055, "logits/rejected": -2.942892551422119, "logps/chosen": -120.39234924316406, "logps/rejected": -276.50946044921875, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -2.482558488845825, "rewards/margins": 4.798471450805664, "rewards/rejected": -7.281030178070068, "step": 2642 }, { "epoch": 0.41, "learning_rate": 1.220885108148763e-05, "logits/chosen": -3.2541909217834473, "logits/rejected": -3.0600192546844482, "logps/chosen": -625.802734375, "logps/rejected": -463.98370361328125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.2112488746643066, "rewards/margins": 5.38063907623291, "rewards/rejected": -6.591887950897217, "step": 2643 }, { "epoch": 0.41, "learning_rate": 1.2208117640956481e-05, "logits/chosen": -2.7194676399230957, "logits/rejected": -2.5443546772003174, "logps/chosen": -210.9439697265625, "logps/rejected": -258.50994873046875, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -2.083085060119629, "rewards/margins": 5.670755863189697, "rewards/rejected": -7.753840446472168, "step": 2644 }, { "epoch": 0.41, "learning_rate": 1.2207384200425333e-05, "logits/chosen": -2.417296886444092, "logits/rejected": -2.906926393508911, "logps/chosen": -791.394287109375, "logps/rejected": -342.95745849609375, "loss": 0.6265, "rewards/accuracies": 0.5, "rewards/chosen": -3.7200989723205566, "rewards/margins": 0.7830561399459839, "rewards/rejected": -4.503154754638672, "step": 2645 }, { "epoch": 0.41, "learning_rate": 1.2206650759894185e-05, "logits/chosen": -2.8302254676818848, "logits/rejected": -1.8007047176361084, "logps/chosen": -538.59033203125, "logps/rejected": -376.8414611816406, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.5292725563049316, "rewards/margins": 6.846322536468506, "rewards/rejected": -9.375595092773438, "step": 2646 }, { "epoch": 0.41, "learning_rate": 1.2205917319363037e-05, "logits/chosen": -2.5314464569091797, "logits/rejected": -1.285527229309082, "logps/chosen": -636.8488159179688, "logps/rejected": -418.21484375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.0868382453918457, "rewards/margins": 6.676690578460693, "rewards/rejected": -9.763528823852539, "step": 2647 }, { "epoch": 0.41, "learning_rate": 1.2205183878831889e-05, "logits/chosen": -2.5205750465393066, "logits/rejected": -2.96722412109375, "logps/chosen": -282.06256103515625, "logps/rejected": -215.83963012695312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5309104323387146, "rewards/margins": 7.338507652282715, "rewards/rejected": -7.869418144226074, "step": 2648 }, { "epoch": 0.41, "learning_rate": 1.220445043830074e-05, "logits/chosen": -2.6859500408172607, "logits/rejected": -2.5736193656921387, "logps/chosen": -504.7781982421875, "logps/rejected": -604.4822998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7413688898086548, "rewards/margins": 12.286502838134766, "rewards/rejected": -14.027872085571289, "step": 2649 }, { "epoch": 0.41, "learning_rate": 1.2203716997769592e-05, "logits/chosen": -3.1940836906433105, "logits/rejected": -3.2135491371154785, "logps/chosen": -50.80751037597656, "logps/rejected": -116.0397720336914, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -3.1348185539245605, "rewards/margins": 4.0250349044799805, "rewards/rejected": -7.159853935241699, "step": 2650 }, { "epoch": 0.41, "learning_rate": 1.2202983557238446e-05, "logits/chosen": -2.576427698135376, "logits/rejected": -3.116486072540283, "logps/chosen": -191.06991577148438, "logps/rejected": -412.4794921875, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.127478837966919, "rewards/margins": 5.201015472412109, "rewards/rejected": -6.328494071960449, "step": 2651 }, { "epoch": 0.41, "learning_rate": 1.22022501167073e-05, "logits/chosen": -3.132476806640625, "logits/rejected": -2.690803050994873, "logps/chosen": -669.5108642578125, "logps/rejected": -677.058349609375, "loss": 4.5008, "rewards/accuracies": 0.5, "rewards/chosen": -5.781518936157227, "rewards/margins": -0.8766968250274658, "rewards/rejected": -4.904821872711182, "step": 2652 }, { "epoch": 0.41, "learning_rate": 1.2201516676176151e-05, "logits/chosen": -2.3167126178741455, "logits/rejected": -3.0675406455993652, "logps/chosen": -68.6990737915039, "logps/rejected": -199.26712036132812, "loss": 0.0926, "rewards/accuracies": 1.0, "rewards/chosen": -2.785616159439087, "rewards/margins": 3.981144905090332, "rewards/rejected": -6.76676082611084, "step": 2653 }, { "epoch": 0.41, "learning_rate": 1.2200783235645003e-05, "logits/chosen": -1.945747971534729, "logits/rejected": -3.2269163131713867, "logps/chosen": -235.64422607421875, "logps/rejected": -483.0064392089844, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -1.3215820789337158, "rewards/margins": 4.137995719909668, "rewards/rejected": -5.459578037261963, "step": 2654 }, { "epoch": 0.41, "learning_rate": 1.2200049795113855e-05, "logits/chosen": -2.7130722999572754, "logits/rejected": -2.9151086807250977, "logps/chosen": -70.61553955078125, "logps/rejected": -201.1082000732422, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": -2.5555410385131836, "rewards/margins": 5.767857551574707, "rewards/rejected": -8.32339859008789, "step": 2655 }, { "epoch": 0.41, "learning_rate": 1.2199316354582707e-05, "logits/chosen": -0.9765435457229614, "logits/rejected": -2.738334894180298, "logps/chosen": -69.75137329101562, "logps/rejected": -292.94342041015625, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": -3.996000289916992, "rewards/margins": 3.7649919986724854, "rewards/rejected": -7.760992050170898, "step": 2656 }, { "epoch": 0.41, "learning_rate": 1.2198582914051559e-05, "logits/chosen": -3.1695961952209473, "logits/rejected": -2.2092537879943848, "logps/chosen": -421.81170654296875, "logps/rejected": -256.4371643066406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.6058807373046875, "rewards/margins": 7.8009490966796875, "rewards/rejected": -7.195068359375, "step": 2657 }, { "epoch": 0.41, "learning_rate": 1.219784947352041e-05, "logits/chosen": -3.1038808822631836, "logits/rejected": -2.0435433387756348, "logps/chosen": -740.8736572265625, "logps/rejected": -355.08355712890625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 0.29906314611434937, "rewards/margins": 6.1843719482421875, "rewards/rejected": -5.885308742523193, "step": 2658 }, { "epoch": 0.41, "learning_rate": 1.2197116032989263e-05, "logits/chosen": -3.027040481567383, "logits/rejected": -3.1465721130371094, "logps/chosen": -181.84571838378906, "logps/rejected": -357.51251220703125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.2957439422607422, "rewards/margins": 7.052637100219727, "rewards/rejected": -8.348381042480469, "step": 2659 }, { "epoch": 0.41, "learning_rate": 1.2196382592458116e-05, "logits/chosen": -1.031157374382019, "logits/rejected": -3.05615234375, "logps/chosen": -79.89263153076172, "logps/rejected": -526.9276123046875, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -2.914342164993286, "rewards/margins": 4.570443153381348, "rewards/rejected": -7.484785079956055, "step": 2660 }, { "epoch": 0.41, "learning_rate": 1.2195649151926968e-05, "logits/chosen": -2.9107506275177, "logits/rejected": -2.924894094467163, "logps/chosen": -187.29641723632812, "logps/rejected": -353.61248779296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.1375133991241455, "rewards/margins": 10.332853317260742, "rewards/rejected": -12.470367431640625, "step": 2661 }, { "epoch": 0.41, "learning_rate": 1.219491571139582e-05, "logits/chosen": -2.9767954349517822, "logits/rejected": -2.5566020011901855, "logps/chosen": -274.23956298828125, "logps/rejected": -139.3391571044922, "loss": 2.3363, "rewards/accuracies": 0.5, "rewards/chosen": -6.642531394958496, "rewards/margins": 1.6930081844329834, "rewards/rejected": -8.335539817810059, "step": 2662 }, { "epoch": 0.41, "learning_rate": 1.2194182270864672e-05, "logits/chosen": -1.80025315284729, "logits/rejected": -2.8365533351898193, "logps/chosen": -92.83778381347656, "logps/rejected": -253.5655517578125, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -3.048898220062256, "rewards/margins": 5.460933208465576, "rewards/rejected": -8.509831428527832, "step": 2663 }, { "epoch": 0.41, "learning_rate": 1.2193448830333524e-05, "logits/chosen": -2.648801803588867, "logits/rejected": -2.890287160873413, "logps/chosen": -60.98063278198242, "logps/rejected": -183.51104736328125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.3843870162963867, "rewards/margins": 5.329710006713867, "rewards/rejected": -6.714097023010254, "step": 2664 }, { "epoch": 0.41, "learning_rate": 1.2192715389802376e-05, "logits/chosen": -3.072740316390991, "logits/rejected": -3.297635316848755, "logps/chosen": -128.41761779785156, "logps/rejected": -278.3699951171875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.405341863632202, "rewards/margins": 5.916376113891602, "rewards/rejected": -9.321718215942383, "step": 2665 }, { "epoch": 0.41, "learning_rate": 1.2191981949271228e-05, "logits/chosen": -2.9810526371002197, "logits/rejected": -2.8821887969970703, "logps/chosen": -605.9902954101562, "logps/rejected": -289.7707214355469, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 0.520275890827179, "rewards/margins": 6.475728988647461, "rewards/rejected": -5.955452919006348, "step": 2666 }, { "epoch": 0.41, "learning_rate": 1.219124850874008e-05, "logits/chosen": -2.824488639831543, "logits/rejected": -2.3350183963775635, "logps/chosen": -628.2615356445312, "logps/rejected": -452.5506591796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.00399172306060791, "rewards/margins": 9.550329208374023, "rewards/rejected": -9.546337127685547, "step": 2667 }, { "epoch": 0.41, "learning_rate": 1.2190515068208931e-05, "logits/chosen": -2.7126429080963135, "logits/rejected": -2.8937652111053467, "logps/chosen": -304.8131408691406, "logps/rejected": -299.2312927246094, "loss": 0.7433, "rewards/accuracies": 0.5, "rewards/chosen": -3.1341986656188965, "rewards/margins": 1.7543954849243164, "rewards/rejected": -4.888594150543213, "step": 2668 }, { "epoch": 0.42, "learning_rate": 1.2189781627677785e-05, "logits/chosen": -3.1660568714141846, "logits/rejected": -2.0017755031585693, "logps/chosen": -726.1002197265625, "logps/rejected": -318.46075439453125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.3474762439727783, "rewards/margins": 7.250596046447754, "rewards/rejected": -9.598072052001953, "step": 2669 }, { "epoch": 0.42, "learning_rate": 1.2189048187146637e-05, "logits/chosen": -2.861015558242798, "logits/rejected": -3.0727763175964355, "logps/chosen": -127.07911682128906, "logps/rejected": -219.86248779296875, "loss": 0.1897, "rewards/accuracies": 1.0, "rewards/chosen": -4.7432708740234375, "rewards/margins": 1.9723855257034302, "rewards/rejected": -6.715656280517578, "step": 2670 }, { "epoch": 0.42, "learning_rate": 1.2188314746615489e-05, "logits/chosen": -2.042562246322632, "logits/rejected": -3.188131332397461, "logps/chosen": -72.49903869628906, "logps/rejected": -331.0045471191406, "loss": 0.2414, "rewards/accuracies": 1.0, "rewards/chosen": -2.946877956390381, "rewards/margins": 2.964608907699585, "rewards/rejected": -5.911487102508545, "step": 2671 }, { "epoch": 0.42, "learning_rate": 1.218758130608434e-05, "logits/chosen": -2.894418239593506, "logits/rejected": -3.051149606704712, "logps/chosen": -606.5943603515625, "logps/rejected": -560.4771728515625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5196456909179688, "rewards/margins": 7.211997032165527, "rewards/rejected": -7.731642723083496, "step": 2672 }, { "epoch": 0.42, "learning_rate": 1.2186847865553192e-05, "logits/chosen": -3.1522247791290283, "logits/rejected": -2.5338668823242188, "logps/chosen": -414.36492919921875, "logps/rejected": -202.45791625976562, "loss": 1.683, "rewards/accuracies": 0.5, "rewards/chosen": -3.880978584289551, "rewards/margins": 1.561036229133606, "rewards/rejected": -5.442014694213867, "step": 2673 }, { "epoch": 0.42, "learning_rate": 1.2186114425022044e-05, "logits/chosen": -1.5551868677139282, "logits/rejected": -3.0795912742614746, "logps/chosen": -143.27711486816406, "logps/rejected": -373.47698974609375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.3306198120117188, "rewards/margins": 6.5544328689575195, "rewards/rejected": -9.885052680969238, "step": 2674 }, { "epoch": 0.42, "learning_rate": 1.2185380984490896e-05, "logits/chosen": -2.266508102416992, "logits/rejected": -3.100128412246704, "logps/chosen": -109.92986297607422, "logps/rejected": -314.38165283203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.294193744659424, "rewards/margins": 8.265117645263672, "rewards/rejected": -10.559311866760254, "step": 2675 }, { "epoch": 0.42, "learning_rate": 1.2184647543959748e-05, "logits/chosen": -1.3257380723953247, "logits/rejected": -2.4120240211486816, "logps/chosen": -152.35630798339844, "logps/rejected": -453.16522216796875, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -4.732018947601318, "rewards/margins": 5.552102565765381, "rewards/rejected": -10.2841215133667, "step": 2676 }, { "epoch": 0.42, "learning_rate": 1.21839141034286e-05, "logits/chosen": -2.147402763366699, "logits/rejected": -2.7159438133239746, "logps/chosen": -245.19534301757812, "logps/rejected": -278.9271240234375, "loss": 0.735, "rewards/accuracies": 0.5, "rewards/chosen": -5.094385147094727, "rewards/margins": 1.9698240756988525, "rewards/rejected": -7.064208984375, "step": 2677 }, { "epoch": 0.42, "learning_rate": 1.2183180662897453e-05, "logits/chosen": -2.6954495906829834, "logits/rejected": -3.163815498352051, "logps/chosen": -93.42198181152344, "logps/rejected": -299.352294921875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -5.191020488739014, "rewards/margins": 5.397597312927246, "rewards/rejected": -10.588617324829102, "step": 2678 }, { "epoch": 0.42, "learning_rate": 1.2182447222366305e-05, "logits/chosen": -1.592209815979004, "logits/rejected": -2.66322922706604, "logps/chosen": -72.51947021484375, "logps/rejected": -207.902099609375, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.6654472351074219, "rewards/margins": 4.243782043457031, "rewards/rejected": -5.909229278564453, "step": 2679 }, { "epoch": 0.42, "learning_rate": 1.2181713781835157e-05, "logits/chosen": -1.9642237424850464, "logits/rejected": -2.7327721118927, "logps/chosen": -103.9967269897461, "logps/rejected": -341.1080627441406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.340686798095703, "rewards/margins": 12.631951332092285, "rewards/rejected": -15.972638130187988, "step": 2680 }, { "epoch": 0.42, "learning_rate": 1.2180980341304009e-05, "logits/chosen": -3.016533374786377, "logits/rejected": -3.0682010650634766, "logps/chosen": -122.0040283203125, "logps/rejected": -111.80802917480469, "loss": 3.9332, "rewards/accuracies": 0.5, "rewards/chosen": -7.432399272918701, "rewards/margins": -0.9919304847717285, "rewards/rejected": -6.440468788146973, "step": 2681 }, { "epoch": 0.42, "learning_rate": 1.2180246900772861e-05, "logits/chosen": -2.879871129989624, "logits/rejected": -2.276918411254883, "logps/chosen": -152.53787231445312, "logps/rejected": -242.8330535888672, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -3.1139309406280518, "rewards/margins": 6.119904041290283, "rewards/rejected": -9.233835220336914, "step": 2682 }, { "epoch": 0.42, "learning_rate": 1.2179513460241713e-05, "logits/chosen": -1.1255810260772705, "logits/rejected": -2.8749797344207764, "logps/chosen": -82.15401458740234, "logps/rejected": -434.2965087890625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -3.21653151512146, "rewards/margins": 8.41377067565918, "rewards/rejected": -11.630302429199219, "step": 2683 }, { "epoch": 0.42, "learning_rate": 1.2178780019710565e-05, "logits/chosen": -2.642775058746338, "logits/rejected": -3.2192165851593018, "logps/chosen": -190.13282775878906, "logps/rejected": -497.501708984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2906906604766846, "rewards/margins": 8.717719078063965, "rewards/rejected": -10.00840950012207, "step": 2684 }, { "epoch": 0.42, "learning_rate": 1.2178046579179418e-05, "logits/chosen": -1.6253998279571533, "logits/rejected": -3.1920361518859863, "logps/chosen": -163.4475860595703, "logps/rejected": -583.3216552734375, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -3.1794486045837402, "rewards/margins": 5.005845069885254, "rewards/rejected": -8.185293197631836, "step": 2685 }, { "epoch": 0.42, "learning_rate": 1.217731313864827e-05, "logits/chosen": -2.9435837268829346, "logits/rejected": -1.5102075338363647, "logps/chosen": -374.5639953613281, "logps/rejected": -226.53497314453125, "loss": 3.4006, "rewards/accuracies": 0.5, "rewards/chosen": -5.601587772369385, "rewards/margins": -0.2507898807525635, "rewards/rejected": -5.350797653198242, "step": 2686 }, { "epoch": 0.42, "learning_rate": 1.2176579698117124e-05, "logits/chosen": -1.0191069841384888, "logits/rejected": -2.6376540660858154, "logps/chosen": -109.02996063232422, "logps/rejected": -476.4991455078125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -4.543425559997559, "rewards/margins": 6.433823585510254, "rewards/rejected": -10.977249145507812, "step": 2687 }, { "epoch": 0.42, "learning_rate": 1.2175846257585976e-05, "logits/chosen": -1.8933687210083008, "logits/rejected": -3.2753186225891113, "logps/chosen": -61.8766975402832, "logps/rejected": -264.4149169921875, "loss": 0.5382, "rewards/accuracies": 0.5, "rewards/chosen": -4.667095184326172, "rewards/margins": 1.3281095027923584, "rewards/rejected": -5.995204925537109, "step": 2688 }, { "epoch": 0.42, "learning_rate": 1.2175112817054828e-05, "logits/chosen": -2.6271812915802, "logits/rejected": -2.5179078578948975, "logps/chosen": -152.79660034179688, "logps/rejected": -177.91494750976562, "loss": 0.4077, "rewards/accuracies": 0.5, "rewards/chosen": -2.254978656768799, "rewards/margins": 3.984797477722168, "rewards/rejected": -6.239776134490967, "step": 2689 }, { "epoch": 0.42, "learning_rate": 1.217437937652368e-05, "logits/chosen": -2.9041354656219482, "logits/rejected": -3.3896822929382324, "logps/chosen": -106.79853820800781, "logps/rejected": -244.71925354003906, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -3.5850725173950195, "rewards/margins": 6.635937690734863, "rewards/rejected": -10.221010208129883, "step": 2690 }, { "epoch": 0.42, "learning_rate": 1.2173645935992531e-05, "logits/chosen": -2.221454381942749, "logits/rejected": -3.1213669776916504, "logps/chosen": -284.5507507324219, "logps/rejected": -450.55389404296875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.907438039779663, "rewards/margins": 5.990829944610596, "rewards/rejected": -7.898268222808838, "step": 2691 }, { "epoch": 0.42, "learning_rate": 1.2172912495461383e-05, "logits/chosen": -2.8037211894989014, "logits/rejected": -2.898390769958496, "logps/chosen": -68.3048095703125, "logps/rejected": -190.04669189453125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.3512115478515625, "rewards/margins": 6.591393947601318, "rewards/rejected": -8.942605018615723, "step": 2692 }, { "epoch": 0.42, "learning_rate": 1.2172179054930235e-05, "logits/chosen": -2.6245787143707275, "logits/rejected": -1.479284644126892, "logps/chosen": -408.19744873046875, "logps/rejected": -320.39239501953125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -4.018601417541504, "rewards/margins": 7.036365509033203, "rewards/rejected": -11.054966926574707, "step": 2693 }, { "epoch": 0.42, "learning_rate": 1.2171445614399087e-05, "logits/chosen": -2.2542436122894287, "logits/rejected": -2.7829668521881104, "logps/chosen": -243.72726440429688, "logps/rejected": -258.12750244140625, "loss": 2.3447, "rewards/accuracies": 0.5, "rewards/chosen": -4.4902825355529785, "rewards/margins": 2.08895206451416, "rewards/rejected": -6.579234600067139, "step": 2694 }, { "epoch": 0.42, "learning_rate": 1.217071217386794e-05, "logits/chosen": -2.809203624725342, "logits/rejected": -3.191671848297119, "logps/chosen": -380.6535339355469, "logps/rejected": -486.8531799316406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8088035583496094, "rewards/margins": 9.614032745361328, "rewards/rejected": -12.422836303710938, "step": 2695 }, { "epoch": 0.42, "learning_rate": 1.2169978733336792e-05, "logits/chosen": -2.710663080215454, "logits/rejected": -2.929097890853882, "logps/chosen": -97.03466033935547, "logps/rejected": -175.9790496826172, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.387502908706665, "rewards/margins": 7.237940311431885, "rewards/rejected": -9.625443458557129, "step": 2696 }, { "epoch": 0.42, "learning_rate": 1.2169245292805644e-05, "logits/chosen": -2.5328261852264404, "logits/rejected": -3.171370506286621, "logps/chosen": -79.69416809082031, "logps/rejected": -256.9227294921875, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": -4.2046051025390625, "rewards/margins": 3.145002603530884, "rewards/rejected": -7.349607944488525, "step": 2697 }, { "epoch": 0.42, "learning_rate": 1.2168511852274496e-05, "logits/chosen": -2.7260749340057373, "logits/rejected": -1.997404932975769, "logps/chosen": -294.7216796875, "logps/rejected": -248.93080139160156, "loss": 0.7278, "rewards/accuracies": 0.5, "rewards/chosen": -6.812751770019531, "rewards/margins": 1.9757647514343262, "rewards/rejected": -8.788516998291016, "step": 2698 }, { "epoch": 0.42, "learning_rate": 1.2167778411743348e-05, "logits/chosen": -2.7650372982025146, "logits/rejected": -2.8684651851654053, "logps/chosen": -77.42800903320312, "logps/rejected": -363.93402099609375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.370133638381958, "rewards/margins": 7.9397478103637695, "rewards/rejected": -10.309881210327148, "step": 2699 }, { "epoch": 0.42, "learning_rate": 1.21670449712122e-05, "logits/chosen": -2.8738081455230713, "logits/rejected": -0.9704745411872864, "logps/chosen": -617.9849853515625, "logps/rejected": -273.34588623046875, "loss": 2.0201, "rewards/accuracies": 0.5, "rewards/chosen": -6.065779209136963, "rewards/margins": 2.544456720352173, "rewards/rejected": -8.610236167907715, "step": 2700 }, { "epoch": 0.42, "learning_rate": 1.2166311530681052e-05, "logits/chosen": -2.6629133224487305, "logits/rejected": -3.170245409011841, "logps/chosen": -398.7867431640625, "logps/rejected": -490.3526611328125, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/chosen": -1.5732009410858154, "rewards/margins": 5.711797714233398, "rewards/rejected": -7.284998893737793, "step": 2701 }, { "epoch": 0.42, "learning_rate": 1.2165578090149904e-05, "logits/chosen": -1.5687400102615356, "logits/rejected": -3.3746044635772705, "logps/chosen": -160.82241821289062, "logps/rejected": -675.9244384765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.260043144226074, "rewards/margins": 9.886356353759766, "rewards/rejected": -13.146398544311523, "step": 2702 }, { "epoch": 0.42, "learning_rate": 1.2164844649618756e-05, "logits/chosen": -3.0365474224090576, "logits/rejected": -3.342839002609253, "logps/chosen": -34.53821563720703, "logps/rejected": -204.15878295898438, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.704657793045044, "rewards/margins": 6.695021152496338, "rewards/rejected": -8.399679183959961, "step": 2703 }, { "epoch": 0.42, "learning_rate": 1.2164111209087609e-05, "logits/chosen": -2.858558416366577, "logits/rejected": -2.750392198562622, "logps/chosen": -125.64189147949219, "logps/rejected": -227.3099822998047, "loss": 3.6082, "rewards/accuracies": 0.5, "rewards/chosen": -5.275355339050293, "rewards/margins": 0.24531173706054688, "rewards/rejected": -5.52066707611084, "step": 2704 }, { "epoch": 0.42, "learning_rate": 1.2163377768556461e-05, "logits/chosen": -2.856318950653076, "logits/rejected": -2.224017381668091, "logps/chosen": -331.42767333984375, "logps/rejected": -326.373046875, "loss": 6.3629, "rewards/accuracies": 0.5, "rewards/chosen": -8.525925636291504, "rewards/margins": -1.986189365386963, "rewards/rejected": -6.539736270904541, "step": 2705 }, { "epoch": 0.42, "learning_rate": 1.2162644328025313e-05, "logits/chosen": -1.6537343263626099, "logits/rejected": -2.674794912338257, "logps/chosen": -149.3280029296875, "logps/rejected": -325.17559814453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.568493366241455, "rewards/margins": 7.023281574249268, "rewards/rejected": -10.591774940490723, "step": 2706 }, { "epoch": 0.42, "learning_rate": 1.2161910887494165e-05, "logits/chosen": -2.2049219608306885, "logits/rejected": -2.808690071105957, "logps/chosen": -113.62533569335938, "logps/rejected": -305.7816162109375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.0730791091918945, "rewards/margins": 8.494796752929688, "rewards/rejected": -12.567875862121582, "step": 2707 }, { "epoch": 0.42, "learning_rate": 1.2161177446963017e-05, "logits/chosen": -2.7878241539001465, "logits/rejected": -2.4964754581451416, "logps/chosen": -326.6793518066406, "logps/rejected": -342.4954833984375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -2.9079043865203857, "rewards/margins": 4.840900897979736, "rewards/rejected": -7.748805046081543, "step": 2708 }, { "epoch": 0.42, "learning_rate": 1.2160444006431868e-05, "logits/chosen": -2.400315761566162, "logits/rejected": -2.8623626232147217, "logps/chosen": -70.88816833496094, "logps/rejected": -185.05487060546875, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -3.2739169597625732, "rewards/margins": 4.7023162841796875, "rewards/rejected": -7.976233005523682, "step": 2709 }, { "epoch": 0.42, "learning_rate": 1.215971056590072e-05, "logits/chosen": -2.7795495986938477, "logits/rejected": -2.233902931213379, "logps/chosen": -290.954345703125, "logps/rejected": -276.6889953613281, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -5.055878639221191, "rewards/margins": 5.583154678344727, "rewards/rejected": -10.639033317565918, "step": 2710 }, { "epoch": 0.42, "learning_rate": 1.2158977125369572e-05, "logits/chosen": -2.6379764080047607, "logits/rejected": -1.922495722770691, "logps/chosen": -334.7731628417969, "logps/rejected": -303.2547607421875, "loss": 3.4967, "rewards/accuracies": 0.5, "rewards/chosen": -6.746583461761475, "rewards/margins": 0.02663421630859375, "rewards/rejected": -6.773217678070068, "step": 2711 }, { "epoch": 0.42, "learning_rate": 1.2158243684838424e-05, "logits/chosen": -2.513483762741089, "logits/rejected": -2.9247028827667236, "logps/chosen": -217.29644775390625, "logps/rejected": -211.771728515625, "loss": 0.2529, "rewards/accuracies": 1.0, "rewards/chosen": -2.9411425590515137, "rewards/margins": 3.7946107387542725, "rewards/rejected": -6.735753059387207, "step": 2712 }, { "epoch": 0.42, "learning_rate": 1.2157510244307278e-05, "logits/chosen": -2.603813648223877, "logits/rejected": -1.6276260614395142, "logps/chosen": -273.5386962890625, "logps/rejected": -145.7327423095703, "loss": 5.5569, "rewards/accuracies": 0.5, "rewards/chosen": -8.84625244140625, "rewards/margins": -2.541910171508789, "rewards/rejected": -6.304341793060303, "step": 2713 }, { "epoch": 0.42, "learning_rate": 1.215677680377613e-05, "logits/chosen": -2.5878989696502686, "logits/rejected": -2.775536298751831, "logps/chosen": -559.97900390625, "logps/rejected": -444.56988525390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.669964551925659, "rewards/margins": 8.711543083190918, "rewards/rejected": -11.381507873535156, "step": 2714 }, { "epoch": 0.42, "learning_rate": 1.2156043363244981e-05, "logits/chosen": -2.483485698699951, "logits/rejected": -3.1972105503082275, "logps/chosen": -71.10872650146484, "logps/rejected": -269.59454345703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.925267219543457, "rewards/margins": 9.399301528930664, "rewards/rejected": -10.324567794799805, "step": 2715 }, { "epoch": 0.42, "learning_rate": 1.2155309922713833e-05, "logits/chosen": -2.598390817642212, "logits/rejected": -2.829645872116089, "logps/chosen": -141.44480895996094, "logps/rejected": -275.264404296875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.5127034187316895, "rewards/margins": 5.698844909667969, "rewards/rejected": -8.211548805236816, "step": 2716 }, { "epoch": 0.42, "learning_rate": 1.2154576482182685e-05, "logits/chosen": -2.2514243125915527, "logits/rejected": -2.5458872318267822, "logps/chosen": -150.482421875, "logps/rejected": -125.0416259765625, "loss": 1.9867, "rewards/accuracies": 0.5, "rewards/chosen": -6.683705806732178, "rewards/margins": 1.2205044031143188, "rewards/rejected": -7.904210090637207, "step": 2717 }, { "epoch": 0.42, "learning_rate": 1.2153843041651537e-05, "logits/chosen": -1.7893874645233154, "logits/rejected": -2.494291305541992, "logps/chosen": -205.3066864013672, "logps/rejected": -348.00567626953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.610032320022583, "rewards/margins": 7.392374515533447, "rewards/rejected": -11.00240707397461, "step": 2718 }, { "epoch": 0.42, "learning_rate": 1.215310960112039e-05, "logits/chosen": -2.931298017501831, "logits/rejected": -2.9753355979919434, "logps/chosen": -1001.5202026367188, "logps/rejected": -452.7117919921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.08923184871673584, "rewards/margins": 9.673818588256836, "rewards/rejected": -9.763050079345703, "step": 2719 }, { "epoch": 0.42, "learning_rate": 1.2152376160589243e-05, "logits/chosen": -2.3574068546295166, "logits/rejected": -3.021143913269043, "logps/chosen": -217.54183959960938, "logps/rejected": -472.4781494140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5630067586898804, "rewards/margins": 9.326982498168945, "rewards/rejected": -10.889988899230957, "step": 2720 }, { "epoch": 0.42, "learning_rate": 1.2151642720058094e-05, "logits/chosen": -1.7816694974899292, "logits/rejected": -2.6587436199188232, "logps/chosen": -433.54736328125, "logps/rejected": -508.12628173828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.1464569568634033, "rewards/margins": 7.607149124145508, "rewards/rejected": -9.753605842590332, "step": 2721 }, { "epoch": 0.42, "learning_rate": 1.2150909279526948e-05, "logits/chosen": -2.6914408206939697, "logits/rejected": -2.4825596809387207, "logps/chosen": -390.78607177734375, "logps/rejected": -399.21075439453125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.9452767372131348, "rewards/margins": 7.464114189147949, "rewards/rejected": -10.409390449523926, "step": 2722 }, { "epoch": 0.42, "learning_rate": 1.21501758389958e-05, "logits/chosen": -2.091187000274658, "logits/rejected": -2.447948932647705, "logps/chosen": -488.9129638671875, "logps/rejected": -571.7788696289062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.72292423248291, "rewards/margins": 12.747220993041992, "rewards/rejected": -16.470144271850586, "step": 2723 }, { "epoch": 0.42, "learning_rate": 1.2149442398464652e-05, "logits/chosen": -2.686574697494507, "logits/rejected": -2.4368999004364014, "logps/chosen": -314.781982421875, "logps/rejected": -369.2707214355469, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.66011905670166, "rewards/margins": 6.746044158935547, "rewards/rejected": -9.40616226196289, "step": 2724 }, { "epoch": 0.42, "learning_rate": 1.2148708957933504e-05, "logits/chosen": -0.7922748923301697, "logits/rejected": -2.712841272354126, "logps/chosen": -146.71627807617188, "logps/rejected": -606.0380249023438, "loss": 3.9183, "rewards/accuracies": 0.5, "rewards/chosen": -5.840792655944824, "rewards/margins": -1.6753482818603516, "rewards/rejected": -4.165444374084473, "step": 2725 }, { "epoch": 0.42, "learning_rate": 1.2147975517402355e-05, "logits/chosen": -3.098756790161133, "logits/rejected": -3.1498239040374756, "logps/chosen": -199.28121948242188, "logps/rejected": -229.46771240234375, "loss": 1.9505, "rewards/accuracies": 0.5, "rewards/chosen": -4.676710605621338, "rewards/margins": 0.6107430458068848, "rewards/rejected": -5.287453651428223, "step": 2726 }, { "epoch": 0.42, "learning_rate": 1.2147242076871207e-05, "logits/chosen": -2.2894909381866455, "logits/rejected": -3.1916279792785645, "logps/chosen": -231.9716339111328, "logps/rejected": -379.1566467285156, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -3.1242356300354004, "rewards/margins": 4.496487617492676, "rewards/rejected": -7.620723247528076, "step": 2727 }, { "epoch": 0.42, "learning_rate": 1.214650863634006e-05, "logits/chosen": -2.6206328868865967, "logits/rejected": -2.57504940032959, "logps/chosen": -323.1911926269531, "logps/rejected": -181.3517608642578, "loss": 0.6822, "rewards/accuracies": 0.5, "rewards/chosen": -3.434201240539551, "rewards/margins": 1.9319229125976562, "rewards/rejected": -5.366124153137207, "step": 2728 }, { "epoch": 0.42, "learning_rate": 1.2145775195808911e-05, "logits/chosen": -3.048349142074585, "logits/rejected": -3.417130470275879, "logps/chosen": -84.230712890625, "logps/rejected": -200.03289794921875, "loss": 0.1039, "rewards/accuracies": 1.0, "rewards/chosen": -2.0708088874816895, "rewards/margins": 4.452875137329102, "rewards/rejected": -6.523684024810791, "step": 2729 }, { "epoch": 0.42, "learning_rate": 1.2145041755277763e-05, "logits/chosen": -2.8549318313598633, "logits/rejected": -2.4933254718780518, "logps/chosen": -433.2620544433594, "logps/rejected": -490.88214111328125, "loss": 1.9785, "rewards/accuracies": 0.5, "rewards/chosen": -3.8935484886169434, "rewards/margins": 0.5906198024749756, "rewards/rejected": -4.484168529510498, "step": 2730 }, { "epoch": 0.42, "learning_rate": 1.2144308314746617e-05, "logits/chosen": -1.9769114255905151, "logits/rejected": -2.9876935482025146, "logps/chosen": -91.89864349365234, "logps/rejected": -393.3162841796875, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -4.188204765319824, "rewards/margins": 10.016434669494629, "rewards/rejected": -14.204639434814453, "step": 2731 }, { "epoch": 0.42, "learning_rate": 1.2143574874215468e-05, "logits/chosen": -1.6367175579071045, "logits/rejected": -2.5338423252105713, "logps/chosen": -184.5345458984375, "logps/rejected": -295.09552001953125, "loss": 0.6177, "rewards/accuracies": 0.5, "rewards/chosen": -4.413882732391357, "rewards/margins": 4.504716873168945, "rewards/rejected": -8.918599128723145, "step": 2732 }, { "epoch": 0.43, "learning_rate": 1.214284143368432e-05, "logits/chosen": -0.9544469118118286, "logits/rejected": -2.906482696533203, "logps/chosen": -70.99552917480469, "logps/rejected": -443.62786865234375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.934314727783203, "rewards/margins": 7.863471031188965, "rewards/rejected": -10.797784805297852, "step": 2733 }, { "epoch": 0.43, "learning_rate": 1.2142107993153172e-05, "logits/chosen": -2.5088510513305664, "logits/rejected": -2.9823930263519287, "logps/chosen": -143.07266235351562, "logps/rejected": -219.79359436035156, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.884028673171997, "rewards/margins": 5.689702987670898, "rewards/rejected": -7.573731422424316, "step": 2734 }, { "epoch": 0.43, "learning_rate": 1.2141374552622024e-05, "logits/chosen": -1.8598016500473022, "logits/rejected": -2.8341047763824463, "logps/chosen": -115.09365844726562, "logps/rejected": -352.9522399902344, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -3.10357666015625, "rewards/margins": 8.076116561889648, "rewards/rejected": -11.179693222045898, "step": 2735 }, { "epoch": 0.43, "learning_rate": 1.2140641112090876e-05, "logits/chosen": -1.2464303970336914, "logits/rejected": -2.7345783710479736, "logps/chosen": -118.63107299804688, "logps/rejected": -269.62347412109375, "loss": 1.7155, "rewards/accuracies": 0.0, "rewards/chosen": -4.4914445877075195, "rewards/margins": -1.3702458143234253, "rewards/rejected": -3.121198892593384, "step": 2736 }, { "epoch": 0.43, "learning_rate": 1.2139907671559728e-05, "logits/chosen": -2.8269081115722656, "logits/rejected": -2.94442081451416, "logps/chosen": -182.84066772460938, "logps/rejected": -265.55560302734375, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -2.679168224334717, "rewards/margins": 6.447179317474365, "rewards/rejected": -9.126347541809082, "step": 2737 }, { "epoch": 0.43, "learning_rate": 1.213917423102858e-05, "logits/chosen": -2.8673999309539795, "logits/rejected": -2.496243476867676, "logps/chosen": -353.2395324707031, "logps/rejected": -218.94992065429688, "loss": 3.923, "rewards/accuracies": 0.0, "rewards/chosen": -7.451866149902344, "rewards/margins": -3.8987557888031006, "rewards/rejected": -3.5531105995178223, "step": 2738 }, { "epoch": 0.43, "learning_rate": 1.2138440790497432e-05, "logits/chosen": -1.908888578414917, "logits/rejected": -3.1806411743164062, "logps/chosen": -50.15779495239258, "logps/rejected": -422.7055969238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.279937505722046, "rewards/margins": 10.278847694396973, "rewards/rejected": -12.558784484863281, "step": 2739 }, { "epoch": 0.43, "learning_rate": 1.2137707349966285e-05, "logits/chosen": -2.476728677749634, "logits/rejected": -3.127155303955078, "logps/chosen": -40.59510803222656, "logps/rejected": -257.65264892578125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.452636241912842, "rewards/margins": 7.910545349121094, "rewards/rejected": -10.363181114196777, "step": 2740 }, { "epoch": 0.43, "learning_rate": 1.2136973909435137e-05, "logits/chosen": -2.5283992290496826, "logits/rejected": -2.810795545578003, "logps/chosen": -171.7869110107422, "logps/rejected": -266.67889404296875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.4791779518127441, "rewards/margins": 6.281505584716797, "rewards/rejected": -7.760683536529541, "step": 2741 }, { "epoch": 0.43, "learning_rate": 1.2136240468903989e-05, "logits/chosen": -2.7403979301452637, "logits/rejected": -2.999666929244995, "logps/chosen": -73.62704467773438, "logps/rejected": -240.656005859375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.799452066421509, "rewards/margins": 5.9361090660095215, "rewards/rejected": -8.73556137084961, "step": 2742 }, { "epoch": 0.43, "learning_rate": 1.213550702837284e-05, "logits/chosen": -2.0304195880889893, "logits/rejected": -2.996791124343872, "logps/chosen": -62.802894592285156, "logps/rejected": -319.9814453125, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -2.8423409461975098, "rewards/margins": 7.064712047576904, "rewards/rejected": -9.907052993774414, "step": 2743 }, { "epoch": 0.43, "learning_rate": 1.2134773587841693e-05, "logits/chosen": -2.6676273345947266, "logits/rejected": -1.3418376445770264, "logps/chosen": -186.00921630859375, "logps/rejected": -97.08429718017578, "loss": 2.2792, "rewards/accuracies": 0.0, "rewards/chosen": -6.670318603515625, "rewards/margins": -2.1288156509399414, "rewards/rejected": -4.541502952575684, "step": 2744 }, { "epoch": 0.43, "learning_rate": 1.2134040147310545e-05, "logits/chosen": -2.344677686691284, "logits/rejected": -3.048008918762207, "logps/chosen": -64.0754165649414, "logps/rejected": -258.57183837890625, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -4.528714179992676, "rewards/margins": 3.889476776123047, "rewards/rejected": -8.418190956115723, "step": 2745 }, { "epoch": 0.43, "learning_rate": 1.2133306706779396e-05, "logits/chosen": -2.453341484069824, "logits/rejected": -2.9824957847595215, "logps/chosen": -77.54309844970703, "logps/rejected": -280.2054138183594, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.2768959999084473, "rewards/margins": 6.331258773803711, "rewards/rejected": -8.608155250549316, "step": 2746 }, { "epoch": 0.43, "learning_rate": 1.2132573266248248e-05, "logits/chosen": -2.853595495223999, "logits/rejected": -2.6743431091308594, "logps/chosen": -484.70843505859375, "logps/rejected": -494.75872802734375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.8782334327697754, "rewards/margins": 5.911287307739258, "rewards/rejected": -8.789520263671875, "step": 2747 }, { "epoch": 0.43, "learning_rate": 1.21318398257171e-05, "logits/chosen": -2.6308603286743164, "logits/rejected": -2.8008124828338623, "logps/chosen": -350.638916015625, "logps/rejected": -349.0047302246094, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -3.948381185531616, "rewards/margins": 5.283624649047852, "rewards/rejected": -9.232006072998047, "step": 2748 }, { "epoch": 0.43, "learning_rate": 1.2131106385185954e-05, "logits/chosen": -2.901923656463623, "logits/rejected": -3.3089916706085205, "logps/chosen": -91.01283264160156, "logps/rejected": -282.575927734375, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": -1.194909930229187, "rewards/margins": 4.130478382110596, "rewards/rejected": -5.325387954711914, "step": 2749 }, { "epoch": 0.43, "learning_rate": 1.2130372944654806e-05, "logits/chosen": -2.8653204441070557, "logits/rejected": -2.8113677501678467, "logps/chosen": -228.89971923828125, "logps/rejected": -314.92327880859375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.704935073852539, "rewards/margins": 6.246582984924316, "rewards/rejected": -8.951518058776855, "step": 2750 }, { "epoch": 0.43, "learning_rate": 1.2129639504123658e-05, "logits/chosen": -2.8249900341033936, "logits/rejected": -2.7917442321777344, "logps/chosen": -117.93629455566406, "logps/rejected": -272.893310546875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.3250229358673096, "rewards/margins": 7.005915641784668, "rewards/rejected": -10.330938339233398, "step": 2751 }, { "epoch": 0.43, "learning_rate": 1.212890606359251e-05, "logits/chosen": -2.186737060546875, "logits/rejected": -2.936105966567993, "logps/chosen": -65.33671569824219, "logps/rejected": -296.3030090332031, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.228915214538574, "rewards/margins": 5.480196952819824, "rewards/rejected": -7.709112167358398, "step": 2752 }, { "epoch": 0.43, "learning_rate": 1.2128172623061363e-05, "logits/chosen": -2.800006866455078, "logits/rejected": -3.1940741539001465, "logps/chosen": -197.49147033691406, "logps/rejected": -318.3362121582031, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.3497724533081055, "rewards/margins": 7.759505748748779, "rewards/rejected": -10.109277725219727, "step": 2753 }, { "epoch": 0.43, "learning_rate": 1.2127439182530215e-05, "logits/chosen": -2.746692180633545, "logits/rejected": -2.9726414680480957, "logps/chosen": -44.38622283935547, "logps/rejected": -216.43519592285156, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.5517876148223877, "rewards/margins": 6.328991889953613, "rewards/rejected": -7.880780220031738, "step": 2754 }, { "epoch": 0.43, "learning_rate": 1.2126705741999067e-05, "logits/chosen": -2.359769821166992, "logits/rejected": -3.0571787357330322, "logps/chosen": -44.31109619140625, "logps/rejected": -289.37347412109375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.0231008529663086, "rewards/margins": 8.501168251037598, "rewards/rejected": -10.524269104003906, "step": 2755 }, { "epoch": 0.43, "learning_rate": 1.2125972301467919e-05, "logits/chosen": -2.9468507766723633, "logits/rejected": -2.5580668449401855, "logps/chosen": -386.0318603515625, "logps/rejected": -291.9984130859375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.773437023162842, "rewards/margins": 7.368407249450684, "rewards/rejected": -10.141843795776367, "step": 2756 }, { "epoch": 0.43, "learning_rate": 1.212523886093677e-05, "logits/chosen": -1.8751537799835205, "logits/rejected": -2.779151678085327, "logps/chosen": -213.77178955078125, "logps/rejected": -481.9635009765625, "loss": 4.4875, "rewards/accuracies": 0.5, "rewards/chosen": -6.447725772857666, "rewards/margins": 2.5389504432678223, "rewards/rejected": -8.986676216125488, "step": 2757 }, { "epoch": 0.43, "learning_rate": 1.2124505420405624e-05, "logits/chosen": -2.57059645652771, "logits/rejected": -1.5604865550994873, "logps/chosen": -210.181640625, "logps/rejected": -235.71347045898438, "loss": 1.0674, "rewards/accuracies": 0.5, "rewards/chosen": -5.020826816558838, "rewards/margins": 3.247295618057251, "rewards/rejected": -8.268122673034668, "step": 2758 }, { "epoch": 0.43, "learning_rate": 1.2123771979874476e-05, "logits/chosen": -2.4749205112457275, "logits/rejected": -2.985097646713257, "logps/chosen": -113.05734252929688, "logps/rejected": -303.7060852050781, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -2.3585572242736816, "rewards/margins": 6.818076133728027, "rewards/rejected": -9.176633834838867, "step": 2759 }, { "epoch": 0.43, "learning_rate": 1.2123038539343328e-05, "logits/chosen": -2.6552927494049072, "logits/rejected": -2.945314645767212, "logps/chosen": -144.6465301513672, "logps/rejected": -278.35845947265625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -2.2883944511413574, "rewards/margins": 7.093935489654541, "rewards/rejected": -9.382329940795898, "step": 2760 }, { "epoch": 0.43, "learning_rate": 1.212230509881218e-05, "logits/chosen": -2.95310640335083, "logits/rejected": -2.744417905807495, "logps/chosen": -267.56170654296875, "logps/rejected": -305.09161376953125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.900965690612793, "rewards/margins": 6.827563285827637, "rewards/rejected": -12.72852897644043, "step": 2761 }, { "epoch": 0.43, "learning_rate": 1.2121571658281032e-05, "logits/chosen": -2.8920726776123047, "logits/rejected": -2.830859899520874, "logps/chosen": -252.58749389648438, "logps/rejected": -241.4688720703125, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -1.4503059387207031, "rewards/margins": 6.15138053894043, "rewards/rejected": -7.601686477661133, "step": 2762 }, { "epoch": 0.43, "learning_rate": 1.2120838217749883e-05, "logits/chosen": -1.4370498657226562, "logits/rejected": -2.727764129638672, "logps/chosen": -94.64753723144531, "logps/rejected": -271.0795593261719, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -2.7908244132995605, "rewards/margins": 4.992006301879883, "rewards/rejected": -7.782831192016602, "step": 2763 }, { "epoch": 0.43, "learning_rate": 1.2120104777218735e-05, "logits/chosen": -2.689177989959717, "logits/rejected": -2.685427665710449, "logps/chosen": -238.95211791992188, "logps/rejected": -136.43824768066406, "loss": 5.3579, "rewards/accuracies": 0.5, "rewards/chosen": -7.3315653800964355, "rewards/margins": -1.280932903289795, "rewards/rejected": -6.050632476806641, "step": 2764 }, { "epoch": 0.43, "learning_rate": 1.2119371336687587e-05, "logits/chosen": -2.763035774230957, "logits/rejected": -2.0313854217529297, "logps/chosen": -480.43536376953125, "logps/rejected": -387.9032287597656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.8082168102264404, "rewards/margins": 8.468238830566406, "rewards/rejected": -10.276455879211426, "step": 2765 }, { "epoch": 0.43, "learning_rate": 1.2118637896156439e-05, "logits/chosen": -1.577298879623413, "logits/rejected": -2.4295122623443604, "logps/chosen": -293.35943603515625, "logps/rejected": -625.2515869140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3644347190856934, "rewards/margins": 9.185306549072266, "rewards/rejected": -12.5497407913208, "step": 2766 }, { "epoch": 0.43, "learning_rate": 1.2117904455625293e-05, "logits/chosen": -2.2216994762420654, "logits/rejected": -2.843580484390259, "logps/chosen": -545.2109985351562, "logps/rejected": -647.2261962890625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.2039809226989746, "rewards/margins": 6.9977216720581055, "rewards/rejected": -10.201702117919922, "step": 2767 }, { "epoch": 0.43, "learning_rate": 1.2117171015094145e-05, "logits/chosen": -1.9521209001541138, "logits/rejected": -2.7026925086975098, "logps/chosen": -244.72964477539062, "logps/rejected": -394.9700927734375, "loss": 3.7421, "rewards/accuracies": 0.5, "rewards/chosen": -6.233564376831055, "rewards/margins": 2.8915205001831055, "rewards/rejected": -9.12508487701416, "step": 2768 }, { "epoch": 0.43, "learning_rate": 1.2116437574562996e-05, "logits/chosen": -2.7008378505706787, "logits/rejected": -2.8617215156555176, "logps/chosen": -286.80419921875, "logps/rejected": -255.5485076904297, "loss": 3.2472, "rewards/accuracies": 0.5, "rewards/chosen": -7.493052005767822, "rewards/margins": -0.8158628940582275, "rewards/rejected": -6.677188873291016, "step": 2769 }, { "epoch": 0.43, "learning_rate": 1.2115704134031848e-05, "logits/chosen": -2.229541778564453, "logits/rejected": -3.0038599967956543, "logps/chosen": -302.24298095703125, "logps/rejected": -378.793212890625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.4225120544433594, "rewards/margins": 7.286960601806641, "rewards/rejected": -9.70947265625, "step": 2770 }, { "epoch": 0.43, "learning_rate": 1.21149706935007e-05, "logits/chosen": -2.711883544921875, "logits/rejected": -2.5236573219299316, "logps/chosen": -507.15460205078125, "logps/rejected": -471.8441162109375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.2703728675842285, "rewards/margins": 7.46842098236084, "rewards/rejected": -10.73879337310791, "step": 2771 }, { "epoch": 0.43, "learning_rate": 1.2114237252969552e-05, "logits/chosen": -2.793940305709839, "logits/rejected": -3.026648998260498, "logps/chosen": -93.59265899658203, "logps/rejected": -186.595458984375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.356476068496704, "rewards/margins": 6.374900817871094, "rewards/rejected": -8.731377601623535, "step": 2772 }, { "epoch": 0.43, "learning_rate": 1.2113503812438404e-05, "logits/chosen": -0.8075950741767883, "logits/rejected": -2.7747151851654053, "logps/chosen": -76.61573791503906, "logps/rejected": -484.9649963378906, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.208643913269043, "rewards/margins": 7.648698806762695, "rewards/rejected": -10.857342720031738, "step": 2773 }, { "epoch": 0.43, "learning_rate": 1.2112770371907256e-05, "logits/chosen": -2.748019218444824, "logits/rejected": -3.2255802154541016, "logps/chosen": -406.8502197265625, "logps/rejected": -449.68914794921875, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -2.1622047424316406, "rewards/margins": 6.425529479980469, "rewards/rejected": -8.58773422241211, "step": 2774 }, { "epoch": 0.43, "learning_rate": 1.211203693137611e-05, "logits/chosen": -2.150416851043701, "logits/rejected": -2.6902763843536377, "logps/chosen": -206.26004028320312, "logps/rejected": -307.4852600097656, "loss": 3.267, "rewards/accuracies": 0.5, "rewards/chosen": -6.678541660308838, "rewards/margins": 2.258955240249634, "rewards/rejected": -8.93749713897705, "step": 2775 }, { "epoch": 0.43, "learning_rate": 1.2111303490844961e-05, "logits/chosen": -2.4322047233581543, "logits/rejected": -2.761507272720337, "logps/chosen": -410.60076904296875, "logps/rejected": -417.7040710449219, "loss": 5.0906, "rewards/accuracies": 0.5, "rewards/chosen": -8.428049087524414, "rewards/margins": -3.006134033203125, "rewards/rejected": -5.421915054321289, "step": 2776 }, { "epoch": 0.43, "learning_rate": 1.2110570050313813e-05, "logits/chosen": -1.121031641960144, "logits/rejected": -2.2724781036376953, "logps/chosen": -198.48704528808594, "logps/rejected": -493.82415771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.0808472633361816, "rewards/margins": 9.694849014282227, "rewards/rejected": -11.77569580078125, "step": 2777 }, { "epoch": 0.43, "learning_rate": 1.2109836609782665e-05, "logits/chosen": -2.723668336868286, "logits/rejected": -2.7997562885284424, "logps/chosen": -501.90863037109375, "logps/rejected": -574.5836181640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.715467929840088, "rewards/margins": 7.492732524871826, "rewards/rejected": -10.208200454711914, "step": 2778 }, { "epoch": 0.43, "learning_rate": 1.2109103169251517e-05, "logits/chosen": -2.8761467933654785, "logits/rejected": -2.7958736419677734, "logps/chosen": -90.85026550292969, "logps/rejected": -188.21556091308594, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.6306970119476318, "rewards/margins": 7.48699426651001, "rewards/rejected": -9.117691040039062, "step": 2779 }, { "epoch": 0.43, "learning_rate": 1.2108369728720369e-05, "logits/chosen": -2.4278907775878906, "logits/rejected": -3.1641929149627686, "logps/chosen": -193.09786987304688, "logps/rejected": -332.03143310546875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.2190818786621094, "rewards/margins": 6.77596378326416, "rewards/rejected": -8.99504566192627, "step": 2780 }, { "epoch": 0.43, "learning_rate": 1.210763628818922e-05, "logits/chosen": -2.2934985160827637, "logits/rejected": -2.6597390174865723, "logps/chosen": -96.87300109863281, "logps/rejected": -264.6046447753906, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4848039150238037, "rewards/margins": 6.98858642578125, "rewards/rejected": -8.473390579223633, "step": 2781 }, { "epoch": 0.43, "learning_rate": 1.2106902847658073e-05, "logits/chosen": -3.199160099029541, "logits/rejected": -2.901952028274536, "logps/chosen": -571.36181640625, "logps/rejected": -385.748046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.213724136352539, "rewards/margins": 7.253766059875488, "rewards/rejected": -8.467490196228027, "step": 2782 }, { "epoch": 0.43, "learning_rate": 1.2106169407126924e-05, "logits/chosen": -2.0848021507263184, "logits/rejected": -2.6779298782348633, "logps/chosen": -113.71288299560547, "logps/rejected": -353.91497802734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.9874937534332275, "rewards/margins": 8.593584060668945, "rewards/rejected": -11.58107852935791, "step": 2783 }, { "epoch": 0.43, "learning_rate": 1.2105435966595778e-05, "logits/chosen": -3.0094869136810303, "logits/rejected": -3.1632306575775146, "logps/chosen": -253.4370574951172, "logps/rejected": -269.9003601074219, "loss": 3.0061, "rewards/accuracies": 0.5, "rewards/chosen": -6.088736057281494, "rewards/margins": -0.3688535690307617, "rewards/rejected": -5.719882488250732, "step": 2784 }, { "epoch": 0.43, "learning_rate": 1.210470252606463e-05, "logits/chosen": -1.9506665468215942, "logits/rejected": -2.9145636558532715, "logps/chosen": -58.29116439819336, "logps/rejected": -170.86831665039062, "loss": 0.0978, "rewards/accuracies": 1.0, "rewards/chosen": -3.0867271423339844, "rewards/margins": 2.284731864929199, "rewards/rejected": -5.371459007263184, "step": 2785 }, { "epoch": 0.43, "learning_rate": 1.2103969085533482e-05, "logits/chosen": -2.789856433868408, "logits/rejected": -2.7120449542999268, "logps/chosen": -258.53857421875, "logps/rejected": -121.46551513671875, "loss": 3.5334, "rewards/accuracies": 0.5, "rewards/chosen": -5.990719318389893, "rewards/margins": -1.9758065938949585, "rewards/rejected": -4.0149126052856445, "step": 2786 }, { "epoch": 0.43, "learning_rate": 1.2103235645002335e-05, "logits/chosen": -1.2961403131484985, "logits/rejected": -2.782076835632324, "logps/chosen": -175.99884033203125, "logps/rejected": -357.867919921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.0712594985961914, "rewards/margins": 7.336066246032715, "rewards/rejected": -10.407325744628906, "step": 2787 }, { "epoch": 0.43, "learning_rate": 1.2102502204471187e-05, "logits/chosen": -2.476435661315918, "logits/rejected": -2.6130118370056152, "logps/chosen": -344.2153015136719, "logps/rejected": -449.8225402832031, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -2.1409268379211426, "rewards/margins": 4.564389228820801, "rewards/rejected": -6.705316543579102, "step": 2788 }, { "epoch": 0.43, "learning_rate": 1.2101768763940039e-05, "logits/chosen": -3.046677350997925, "logits/rejected": -3.213442325592041, "logps/chosen": -164.22479248046875, "logps/rejected": -334.9478759765625, "loss": 1.2757, "rewards/accuracies": 0.5, "rewards/chosen": -2.658759355545044, "rewards/margins": 3.1874732971191406, "rewards/rejected": -5.8462324142456055, "step": 2789 }, { "epoch": 0.43, "learning_rate": 1.2101035323408891e-05, "logits/chosen": -2.332730531692505, "logits/rejected": -2.7196898460388184, "logps/chosen": -218.62142944335938, "logps/rejected": -216.12315368652344, "loss": 3.6271, "rewards/accuracies": 0.5, "rewards/chosen": -6.1347880363464355, "rewards/margins": -0.6981191635131836, "rewards/rejected": -5.436668395996094, "step": 2790 }, { "epoch": 0.43, "learning_rate": 1.2100301882877743e-05, "logits/chosen": -2.3667707443237305, "logits/rejected": -3.127681016921997, "logps/chosen": -234.22100830078125, "logps/rejected": -595.1749267578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.264936923980713, "rewards/margins": 8.196249008178711, "rewards/rejected": -10.461186408996582, "step": 2791 }, { "epoch": 0.43, "learning_rate": 1.2099568442346595e-05, "logits/chosen": -3.0464608669281006, "logits/rejected": -2.941371440887451, "logps/chosen": -159.3048095703125, "logps/rejected": -171.85067749023438, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.053563117980957, "rewards/margins": 5.719661712646484, "rewards/rejected": -6.773224830627441, "step": 2792 }, { "epoch": 0.43, "learning_rate": 1.2098835001815448e-05, "logits/chosen": -2.3689136505126953, "logits/rejected": -2.924639940261841, "logps/chosen": -353.0408630371094, "logps/rejected": -401.8106994628906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.4894180297851562, "rewards/margins": 8.695221900939941, "rewards/rejected": -11.184640884399414, "step": 2793 }, { "epoch": 0.43, "learning_rate": 1.20981015612843e-05, "logits/chosen": -2.785811185836792, "logits/rejected": -2.9112327098846436, "logps/chosen": -123.33186340332031, "logps/rejected": -245.20135498046875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.9140892028808594, "rewards/margins": 6.147251129150391, "rewards/rejected": -8.06134033203125, "step": 2794 }, { "epoch": 0.43, "learning_rate": 1.2097368120753152e-05, "logits/chosen": -2.7446882724761963, "logits/rejected": -3.220231056213379, "logps/chosen": -267.51318359375, "logps/rejected": -433.46875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.4068238735198975, "rewards/margins": 7.736903190612793, "rewards/rejected": -10.14372730255127, "step": 2795 }, { "epoch": 0.43, "learning_rate": 1.2096634680222004e-05, "logits/chosen": -2.6851656436920166, "logits/rejected": -3.319019317626953, "logps/chosen": -140.08816528320312, "logps/rejected": -282.9931640625, "loss": 0.0899, "rewards/accuracies": 1.0, "rewards/chosen": -1.8255739212036133, "rewards/margins": 5.9023518562316895, "rewards/rejected": -7.727926254272461, "step": 2796 }, { "epoch": 0.43, "learning_rate": 1.2095901239690856e-05, "logits/chosen": -3.0263137817382812, "logits/rejected": -2.4613611698150635, "logps/chosen": -638.7962646484375, "logps/rejected": -476.66973876953125, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": -3.0882186889648438, "rewards/margins": 4.292271614074707, "rewards/rejected": -7.380490303039551, "step": 2797 }, { "epoch": 0.44, "learning_rate": 1.2095167799159708e-05, "logits/chosen": -2.848177433013916, "logits/rejected": -2.781984806060791, "logps/chosen": -113.2372055053711, "logps/rejected": -201.22434997558594, "loss": 3.6599, "rewards/accuracies": 0.5, "rewards/chosen": -5.860787868499756, "rewards/margins": -0.9124588966369629, "rewards/rejected": -4.948328971862793, "step": 2798 }, { "epoch": 0.44, "learning_rate": 1.209443435862856e-05, "logits/chosen": -2.5402958393096924, "logits/rejected": -2.5673916339874268, "logps/chosen": -144.90106201171875, "logps/rejected": -178.8046875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -3.310073137283325, "rewards/margins": 4.460788249969482, "rewards/rejected": -7.770861625671387, "step": 2799 }, { "epoch": 0.44, "learning_rate": 1.2093700918097411e-05, "logits/chosen": -2.4497251510620117, "logits/rejected": -2.6997792720794678, "logps/chosen": -598.6126098632812, "logps/rejected": -505.23004150390625, "loss": 0.2122, "rewards/accuracies": 1.0, "rewards/chosen": -2.6723098754882812, "rewards/margins": 2.7656495571136475, "rewards/rejected": -5.437959671020508, "step": 2800 }, { "epoch": 0.44, "learning_rate": 1.2092967477566263e-05, "logits/chosen": -3.2140214443206787, "logits/rejected": -2.6246285438537598, "logps/chosen": -636.643310546875, "logps/rejected": -390.71527099609375, "loss": 0.2245, "rewards/accuracies": 1.0, "rewards/chosen": -1.1750915050506592, "rewards/margins": 4.731316566467285, "rewards/rejected": -5.906407833099365, "step": 2801 }, { "epoch": 0.44, "learning_rate": 1.2092234037035117e-05, "logits/chosen": -2.6414012908935547, "logits/rejected": -3.2261409759521484, "logps/chosen": -424.0295715332031, "logps/rejected": -651.7260131835938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.099914073944092, "rewards/margins": 9.468002319335938, "rewards/rejected": -12.567916870117188, "step": 2802 }, { "epoch": 0.44, "learning_rate": 1.2091500596503969e-05, "logits/chosen": -2.559274911880493, "logits/rejected": -2.9923410415649414, "logps/chosen": -374.49285888671875, "logps/rejected": -707.9228515625, "loss": 3.8734, "rewards/accuracies": 0.5, "rewards/chosen": -6.461509704589844, "rewards/margins": -0.796687126159668, "rewards/rejected": -5.664822578430176, "step": 2803 }, { "epoch": 0.44, "learning_rate": 1.209076715597282e-05, "logits/chosen": -2.5092928409576416, "logits/rejected": -2.9469282627105713, "logps/chosen": -177.69439697265625, "logps/rejected": -390.69183349609375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.7751874923706055, "rewards/margins": 5.265810012817383, "rewards/rejected": -8.040997505187988, "step": 2804 }, { "epoch": 0.44, "learning_rate": 1.2090033715441673e-05, "logits/chosen": -2.4949142932891846, "logits/rejected": -3.1098337173461914, "logps/chosen": -125.17119598388672, "logps/rejected": -312.6651611328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.310141086578369, "rewards/margins": 6.6434645652771, "rewards/rejected": -8.953605651855469, "step": 2805 }, { "epoch": 0.44, "learning_rate": 1.2089300274910524e-05, "logits/chosen": -1.3723652362823486, "logits/rejected": -2.819729804992676, "logps/chosen": -110.06322479248047, "logps/rejected": -371.1934814453125, "loss": 0.3279, "rewards/accuracies": 1.0, "rewards/chosen": -3.809814453125, "rewards/margins": 2.607646942138672, "rewards/rejected": -6.417461395263672, "step": 2806 }, { "epoch": 0.44, "learning_rate": 1.2088566834379376e-05, "logits/chosen": -2.6787400245666504, "logits/rejected": -2.739156484603882, "logps/chosen": -133.65757751464844, "logps/rejected": -225.72006225585938, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.028078556060791, "rewards/margins": 6.437800884246826, "rewards/rejected": -8.465879440307617, "step": 2807 }, { "epoch": 0.44, "learning_rate": 1.2087833393848228e-05, "logits/chosen": -2.725663900375366, "logits/rejected": -2.0729639530181885, "logps/chosen": -129.1647491455078, "logps/rejected": -160.78102111816406, "loss": 2.0257, "rewards/accuracies": 0.5, "rewards/chosen": -4.866320610046387, "rewards/margins": 0.5800683498382568, "rewards/rejected": -5.4463887214660645, "step": 2808 }, { "epoch": 0.44, "learning_rate": 1.208709995331708e-05, "logits/chosen": -2.3404505252838135, "logits/rejected": -2.9666028022766113, "logps/chosen": -104.00054168701172, "logps/rejected": -253.11129760742188, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.973541021347046, "rewards/margins": 5.704037666320801, "rewards/rejected": -7.677578926086426, "step": 2809 }, { "epoch": 0.44, "learning_rate": 1.2086366512785932e-05, "logits/chosen": -2.234762668609619, "logits/rejected": -3.0624592304229736, "logps/chosen": -192.18203735351562, "logps/rejected": -313.81610107421875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.98500919342041, "rewards/margins": 6.330592155456543, "rewards/rejected": -9.315601348876953, "step": 2810 }, { "epoch": 0.44, "learning_rate": 1.2085633072254785e-05, "logits/chosen": -3.0825679302215576, "logits/rejected": -3.1974661350250244, "logps/chosen": -105.25988006591797, "logps/rejected": -249.76377868652344, "loss": 1.6432, "rewards/accuracies": 0.5, "rewards/chosen": -4.299286842346191, "rewards/margins": 2.7056593894958496, "rewards/rejected": -7.004946231842041, "step": 2811 }, { "epoch": 0.44, "learning_rate": 1.2084899631723637e-05, "logits/chosen": -2.842344045639038, "logits/rejected": -2.4196360111236572, "logps/chosen": -477.9305114746094, "logps/rejected": -390.768798828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.049325466156006, "rewards/margins": 8.11581039428711, "rewards/rejected": -10.165136337280273, "step": 2812 }, { "epoch": 0.44, "learning_rate": 1.208416619119249e-05, "logits/chosen": -2.6177408695220947, "logits/rejected": -1.548215389251709, "logps/chosen": -328.628662109375, "logps/rejected": -252.61642456054688, "loss": 2.9193, "rewards/accuracies": 0.5, "rewards/chosen": -5.688050270080566, "rewards/margins": 0.1726360321044922, "rewards/rejected": -5.860686779022217, "step": 2813 }, { "epoch": 0.44, "learning_rate": 1.2083432750661341e-05, "logits/chosen": -3.189887523651123, "logits/rejected": -3.0437495708465576, "logps/chosen": -388.5549011230469, "logps/rejected": -291.6547546386719, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -4.107759952545166, "rewards/margins": 4.945113182067871, "rewards/rejected": -9.052873611450195, "step": 2814 }, { "epoch": 0.44, "learning_rate": 1.2082699310130193e-05, "logits/chosen": -2.5863547325134277, "logits/rejected": -2.933126211166382, "logps/chosen": -180.007568359375, "logps/rejected": -323.9005126953125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -2.456667184829712, "rewards/margins": 4.726511478424072, "rewards/rejected": -7.183178901672363, "step": 2815 }, { "epoch": 0.44, "learning_rate": 1.2081965869599045e-05, "logits/chosen": -2.9909911155700684, "logits/rejected": -3.077176094055176, "logps/chosen": -92.29103088378906, "logps/rejected": -260.58441162109375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -3.329564332962036, "rewards/margins": 5.835671424865723, "rewards/rejected": -9.16523551940918, "step": 2816 }, { "epoch": 0.44, "learning_rate": 1.2081232429067897e-05, "logits/chosen": -2.8487203121185303, "logits/rejected": -2.6781768798828125, "logps/chosen": -338.68463134765625, "logps/rejected": -226.1136474609375, "loss": 1.3372, "rewards/accuracies": 0.5, "rewards/chosen": -4.839915752410889, "rewards/margins": 2.5789005756378174, "rewards/rejected": -7.418816566467285, "step": 2817 }, { "epoch": 0.44, "learning_rate": 1.2080498988536749e-05, "logits/chosen": -1.773315668106079, "logits/rejected": -2.5746169090270996, "logps/chosen": -189.7546844482422, "logps/rejected": -311.575927734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.151453733444214, "rewards/margins": 7.613711357116699, "rewards/rejected": -10.765165328979492, "step": 2818 }, { "epoch": 0.44, "learning_rate": 1.2079765548005602e-05, "logits/chosen": -2.454458713531494, "logits/rejected": -2.907137155532837, "logps/chosen": -112.90106964111328, "logps/rejected": -304.9295654296875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.7187440395355225, "rewards/margins": 7.049261093139648, "rewards/rejected": -8.76800537109375, "step": 2819 }, { "epoch": 0.44, "learning_rate": 1.2079032107474454e-05, "logits/chosen": -3.1812524795532227, "logits/rejected": -3.0467188358306885, "logps/chosen": -174.0564727783203, "logps/rejected": -169.4090576171875, "loss": 2.6127, "rewards/accuracies": 0.5, "rewards/chosen": -5.231691837310791, "rewards/margins": 1.1755638122558594, "rewards/rejected": -6.40725564956665, "step": 2820 }, { "epoch": 0.44, "learning_rate": 1.2078298666943308e-05, "logits/chosen": -2.7820074558258057, "logits/rejected": -2.9277398586273193, "logps/chosen": -56.124534606933594, "logps/rejected": -169.23770141601562, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.3481791019439697, "rewards/margins": 5.887788772583008, "rewards/rejected": -8.235967636108398, "step": 2821 }, { "epoch": 0.44, "learning_rate": 1.207756522641216e-05, "logits/chosen": -2.8515207767486572, "logits/rejected": -3.082357883453369, "logps/chosen": -385.8558349609375, "logps/rejected": -400.08551025390625, "loss": 0.3587, "rewards/accuracies": 0.5, "rewards/chosen": -3.6525893211364746, "rewards/margins": 4.097320556640625, "rewards/rejected": -7.749909400939941, "step": 2822 }, { "epoch": 0.44, "learning_rate": 1.2076831785881011e-05, "logits/chosen": -1.9254348278045654, "logits/rejected": -2.8896610736846924, "logps/chosen": -220.51174926757812, "logps/rejected": -338.0869140625, "loss": 0.0596, "rewards/accuracies": 1.0, "rewards/chosen": -4.70328950881958, "rewards/margins": 2.935072898864746, "rewards/rejected": -7.638362407684326, "step": 2823 }, { "epoch": 0.44, "learning_rate": 1.2076098345349863e-05, "logits/chosen": -2.952812671661377, "logits/rejected": -2.4615652561187744, "logps/chosen": -158.29559326171875, "logps/rejected": -150.88323974609375, "loss": 0.5393, "rewards/accuracies": 0.5, "rewards/chosen": -3.4760942459106445, "rewards/margins": 2.3984150886535645, "rewards/rejected": -5.874508857727051, "step": 2824 }, { "epoch": 0.44, "learning_rate": 1.2075364904818715e-05, "logits/chosen": -1.9360466003417969, "logits/rejected": -3.1504340171813965, "logps/chosen": -76.00508117675781, "logps/rejected": -417.1669921875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.8844267129898071, "rewards/margins": 5.726433753967285, "rewards/rejected": -7.610860824584961, "step": 2825 }, { "epoch": 0.44, "learning_rate": 1.2074631464287567e-05, "logits/chosen": -2.6065268516540527, "logits/rejected": -2.9408833980560303, "logps/chosen": -183.63156127929688, "logps/rejected": -507.04302978515625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.93892765045166, "rewards/margins": 7.55739688873291, "rewards/rejected": -10.49632453918457, "step": 2826 }, { "epoch": 0.44, "learning_rate": 1.2073898023756419e-05, "logits/chosen": -2.786184310913086, "logits/rejected": -1.99099600315094, "logps/chosen": -228.47610473632812, "logps/rejected": -297.1288146972656, "loss": 2.5211, "rewards/accuracies": 0.5, "rewards/chosen": -5.896071434020996, "rewards/margins": 0.7667732238769531, "rewards/rejected": -6.662844657897949, "step": 2827 }, { "epoch": 0.44, "learning_rate": 1.207316458322527e-05, "logits/chosen": -2.856734037399292, "logits/rejected": -2.4057958126068115, "logps/chosen": -184.57904052734375, "logps/rejected": -174.11952209472656, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -3.999955177307129, "rewards/margins": 4.390270233154297, "rewards/rejected": -8.390225410461426, "step": 2828 }, { "epoch": 0.44, "learning_rate": 1.2072431142694124e-05, "logits/chosen": -1.8319683074951172, "logits/rejected": -3.205768346786499, "logps/chosen": -147.32191467285156, "logps/rejected": -412.53900146484375, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -2.639613151550293, "rewards/margins": 3.95133113861084, "rewards/rejected": -6.590944290161133, "step": 2829 }, { "epoch": 0.44, "learning_rate": 1.2071697702162976e-05, "logits/chosen": -2.9559273719787598, "logits/rejected": -3.1028683185577393, "logps/chosen": -355.6938171386719, "logps/rejected": -355.6484375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.467642307281494, "rewards/margins": 7.144187927246094, "rewards/rejected": -9.61182975769043, "step": 2830 }, { "epoch": 0.44, "learning_rate": 1.2070964261631828e-05, "logits/chosen": -1.2678102254867554, "logits/rejected": -2.777993679046631, "logps/chosen": -84.13836669921875, "logps/rejected": -220.18435668945312, "loss": 0.0954, "rewards/accuracies": 1.0, "rewards/chosen": -3.7015938758850098, "rewards/margins": 3.7200703620910645, "rewards/rejected": -7.421664237976074, "step": 2831 }, { "epoch": 0.44, "learning_rate": 1.207023082110068e-05, "logits/chosen": -2.154296636581421, "logits/rejected": -3.1245901584625244, "logps/chosen": -248.5325469970703, "logps/rejected": -375.66717529296875, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -1.3021973371505737, "rewards/margins": 4.613137722015381, "rewards/rejected": -5.915335178375244, "step": 2832 }, { "epoch": 0.44, "learning_rate": 1.2069497380569532e-05, "logits/chosen": -2.7424848079681396, "logits/rejected": -3.012016534805298, "logps/chosen": -178.03146362304688, "logps/rejected": -182.01431274414062, "loss": 0.1824, "rewards/accuracies": 1.0, "rewards/chosen": -2.267279863357544, "rewards/margins": 3.1851959228515625, "rewards/rejected": -5.452475547790527, "step": 2833 }, { "epoch": 0.44, "learning_rate": 1.2068763940038384e-05, "logits/chosen": -2.2853429317474365, "logits/rejected": -2.9111127853393555, "logps/chosen": -122.91082763671875, "logps/rejected": -371.6368713378906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.908006191253662, "rewards/margins": 8.169885635375977, "rewards/rejected": -11.077892303466797, "step": 2834 }, { "epoch": 0.44, "learning_rate": 1.2068030499507236e-05, "logits/chosen": -2.862366199493408, "logits/rejected": -2.8979129791259766, "logps/chosen": -46.41572952270508, "logps/rejected": -118.83375549316406, "loss": 0.1705, "rewards/accuracies": 1.0, "rewards/chosen": -2.1159698963165283, "rewards/margins": 3.2660107612609863, "rewards/rejected": -5.381980895996094, "step": 2835 }, { "epoch": 0.44, "learning_rate": 1.2067297058976088e-05, "logits/chosen": -1.5348705053329468, "logits/rejected": -2.8339762687683105, "logps/chosen": -100.90127563476562, "logps/rejected": -302.0413513183594, "loss": 0.897, "rewards/accuracies": 0.5, "rewards/chosen": -5.236872673034668, "rewards/margins": 3.840771436691284, "rewards/rejected": -9.077644348144531, "step": 2836 }, { "epoch": 0.44, "learning_rate": 1.206656361844494e-05, "logits/chosen": -2.9200215339660645, "logits/rejected": -2.873055934906006, "logps/chosen": -70.33172607421875, "logps/rejected": -281.85711669921875, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -2.3594565391540527, "rewards/margins": 7.281232833862305, "rewards/rejected": -9.640689849853516, "step": 2837 }, { "epoch": 0.44, "learning_rate": 1.2065830177913793e-05, "logits/chosen": -2.8466544151306152, "logits/rejected": -2.7163901329040527, "logps/chosen": -127.20684051513672, "logps/rejected": -203.96697998046875, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -3.802135467529297, "rewards/margins": 4.988712310791016, "rewards/rejected": -8.790847778320312, "step": 2838 }, { "epoch": 0.44, "learning_rate": 1.2065096737382645e-05, "logits/chosen": -2.816129207611084, "logits/rejected": -2.375527858734131, "logps/chosen": -261.1827087402344, "logps/rejected": -241.42010498046875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.040715217590332, "rewards/margins": 7.572091102600098, "rewards/rejected": -10.61280632019043, "step": 2839 }, { "epoch": 0.44, "learning_rate": 1.2064363296851497e-05, "logits/chosen": -3.185525417327881, "logits/rejected": -2.0278258323669434, "logps/chosen": -661.2339477539062, "logps/rejected": -367.7038269042969, "loss": 2.1827, "rewards/accuracies": 0.5, "rewards/chosen": -5.282603740692139, "rewards/margins": 0.11641550064086914, "rewards/rejected": -5.399019241333008, "step": 2840 }, { "epoch": 0.44, "learning_rate": 1.2063629856320349e-05, "logits/chosen": -2.7546534538269043, "logits/rejected": -2.9600203037261963, "logps/chosen": -156.27023315429688, "logps/rejected": -245.134033203125, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/chosen": -2.941699743270874, "rewards/margins": 4.190155982971191, "rewards/rejected": -7.1318559646606445, "step": 2841 }, { "epoch": 0.44, "learning_rate": 1.20628964157892e-05, "logits/chosen": -3.305499315261841, "logits/rejected": -3.4243288040161133, "logps/chosen": -185.07984924316406, "logps/rejected": -186.29087829589844, "loss": 2.8236, "rewards/accuracies": 0.5, "rewards/chosen": -6.310075759887695, "rewards/margins": -0.6976912021636963, "rewards/rejected": -5.612384796142578, "step": 2842 }, { "epoch": 0.44, "learning_rate": 1.2062162975258052e-05, "logits/chosen": -2.3535218238830566, "logits/rejected": -2.6291842460632324, "logps/chosen": -141.22091674804688, "logps/rejected": -309.8729248046875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.095302104949951, "rewards/margins": 5.898180961608887, "rewards/rejected": -8.99348258972168, "step": 2843 }, { "epoch": 0.44, "learning_rate": 1.2061429534726904e-05, "logits/chosen": -3.0052008628845215, "logits/rejected": -2.3751397132873535, "logps/chosen": -196.3391876220703, "logps/rejected": -72.79544067382812, "loss": 3.9224, "rewards/accuracies": 0.0, "rewards/chosen": -7.604733467102051, "rewards/margins": -3.804847240447998, "rewards/rejected": -3.7998859882354736, "step": 2844 }, { "epoch": 0.44, "learning_rate": 1.2060696094195756e-05, "logits/chosen": -2.2724528312683105, "logits/rejected": -2.880497455596924, "logps/chosen": -70.34105682373047, "logps/rejected": -197.1350555419922, "loss": 0.2143, "rewards/accuracies": 1.0, "rewards/chosen": -4.153667449951172, "rewards/margins": 2.3336386680603027, "rewards/rejected": -6.487305641174316, "step": 2845 }, { "epoch": 0.44, "learning_rate": 1.2059962653664608e-05, "logits/chosen": -2.418177604675293, "logits/rejected": -3.152700662612915, "logps/chosen": -96.41958618164062, "logps/rejected": -300.5474548339844, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.5140244960784912, "rewards/margins": 7.475501537322998, "rewards/rejected": -8.98952579498291, "step": 2846 }, { "epoch": 0.44, "learning_rate": 1.2059229213133462e-05, "logits/chosen": -2.7690062522888184, "logits/rejected": -3.3120319843292236, "logps/chosen": -245.4939422607422, "logps/rejected": -437.8321228027344, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.1187217235565186, "rewards/margins": 7.420438766479492, "rewards/rejected": -10.53916072845459, "step": 2847 }, { "epoch": 0.44, "learning_rate": 1.2058495772602313e-05, "logits/chosen": -1.921734094619751, "logits/rejected": -2.8420004844665527, "logps/chosen": -67.484619140625, "logps/rejected": -225.30020141601562, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -2.4621245861053467, "rewards/margins": 5.546308994293213, "rewards/rejected": -8.00843334197998, "step": 2848 }, { "epoch": 0.44, "learning_rate": 1.2057762332071165e-05, "logits/chosen": -2.7165067195892334, "logits/rejected": -2.7217345237731934, "logps/chosen": -216.51573181152344, "logps/rejected": -400.61956787109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.355055809020996, "rewards/margins": 6.65543270111084, "rewards/rejected": -11.010488510131836, "step": 2849 }, { "epoch": 0.44, "learning_rate": 1.2057028891540017e-05, "logits/chosen": -0.7742887735366821, "logits/rejected": -2.5067825317382812, "logps/chosen": -111.86973571777344, "logps/rejected": -403.48828125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.57126784324646, "rewards/margins": 6.02325963973999, "rewards/rejected": -8.594528198242188, "step": 2850 }, { "epoch": 0.44, "learning_rate": 1.2056295451008869e-05, "logits/chosen": -2.5597424507141113, "logits/rejected": -3.0776212215423584, "logps/chosen": -248.59170532226562, "logps/rejected": -368.095703125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.3246335983276367, "rewards/margins": 5.553318500518799, "rewards/rejected": -8.877952575683594, "step": 2851 }, { "epoch": 0.44, "learning_rate": 1.2055562010477721e-05, "logits/chosen": -3.1679532527923584, "logits/rejected": -2.102128744125366, "logps/chosen": -424.6427001953125, "logps/rejected": -337.46343994140625, "loss": 3.0784, "rewards/accuracies": 0.5, "rewards/chosen": -4.8700714111328125, "rewards/margins": 1.7229394912719727, "rewards/rejected": -6.593010902404785, "step": 2852 }, { "epoch": 0.44, "learning_rate": 1.2054828569946575e-05, "logits/chosen": -2.764958620071411, "logits/rejected": -2.5513079166412354, "logps/chosen": -96.39823150634766, "logps/rejected": -192.3792724609375, "loss": 0.2771, "rewards/accuracies": 1.0, "rewards/chosen": -3.300480365753174, "rewards/margins": 3.4411113262176514, "rewards/rejected": -6.741591453552246, "step": 2853 }, { "epoch": 0.44, "learning_rate": 1.2054095129415426e-05, "logits/chosen": -2.9085500240325928, "logits/rejected": -2.328735828399658, "logps/chosen": -147.47596740722656, "logps/rejected": -106.92205047607422, "loss": 1.0652, "rewards/accuracies": 0.5, "rewards/chosen": -3.1932482719421387, "rewards/margins": 2.287414789199829, "rewards/rejected": -5.480662822723389, "step": 2854 }, { "epoch": 0.44, "learning_rate": 1.2053361688884278e-05, "logits/chosen": -2.8722684383392334, "logits/rejected": -3.276212692260742, "logps/chosen": -127.47242736816406, "logps/rejected": -167.41366577148438, "loss": 1.0005, "rewards/accuracies": 0.5, "rewards/chosen": -3.6051876544952393, "rewards/margins": 1.963168978691101, "rewards/rejected": -5.568356513977051, "step": 2855 }, { "epoch": 0.44, "learning_rate": 1.2052628248353132e-05, "logits/chosen": -2.3679397106170654, "logits/rejected": -3.148444175720215, "logps/chosen": -212.1334228515625, "logps/rejected": -392.42364501953125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.2546334266662598, "rewards/margins": 6.555972099304199, "rewards/rejected": -7.810605049133301, "step": 2856 }, { "epoch": 0.44, "learning_rate": 1.2051894807821984e-05, "logits/chosen": -2.6920106410980225, "logits/rejected": -3.1813788414001465, "logps/chosen": -169.20831298828125, "logps/rejected": -357.0753479003906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.294405460357666, "rewards/margins": 9.893993377685547, "rewards/rejected": -13.188398361206055, "step": 2857 }, { "epoch": 0.44, "learning_rate": 1.2051161367290836e-05, "logits/chosen": -2.872124433517456, "logits/rejected": -2.743276357650757, "logps/chosen": -174.69638061523438, "logps/rejected": -85.8466796875, "loss": 2.254, "rewards/accuracies": 0.5, "rewards/chosen": -5.93656063079834, "rewards/margins": -0.8981776237487793, "rewards/rejected": -5.038382530212402, "step": 2858 }, { "epoch": 0.44, "learning_rate": 1.2050427926759687e-05, "logits/chosen": -3.393507480621338, "logits/rejected": -2.9855167865753174, "logps/chosen": -204.51333618164062, "logps/rejected": -67.53600311279297, "loss": 4.6675, "rewards/accuracies": 0.0, "rewards/chosen": -6.95809268951416, "rewards/margins": -4.655527114868164, "rewards/rejected": -2.302565574645996, "step": 2859 }, { "epoch": 0.44, "learning_rate": 1.204969448622854e-05, "logits/chosen": -2.341296434402466, "logits/rejected": -2.7324905395507812, "logps/chosen": -123.73369598388672, "logps/rejected": -331.929443359375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.835300922393799, "rewards/margins": 6.943619728088379, "rewards/rejected": -9.778921127319336, "step": 2860 }, { "epoch": 0.44, "learning_rate": 1.2048961045697391e-05, "logits/chosen": -2.639928102493286, "logits/rejected": -3.0227913856506348, "logps/chosen": -648.3255615234375, "logps/rejected": -642.0790405273438, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -3.2262816429138184, "rewards/margins": 5.831486225128174, "rewards/rejected": -9.057767868041992, "step": 2861 }, { "epoch": 0.45, "learning_rate": 1.2048227605166243e-05, "logits/chosen": -1.8650932312011719, "logits/rejected": -3.2181437015533447, "logps/chosen": -356.368408203125, "logps/rejected": -642.404296875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -3.4107468128204346, "rewards/margins": 5.588014125823975, "rewards/rejected": -8.998761177062988, "step": 2862 }, { "epoch": 0.45, "learning_rate": 1.2047494164635095e-05, "logits/chosen": -2.7000298500061035, "logits/rejected": -2.9505510330200195, "logps/chosen": -359.6434020996094, "logps/rejected": -244.53475952148438, "loss": 3.4833, "rewards/accuracies": 0.5, "rewards/chosen": -6.451069355010986, "rewards/margins": -0.5624780654907227, "rewards/rejected": -5.8885908126831055, "step": 2863 }, { "epoch": 0.45, "learning_rate": 1.2046760724103947e-05, "logits/chosen": -2.589658260345459, "logits/rejected": -3.152092456817627, "logps/chosen": -172.3988494873047, "logps/rejected": -303.1007080078125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.56868052482605, "rewards/margins": 5.211573123931885, "rewards/rejected": -7.7802534103393555, "step": 2864 }, { "epoch": 0.45, "learning_rate": 1.20460272835728e-05, "logits/chosen": -2.9817235469818115, "logits/rejected": -2.4093868732452393, "logps/chosen": -236.74502563476562, "logps/rejected": -136.37657165527344, "loss": 3.6046, "rewards/accuracies": 0.5, "rewards/chosen": -6.499057769775391, "rewards/margins": -1.6871079206466675, "rewards/rejected": -4.811950206756592, "step": 2865 }, { "epoch": 0.45, "learning_rate": 1.2045293843041652e-05, "logits/chosen": -1.8914835453033447, "logits/rejected": -2.870682954788208, "logps/chosen": -302.7065124511719, "logps/rejected": -523.8895874023438, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.4253106117248535, "rewards/margins": 7.091004371643066, "rewards/rejected": -9.516315460205078, "step": 2866 }, { "epoch": 0.45, "learning_rate": 1.2044560402510504e-05, "logits/chosen": -1.9035841226577759, "logits/rejected": -3.101597785949707, "logps/chosen": -229.9564971923828, "logps/rejected": -359.1461486816406, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -2.9194703102111816, "rewards/margins": 4.106995105743408, "rewards/rejected": -7.02646541595459, "step": 2867 }, { "epoch": 0.45, "learning_rate": 1.2043826961979356e-05, "logits/chosen": -2.433598041534424, "logits/rejected": -2.7868659496307373, "logps/chosen": -299.07196044921875, "logps/rejected": -487.28436279296875, "loss": 3.4692, "rewards/accuracies": 0.5, "rewards/chosen": -5.911072731018066, "rewards/margins": -0.544130802154541, "rewards/rejected": -5.366941928863525, "step": 2868 }, { "epoch": 0.45, "learning_rate": 1.2043093521448208e-05, "logits/chosen": -1.29375159740448, "logits/rejected": -2.961639165878296, "logps/chosen": -59.49091339111328, "logps/rejected": -289.146240234375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -3.7939867973327637, "rewards/margins": 5.400313377380371, "rewards/rejected": -9.194299697875977, "step": 2869 }, { "epoch": 0.45, "learning_rate": 1.204236008091706e-05, "logits/chosen": -2.9751906394958496, "logits/rejected": -2.9641976356506348, "logps/chosen": -191.3042449951172, "logps/rejected": -302.0980224609375, "loss": 0.0526, "rewards/accuracies": 1.0, "rewards/chosen": -3.843604803085327, "rewards/margins": 5.478021621704102, "rewards/rejected": -9.321626663208008, "step": 2870 }, { "epoch": 0.45, "learning_rate": 1.2041626640385912e-05, "logits/chosen": -1.9717646837234497, "logits/rejected": -2.799586057662964, "logps/chosen": -320.790771484375, "logps/rejected": -458.8542785644531, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -2.116992473602295, "rewards/margins": 6.2371721267700195, "rewards/rejected": -8.354164123535156, "step": 2871 }, { "epoch": 0.45, "learning_rate": 1.2040893199854764e-05, "logits/chosen": -2.753366231918335, "logits/rejected": -2.673001766204834, "logps/chosen": -521.6299438476562, "logps/rejected": -476.04339599609375, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -3.357269763946533, "rewards/margins": 4.807755470275879, "rewards/rejected": -8.16502571105957, "step": 2872 }, { "epoch": 0.45, "learning_rate": 1.2040159759323617e-05, "logits/chosen": -3.113595485687256, "logits/rejected": -3.0985870361328125, "logps/chosen": -178.3005828857422, "logps/rejected": -138.00827026367188, "loss": 2.04, "rewards/accuracies": 0.5, "rewards/chosen": -5.9617509841918945, "rewards/margins": -0.1973104476928711, "rewards/rejected": -5.764440536499023, "step": 2873 }, { "epoch": 0.45, "learning_rate": 1.2039426318792469e-05, "logits/chosen": -3.225649118423462, "logits/rejected": -2.8128912448883057, "logps/chosen": -198.17019653320312, "logps/rejected": -161.29014587402344, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -3.4467391967773438, "rewards/margins": 4.05711030960083, "rewards/rejected": -7.503849506378174, "step": 2874 }, { "epoch": 0.45, "learning_rate": 1.2038692878261321e-05, "logits/chosen": -2.6725900173187256, "logits/rejected": -3.083224296569824, "logps/chosen": -150.5555877685547, "logps/rejected": -176.1679229736328, "loss": 2.044, "rewards/accuracies": 0.5, "rewards/chosen": -5.309981822967529, "rewards/margins": 0.9896924495697021, "rewards/rejected": -6.299674034118652, "step": 2875 }, { "epoch": 0.45, "learning_rate": 1.2037959437730173e-05, "logits/chosen": -1.4466696977615356, "logits/rejected": -2.5785229206085205, "logps/chosen": -348.97930908203125, "logps/rejected": -486.272216796875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.3580079078674316, "rewards/margins": 5.766446113586426, "rewards/rejected": -9.1244535446167, "step": 2876 }, { "epoch": 0.45, "learning_rate": 1.2037225997199025e-05, "logits/chosen": -2.974179267883301, "logits/rejected": -2.8338212966918945, "logps/chosen": -597.8617553710938, "logps/rejected": -585.1326904296875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -4.0873517990112305, "rewards/margins": 5.313359260559082, "rewards/rejected": -9.400711059570312, "step": 2877 }, { "epoch": 0.45, "learning_rate": 1.2036492556667877e-05, "logits/chosen": -2.2476589679718018, "logits/rejected": -3.1108381748199463, "logps/chosen": -372.19561767578125, "logps/rejected": -453.5029296875, "loss": 3.1728, "rewards/accuracies": 0.5, "rewards/chosen": -5.407447338104248, "rewards/margins": -0.6619834899902344, "rewards/rejected": -4.7454633712768555, "step": 2878 }, { "epoch": 0.45, "learning_rate": 1.2035759116136728e-05, "logits/chosen": -2.437828540802002, "logits/rejected": -2.903275966644287, "logps/chosen": -110.82412719726562, "logps/rejected": -285.747802734375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2629318237304688, "rewards/margins": 6.219949245452881, "rewards/rejected": -8.482881546020508, "step": 2879 }, { "epoch": 0.45, "learning_rate": 1.203502567560558e-05, "logits/chosen": -1.8714609146118164, "logits/rejected": -3.080458402633667, "logps/chosen": -57.84710693359375, "logps/rejected": -364.1072998046875, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": -3.0035667419433594, "rewards/margins": 4.51947021484375, "rewards/rejected": -7.523036956787109, "step": 2880 }, { "epoch": 0.45, "learning_rate": 1.2034292235074432e-05, "logits/chosen": -1.8621370792388916, "logits/rejected": -2.68790602684021, "logps/chosen": -48.11376190185547, "logps/rejected": -144.4368133544922, "loss": 0.0722, "rewards/accuracies": 1.0, "rewards/chosen": -3.216874837875366, "rewards/margins": 2.79046368598938, "rewards/rejected": -6.007338523864746, "step": 2881 }, { "epoch": 0.45, "learning_rate": 1.2033558794543286e-05, "logits/chosen": -3.095327377319336, "logits/rejected": -2.5753352642059326, "logps/chosen": -263.65814208984375, "logps/rejected": -252.8031768798828, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.9716293811798096, "rewards/margins": 4.672219276428223, "rewards/rejected": -7.643848419189453, "step": 2882 }, { "epoch": 0.45, "learning_rate": 1.2032825354012138e-05, "logits/chosen": -2.8656790256500244, "logits/rejected": -2.9267776012420654, "logps/chosen": -257.62591552734375, "logps/rejected": -239.49734497070312, "loss": 0.1031, "rewards/accuracies": 1.0, "rewards/chosen": -1.5716204643249512, "rewards/margins": 2.7600178718566895, "rewards/rejected": -4.331638336181641, "step": 2883 }, { "epoch": 0.45, "learning_rate": 1.203209191348099e-05, "logits/chosen": -2.82289719581604, "logits/rejected": -2.0532689094543457, "logps/chosen": -361.44415283203125, "logps/rejected": -309.7518310546875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.838681697845459, "rewards/margins": 5.467391490936279, "rewards/rejected": -8.306073188781738, "step": 2884 }, { "epoch": 0.45, "learning_rate": 1.2031358472949841e-05, "logits/chosen": -2.5197389125823975, "logits/rejected": -2.9018940925598145, "logps/chosen": -210.50668334960938, "logps/rejected": -219.81639099121094, "loss": 0.3576, "rewards/accuracies": 0.5, "rewards/chosen": -4.938056945800781, "rewards/margins": 2.09605073928833, "rewards/rejected": -7.034107685089111, "step": 2885 }, { "epoch": 0.45, "learning_rate": 1.2030625032418693e-05, "logits/chosen": -1.0691020488739014, "logits/rejected": -2.748321771621704, "logps/chosen": -113.40115356445312, "logps/rejected": -368.58734130859375, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -2.9373364448547363, "rewards/margins": 4.1559953689575195, "rewards/rejected": -7.093332290649414, "step": 2886 }, { "epoch": 0.45, "learning_rate": 1.2029891591887547e-05, "logits/chosen": -2.7029025554656982, "logits/rejected": -1.8821675777435303, "logps/chosen": -319.5679931640625, "logps/rejected": -244.4335174560547, "loss": 1.3457, "rewards/accuracies": 0.5, "rewards/chosen": -4.485369682312012, "rewards/margins": 2.3910727500915527, "rewards/rejected": -6.876442909240723, "step": 2887 }, { "epoch": 0.45, "learning_rate": 1.2029158151356399e-05, "logits/chosen": -2.641590118408203, "logits/rejected": -3.084252119064331, "logps/chosen": -145.93145751953125, "logps/rejected": -235.00872802734375, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": -2.675734281539917, "rewards/margins": 3.651093006134033, "rewards/rejected": -6.326827049255371, "step": 2888 }, { "epoch": 0.45, "learning_rate": 1.202842471082525e-05, "logits/chosen": -2.3477628231048584, "logits/rejected": -2.709947347640991, "logps/chosen": -577.26025390625, "logps/rejected": -840.1533813476562, "loss": 2.7227, "rewards/accuracies": 0.5, "rewards/chosen": -4.250261306762695, "rewards/margins": 0.7460281848907471, "rewards/rejected": -4.996289253234863, "step": 2889 }, { "epoch": 0.45, "learning_rate": 1.2027691270294102e-05, "logits/chosen": -2.9900412559509277, "logits/rejected": -2.9451825618743896, "logps/chosen": -228.01608276367188, "logps/rejected": -177.8077392578125, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": -3.171482801437378, "rewards/margins": 3.196401834487915, "rewards/rejected": -6.367884635925293, "step": 2890 }, { "epoch": 0.45, "learning_rate": 1.2026957829762956e-05, "logits/chosen": -3.0353927612304688, "logits/rejected": -3.100398063659668, "logps/chosen": -412.8609924316406, "logps/rejected": -470.2311706542969, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.665707588195801, "rewards/margins": 5.896930694580078, "rewards/rejected": -8.562637329101562, "step": 2891 }, { "epoch": 0.45, "learning_rate": 1.2026224389231808e-05, "logits/chosen": -1.1266586780548096, "logits/rejected": -2.90285062789917, "logps/chosen": -132.4795684814453, "logps/rejected": -491.9449157714844, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.7454214096069336, "rewards/margins": 7.465032577514648, "rewards/rejected": -10.210453987121582, "step": 2892 }, { "epoch": 0.45, "learning_rate": 1.202549094870066e-05, "logits/chosen": -2.4655792713165283, "logits/rejected": -2.6999645233154297, "logps/chosen": -264.9372253417969, "logps/rejected": -207.23141479492188, "loss": 1.4417, "rewards/accuracies": 0.5, "rewards/chosen": -3.9744248390197754, "rewards/margins": 0.2734527587890625, "rewards/rejected": -4.247877597808838, "step": 2893 }, { "epoch": 0.45, "learning_rate": 1.2024757508169512e-05, "logits/chosen": -2.7628748416900635, "logits/rejected": -3.115506410598755, "logps/chosen": -60.17936706542969, "logps/rejected": -222.05442810058594, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -3.062624454498291, "rewards/margins": 3.7509865760803223, "rewards/rejected": -6.813611030578613, "step": 2894 }, { "epoch": 0.45, "learning_rate": 1.2024024067638364e-05, "logits/chosen": -1.509395718574524, "logits/rejected": -3.159095525741577, "logps/chosen": -106.98980712890625, "logps/rejected": -360.56622314453125, "loss": 0.048, "rewards/accuracies": 1.0, "rewards/chosen": -2.745443105697632, "rewards/margins": 4.280438423156738, "rewards/rejected": -7.025881290435791, "step": 2895 }, { "epoch": 0.45, "learning_rate": 1.2023290627107215e-05, "logits/chosen": -2.489799737930298, "logits/rejected": -2.9920778274536133, "logps/chosen": -104.32243347167969, "logps/rejected": -207.50991821289062, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -2.069638729095459, "rewards/margins": 4.303687572479248, "rewards/rejected": -6.373326301574707, "step": 2896 }, { "epoch": 0.45, "learning_rate": 1.2022557186576067e-05, "logits/chosen": -2.9813976287841797, "logits/rejected": -3.159054756164551, "logps/chosen": -130.80218505859375, "logps/rejected": -273.9773864746094, "loss": 0.1907, "rewards/accuracies": 1.0, "rewards/chosen": -2.781886577606201, "rewards/margins": 1.636961817741394, "rewards/rejected": -4.418848514556885, "step": 2897 }, { "epoch": 0.45, "learning_rate": 1.202182374604492e-05, "logits/chosen": -1.0026798248291016, "logits/rejected": -2.4374608993530273, "logps/chosen": -144.6723175048828, "logps/rejected": -527.1898193359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.358325481414795, "rewards/margins": 8.277307510375977, "rewards/rejected": -11.635632514953613, "step": 2898 }, { "epoch": 0.45, "learning_rate": 1.2021090305513771e-05, "logits/chosen": -2.8473634719848633, "logits/rejected": -2.927873373031616, "logps/chosen": -249.52505493164062, "logps/rejected": -211.76043701171875, "loss": 1.0412, "rewards/accuracies": 0.5, "rewards/chosen": -3.428624153137207, "rewards/margins": 1.2630833387374878, "rewards/rejected": -4.691707134246826, "step": 2899 }, { "epoch": 0.45, "learning_rate": 1.2020356864982625e-05, "logits/chosen": -2.5571682453155518, "logits/rejected": -2.7681198120117188, "logps/chosen": -772.6136474609375, "logps/rejected": -635.2205200195312, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.3835649490356445, "rewards/margins": 5.384937286376953, "rewards/rejected": -7.768502235412598, "step": 2900 }, { "epoch": 0.45, "learning_rate": 1.2019623424451477e-05, "logits/chosen": -2.496502161026001, "logits/rejected": -3.2715024948120117, "logps/chosen": -57.98155212402344, "logps/rejected": -288.2352294921875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.222686767578125, "rewards/margins": 5.360174179077148, "rewards/rejected": -7.582860946655273, "step": 2901 }, { "epoch": 0.45, "learning_rate": 1.2018889983920328e-05, "logits/chosen": -2.9224367141723633, "logits/rejected": -2.4487695693969727, "logps/chosen": -386.40667724609375, "logps/rejected": -324.93402099609375, "loss": 3.2338, "rewards/accuracies": 0.5, "rewards/chosen": -6.15201473236084, "rewards/margins": -1.095503568649292, "rewards/rejected": -5.056510925292969, "step": 2902 }, { "epoch": 0.45, "learning_rate": 1.201815654338918e-05, "logits/chosen": -2.253063201904297, "logits/rejected": -2.8940000534057617, "logps/chosen": -299.9693908691406, "logps/rejected": -393.0670166015625, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -2.3652539253234863, "rewards/margins": 5.140669822692871, "rewards/rejected": -7.505923748016357, "step": 2903 }, { "epoch": 0.45, "learning_rate": 1.2017423102858032e-05, "logits/chosen": -1.7353624105453491, "logits/rejected": -2.673631429672241, "logps/chosen": -279.4570007324219, "logps/rejected": -552.3658447265625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -2.4890146255493164, "rewards/margins": 6.381214618682861, "rewards/rejected": -8.870229721069336, "step": 2904 }, { "epoch": 0.45, "learning_rate": 1.2016689662326884e-05, "logits/chosen": -2.0687143802642822, "logits/rejected": -2.921060085296631, "logps/chosen": -54.50000762939453, "logps/rejected": -167.99844360351562, "loss": 0.7643, "rewards/accuracies": 0.5, "rewards/chosen": -3.839527130126953, "rewards/margins": 1.1877294778823853, "rewards/rejected": -5.027256965637207, "step": 2905 }, { "epoch": 0.45, "learning_rate": 1.2015956221795736e-05, "logits/chosen": -1.5470060110092163, "logits/rejected": -2.8647186756134033, "logps/chosen": -230.7364044189453, "logps/rejected": -566.0989990234375, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": -3.1464805603027344, "rewards/margins": 3.4380900859832764, "rewards/rejected": -6.584570407867432, "step": 2906 }, { "epoch": 0.45, "learning_rate": 1.2015222781264588e-05, "logits/chosen": -2.2953174114227295, "logits/rejected": -3.139808177947998, "logps/chosen": -144.55886840820312, "logps/rejected": -332.0767517089844, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -2.8768959045410156, "rewards/margins": 4.817572116851807, "rewards/rejected": -7.694468021392822, "step": 2907 }, { "epoch": 0.45, "learning_rate": 1.201448934073344e-05, "logits/chosen": -2.8553497791290283, "logits/rejected": -2.6634669303894043, "logps/chosen": -96.69065856933594, "logps/rejected": -211.62548828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.084587574005127, "rewards/margins": 6.769005298614502, "rewards/rejected": -8.853592872619629, "step": 2908 }, { "epoch": 0.45, "learning_rate": 1.2013755900202293e-05, "logits/chosen": -2.7853548526763916, "logits/rejected": -2.1776554584503174, "logps/chosen": -167.8957061767578, "logps/rejected": -156.66458129882812, "loss": 0.1507, "rewards/accuracies": 1.0, "rewards/chosen": -4.11130428314209, "rewards/margins": 2.3534131050109863, "rewards/rejected": -6.464717864990234, "step": 2909 }, { "epoch": 0.45, "learning_rate": 1.2013022459671145e-05, "logits/chosen": -2.4971635341644287, "logits/rejected": -2.8629212379455566, "logps/chosen": -554.524658203125, "logps/rejected": -859.6448974609375, "loss": 2.8411, "rewards/accuracies": 0.5, "rewards/chosen": -4.894712448120117, "rewards/margins": -0.20487046241760254, "rewards/rejected": -4.689842224121094, "step": 2910 }, { "epoch": 0.45, "learning_rate": 1.2012289019139997e-05, "logits/chosen": -3.043062448501587, "logits/rejected": -2.9849045276641846, "logps/chosen": -230.45037841796875, "logps/rejected": -349.3889465332031, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -3.065812587738037, "rewards/margins": 5.927792549133301, "rewards/rejected": -8.99360466003418, "step": 2911 }, { "epoch": 0.45, "learning_rate": 1.2011555578608849e-05, "logits/chosen": -2.774627447128296, "logits/rejected": -2.93381929397583, "logps/chosen": -290.5362548828125, "logps/rejected": -370.5498046875, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -2.225935459136963, "rewards/margins": 5.34281587600708, "rewards/rejected": -7.568751335144043, "step": 2912 }, { "epoch": 0.45, "learning_rate": 1.20108221380777e-05, "logits/chosen": -1.7716476917266846, "logits/rejected": -2.9964029788970947, "logps/chosen": -198.8301544189453, "logps/rejected": -557.4258422851562, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.441767930984497, "rewards/margins": 6.846510887145996, "rewards/rejected": -10.288278579711914, "step": 2913 }, { "epoch": 0.45, "learning_rate": 1.2010088697546553e-05, "logits/chosen": -0.9048786759376526, "logits/rejected": -2.8310422897338867, "logps/chosen": -64.81419372558594, "logps/rejected": -361.8766784667969, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -2.9643261432647705, "rewards/margins": 5.525989532470703, "rewards/rejected": -8.490315437316895, "step": 2914 }, { "epoch": 0.45, "learning_rate": 1.2009355257015405e-05, "logits/chosen": -1.4483891725540161, "logits/rejected": -3.205864667892456, "logps/chosen": -107.0063705444336, "logps/rejected": -421.9191589355469, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.5548057556152344, "rewards/margins": 7.326865196228027, "rewards/rejected": -9.881671905517578, "step": 2915 }, { "epoch": 0.45, "learning_rate": 1.2008621816484256e-05, "logits/chosen": -2.6918137073516846, "logits/rejected": -3.0238196849823, "logps/chosen": -101.38798522949219, "logps/rejected": -270.94036865234375, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -2.596602439880371, "rewards/margins": 4.743078231811523, "rewards/rejected": -7.3396806716918945, "step": 2916 }, { "epoch": 0.45, "learning_rate": 1.2007888375953108e-05, "logits/chosen": -2.307145833969116, "logits/rejected": -2.950294017791748, "logps/chosen": -151.74887084960938, "logps/rejected": -220.591552734375, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -2.327728271484375, "rewards/margins": 5.240170478820801, "rewards/rejected": -7.567898750305176, "step": 2917 }, { "epoch": 0.45, "learning_rate": 1.2007154935421962e-05, "logits/chosen": -2.865046739578247, "logits/rejected": -2.248039960861206, "logps/chosen": -346.146484375, "logps/rejected": -255.78176879882812, "loss": 3.483, "rewards/accuracies": 0.5, "rewards/chosen": -5.2977094650268555, "rewards/margins": 0.651003360748291, "rewards/rejected": -5.9487128257751465, "step": 2918 }, { "epoch": 0.45, "learning_rate": 1.2006421494890814e-05, "logits/chosen": -2.6284666061401367, "logits/rejected": -3.292041063308716, "logps/chosen": -232.71043395996094, "logps/rejected": -479.5042724609375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -3.875466823577881, "rewards/margins": 6.227362632751465, "rewards/rejected": -10.102828979492188, "step": 2919 }, { "epoch": 0.45, "learning_rate": 1.2005688054359666e-05, "logits/chosen": -3.242530345916748, "logits/rejected": -2.627568483352661, "logps/chosen": -483.96685791015625, "logps/rejected": -275.6379699707031, "loss": 2.9705, "rewards/accuracies": 0.5, "rewards/chosen": -4.0969743728637695, "rewards/margins": 0.44710588455200195, "rewards/rejected": -4.5440802574157715, "step": 2920 }, { "epoch": 0.45, "learning_rate": 1.200495461382852e-05, "logits/chosen": -3.102738380432129, "logits/rejected": -2.69124174118042, "logps/chosen": -369.0860290527344, "logps/rejected": -202.951171875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.6980819702148438, "rewards/margins": 7.761544704437256, "rewards/rejected": -8.459627151489258, "step": 2921 }, { "epoch": 0.45, "learning_rate": 1.2004221173297371e-05, "logits/chosen": -2.828822374343872, "logits/rejected": -2.6751368045806885, "logps/chosen": -519.41162109375, "logps/rejected": -382.10595703125, "loss": 3.6883, "rewards/accuracies": 0.5, "rewards/chosen": -7.466439723968506, "rewards/margins": 1.9938726425170898, "rewards/rejected": -9.460312843322754, "step": 2922 }, { "epoch": 0.45, "learning_rate": 1.2003487732766223e-05, "logits/chosen": -2.8694493770599365, "logits/rejected": -1.9579015970230103, "logps/chosen": -135.37991333007812, "logps/rejected": -142.6029510498047, "loss": 0.0707, "rewards/accuracies": 1.0, "rewards/chosen": -2.6495003700256348, "rewards/margins": 3.7731258869171143, "rewards/rejected": -6.422626495361328, "step": 2923 }, { "epoch": 0.45, "learning_rate": 1.2002754292235075e-05, "logits/chosen": -2.71065092086792, "logits/rejected": -3.2632720470428467, "logps/chosen": -70.41032409667969, "logps/rejected": -255.49249267578125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.2925047874450684, "rewards/margins": 6.509662628173828, "rewards/rejected": -8.802167892456055, "step": 2924 }, { "epoch": 0.45, "learning_rate": 1.2002020851703927e-05, "logits/chosen": -2.697922945022583, "logits/rejected": -2.202252149581909, "logps/chosen": -881.5291748046875, "logps/rejected": -792.99658203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.8540070056915283, "rewards/margins": 7.388885498046875, "rewards/rejected": -9.24289321899414, "step": 2925 }, { "epoch": 0.46, "learning_rate": 1.2001287411172779e-05, "logits/chosen": -2.265357732772827, "logits/rejected": -3.168900489807129, "logps/chosen": -39.359092712402344, "logps/rejected": -249.68914794921875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.7206804752349854, "rewards/margins": 6.213035583496094, "rewards/rejected": -7.9337158203125, "step": 2926 }, { "epoch": 0.46, "learning_rate": 1.2000553970641632e-05, "logits/chosen": -2.886920213699341, "logits/rejected": -3.2851293087005615, "logps/chosen": -226.88568115234375, "logps/rejected": -273.14141845703125, "loss": 1.7416, "rewards/accuracies": 0.5, "rewards/chosen": -3.811328172683716, "rewards/margins": 1.752752661705017, "rewards/rejected": -5.564080715179443, "step": 2927 }, { "epoch": 0.46, "learning_rate": 1.1999820530110484e-05, "logits/chosen": -2.372868299484253, "logits/rejected": -2.9270119667053223, "logps/chosen": -385.9766845703125, "logps/rejected": -426.87969970703125, "loss": 3.4767, "rewards/accuracies": 0.5, "rewards/chosen": -5.818493843078613, "rewards/margins": -0.16626214981079102, "rewards/rejected": -5.652231693267822, "step": 2928 }, { "epoch": 0.46, "learning_rate": 1.1999087089579336e-05, "logits/chosen": -2.663766860961914, "logits/rejected": -3.0434305667877197, "logps/chosen": -374.1122741699219, "logps/rejected": -323.7296447753906, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -2.3657283782958984, "rewards/margins": 4.718419075012207, "rewards/rejected": -7.0841474533081055, "step": 2929 }, { "epoch": 0.46, "learning_rate": 1.1998353649048188e-05, "logits/chosen": -2.6506197452545166, "logits/rejected": -2.7674338817596436, "logps/chosen": -397.3290710449219, "logps/rejected": -530.6063842773438, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.241835832595825, "rewards/margins": 8.548996925354004, "rewards/rejected": -10.79083251953125, "step": 2930 }, { "epoch": 0.46, "learning_rate": 1.199762020851704e-05, "logits/chosen": -2.2870054244995117, "logits/rejected": -0.9822856187820435, "logps/chosen": -357.6361999511719, "logps/rejected": -179.6273193359375, "loss": 4.7458, "rewards/accuracies": 0.5, "rewards/chosen": -8.740211486816406, "rewards/margins": -1.4319837093353271, "rewards/rejected": -7.308228015899658, "step": 2931 }, { "epoch": 0.46, "learning_rate": 1.1996886767985892e-05, "logits/chosen": -2.749887228012085, "logits/rejected": -3.3555328845977783, "logps/chosen": -905.9569091796875, "logps/rejected": -785.0157470703125, "loss": 0.5875, "rewards/accuracies": 0.5, "rewards/chosen": -0.7514358758926392, "rewards/margins": 4.2939133644104, "rewards/rejected": -5.045349597930908, "step": 2932 }, { "epoch": 0.46, "learning_rate": 1.1996153327454743e-05, "logits/chosen": -2.652359962463379, "logits/rejected": -3.1565420627593994, "logps/chosen": -55.67625045776367, "logps/rejected": -199.55728149414062, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -2.5878798961639404, "rewards/margins": 3.0549614429473877, "rewards/rejected": -5.642841339111328, "step": 2933 }, { "epoch": 0.46, "learning_rate": 1.1995419886923595e-05, "logits/chosen": -2.468470335006714, "logits/rejected": -3.174593448638916, "logps/chosen": -90.54168701171875, "logps/rejected": -295.2239074707031, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.9247117042541504, "rewards/margins": 7.682395935058594, "rewards/rejected": -9.607107162475586, "step": 2934 }, { "epoch": 0.46, "learning_rate": 1.1994686446392447e-05, "logits/chosen": -1.9743187427520752, "logits/rejected": -3.1047980785369873, "logps/chosen": -288.8785095214844, "logps/rejected": -458.2989196777344, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.914513349533081, "rewards/margins": 5.830054759979248, "rewards/rejected": -7.74456787109375, "step": 2935 }, { "epoch": 0.46, "learning_rate": 1.19939530058613e-05, "logits/chosen": -2.6231963634490967, "logits/rejected": -2.559300422668457, "logps/chosen": -130.78858947753906, "logps/rejected": -234.4376678466797, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.81851863861084, "rewards/margins": 6.872812271118164, "rewards/rejected": -9.691330909729004, "step": 2936 }, { "epoch": 0.46, "learning_rate": 1.1993219565330153e-05, "logits/chosen": -2.4438695907592773, "logits/rejected": -2.8938024044036865, "logps/chosen": -82.64845275878906, "logps/rejected": -268.32977294921875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -3.0277609825134277, "rewards/margins": 5.746459007263184, "rewards/rejected": -8.774219512939453, "step": 2937 }, { "epoch": 0.46, "learning_rate": 1.1992486124799005e-05, "logits/chosen": -2.9676549434661865, "logits/rejected": -2.8532207012176514, "logps/chosen": -183.6979217529297, "logps/rejected": -84.72288513183594, "loss": 1.3581, "rewards/accuracies": 0.5, "rewards/chosen": -3.6384878158569336, "rewards/margins": 1.4848202466964722, "rewards/rejected": -5.123307704925537, "step": 2938 }, { "epoch": 0.46, "learning_rate": 1.1991752684267856e-05, "logits/chosen": -2.4924004077911377, "logits/rejected": -3.3133304119110107, "logps/chosen": -378.4837341308594, "logps/rejected": -685.2225952148438, "loss": 3.2149, "rewards/accuracies": 0.5, "rewards/chosen": -4.717432975769043, "rewards/margins": 0.085235595703125, "rewards/rejected": -4.802669048309326, "step": 2939 }, { "epoch": 0.46, "learning_rate": 1.1991019243736708e-05, "logits/chosen": -2.421773910522461, "logits/rejected": -3.0782346725463867, "logps/chosen": -39.51737594604492, "logps/rejected": -205.73963928222656, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.786899209022522, "rewards/margins": 4.823031425476074, "rewards/rejected": -6.609930515289307, "step": 2940 }, { "epoch": 0.46, "learning_rate": 1.199028580320556e-05, "logits/chosen": -3.0186383724212646, "logits/rejected": -2.9234507083892822, "logps/chosen": -304.0327453613281, "logps/rejected": -122.22343444824219, "loss": 2.3072, "rewards/accuracies": 0.5, "rewards/chosen": -2.6799304485321045, "rewards/margins": -0.8426557779312134, "rewards/rejected": -1.8372745513916016, "step": 2941 }, { "epoch": 0.46, "learning_rate": 1.1989552362674412e-05, "logits/chosen": -2.331441640853882, "logits/rejected": -3.045828342437744, "logps/chosen": -100.98052215576172, "logps/rejected": -339.7479248046875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.887953281402588, "rewards/margins": 6.05428409576416, "rewards/rejected": -7.94223690032959, "step": 2942 }, { "epoch": 0.46, "learning_rate": 1.1988818922143264e-05, "logits/chosen": -2.9100847244262695, "logits/rejected": -3.239375352859497, "logps/chosen": -83.74498748779297, "logps/rejected": -180.12161254882812, "loss": 0.0705, "rewards/accuracies": 1.0, "rewards/chosen": -3.2915220260620117, "rewards/margins": 2.7105154991149902, "rewards/rejected": -6.002037048339844, "step": 2943 }, { "epoch": 0.46, "learning_rate": 1.1988085481612116e-05, "logits/chosen": -2.7459356784820557, "logits/rejected": -3.1879069805145264, "logps/chosen": -174.01980590820312, "logps/rejected": -288.0732116699219, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.6066882610321045, "rewards/margins": 5.651487350463867, "rewards/rejected": -8.25817584991455, "step": 2944 }, { "epoch": 0.46, "learning_rate": 1.198735204108097e-05, "logits/chosen": -3.156862735748291, "logits/rejected": -3.0639898777008057, "logps/chosen": -176.43975830078125, "logps/rejected": -160.54165649414062, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.684133291244507, "rewards/margins": 5.144302845001221, "rewards/rejected": -7.828436374664307, "step": 2945 }, { "epoch": 0.46, "learning_rate": 1.1986618600549821e-05, "logits/chosen": -2.4069600105285645, "logits/rejected": -3.033302068710327, "logps/chosen": -59.598487854003906, "logps/rejected": -304.5492858886719, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.3586955070495605, "rewards/margins": 4.7576093673706055, "rewards/rejected": -6.116305351257324, "step": 2946 }, { "epoch": 0.46, "learning_rate": 1.1985885160018673e-05, "logits/chosen": -2.291748285293579, "logits/rejected": -3.046518087387085, "logps/chosen": -104.09126281738281, "logps/rejected": -264.70770263671875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.5257481336593628, "rewards/margins": 6.28303861618042, "rewards/rejected": -7.808786869049072, "step": 2947 }, { "epoch": 0.46, "learning_rate": 1.1985151719487525e-05, "logits/chosen": -2.6552813053131104, "logits/rejected": -3.1503961086273193, "logps/chosen": -150.82521057128906, "logps/rejected": -337.01116943359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2148716449737549, "rewards/margins": 7.07990837097168, "rewards/rejected": -8.294780731201172, "step": 2948 }, { "epoch": 0.46, "learning_rate": 1.1984418278956377e-05, "logits/chosen": -2.7248854637145996, "logits/rejected": -2.972918748855591, "logps/chosen": -192.275146484375, "logps/rejected": -387.7535095214844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.1650726795196533, "rewards/margins": 6.362881660461426, "rewards/rejected": -7.5279541015625, "step": 2949 }, { "epoch": 0.46, "learning_rate": 1.1983684838425229e-05, "logits/chosen": -2.753741502761841, "logits/rejected": -3.0975496768951416, "logps/chosen": -190.40216064453125, "logps/rejected": -384.67498779296875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5202136039733887, "rewards/margins": 9.03310775756836, "rewards/rejected": -10.553321838378906, "step": 2950 }, { "epoch": 0.46, "learning_rate": 1.198295139789408e-05, "logits/chosen": -1.9734059572219849, "logits/rejected": -2.8115837574005127, "logps/chosen": -55.04657745361328, "logps/rejected": -214.01821899414062, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -2.3417179584503174, "rewards/margins": 5.908780574798584, "rewards/rejected": -8.25049877166748, "step": 2951 }, { "epoch": 0.46, "learning_rate": 1.1982217957362932e-05, "logits/chosen": -2.320631504058838, "logits/rejected": -3.2326412200927734, "logps/chosen": -282.806640625, "logps/rejected": -530.5104370117188, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.5651650428771973, "rewards/margins": 6.53973913192749, "rewards/rejected": -9.104904174804688, "step": 2952 }, { "epoch": 0.46, "learning_rate": 1.1981484516831786e-05, "logits/chosen": -2.1886940002441406, "logits/rejected": -3.0372307300567627, "logps/chosen": -152.1079559326172, "logps/rejected": -264.69970703125, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -1.6058661937713623, "rewards/margins": 4.631133079528809, "rewards/rejected": -6.23699951171875, "step": 2953 }, { "epoch": 0.46, "learning_rate": 1.1980751076300638e-05, "logits/chosen": -1.6337207555770874, "logits/rejected": -2.827317953109741, "logps/chosen": -77.4747314453125, "logps/rejected": -304.32220458984375, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -2.7126269340515137, "rewards/margins": 5.39231014251709, "rewards/rejected": -8.104936599731445, "step": 2954 }, { "epoch": 0.46, "learning_rate": 1.1980017635769492e-05, "logits/chosen": -2.7581353187561035, "logits/rejected": -3.4420647621154785, "logps/chosen": -71.44718933105469, "logps/rejected": -253.16116333007812, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.5899138450622559, "rewards/margins": 5.031553745269775, "rewards/rejected": -6.621467590332031, "step": 2955 }, { "epoch": 0.46, "learning_rate": 1.1979284195238343e-05, "logits/chosen": -3.1187543869018555, "logits/rejected": -2.7521984577178955, "logps/chosen": -372.4378662109375, "logps/rejected": -286.4568176269531, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -3.0537431240081787, "rewards/margins": 4.528769016265869, "rewards/rejected": -7.582511901855469, "step": 2956 }, { "epoch": 0.46, "learning_rate": 1.1978550754707195e-05, "logits/chosen": -2.5776097774505615, "logits/rejected": -2.2847166061401367, "logps/chosen": -499.265869140625, "logps/rejected": -419.3766174316406, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.358386278152466, "rewards/margins": 5.471062660217285, "rewards/rejected": -7.829448699951172, "step": 2957 }, { "epoch": 0.46, "learning_rate": 1.1977817314176047e-05, "logits/chosen": -0.8826078176498413, "logits/rejected": -1.8441269397735596, "logps/chosen": -314.6557312011719, "logps/rejected": -468.6910400390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.5610779523849487, "rewards/margins": 7.627711296081543, "rewards/rejected": -9.188789367675781, "step": 2958 }, { "epoch": 0.46, "learning_rate": 1.1977083873644899e-05, "logits/chosen": -2.6752846240997314, "logits/rejected": -2.728396415710449, "logps/chosen": -102.19136810302734, "logps/rejected": -216.70431518554688, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.5584888458251953, "rewards/margins": 6.277459621429443, "rewards/rejected": -7.835948467254639, "step": 2959 }, { "epoch": 0.46, "learning_rate": 1.1976350433113751e-05, "logits/chosen": -3.014070987701416, "logits/rejected": -3.0269808769226074, "logps/chosen": -151.71923828125, "logps/rejected": -158.33277893066406, "loss": 2.0058, "rewards/accuracies": 0.5, "rewards/chosen": -4.063963890075684, "rewards/margins": -0.787371039390564, "rewards/rejected": -3.27659273147583, "step": 2960 }, { "epoch": 0.46, "learning_rate": 1.1975616992582603e-05, "logits/chosen": -1.5728408098220825, "logits/rejected": -3.0332541465759277, "logps/chosen": -95.55818176269531, "logps/rejected": -503.5947265625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.5120937824249268, "rewards/margins": 5.868768692016602, "rewards/rejected": -8.38086223602295, "step": 2961 }, { "epoch": 0.46, "learning_rate": 1.1974883552051456e-05, "logits/chosen": -2.7407851219177246, "logits/rejected": -3.12680721282959, "logps/chosen": -421.1763916015625, "logps/rejected": -589.8155517578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.8628559112548828, "rewards/margins": 7.242684841156006, "rewards/rejected": -9.105541229248047, "step": 2962 }, { "epoch": 0.46, "learning_rate": 1.1974150111520308e-05, "logits/chosen": -2.581822633743286, "logits/rejected": -3.034115791320801, "logps/chosen": -38.410884857177734, "logps/rejected": -250.74041748046875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.5460516214370728, "rewards/margins": 6.2273664474487305, "rewards/rejected": -7.773417949676514, "step": 2963 }, { "epoch": 0.46, "learning_rate": 1.197341667098916e-05, "logits/chosen": -2.9592578411102295, "logits/rejected": -3.1377716064453125, "logps/chosen": -485.63970947265625, "logps/rejected": -589.4164428710938, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -1.7037239074707031, "rewards/margins": 3.9570322036743164, "rewards/rejected": -5.6607561111450195, "step": 2964 }, { "epoch": 0.46, "learning_rate": 1.1972683230458012e-05, "logits/chosen": -2.96865177154541, "logits/rejected": -3.2774033546447754, "logps/chosen": -90.55038452148438, "logps/rejected": -224.06585693359375, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.3656978607177734, "rewards/margins": 5.554006576538086, "rewards/rejected": -6.919704437255859, "step": 2965 }, { "epoch": 0.46, "learning_rate": 1.1971949789926864e-05, "logits/chosen": -2.7947895526885986, "logits/rejected": -2.6569712162017822, "logps/chosen": -845.7306518554688, "logps/rejected": -641.9236450195312, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.3673043251037598, "rewards/margins": 6.185218811035156, "rewards/rejected": -8.552522659301758, "step": 2966 }, { "epoch": 0.46, "learning_rate": 1.1971216349395716e-05, "logits/chosen": -2.870262384414673, "logits/rejected": -2.576948404312134, "logps/chosen": -236.0787811279297, "logps/rejected": -323.5926513671875, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -2.868713855743408, "rewards/margins": 4.472970962524414, "rewards/rejected": -7.341684818267822, "step": 2967 }, { "epoch": 0.46, "learning_rate": 1.1970482908864568e-05, "logits/chosen": -2.6473093032836914, "logits/rejected": -2.629014492034912, "logps/chosen": -234.23068237304688, "logps/rejected": -287.37542724609375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.7041794061660767, "rewards/margins": 6.198336601257324, "rewards/rejected": -6.9025163650512695, "step": 2968 }, { "epoch": 0.46, "learning_rate": 1.196974946833342e-05, "logits/chosen": -2.71795392036438, "logits/rejected": -2.996554136276245, "logps/chosen": -160.3495635986328, "logps/rejected": -191.34799194335938, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -2.1210663318634033, "rewards/margins": 5.388945579528809, "rewards/rejected": -7.510012149810791, "step": 2969 }, { "epoch": 0.46, "learning_rate": 1.1969016027802271e-05, "logits/chosen": -2.1603121757507324, "logits/rejected": -2.6833107471466064, "logps/chosen": -159.2720947265625, "logps/rejected": -261.6063537597656, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.8740508556365967, "rewards/margins": 7.38028621673584, "rewards/rejected": -9.254337310791016, "step": 2970 }, { "epoch": 0.46, "learning_rate": 1.1968282587271125e-05, "logits/chosen": -3.0283455848693848, "logits/rejected": -2.901710271835327, "logps/chosen": -197.18075561523438, "logps/rejected": -122.69833374023438, "loss": 0.8134, "rewards/accuracies": 0.5, "rewards/chosen": -2.777024984359741, "rewards/margins": 1.9884346723556519, "rewards/rejected": -4.7654595375061035, "step": 2971 }, { "epoch": 0.46, "learning_rate": 1.1967549146739977e-05, "logits/chosen": -2.458913803100586, "logits/rejected": -3.2060720920562744, "logps/chosen": -142.71456909179688, "logps/rejected": -384.7902526855469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2774665355682373, "rewards/margins": 10.123167037963867, "rewards/rejected": -11.400632858276367, "step": 2972 }, { "epoch": 0.46, "learning_rate": 1.1966815706208829e-05, "logits/chosen": -2.9761619567871094, "logits/rejected": -3.085698366165161, "logps/chosen": -481.58453369140625, "logps/rejected": -596.0360717773438, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7080711722373962, "rewards/margins": 9.08539867401123, "rewards/rejected": -9.793469429016113, "step": 2973 }, { "epoch": 0.46, "learning_rate": 1.196608226567768e-05, "logits/chosen": -2.092724323272705, "logits/rejected": -2.1438302993774414, "logps/chosen": -570.7830810546875, "logps/rejected": -384.0810546875, "loss": 1.2424, "rewards/accuracies": 0.5, "rewards/chosen": -4.8134260177612305, "rewards/margins": 0.47327864170074463, "rewards/rejected": -5.2867045402526855, "step": 2974 }, { "epoch": 0.46, "learning_rate": 1.1965348825146532e-05, "logits/chosen": -2.891007423400879, "logits/rejected": -2.495819568634033, "logps/chosen": -501.4319763183594, "logps/rejected": -516.27001953125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.9509624242782593, "rewards/margins": 6.601193428039551, "rewards/rejected": -8.552156448364258, "step": 2975 }, { "epoch": 0.46, "learning_rate": 1.1964615384615384e-05, "logits/chosen": -2.532660722732544, "logits/rejected": -2.894573211669922, "logps/chosen": -234.97784423828125, "logps/rejected": -237.1381378173828, "loss": 2.2556, "rewards/accuracies": 0.5, "rewards/chosen": -4.500627517700195, "rewards/margins": 2.107233762741089, "rewards/rejected": -6.607861518859863, "step": 2976 }, { "epoch": 0.46, "learning_rate": 1.1963881944084236e-05, "logits/chosen": -2.8100461959838867, "logits/rejected": -1.5767055749893188, "logps/chosen": -323.2411804199219, "logps/rejected": -190.8896026611328, "loss": 2.4967, "rewards/accuracies": 0.5, "rewards/chosen": -4.695801734924316, "rewards/margins": 0.41168785095214844, "rewards/rejected": -5.107489585876465, "step": 2977 }, { "epoch": 0.46, "learning_rate": 1.1963148503553088e-05, "logits/chosen": -1.9493091106414795, "logits/rejected": -2.713637351989746, "logps/chosen": -187.4253692626953, "logps/rejected": -485.0837707519531, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.292885422706604, "rewards/margins": 7.515673637390137, "rewards/rejected": -8.80855941772461, "step": 2978 }, { "epoch": 0.46, "learning_rate": 1.196241506302194e-05, "logits/chosen": -2.7860093116760254, "logits/rejected": -2.7281038761138916, "logps/chosen": -127.99423217773438, "logps/rejected": -119.18618774414062, "loss": 3.0043, "rewards/accuracies": 0.5, "rewards/chosen": -3.957096576690674, "rewards/margins": 0.1676943302154541, "rewards/rejected": -4.124790668487549, "step": 2979 }, { "epoch": 0.46, "learning_rate": 1.1961681622490794e-05, "logits/chosen": -1.9301774501800537, "logits/rejected": -2.874159336090088, "logps/chosen": -150.5899200439453, "logps/rejected": -367.8493957519531, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -2.0406367778778076, "rewards/margins": 8.928096771240234, "rewards/rejected": -10.968732833862305, "step": 2980 }, { "epoch": 0.46, "learning_rate": 1.1960948181959645e-05, "logits/chosen": -3.0585060119628906, "logits/rejected": -2.5795271396636963, "logps/chosen": -1008.052001953125, "logps/rejected": -894.7228393554688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.288848876953125, "rewards/margins": 6.302334785461426, "rewards/rejected": -8.59118366241455, "step": 2981 }, { "epoch": 0.46, "learning_rate": 1.1960214741428497e-05, "logits/chosen": -2.531149387359619, "logits/rejected": -3.2285139560699463, "logps/chosen": -53.89363098144531, "logps/rejected": -369.72625732421875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.463922142982483, "rewards/margins": 7.99286413192749, "rewards/rejected": -9.456786155700684, "step": 2982 }, { "epoch": 0.46, "learning_rate": 1.195948130089735e-05, "logits/chosen": -3.08355450630188, "logits/rejected": -3.022148609161377, "logps/chosen": -224.86085510253906, "logps/rejected": -459.35198974609375, "loss": 2.9448, "rewards/accuracies": 0.5, "rewards/chosen": -5.363649368286133, "rewards/margins": 3.702687978744507, "rewards/rejected": -9.066337585449219, "step": 2983 }, { "epoch": 0.46, "learning_rate": 1.1958747860366201e-05, "logits/chosen": -2.6766536235809326, "logits/rejected": -2.4492311477661133, "logps/chosen": -271.7526550292969, "logps/rejected": -320.5556640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.667269468307495, "rewards/margins": 7.148448944091797, "rewards/rejected": -9.815718650817871, "step": 2984 }, { "epoch": 0.46, "learning_rate": 1.1958014419835053e-05, "logits/chosen": -2.154024839401245, "logits/rejected": -2.871473550796509, "logps/chosen": -87.31646728515625, "logps/rejected": -309.52069091796875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.471022605895996, "rewards/margins": 6.610101699829102, "rewards/rejected": -8.081124305725098, "step": 2985 }, { "epoch": 0.46, "learning_rate": 1.1957280979303905e-05, "logits/chosen": -3.2170639038085938, "logits/rejected": -2.9752471446990967, "logps/chosen": -117.28598022460938, "logps/rejected": -57.35026550292969, "loss": 3.7413, "rewards/accuracies": 0.5, "rewards/chosen": -4.789796352386475, "rewards/margins": -2.069566011428833, "rewards/rejected": -2.7202301025390625, "step": 2986 }, { "epoch": 0.46, "learning_rate": 1.1956547538772758e-05, "logits/chosen": -2.6923904418945312, "logits/rejected": -3.0716474056243896, "logps/chosen": -212.8114471435547, "logps/rejected": -245.94337463378906, "loss": 3.4608, "rewards/accuracies": 0.5, "rewards/chosen": -5.216974258422852, "rewards/margins": 0.6208310127258301, "rewards/rejected": -5.837805271148682, "step": 2987 }, { "epoch": 0.46, "learning_rate": 1.195581409824161e-05, "logits/chosen": -2.928384304046631, "logits/rejected": -2.895040988922119, "logps/chosen": -513.8338623046875, "logps/rejected": -543.8659057617188, "loss": 3.7374, "rewards/accuracies": 0.5, "rewards/chosen": -5.628116130828857, "rewards/margins": 0.9393672943115234, "rewards/rejected": -6.567483425140381, "step": 2988 }, { "epoch": 0.46, "learning_rate": 1.1955080657710464e-05, "logits/chosen": -3.0223779678344727, "logits/rejected": -3.0627243518829346, "logps/chosen": -196.97901916503906, "logps/rejected": -281.5994873046875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.918707013130188, "rewards/margins": 7.526364326477051, "rewards/rejected": -8.44507122039795, "step": 2989 }, { "epoch": 0.47, "learning_rate": 1.1954347217179316e-05, "logits/chosen": -3.1796183586120605, "logits/rejected": -2.9209790229797363, "logps/chosen": -148.8883056640625, "logps/rejected": -172.4364471435547, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.4081470966339111, "rewards/margins": 7.535914897918701, "rewards/rejected": -8.944062232971191, "step": 2990 }, { "epoch": 0.47, "learning_rate": 1.1953613776648168e-05, "logits/chosen": -3.4259166717529297, "logits/rejected": -3.1869757175445557, "logps/chosen": -165.89723205566406, "logps/rejected": -56.50548553466797, "loss": 3.24, "rewards/accuracies": 0.0, "rewards/chosen": -4.571375846862793, "rewards/margins": -3.1933212280273438, "rewards/rejected": -1.3780544996261597, "step": 2991 }, { "epoch": 0.47, "learning_rate": 1.195288033611702e-05, "logits/chosen": -2.219282627105713, "logits/rejected": -2.710094928741455, "logps/chosen": -225.7338104248047, "logps/rejected": -198.98455810546875, "loss": 1.2128, "rewards/accuracies": 0.5, "rewards/chosen": -4.687167167663574, "rewards/margins": 0.8548598289489746, "rewards/rejected": -5.542027473449707, "step": 2992 }, { "epoch": 0.47, "learning_rate": 1.1952146895585871e-05, "logits/chosen": -2.894205331802368, "logits/rejected": -2.45546555519104, "logps/chosen": -275.3834228515625, "logps/rejected": -102.94190979003906, "loss": 2.8722, "rewards/accuracies": 0.5, "rewards/chosen": -4.078814506530762, "rewards/margins": -2.382746934890747, "rewards/rejected": -1.6960678100585938, "step": 2993 }, { "epoch": 0.47, "learning_rate": 1.1951413455054723e-05, "logits/chosen": -2.688692569732666, "logits/rejected": -2.719426155090332, "logps/chosen": -132.51950073242188, "logps/rejected": -303.17108154296875, "loss": 3.4761, "rewards/accuracies": 0.5, "rewards/chosen": -6.365525245666504, "rewards/margins": 0.06736993789672852, "rewards/rejected": -6.432894706726074, "step": 2994 }, { "epoch": 0.47, "learning_rate": 1.1950680014523575e-05, "logits/chosen": -1.9769700765609741, "logits/rejected": -3.219508647918701, "logps/chosen": -298.2013244628906, "logps/rejected": -610.275634765625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.7372956275939941, "rewards/margins": 7.159100532531738, "rewards/rejected": -8.89639663696289, "step": 2995 }, { "epoch": 0.47, "learning_rate": 1.1949946573992427e-05, "logits/chosen": -2.831580400466919, "logits/rejected": -2.7613253593444824, "logps/chosen": -259.0971984863281, "logps/rejected": -242.69985961914062, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8652147650718689, "rewards/margins": 8.082237243652344, "rewards/rejected": -8.947452545166016, "step": 2996 }, { "epoch": 0.47, "learning_rate": 1.1949213133461279e-05, "logits/chosen": -2.658534049987793, "logits/rejected": -2.9291183948516846, "logps/chosen": -290.6852722167969, "logps/rejected": -356.94085693359375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.305570363998413, "rewards/margins": 7.530494689941406, "rewards/rejected": -9.836065292358398, "step": 2997 }, { "epoch": 0.47, "learning_rate": 1.1948479692930132e-05, "logits/chosen": -2.4850738048553467, "logits/rejected": -3.262742757797241, "logps/chosen": -247.89718627929688, "logps/rejected": -346.649658203125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.035082221031189, "rewards/margins": 7.368449687957764, "rewards/rejected": -8.403532028198242, "step": 2998 }, { "epoch": 0.47, "learning_rate": 1.1947746252398984e-05, "logits/chosen": -2.7243616580963135, "logits/rejected": -2.8295791149139404, "logps/chosen": -66.43296813964844, "logps/rejected": -224.31045532226562, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -2.2252798080444336, "rewards/margins": 6.636724472045898, "rewards/rejected": -8.862004280090332, "step": 2999 }, { "epoch": 0.47, "learning_rate": 1.1947012811867836e-05, "logits/chosen": -1.7890756130218506, "logits/rejected": -3.0023906230926514, "logps/chosen": -186.4691162109375, "logps/rejected": -173.12037658691406, "loss": 2.9737, "rewards/accuracies": 0.5, "rewards/chosen": -5.663613319396973, "rewards/margins": -1.0336722135543823, "rewards/rejected": -4.629940986633301, "step": 3000 }, { "epoch": 0.47, "learning_rate": 1.1946279371336688e-05, "logits/chosen": -1.2179759740829468, "logits/rejected": -3.256679058074951, "logps/chosen": -144.79214477539062, "logps/rejected": -422.603271484375, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -3.041095018386841, "rewards/margins": 3.9670333862304688, "rewards/rejected": -7.008128643035889, "step": 3001 }, { "epoch": 0.47, "learning_rate": 1.194554593080554e-05, "logits/chosen": -2.4158666133880615, "logits/rejected": -2.9956328868865967, "logps/chosen": -81.74763488769531, "logps/rejected": -290.0328369140625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.2811784744262695, "rewards/margins": 6.506106376647949, "rewards/rejected": -8.787284851074219, "step": 3002 }, { "epoch": 0.47, "learning_rate": 1.1944812490274392e-05, "logits/chosen": -3.2162744998931885, "logits/rejected": -2.4550507068634033, "logps/chosen": -586.9081420898438, "logps/rejected": -207.31524658203125, "loss": 1.8059, "rewards/accuracies": 0.5, "rewards/chosen": -3.6833925247192383, "rewards/margins": 1.2460891008377075, "rewards/rejected": -4.929481506347656, "step": 3003 }, { "epoch": 0.47, "learning_rate": 1.1944079049743244e-05, "logits/chosen": -2.5818159580230713, "logits/rejected": -3.150787591934204, "logps/chosen": -139.23060607910156, "logps/rejected": -272.30267333984375, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": -2.7570834159851074, "rewards/margins": 3.5991199016571045, "rewards/rejected": -6.356203556060791, "step": 3004 }, { "epoch": 0.47, "learning_rate": 1.1943345609212096e-05, "logits/chosen": -2.7962331771850586, "logits/rejected": -3.1580770015716553, "logps/chosen": -343.368408203125, "logps/rejected": -520.35205078125, "loss": 3.7195, "rewards/accuracies": 0.5, "rewards/chosen": -5.262292385101318, "rewards/margins": 0.05707383155822754, "rewards/rejected": -5.319366455078125, "step": 3005 }, { "epoch": 0.47, "learning_rate": 1.1942612168680947e-05, "logits/chosen": -3.172518491744995, "logits/rejected": -2.8379101753234863, "logps/chosen": -149.70260620117188, "logps/rejected": -88.74092864990234, "loss": 0.5394, "rewards/accuracies": 0.5, "rewards/chosen": -3.851313591003418, "rewards/margins": 1.4362329244613647, "rewards/rejected": -5.287546634674072, "step": 3006 }, { "epoch": 0.47, "learning_rate": 1.1941878728149801e-05, "logits/chosen": -2.5213735103607178, "logits/rejected": -2.9686660766601562, "logps/chosen": -50.13656234741211, "logps/rejected": -168.369140625, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -2.1089634895324707, "rewards/margins": 3.113250255584717, "rewards/rejected": -5.2222137451171875, "step": 3007 }, { "epoch": 0.47, "learning_rate": 1.1941145287618653e-05, "logits/chosen": -1.8544310331344604, "logits/rejected": -2.5347061157226562, "logps/chosen": -299.5360107421875, "logps/rejected": -356.94525146484375, "loss": 2.5711, "rewards/accuracies": 0.5, "rewards/chosen": -3.6191115379333496, "rewards/margins": 0.8540101051330566, "rewards/rejected": -4.473121643066406, "step": 3008 }, { "epoch": 0.47, "learning_rate": 1.1940411847087505e-05, "logits/chosen": -2.8133034706115723, "logits/rejected": -3.0513975620269775, "logps/chosen": -99.45417022705078, "logps/rejected": -186.15382385253906, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -2.1233010292053223, "rewards/margins": 4.417965412139893, "rewards/rejected": -6.541266441345215, "step": 3009 }, { "epoch": 0.47, "learning_rate": 1.1939678406556357e-05, "logits/chosen": -2.210156202316284, "logits/rejected": -2.6505258083343506, "logps/chosen": -240.96693420410156, "logps/rejected": -318.1405029296875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.3433258533477783, "rewards/margins": 5.024514675140381, "rewards/rejected": -6.367840766906738, "step": 3010 }, { "epoch": 0.47, "learning_rate": 1.1938944966025209e-05, "logits/chosen": -2.6221542358398438, "logits/rejected": -2.2153713703155518, "logps/chosen": -73.53650665283203, "logps/rejected": -164.15902709960938, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -2.492994785308838, "rewards/margins": 4.939638137817383, "rewards/rejected": -7.4326324462890625, "step": 3011 }, { "epoch": 0.47, "learning_rate": 1.193821152549406e-05, "logits/chosen": -0.8089055418968201, "logits/rejected": -2.6772022247314453, "logps/chosen": -112.56985473632812, "logps/rejected": -384.7762451171875, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -2.237180709838867, "rewards/margins": 4.814918518066406, "rewards/rejected": -7.052099227905273, "step": 3012 }, { "epoch": 0.47, "learning_rate": 1.1937478084962912e-05, "logits/chosen": -2.955047130584717, "logits/rejected": -3.1289846897125244, "logps/chosen": -440.01910400390625, "logps/rejected": -565.154052734375, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.9121811389923096, "rewards/margins": 4.380801200866699, "rewards/rejected": -6.292982578277588, "step": 3013 }, { "epoch": 0.47, "learning_rate": 1.1936744644431764e-05, "logits/chosen": -2.0797412395477295, "logits/rejected": -3.0492653846740723, "logps/chosen": -77.30096435546875, "logps/rejected": -253.26406860351562, "loss": 0.0847, "rewards/accuracies": 1.0, "rewards/chosen": -2.9849588871002197, "rewards/margins": 2.530641794204712, "rewards/rejected": -5.515600681304932, "step": 3014 }, { "epoch": 0.47, "learning_rate": 1.1936011203900616e-05, "logits/chosen": -2.9978034496307373, "logits/rejected": -3.337459087371826, "logps/chosen": -392.236083984375, "logps/rejected": -561.41455078125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8181552886962891, "rewards/margins": 8.136622428894043, "rewards/rejected": -8.954777717590332, "step": 3015 }, { "epoch": 0.47, "learning_rate": 1.193527776336947e-05, "logits/chosen": -2.539804697036743, "logits/rejected": -2.7660789489746094, "logps/chosen": -498.3468322753906, "logps/rejected": -544.1180419921875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7058937549591064, "rewards/margins": 6.563267230987549, "rewards/rejected": -7.269160747528076, "step": 3016 }, { "epoch": 0.47, "learning_rate": 1.1934544322838322e-05, "logits/chosen": -1.2065662145614624, "logits/rejected": -3.0274782180786133, "logps/chosen": -32.901283264160156, "logps/rejected": -500.0479736328125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.6635633707046509, "rewards/margins": 4.947884559631348, "rewards/rejected": -6.611448287963867, "step": 3017 }, { "epoch": 0.47, "learning_rate": 1.1933810882307173e-05, "logits/chosen": -2.4038822650909424, "logits/rejected": -3.216775894165039, "logps/chosen": -81.62339782714844, "logps/rejected": -250.66705322265625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.6985225677490234, "rewards/margins": 5.520172119140625, "rewards/rejected": -7.218694686889648, "step": 3018 }, { "epoch": 0.47, "learning_rate": 1.1933077441776025e-05, "logits/chosen": -2.7302675247192383, "logits/rejected": -2.2401418685913086, "logps/chosen": -157.7742156982422, "logps/rejected": -173.1005401611328, "loss": 2.4135, "rewards/accuracies": 0.5, "rewards/chosen": -5.165182113647461, "rewards/margins": -1.4390901327133179, "rewards/rejected": -3.7260918617248535, "step": 3019 }, { "epoch": 0.47, "learning_rate": 1.1932344001244877e-05, "logits/chosen": -3.0322439670562744, "logits/rejected": -1.9184157848358154, "logps/chosen": -250.0389404296875, "logps/rejected": -230.53411865234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.655652642250061, "rewards/margins": 7.7476606369018555, "rewards/rejected": -8.403313636779785, "step": 3020 }, { "epoch": 0.47, "learning_rate": 1.193161056071373e-05, "logits/chosen": -3.1837244033813477, "logits/rejected": -1.4601811170578003, "logps/chosen": -450.3053894042969, "logps/rejected": -184.78176879882812, "loss": 2.6729, "rewards/accuracies": 0.5, "rewards/chosen": -3.9557695388793945, "rewards/margins": 0.2837707996368408, "rewards/rejected": -4.239540100097656, "step": 3021 }, { "epoch": 0.47, "learning_rate": 1.1930877120182583e-05, "logits/chosen": -2.9762930870056152, "logits/rejected": -2.658958911895752, "logps/chosen": -355.71734619140625, "logps/rejected": -261.8717346191406, "loss": 1.3618, "rewards/accuracies": 0.5, "rewards/chosen": -3.831028699874878, "rewards/margins": 0.5021754503250122, "rewards/rejected": -4.33320426940918, "step": 3022 }, { "epoch": 0.47, "learning_rate": 1.1930143679651434e-05, "logits/chosen": -3.225358247756958, "logits/rejected": -3.058028221130371, "logps/chosen": -466.07000732421875, "logps/rejected": -452.0672607421875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.7985290288925171, "rewards/margins": 5.651100158691406, "rewards/rejected": -6.449629306793213, "step": 3023 }, { "epoch": 0.47, "learning_rate": 1.1929410239120286e-05, "logits/chosen": -2.689049482345581, "logits/rejected": -3.00087308883667, "logps/chosen": -50.82579803466797, "logps/rejected": -146.5813446044922, "loss": 0.1534, "rewards/accuracies": 1.0, "rewards/chosen": -1.5442934036254883, "rewards/margins": 2.455453395843506, "rewards/rejected": -3.999746799468994, "step": 3024 }, { "epoch": 0.47, "learning_rate": 1.192867679858914e-05, "logits/chosen": -1.7263633012771606, "logits/rejected": -2.911442995071411, "logps/chosen": -86.45054626464844, "logps/rejected": -392.6280822753906, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -2.028679132461548, "rewards/margins": 4.959419250488281, "rewards/rejected": -6.98809814453125, "step": 3025 }, { "epoch": 0.47, "learning_rate": 1.1927943358057992e-05, "logits/chosen": -2.9723567962646484, "logits/rejected": -3.130251407623291, "logps/chosen": -717.8840942382812, "logps/rejected": -522.5501708984375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.7615204453468323, "rewards/margins": 5.034873962402344, "rewards/rejected": -5.796394348144531, "step": 3026 }, { "epoch": 0.47, "learning_rate": 1.1927209917526844e-05, "logits/chosen": -2.5385468006134033, "logits/rejected": -2.7531323432922363, "logps/chosen": -163.6543426513672, "logps/rejected": -303.33416748046875, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": -1.7456059455871582, "rewards/margins": 4.025372505187988, "rewards/rejected": -5.770978927612305, "step": 3027 }, { "epoch": 0.47, "learning_rate": 1.1926476476995696e-05, "logits/chosen": -3.0912587642669678, "logits/rejected": -3.0157418251037598, "logps/chosen": -654.890625, "logps/rejected": -436.26019287109375, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -0.7378708124160767, "rewards/margins": 5.424964904785156, "rewards/rejected": -6.162835597991943, "step": 3028 }, { "epoch": 0.47, "learning_rate": 1.1925743036464547e-05, "logits/chosen": -1.7690807580947876, "logits/rejected": -2.8362698554992676, "logps/chosen": -141.05929565429688, "logps/rejected": -316.6219787597656, "loss": 0.0478, "rewards/accuracies": 1.0, "rewards/chosen": -3.970642566680908, "rewards/margins": 6.334112167358398, "rewards/rejected": -10.304754257202148, "step": 3029 }, { "epoch": 0.47, "learning_rate": 1.19250095959334e-05, "logits/chosen": -1.9319475889205933, "logits/rejected": -2.74782657623291, "logps/chosen": -441.55279541015625, "logps/rejected": -513.8447875976562, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.7514259815216064, "rewards/margins": 6.923009872436523, "rewards/rejected": -9.67443561553955, "step": 3030 }, { "epoch": 0.47, "learning_rate": 1.1924276155402251e-05, "logits/chosen": -2.446925401687622, "logits/rejected": -3.126591444015503, "logps/chosen": -620.0374755859375, "logps/rejected": -815.792724609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.8353195190429688, "rewards/margins": 6.858372688293457, "rewards/rejected": -8.693692207336426, "step": 3031 }, { "epoch": 0.47, "learning_rate": 1.1923542714871103e-05, "logits/chosen": -1.3485745191574097, "logits/rejected": -2.8087728023529053, "logps/chosen": -77.16366577148438, "logps/rejected": -262.9428405761719, "loss": 0.2841, "rewards/accuracies": 1.0, "rewards/chosen": -3.3084263801574707, "rewards/margins": 2.8141448497772217, "rewards/rejected": -6.122570991516113, "step": 3032 }, { "epoch": 0.47, "learning_rate": 1.1922809274339955e-05, "logits/chosen": -3.07102632522583, "logits/rejected": -3.0132999420166016, "logps/chosen": -262.6542663574219, "logps/rejected": -250.93508911132812, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.2208855152130127, "rewards/margins": 6.612520217895508, "rewards/rejected": -7.833405494689941, "step": 3033 }, { "epoch": 0.47, "learning_rate": 1.1922075833808809e-05, "logits/chosen": -2.9733645915985107, "logits/rejected": -1.8008822202682495, "logps/chosen": -295.9393615722656, "logps/rejected": -240.7066650390625, "loss": 2.4655, "rewards/accuracies": 0.5, "rewards/chosen": -4.447900295257568, "rewards/margins": -1.1657860279083252, "rewards/rejected": -3.2821145057678223, "step": 3034 }, { "epoch": 0.47, "learning_rate": 1.192134239327766e-05, "logits/chosen": -3.0055572986602783, "logits/rejected": -1.4316980838775635, "logps/chosen": -271.1334533691406, "logps/rejected": -154.4917755126953, "loss": 3.8858, "rewards/accuracies": 0.5, "rewards/chosen": -5.0262651443481445, "rewards/margins": 0.21611738204956055, "rewards/rejected": -5.242382526397705, "step": 3035 }, { "epoch": 0.47, "learning_rate": 1.1920608952746512e-05, "logits/chosen": -1.9981645345687866, "logits/rejected": -2.7863776683807373, "logps/chosen": -72.46233367919922, "logps/rejected": -237.11895751953125, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -1.9514367580413818, "rewards/margins": 4.028656005859375, "rewards/rejected": -5.980093002319336, "step": 3036 }, { "epoch": 0.47, "learning_rate": 1.1919875512215364e-05, "logits/chosen": -2.10780668258667, "logits/rejected": -3.181572437286377, "logps/chosen": -271.459716796875, "logps/rejected": -447.8085632324219, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.6733119487762451, "rewards/margins": 6.282877445220947, "rewards/rejected": -7.956189155578613, "step": 3037 }, { "epoch": 0.47, "learning_rate": 1.1919142071684216e-05, "logits/chosen": -3.0470123291015625, "logits/rejected": -2.9967689514160156, "logps/chosen": -229.36697387695312, "logps/rejected": -325.96258544921875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.4907559156417847, "rewards/margins": 7.174286365509033, "rewards/rejected": -8.665042877197266, "step": 3038 }, { "epoch": 0.47, "learning_rate": 1.1918408631153068e-05, "logits/chosen": -2.731269359588623, "logits/rejected": -2.5140647888183594, "logps/chosen": -132.31411743164062, "logps/rejected": -261.2702941894531, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.2091789245605469, "rewards/margins": 7.307807445526123, "rewards/rejected": -8.516986846923828, "step": 3039 }, { "epoch": 0.47, "learning_rate": 1.191767519062192e-05, "logits/chosen": -2.9439430236816406, "logits/rejected": -2.824148654937744, "logps/chosen": -328.33270263671875, "logps/rejected": -409.08282470703125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.609789252281189, "rewards/margins": 4.558777809143066, "rewards/rejected": -6.168567180633545, "step": 3040 }, { "epoch": 0.47, "learning_rate": 1.1916941750090772e-05, "logits/chosen": -2.292616367340088, "logits/rejected": -3.1847636699676514, "logps/chosen": -276.8926696777344, "logps/rejected": -450.23004150390625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -2.819068193435669, "rewards/margins": 8.036616325378418, "rewards/rejected": -10.855684280395508, "step": 3041 }, { "epoch": 0.47, "learning_rate": 1.1916208309559624e-05, "logits/chosen": -1.2892451286315918, "logits/rejected": -3.109066963195801, "logps/chosen": -137.55841064453125, "logps/rejected": -455.54193115234375, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.5084161758422852, "rewards/margins": 6.984834671020508, "rewards/rejected": -8.493250846862793, "step": 3042 }, { "epoch": 0.47, "learning_rate": 1.1915474869028477e-05, "logits/chosen": -2.879669666290283, "logits/rejected": -2.00663685798645, "logps/chosen": -207.13844299316406, "logps/rejected": -162.92066955566406, "loss": 3.8709, "rewards/accuracies": 0.5, "rewards/chosen": -6.502161026000977, "rewards/margins": -1.4647293090820312, "rewards/rejected": -5.037431716918945, "step": 3043 }, { "epoch": 0.47, "learning_rate": 1.1914741428497329e-05, "logits/chosen": -2.9277517795562744, "logits/rejected": -2.8485498428344727, "logps/chosen": -286.0845031738281, "logps/rejected": -343.6512756347656, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.7457550764083862, "rewards/margins": 5.2402544021606445, "rewards/rejected": -6.98600959777832, "step": 3044 }, { "epoch": 0.47, "learning_rate": 1.1914007987966181e-05, "logits/chosen": -2.521973133087158, "logits/rejected": -3.110813856124878, "logps/chosen": -311.06695556640625, "logps/rejected": -601.1436767578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.1533005237579346, "rewards/margins": 8.684725761413574, "rewards/rejected": -10.83802604675293, "step": 3045 }, { "epoch": 0.47, "learning_rate": 1.1913274547435033e-05, "logits/chosen": -2.777862310409546, "logits/rejected": -2.7409281730651855, "logps/chosen": -96.31996154785156, "logps/rejected": -153.90284729003906, "loss": 0.0775, "rewards/accuracies": 1.0, "rewards/chosen": -1.47440767288208, "rewards/margins": 4.191356658935547, "rewards/rejected": -5.665763854980469, "step": 3046 }, { "epoch": 0.47, "learning_rate": 1.1912541106903885e-05, "logits/chosen": -2.761338710784912, "logits/rejected": -2.862114191055298, "logps/chosen": -371.30889892578125, "logps/rejected": -388.6943664550781, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3829551935195923, "rewards/margins": 7.28489351272583, "rewards/rejected": -8.667848587036133, "step": 3047 }, { "epoch": 0.47, "learning_rate": 1.1911807666372737e-05, "logits/chosen": -2.457733392715454, "logits/rejected": -2.703124761581421, "logps/chosen": -364.3299255371094, "logps/rejected": -545.067138671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.028496503829956, "rewards/margins": 7.143268585205078, "rewards/rejected": -9.171765327453613, "step": 3048 }, { "epoch": 0.47, "learning_rate": 1.1911074225841588e-05, "logits/chosen": -2.763207197189331, "logits/rejected": -2.131361484527588, "logps/chosen": -164.2516632080078, "logps/rejected": -202.8463134765625, "loss": 0.7947, "rewards/accuracies": 0.5, "rewards/chosen": -3.069140911102295, "rewards/margins": 1.7776081562042236, "rewards/rejected": -4.846749305725098, "step": 3049 }, { "epoch": 0.47, "learning_rate": 1.191034078531044e-05, "logits/chosen": -2.1461453437805176, "logits/rejected": -2.884340286254883, "logps/chosen": -272.4349060058594, "logps/rejected": -372.0182189941406, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.1973717212677, "rewards/margins": 5.60717248916626, "rewards/rejected": -7.804544448852539, "step": 3050 }, { "epoch": 0.47, "learning_rate": 1.1909607344779292e-05, "logits/chosen": -2.8574278354644775, "logits/rejected": -3.441481113433838, "logps/chosen": -54.35670471191406, "logps/rejected": -223.62643432617188, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.4622020721435547, "rewards/margins": 6.247617244720459, "rewards/rejected": -7.709819316864014, "step": 3051 }, { "epoch": 0.47, "learning_rate": 1.1908873904248146e-05, "logits/chosen": -2.4047679901123047, "logits/rejected": -3.116544246673584, "logps/chosen": -189.10546875, "logps/rejected": -319.94512939453125, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -1.286383867263794, "rewards/margins": 3.9881033897399902, "rewards/rejected": -5.274487495422363, "step": 3052 }, { "epoch": 0.47, "learning_rate": 1.1908140463716998e-05, "logits/chosen": -1.8761656284332275, "logits/rejected": -2.5844359397888184, "logps/chosen": -224.2360076904297, "logps/rejected": -369.20166015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.6008732318878174, "rewards/margins": 7.574323654174805, "rewards/rejected": -10.175196647644043, "step": 3053 }, { "epoch": 0.47, "learning_rate": 1.190740702318585e-05, "logits/chosen": -2.6091668605804443, "logits/rejected": -2.2326974868774414, "logps/chosen": -270.17999267578125, "logps/rejected": -363.9827880859375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.5675883293151855, "rewards/margins": 6.152737617492676, "rewards/rejected": -8.720325469970703, "step": 3054 }, { "epoch": 0.48, "learning_rate": 1.1906673582654701e-05, "logits/chosen": -2.4392178058624268, "logits/rejected": -2.7556512355804443, "logps/chosen": -779.384033203125, "logps/rejected": -693.3119506835938, "loss": 3.3843, "rewards/accuracies": 0.5, "rewards/chosen": -5.556929111480713, "rewards/margins": -0.5802276134490967, "rewards/rejected": -4.976701736450195, "step": 3055 }, { "epoch": 0.48, "learning_rate": 1.1905940142123555e-05, "logits/chosen": -2.968834400177002, "logits/rejected": -1.9078471660614014, "logps/chosen": -441.072998046875, "logps/rejected": -245.66612243652344, "loss": 0.0866, "rewards/accuracies": 1.0, "rewards/chosen": -1.699609398841858, "rewards/margins": 3.2378625869750977, "rewards/rejected": -4.937472343444824, "step": 3056 }, { "epoch": 0.48, "learning_rate": 1.1905206701592407e-05, "logits/chosen": -2.7972235679626465, "logits/rejected": -2.7103078365325928, "logps/chosen": -203.67755126953125, "logps/rejected": -287.51513671875, "loss": 0.1527, "rewards/accuracies": 1.0, "rewards/chosen": -1.412642002105713, "rewards/margins": 4.226066589355469, "rewards/rejected": -5.638708591461182, "step": 3057 }, { "epoch": 0.48, "learning_rate": 1.1904473261061259e-05, "logits/chosen": -2.9252350330352783, "logits/rejected": -3.1499621868133545, "logps/chosen": -393.3540954589844, "logps/rejected": -383.599365234375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7057335376739502, "rewards/margins": 6.2256340980529785, "rewards/rejected": -7.931367874145508, "step": 3058 }, { "epoch": 0.48, "learning_rate": 1.190373982053011e-05, "logits/chosen": -0.8583461046218872, "logits/rejected": -2.9847469329833984, "logps/chosen": -50.656105041503906, "logps/rejected": -290.31121826171875, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": -2.1670002937316895, "rewards/margins": 3.112415075302124, "rewards/rejected": -5.279415130615234, "step": 3059 }, { "epoch": 0.48, "learning_rate": 1.1903006379998964e-05, "logits/chosen": -2.785285711288452, "logits/rejected": -2.5871458053588867, "logps/chosen": -387.94189453125, "logps/rejected": -305.091552734375, "loss": 2.4523, "rewards/accuracies": 0.5, "rewards/chosen": -5.215126991271973, "rewards/margins": 0.3729128837585449, "rewards/rejected": -5.588039875030518, "step": 3060 }, { "epoch": 0.48, "learning_rate": 1.1902272939467816e-05, "logits/chosen": -3.0662786960601807, "logits/rejected": -3.066311836242676, "logps/chosen": -552.815673828125, "logps/rejected": -597.17138671875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.454420566558838, "rewards/margins": 5.597651481628418, "rewards/rejected": -8.052072525024414, "step": 3061 }, { "epoch": 0.48, "learning_rate": 1.1901539498936668e-05, "logits/chosen": -2.3185110092163086, "logits/rejected": -2.866607904434204, "logps/chosen": -95.67298889160156, "logps/rejected": -166.93084716796875, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -2.3745689392089844, "rewards/margins": 4.528008460998535, "rewards/rejected": -6.9025774002075195, "step": 3062 }, { "epoch": 0.48, "learning_rate": 1.190080605840552e-05, "logits/chosen": -2.199711561203003, "logits/rejected": -3.0824646949768066, "logps/chosen": -372.427490234375, "logps/rejected": -713.477783203125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.433701753616333, "rewards/margins": 8.327531814575195, "rewards/rejected": -9.761234283447266, "step": 3063 }, { "epoch": 0.48, "learning_rate": 1.1900072617874372e-05, "logits/chosen": -1.9612517356872559, "logits/rejected": -2.8168647289276123, "logps/chosen": -78.03434753417969, "logps/rejected": -291.51715087890625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -2.202178716659546, "rewards/margins": 6.205273628234863, "rewards/rejected": -8.407451629638672, "step": 3064 }, { "epoch": 0.48, "learning_rate": 1.1899339177343224e-05, "logits/chosen": -2.4472687244415283, "logits/rejected": -2.9748785495758057, "logps/chosen": -161.81500244140625, "logps/rejected": -502.8926086425781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.923320770263672, "rewards/margins": 8.263765335083008, "rewards/rejected": -11.187087059020996, "step": 3065 }, { "epoch": 0.48, "learning_rate": 1.1898605736812075e-05, "logits/chosen": -2.5230023860931396, "logits/rejected": -3.0973641872406006, "logps/chosen": -154.9950408935547, "logps/rejected": -317.11749267578125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.3052101135253906, "rewards/margins": 5.453726768493652, "rewards/rejected": -6.758936882019043, "step": 3066 }, { "epoch": 0.48, "learning_rate": 1.1897872296280927e-05, "logits/chosen": -2.6170151233673096, "logits/rejected": -2.878959894180298, "logps/chosen": -288.0010070800781, "logps/rejected": -473.9615173339844, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -2.287663459777832, "rewards/margins": 5.3135576248168945, "rewards/rejected": -7.601221084594727, "step": 3067 }, { "epoch": 0.48, "learning_rate": 1.189713885574978e-05, "logits/chosen": -2.820998191833496, "logits/rejected": -2.467820882797241, "logps/chosen": -168.58987426757812, "logps/rejected": -318.52105712890625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.294226884841919, "rewards/margins": 9.357226371765137, "rewards/rejected": -11.651453018188477, "step": 3068 }, { "epoch": 0.48, "learning_rate": 1.1896405415218633e-05, "logits/chosen": -2.9000604152679443, "logits/rejected": -1.9071335792541504, "logps/chosen": -230.9653778076172, "logps/rejected": -114.5215835571289, "loss": 0.1095, "rewards/accuracies": 1.0, "rewards/chosen": -2.6604361534118652, "rewards/margins": 3.155841588973999, "rewards/rejected": -5.816277980804443, "step": 3069 }, { "epoch": 0.48, "learning_rate": 1.1895671974687485e-05, "logits/chosen": -3.115048885345459, "logits/rejected": -3.0324697494506836, "logps/chosen": -123.37373352050781, "logps/rejected": -154.11727905273438, "loss": 1.8388, "rewards/accuracies": 0.5, "rewards/chosen": -2.2334532737731934, "rewards/margins": 1.6266705989837646, "rewards/rejected": -3.860123872756958, "step": 3070 }, { "epoch": 0.48, "learning_rate": 1.1894938534156337e-05, "logits/chosen": -2.5300872325897217, "logits/rejected": -2.9175314903259277, "logps/chosen": -39.71156311035156, "logps/rejected": -197.85321044921875, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -2.6129815578460693, "rewards/margins": 3.9251904487609863, "rewards/rejected": -6.538171768188477, "step": 3071 }, { "epoch": 0.48, "learning_rate": 1.1894205093625188e-05, "logits/chosen": -2.787612199783325, "logits/rejected": -0.797544538974762, "logps/chosen": -928.1162109375, "logps/rejected": -288.4996337890625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.8630121946334839, "rewards/margins": 5.82697057723999, "rewards/rejected": -7.6899824142456055, "step": 3072 }, { "epoch": 0.48, "learning_rate": 1.189347165309404e-05, "logits/chosen": -2.1475682258605957, "logits/rejected": -3.0905158519744873, "logps/chosen": -73.60633087158203, "logps/rejected": -210.57998657226562, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -2.768169641494751, "rewards/margins": 4.066761016845703, "rewards/rejected": -6.834930896759033, "step": 3073 }, { "epoch": 0.48, "learning_rate": 1.1892738212562892e-05, "logits/chosen": -3.4041075706481934, "logits/rejected": -3.3638596534729004, "logps/chosen": -121.25426483154297, "logps/rejected": -88.36559295654297, "loss": 1.6091, "rewards/accuracies": 0.5, "rewards/chosen": -3.719278335571289, "rewards/margins": 1.0260881185531616, "rewards/rejected": -4.74536657333374, "step": 3074 }, { "epoch": 0.48, "learning_rate": 1.1892004772031744e-05, "logits/chosen": -1.8152767419815063, "logits/rejected": -2.762730836868286, "logps/chosen": -171.00820922851562, "logps/rejected": -258.1489562988281, "loss": 0.3762, "rewards/accuracies": 0.5, "rewards/chosen": -2.527327537536621, "rewards/margins": 3.708878755569458, "rewards/rejected": -6.2362060546875, "step": 3075 }, { "epoch": 0.48, "learning_rate": 1.1891271331500596e-05, "logits/chosen": -1.8522686958312988, "logits/rejected": -2.7430150508880615, "logps/chosen": -251.46145629882812, "logps/rejected": -334.8485412597656, "loss": 0.7789, "rewards/accuracies": 0.5, "rewards/chosen": -3.3943328857421875, "rewards/margins": 3.1041646003723145, "rewards/rejected": -6.498497486114502, "step": 3076 }, { "epoch": 0.48, "learning_rate": 1.1890537890969448e-05, "logits/chosen": -2.887723207473755, "logits/rejected": -1.7312673330307007, "logps/chosen": -230.28419494628906, "logps/rejected": -51.798583984375, "loss": 0.7456, "rewards/accuracies": 0.5, "rewards/chosen": -2.354705333709717, "rewards/margins": 0.31077998876571655, "rewards/rejected": -2.665485382080078, "step": 3077 }, { "epoch": 0.48, "learning_rate": 1.1889804450438301e-05, "logits/chosen": -1.6604174375534058, "logits/rejected": -3.146289825439453, "logps/chosen": -78.00238800048828, "logps/rejected": -286.9404602050781, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": -2.1638734340667725, "rewards/margins": 4.1745195388793945, "rewards/rejected": -6.338393211364746, "step": 3078 }, { "epoch": 0.48, "learning_rate": 1.1889071009907153e-05, "logits/chosen": -2.5380961894989014, "logits/rejected": -2.8302321434020996, "logps/chosen": -612.4255981445312, "logps/rejected": -564.778076171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.8836133480072021, "rewards/margins": 8.712307929992676, "rewards/rejected": -10.595921516418457, "step": 3079 }, { "epoch": 0.48, "learning_rate": 1.1888337569376005e-05, "logits/chosen": -2.82266902923584, "logits/rejected": -2.5093369483947754, "logps/chosen": -129.7791748046875, "logps/rejected": -180.46487426757812, "loss": 1.9367, "rewards/accuracies": 0.5, "rewards/chosen": -3.1997406482696533, "rewards/margins": 2.815211057662964, "rewards/rejected": -6.014951705932617, "step": 3080 }, { "epoch": 0.48, "learning_rate": 1.1887604128844857e-05, "logits/chosen": -2.002454996109009, "logits/rejected": -2.8893067836761475, "logps/chosen": -135.3544158935547, "logps/rejected": -263.96630859375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.36137503385543823, "rewards/margins": 6.449896335601807, "rewards/rejected": -6.811271667480469, "step": 3081 }, { "epoch": 0.48, "learning_rate": 1.1886870688313709e-05, "logits/chosen": -3.111328601837158, "logits/rejected": -2.7917368412017822, "logps/chosen": -282.4208068847656, "logps/rejected": -239.6634979248047, "loss": 2.2805, "rewards/accuracies": 0.5, "rewards/chosen": -3.9919228553771973, "rewards/margins": 0.9209604263305664, "rewards/rejected": -4.912883281707764, "step": 3082 }, { "epoch": 0.48, "learning_rate": 1.188613724778256e-05, "logits/chosen": -2.5176682472229004, "logits/rejected": -2.9677627086639404, "logps/chosen": -70.89649963378906, "logps/rejected": -158.844970703125, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -1.3261082172393799, "rewards/margins": 3.7322583198547363, "rewards/rejected": -5.058366775512695, "step": 3083 }, { "epoch": 0.48, "learning_rate": 1.1885403807251413e-05, "logits/chosen": -2.407351493835449, "logits/rejected": -2.8438680171966553, "logps/chosen": -110.43753051757812, "logps/rejected": -207.75892639160156, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.121275544166565, "rewards/margins": 4.716624736785889, "rewards/rejected": -5.837900161743164, "step": 3084 }, { "epoch": 0.48, "learning_rate": 1.1884670366720264e-05, "logits/chosen": -2.974355459213257, "logits/rejected": -2.816685676574707, "logps/chosen": -190.56329345703125, "logps/rejected": -250.92623901367188, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.5401912927627563, "rewards/margins": 6.25327205657959, "rewards/rejected": -7.793463706970215, "step": 3085 }, { "epoch": 0.48, "learning_rate": 1.1883936926189116e-05, "logits/chosen": -2.8789851665496826, "logits/rejected": -2.4584567546844482, "logps/chosen": -220.73538208007812, "logps/rejected": -127.81060791015625, "loss": 2.7679, "rewards/accuracies": 0.5, "rewards/chosen": -3.8094356060028076, "rewards/margins": -0.648890495300293, "rewards/rejected": -3.1605451107025146, "step": 3086 }, { "epoch": 0.48, "learning_rate": 1.188320348565797e-05, "logits/chosen": -2.9987375736236572, "logits/rejected": -2.8990073204040527, "logps/chosen": -409.3656311035156, "logps/rejected": -414.81024169921875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.7760255932807922, "rewards/margins": 5.711094856262207, "rewards/rejected": -6.487120628356934, "step": 3087 }, { "epoch": 0.48, "learning_rate": 1.1882470045126822e-05, "logits/chosen": -2.2754311561584473, "logits/rejected": -2.992783784866333, "logps/chosen": -265.8157653808594, "logps/rejected": -488.5415344238281, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.3158748149871826, "rewards/margins": 6.033858299255371, "rewards/rejected": -7.349733352661133, "step": 3088 }, { "epoch": 0.48, "learning_rate": 1.1881736604595674e-05, "logits/chosen": -2.7635982036590576, "logits/rejected": -3.009338855743408, "logps/chosen": -246.7738494873047, "logps/rejected": -281.06610107421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.6300415992736816, "rewards/margins": 6.9423651695251465, "rewards/rejected": -9.572406768798828, "step": 3089 }, { "epoch": 0.48, "learning_rate": 1.1881003164064527e-05, "logits/chosen": -1.6088571548461914, "logits/rejected": -2.7786705493927, "logps/chosen": -78.86117553710938, "logps/rejected": -354.2080993652344, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.942649483680725, "rewards/margins": 6.726506233215332, "rewards/rejected": -8.66915512084961, "step": 3090 }, { "epoch": 0.48, "learning_rate": 1.1880269723533379e-05, "logits/chosen": -2.7354366779327393, "logits/rejected": -3.023285388946533, "logps/chosen": -221.39566040039062, "logps/rejected": -463.1644592285156, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.2999122440814972, "rewards/margins": 7.734867572784424, "rewards/rejected": -8.03477954864502, "step": 3091 }, { "epoch": 0.48, "learning_rate": 1.1879536283002231e-05, "logits/chosen": -2.8511576652526855, "logits/rejected": -2.562591552734375, "logps/chosen": -240.6750030517578, "logps/rejected": -241.51663208007812, "loss": 0.0335, "rewards/accuracies": 1.0, "rewards/chosen": -0.5548591613769531, "rewards/margins": 4.922215938568115, "rewards/rejected": -5.477075099945068, "step": 3092 }, { "epoch": 0.48, "learning_rate": 1.1878802842471083e-05, "logits/chosen": -2.7605559825897217, "logits/rejected": -2.9752817153930664, "logps/chosen": -712.2210693359375, "logps/rejected": -908.92041015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.6924080848693848, "rewards/margins": 9.664058685302734, "rewards/rejected": -12.356466293334961, "step": 3093 }, { "epoch": 0.48, "learning_rate": 1.1878069401939935e-05, "logits/chosen": -2.5987703800201416, "logits/rejected": -2.959869146347046, "logps/chosen": -263.22625732421875, "logps/rejected": -523.3601684570312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4218018054962158, "rewards/margins": 8.480829238891602, "rewards/rejected": -9.902630805969238, "step": 3094 }, { "epoch": 0.48, "learning_rate": 1.1877335961408787e-05, "logits/chosen": -2.835016965866089, "logits/rejected": -2.926922082901001, "logps/chosen": -138.00051879882812, "logps/rejected": -243.21401977539062, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3970803022384644, "rewards/margins": 6.80584716796875, "rewards/rejected": -8.202927589416504, "step": 3095 }, { "epoch": 0.48, "learning_rate": 1.187660252087764e-05, "logits/chosen": -2.4146294593811035, "logits/rejected": -2.8878679275512695, "logps/chosen": -133.99252319335938, "logps/rejected": -337.718505859375, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -2.450072765350342, "rewards/margins": 7.461503505706787, "rewards/rejected": -9.911576271057129, "step": 3096 }, { "epoch": 0.48, "learning_rate": 1.1875869080346492e-05, "logits/chosen": -2.7846968173980713, "logits/rejected": -3.0414879322052, "logps/chosen": -40.57152557373047, "logps/rejected": -132.1678924560547, "loss": 0.1301, "rewards/accuracies": 1.0, "rewards/chosen": -2.747732162475586, "rewards/margins": 2.04024076461792, "rewards/rejected": -4.787972927093506, "step": 3097 }, { "epoch": 0.48, "learning_rate": 1.1875135639815344e-05, "logits/chosen": -2.774388313293457, "logits/rejected": -2.8790221214294434, "logps/chosen": -56.654144287109375, "logps/rejected": -179.43978881835938, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -1.635571002960205, "rewards/margins": 5.183108329772949, "rewards/rejected": -6.8186798095703125, "step": 3098 }, { "epoch": 0.48, "learning_rate": 1.1874402199284196e-05, "logits/chosen": -2.841625452041626, "logits/rejected": -1.8580379486083984, "logps/chosen": -179.08848571777344, "logps/rejected": -117.11798095703125, "loss": 4.1203, "rewards/accuracies": 0.5, "rewards/chosen": -6.267538070678711, "rewards/margins": -1.5422446727752686, "rewards/rejected": -4.7252936363220215, "step": 3099 }, { "epoch": 0.48, "learning_rate": 1.1873668758753048e-05, "logits/chosen": -1.9522597789764404, "logits/rejected": -3.2845911979675293, "logps/chosen": -293.114501953125, "logps/rejected": -632.7537841796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.7059638500213623, "rewards/margins": 6.533660411834717, "rewards/rejected": -9.2396240234375, "step": 3100 }, { "epoch": 0.48, "learning_rate": 1.18729353182219e-05, "logits/chosen": -2.108309030532837, "logits/rejected": -2.7719244956970215, "logps/chosen": -179.78904724121094, "logps/rejected": -189.88961791992188, "loss": 1.8238, "rewards/accuracies": 0.5, "rewards/chosen": -4.316491603851318, "rewards/margins": 1.5014206171035767, "rewards/rejected": -5.817912578582764, "step": 3101 }, { "epoch": 0.48, "learning_rate": 1.1872201877690752e-05, "logits/chosen": -2.2624270915985107, "logits/rejected": -3.1909005641937256, "logps/chosen": -48.07417297363281, "logps/rejected": -253.39260864257812, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.4189300537109375, "rewards/margins": 6.794192314147949, "rewards/rejected": -8.213122367858887, "step": 3102 }, { "epoch": 0.48, "learning_rate": 1.1871468437159603e-05, "logits/chosen": -2.456653118133545, "logits/rejected": -3.004974126815796, "logps/chosen": -258.4237060546875, "logps/rejected": -496.2037048339844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1895618438720703, "rewards/margins": 8.300859451293945, "rewards/rejected": -9.490421295166016, "step": 3103 }, { "epoch": 0.48, "learning_rate": 1.1870734996628455e-05, "logits/chosen": -2.432548999786377, "logits/rejected": -3.0998010635375977, "logps/chosen": -144.0848846435547, "logps/rejected": -467.0762634277344, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.0512242317199707, "rewards/margins": 8.197128295898438, "rewards/rejected": -10.24835205078125, "step": 3104 }, { "epoch": 0.48, "learning_rate": 1.1870001556097309e-05, "logits/chosen": -3.1086366176605225, "logits/rejected": -2.693685531616211, "logps/chosen": -147.44534301757812, "logps/rejected": -59.375343322753906, "loss": 3.3443, "rewards/accuracies": 0.0, "rewards/chosen": -5.3814496994018555, "rewards/margins": -3.2394416332244873, "rewards/rejected": -2.142008066177368, "step": 3105 }, { "epoch": 0.48, "learning_rate": 1.186926811556616e-05, "logits/chosen": -2.9957942962646484, "logits/rejected": -3.0134310722351074, "logps/chosen": -928.94091796875, "logps/rejected": -645.6558837890625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.361525058746338, "rewards/margins": 6.477998733520508, "rewards/rejected": -7.8395233154296875, "step": 3106 }, { "epoch": 0.48, "learning_rate": 1.1868534675035013e-05, "logits/chosen": -2.9324512481689453, "logits/rejected": -2.0241074562072754, "logps/chosen": -415.9934387207031, "logps/rejected": -262.4700012207031, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.9466904401779175, "rewards/margins": 4.870770454406738, "rewards/rejected": -6.817461013793945, "step": 3107 }, { "epoch": 0.48, "learning_rate": 1.1867801234503864e-05, "logits/chosen": -2.472522020339966, "logits/rejected": -2.9503374099731445, "logps/chosen": -941.4171142578125, "logps/rejected": -739.3160400390625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.8806915283203125, "rewards/margins": 7.3737640380859375, "rewards/rejected": -10.25445556640625, "step": 3108 }, { "epoch": 0.48, "learning_rate": 1.1867067793972716e-05, "logits/chosen": -0.45107585191726685, "logits/rejected": -1.6870490312576294, "logps/chosen": -170.6405029296875, "logps/rejected": -270.34228515625, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.0964417457580566, "rewards/margins": 6.749259948730469, "rewards/rejected": -7.845702171325684, "step": 3109 }, { "epoch": 0.48, "learning_rate": 1.1866334353441568e-05, "logits/chosen": -1.6836005449295044, "logits/rejected": -2.6938066482543945, "logps/chosen": -150.700927734375, "logps/rejected": -244.86737060546875, "loss": 1.782, "rewards/accuracies": 0.5, "rewards/chosen": -3.258258104324341, "rewards/margins": 0.9418879747390747, "rewards/rejected": -4.200146198272705, "step": 3110 }, { "epoch": 0.48, "learning_rate": 1.186560091291042e-05, "logits/chosen": -2.1165060997009277, "logits/rejected": -3.064073324203491, "logps/chosen": -352.8021240234375, "logps/rejected": -500.1390686035156, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.5153411626815796, "rewards/margins": 6.185632705688477, "rewards/rejected": -7.7009735107421875, "step": 3111 }, { "epoch": 0.48, "learning_rate": 1.1864867472379272e-05, "logits/chosen": -2.8905181884765625, "logits/rejected": -1.931869626045227, "logps/chosen": -401.16632080078125, "logps/rejected": -214.69314575195312, "loss": 6.9571, "rewards/accuracies": 0.0, "rewards/chosen": -8.195266723632812, "rewards/margins": -6.956025123596191, "rewards/rejected": -1.2392414808273315, "step": 3112 }, { "epoch": 0.48, "learning_rate": 1.1864134031848124e-05, "logits/chosen": -2.8375916481018066, "logits/rejected": -2.7780966758728027, "logps/chosen": -281.2149353027344, "logps/rejected": -191.541748046875, "loss": 1.8832, "rewards/accuracies": 0.5, "rewards/chosen": -4.2542572021484375, "rewards/margins": 1.710992455482483, "rewards/rejected": -5.965249538421631, "step": 3113 }, { "epoch": 0.48, "learning_rate": 1.1863400591316977e-05, "logits/chosen": -2.103785276412964, "logits/rejected": -2.65346097946167, "logps/chosen": -162.44595336914062, "logps/rejected": -202.7802734375, "loss": 0.8967, "rewards/accuracies": 0.5, "rewards/chosen": -2.3638811111450195, "rewards/margins": 2.7934763431549072, "rewards/rejected": -5.157357215881348, "step": 3114 }, { "epoch": 0.48, "learning_rate": 1.186266715078583e-05, "logits/chosen": -2.4240610599517822, "logits/rejected": -2.5811846256256104, "logps/chosen": -125.41558074951172, "logps/rejected": -320.982421875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.4616920948028564, "rewards/margins": 6.316000938415527, "rewards/rejected": -8.777692794799805, "step": 3115 }, { "epoch": 0.48, "learning_rate": 1.1861933710254681e-05, "logits/chosen": -1.291016697883606, "logits/rejected": -2.8403375148773193, "logps/chosen": -60.6868782043457, "logps/rejected": -305.6025390625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.4178493022918701, "rewards/margins": 7.221488952636719, "rewards/rejected": -8.639338493347168, "step": 3116 }, { "epoch": 0.48, "learning_rate": 1.1861200269723533e-05, "logits/chosen": -3.030726432800293, "logits/rejected": -1.8469687700271606, "logps/chosen": -555.6478271484375, "logps/rejected": -362.86688232421875, "loss": 2.7059, "rewards/accuracies": 0.5, "rewards/chosen": -3.7508544921875, "rewards/margins": 0.28966546058654785, "rewards/rejected": -4.040519714355469, "step": 3117 }, { "epoch": 0.48, "learning_rate": 1.1860466829192385e-05, "logits/chosen": -2.8297460079193115, "logits/rejected": -3.20949387550354, "logps/chosen": -343.06060791015625, "logps/rejected": -575.2122802734375, "loss": 3.0957, "rewards/accuracies": 0.5, "rewards/chosen": -4.771738529205322, "rewards/margins": -1.6493889093399048, "rewards/rejected": -3.122349500656128, "step": 3118 }, { "epoch": 0.49, "learning_rate": 1.1859733388661237e-05, "logits/chosen": -2.6891286373138428, "logits/rejected": -2.3135926723480225, "logps/chosen": -429.3108215332031, "logps/rejected": -468.8350830078125, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -1.782677412033081, "rewards/margins": 5.3306193351745605, "rewards/rejected": -7.1132965087890625, "step": 3119 }, { "epoch": 0.49, "learning_rate": 1.1858999948130089e-05, "logits/chosen": -2.909024477005005, "logits/rejected": -2.634181022644043, "logps/chosen": -505.77349853515625, "logps/rejected": -467.64801025390625, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -2.3174147605895996, "rewards/margins": 4.081568241119385, "rewards/rejected": -6.398983001708984, "step": 3120 }, { "epoch": 0.49, "learning_rate": 1.185826650759894e-05, "logits/chosen": -2.95112681388855, "logits/rejected": -1.8998206853866577, "logps/chosen": -182.05361938476562, "logps/rejected": -102.25127410888672, "loss": 2.8849, "rewards/accuracies": 0.5, "rewards/chosen": -3.8011550903320312, "rewards/margins": -0.9235035181045532, "rewards/rejected": -2.8776516914367676, "step": 3121 }, { "epoch": 0.49, "learning_rate": 1.1857533067067794e-05, "logits/chosen": -2.5507311820983887, "logits/rejected": -2.5254364013671875, "logps/chosen": -530.9984130859375, "logps/rejected": -415.62451171875, "loss": 0.0709, "rewards/accuracies": 1.0, "rewards/chosen": -1.5155251026153564, "rewards/margins": 5.873679161071777, "rewards/rejected": -7.389204025268555, "step": 3122 }, { "epoch": 0.49, "learning_rate": 1.1856799626536646e-05, "logits/chosen": -2.2409188747406006, "logits/rejected": -2.884904384613037, "logps/chosen": -114.63378143310547, "logps/rejected": -384.0693054199219, "loss": 0.1783, "rewards/accuracies": 1.0, "rewards/chosen": -1.8050378561019897, "rewards/margins": 4.5627570152282715, "rewards/rejected": -6.367794990539551, "step": 3123 }, { "epoch": 0.49, "learning_rate": 1.18560661860055e-05, "logits/chosen": -1.7853078842163086, "logits/rejected": -2.714595317840576, "logps/chosen": -192.35528564453125, "logps/rejected": -377.97027587890625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.8873317241668701, "rewards/margins": 5.735679626464844, "rewards/rejected": -7.623011589050293, "step": 3124 }, { "epoch": 0.49, "learning_rate": 1.1855332745474351e-05, "logits/chosen": -2.97489070892334, "logits/rejected": -3.366374969482422, "logps/chosen": -145.1794891357422, "logps/rejected": -301.5284423828125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.7528733015060425, "rewards/margins": 5.612545013427734, "rewards/rejected": -7.365418434143066, "step": 3125 }, { "epoch": 0.49, "learning_rate": 1.1854599304943203e-05, "logits/chosen": -1.9652060270309448, "logits/rejected": -3.0762245655059814, "logps/chosen": -224.743896484375, "logps/rejected": -361.97650146484375, "loss": 2.2058, "rewards/accuracies": 0.5, "rewards/chosen": -3.6180384159088135, "rewards/margins": -1.0165154933929443, "rewards/rejected": -2.601522922515869, "step": 3126 }, { "epoch": 0.49, "learning_rate": 1.1853865864412055e-05, "logits/chosen": -2.5800161361694336, "logits/rejected": -2.566100597381592, "logps/chosen": -187.62994384765625, "logps/rejected": -256.3728942871094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0193222761154175, "rewards/margins": 7.243452548980713, "rewards/rejected": -8.262774467468262, "step": 3127 }, { "epoch": 0.49, "learning_rate": 1.1853132423880907e-05, "logits/chosen": -2.7879137992858887, "logits/rejected": -0.9883548617362976, "logps/chosen": -293.18707275390625, "logps/rejected": -123.54893493652344, "loss": 1.2487, "rewards/accuracies": 0.5, "rewards/chosen": -4.023266792297363, "rewards/margins": 0.3575938940048218, "rewards/rejected": -4.380860328674316, "step": 3128 }, { "epoch": 0.49, "learning_rate": 1.1852398983349759e-05, "logits/chosen": -1.807092308998108, "logits/rejected": -2.9748027324676514, "logps/chosen": -80.31097412109375, "logps/rejected": -232.8114013671875, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -2.458484172821045, "rewards/margins": 3.469362258911133, "rewards/rejected": -5.927846431732178, "step": 3129 }, { "epoch": 0.49, "learning_rate": 1.1851665542818611e-05, "logits/chosen": -2.613361358642578, "logits/rejected": -2.854607105255127, "logps/chosen": -90.42947387695312, "logps/rejected": -281.3792724609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.5704498291015625, "rewards/margins": 7.18442440032959, "rewards/rejected": -8.754873275756836, "step": 3130 }, { "epoch": 0.49, "learning_rate": 1.1850932102287463e-05, "logits/chosen": -2.481199026107788, "logits/rejected": -3.1863722801208496, "logps/chosen": -305.2915344238281, "logps/rejected": -349.3551330566406, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -4.053128719329834, "rewards/margins": 4.072516441345215, "rewards/rejected": -8.12564468383789, "step": 3131 }, { "epoch": 0.49, "learning_rate": 1.1850198661756316e-05, "logits/chosen": -2.246910572052002, "logits/rejected": -2.7610890865325928, "logps/chosen": -62.19352722167969, "logps/rejected": -231.12310791015625, "loss": 0.1252, "rewards/accuracies": 1.0, "rewards/chosen": -2.0863075256347656, "rewards/margins": 4.846196174621582, "rewards/rejected": -6.932503700256348, "step": 3132 }, { "epoch": 0.49, "learning_rate": 1.1849465221225168e-05, "logits/chosen": -1.8997266292572021, "logits/rejected": -3.0961122512817383, "logps/chosen": -59.52703857421875, "logps/rejected": -448.191650390625, "loss": 0.8675, "rewards/accuracies": 0.5, "rewards/chosen": -3.2083964347839355, "rewards/margins": 1.2982264757156372, "rewards/rejected": -4.506623268127441, "step": 3133 }, { "epoch": 0.49, "learning_rate": 1.184873178069402e-05, "logits/chosen": -1.5389074087142944, "logits/rejected": -2.7941815853118896, "logps/chosen": -108.0562744140625, "logps/rejected": -323.707275390625, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -2.45249080657959, "rewards/margins": 6.594461917877197, "rewards/rejected": -9.046953201293945, "step": 3134 }, { "epoch": 0.49, "learning_rate": 1.1847998340162872e-05, "logits/chosen": -2.010385036468506, "logits/rejected": -2.953914165496826, "logps/chosen": -165.2403106689453, "logps/rejected": -380.3661193847656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.6247239112854004, "rewards/margins": 8.07483959197998, "rewards/rejected": -9.699563026428223, "step": 3135 }, { "epoch": 0.49, "learning_rate": 1.1847264899631724e-05, "logits/chosen": -2.3600704669952393, "logits/rejected": -3.1038217544555664, "logps/chosen": -137.59786987304688, "logps/rejected": -301.40533447265625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.9832683801651, "rewards/margins": 5.958287239074707, "rewards/rejected": -7.941555976867676, "step": 3136 }, { "epoch": 0.49, "learning_rate": 1.1846531459100576e-05, "logits/chosen": -3.3214166164398193, "logits/rejected": -3.243337631225586, "logps/chosen": -108.1688003540039, "logps/rejected": -113.34650421142578, "loss": 2.4094, "rewards/accuracies": 0.5, "rewards/chosen": -5.203970909118652, "rewards/margins": 0.3662757873535156, "rewards/rejected": -5.570246696472168, "step": 3137 }, { "epoch": 0.49, "learning_rate": 1.1845798018569428e-05, "logits/chosen": -3.284230947494507, "logits/rejected": -3.024914503097534, "logps/chosen": -365.674560546875, "logps/rejected": -244.7468719482422, "loss": 2.8749, "rewards/accuracies": 0.5, "rewards/chosen": -5.556231498718262, "rewards/margins": 1.1573915481567383, "rewards/rejected": -6.713623046875, "step": 3138 }, { "epoch": 0.49, "learning_rate": 1.184506457803828e-05, "logits/chosen": -2.8120572566986084, "logits/rejected": -3.080930233001709, "logps/chosen": -47.459110260009766, "logps/rejected": -161.178955078125, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": -2.75258469581604, "rewards/margins": 3.377929925918579, "rewards/rejected": -6.130514621734619, "step": 3139 }, { "epoch": 0.49, "learning_rate": 1.1844331137507131e-05, "logits/chosen": -2.678866147994995, "logits/rejected": -2.805112600326538, "logps/chosen": -200.95111083984375, "logps/rejected": -297.8384704589844, "loss": 3.3857, "rewards/accuracies": 0.5, "rewards/chosen": -5.654065132141113, "rewards/margins": -0.4605255126953125, "rewards/rejected": -5.193539619445801, "step": 3140 }, { "epoch": 0.49, "learning_rate": 1.1843597696975985e-05, "logits/chosen": -2.5324387550354004, "logits/rejected": -2.6830928325653076, "logps/chosen": -119.85504150390625, "logps/rejected": -232.9146728515625, "loss": 0.1989, "rewards/accuracies": 1.0, "rewards/chosen": -1.988020658493042, "rewards/margins": 5.416803359985352, "rewards/rejected": -7.4048237800598145, "step": 3141 }, { "epoch": 0.49, "learning_rate": 1.1842864256444837e-05, "logits/chosen": -3.164726495742798, "logits/rejected": -3.411146640777588, "logps/chosen": -47.89292907714844, "logps/rejected": -166.3892364501953, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.979956865310669, "rewards/margins": 6.291773796081543, "rewards/rejected": -9.271730422973633, "step": 3142 }, { "epoch": 0.49, "learning_rate": 1.1842130815913689e-05, "logits/chosen": -3.3609695434570312, "logits/rejected": -3.165717363357544, "logps/chosen": -107.75270080566406, "logps/rejected": -220.8445587158203, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.2847073078155518, "rewards/margins": 6.802858352661133, "rewards/rejected": -9.087565422058105, "step": 3143 }, { "epoch": 0.49, "learning_rate": 1.184139737538254e-05, "logits/chosen": -2.9722671508789062, "logits/rejected": -3.113191604614258, "logps/chosen": -191.48260498046875, "logps/rejected": -310.7711181640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.718621015548706, "rewards/margins": 7.256267070770264, "rewards/rejected": -8.97488784790039, "step": 3144 }, { "epoch": 0.49, "learning_rate": 1.1840663934851392e-05, "logits/chosen": -2.2906525135040283, "logits/rejected": -3.024015426635742, "logps/chosen": -241.65777587890625, "logps/rejected": -403.1561279296875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.6903610229492188, "rewards/margins": 4.766243934631348, "rewards/rejected": -6.456604957580566, "step": 3145 }, { "epoch": 0.49, "learning_rate": 1.1839930494320244e-05, "logits/chosen": -3.17236065864563, "logits/rejected": -2.231151819229126, "logps/chosen": -268.43682861328125, "logps/rejected": -169.97909545898438, "loss": 1.8447, "rewards/accuracies": 0.5, "rewards/chosen": -4.305267333984375, "rewards/margins": 0.6142696142196655, "rewards/rejected": -4.91953706741333, "step": 3146 }, { "epoch": 0.49, "learning_rate": 1.1839197053789096e-05, "logits/chosen": -2.9567060470581055, "logits/rejected": -3.1810247898101807, "logps/chosen": -167.4916534423828, "logps/rejected": -234.970947265625, "loss": 0.0761, "rewards/accuracies": 1.0, "rewards/chosen": -2.0517287254333496, "rewards/margins": 3.099454879760742, "rewards/rejected": -5.151183605194092, "step": 3147 }, { "epoch": 0.49, "learning_rate": 1.1838463613257948e-05, "logits/chosen": -2.0804696083068848, "logits/rejected": -2.7162654399871826, "logps/chosen": -108.43321228027344, "logps/rejected": -236.1259765625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.642828941345215, "rewards/margins": 5.786668300628662, "rewards/rejected": -8.429496765136719, "step": 3148 }, { "epoch": 0.49, "learning_rate": 1.1837730172726802e-05, "logits/chosen": -2.9766759872436523, "logits/rejected": -3.329643726348877, "logps/chosen": -163.6085662841797, "logps/rejected": -201.8875732421875, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8225761651992798, "rewards/margins": 5.825254917144775, "rewards/rejected": -6.647830963134766, "step": 3149 }, { "epoch": 0.49, "learning_rate": 1.1836996732195654e-05, "logits/chosen": -1.5217626094818115, "logits/rejected": -2.994884967803955, "logps/chosen": -75.36170196533203, "logps/rejected": -241.13424682617188, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -4.071993350982666, "rewards/margins": 4.363551139831543, "rewards/rejected": -8.43554401397705, "step": 3150 }, { "epoch": 0.49, "learning_rate": 1.1836263291664505e-05, "logits/chosen": -2.6040210723876953, "logits/rejected": -3.0343058109283447, "logps/chosen": -577.4783325195312, "logps/rejected": -768.1942138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9136784076690674, "rewards/margins": 11.922357559204102, "rewards/rejected": -13.836034774780273, "step": 3151 }, { "epoch": 0.49, "learning_rate": 1.1835529851133357e-05, "logits/chosen": -2.352952241897583, "logits/rejected": -1.8960527181625366, "logps/chosen": -361.6737976074219, "logps/rejected": -346.94073486328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.486983060836792, "rewards/margins": 8.721439361572266, "rewards/rejected": -11.208422660827637, "step": 3152 }, { "epoch": 0.49, "learning_rate": 1.183479641060221e-05, "logits/chosen": -2.2175865173339844, "logits/rejected": -2.84657621383667, "logps/chosen": -118.89566802978516, "logps/rejected": -259.6351318359375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -1.6534578800201416, "rewards/margins": 6.467189788818359, "rewards/rejected": -8.120647430419922, "step": 3153 }, { "epoch": 0.49, "learning_rate": 1.1834062970071061e-05, "logits/chosen": -1.8672252893447876, "logits/rejected": -2.961939811706543, "logps/chosen": -310.1338806152344, "logps/rejected": -444.0127868652344, "loss": 2.8074, "rewards/accuracies": 0.5, "rewards/chosen": -5.383389472961426, "rewards/margins": 1.2681729793548584, "rewards/rejected": -6.651562690734863, "step": 3154 }, { "epoch": 0.49, "learning_rate": 1.1833329529539913e-05, "logits/chosen": -3.004978895187378, "logits/rejected": -3.008411407470703, "logps/chosen": -141.35166931152344, "logps/rejected": -181.30392456054688, "loss": 3.1575, "rewards/accuracies": 0.5, "rewards/chosen": -5.3182291984558105, "rewards/margins": 0.4458041191101074, "rewards/rejected": -5.764033317565918, "step": 3155 }, { "epoch": 0.49, "learning_rate": 1.1832596089008766e-05, "logits/chosen": -0.926936686038971, "logits/rejected": -2.9000487327575684, "logps/chosen": -102.95329284667969, "logps/rejected": -497.95233154296875, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": -1.5278191566467285, "rewards/margins": 3.710387706756592, "rewards/rejected": -5.23820686340332, "step": 3156 }, { "epoch": 0.49, "learning_rate": 1.1831862648477618e-05, "logits/chosen": -2.079158067703247, "logits/rejected": -2.745474100112915, "logps/chosen": -145.44992065429688, "logps/rejected": -246.36187744140625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.3542208671569824, "rewards/margins": 5.617501735687256, "rewards/rejected": -7.971722602844238, "step": 3157 }, { "epoch": 0.49, "learning_rate": 1.1831129207946472e-05, "logits/chosen": -2.7441904544830322, "logits/rejected": -2.9935462474823, "logps/chosen": -105.24120330810547, "logps/rejected": -227.01968383789062, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.9895853400230408, "rewards/margins": 5.285910606384277, "rewards/rejected": -6.275495529174805, "step": 3158 }, { "epoch": 0.49, "learning_rate": 1.1830395767415324e-05, "logits/chosen": -2.9717001914978027, "logits/rejected": -3.3153302669525146, "logps/chosen": -276.96185302734375, "logps/rejected": -367.9674072265625, "loss": 0.0657, "rewards/accuracies": 1.0, "rewards/chosen": -1.984391689300537, "rewards/margins": 2.7769713401794434, "rewards/rejected": -4.7613630294799805, "step": 3159 }, { "epoch": 0.49, "learning_rate": 1.1829662326884176e-05, "logits/chosen": -3.0919620990753174, "logits/rejected": -3.124387502670288, "logps/chosen": -187.16854858398438, "logps/rejected": -224.86183166503906, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -2.0439083576202393, "rewards/margins": 3.7112271785736084, "rewards/rejected": -5.755135536193848, "step": 3160 }, { "epoch": 0.49, "learning_rate": 1.1828928886353028e-05, "logits/chosen": -2.6848554611206055, "logits/rejected": -2.692636251449585, "logps/chosen": -59.56703186035156, "logps/rejected": -179.84768676757812, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -2.2070069313049316, "rewards/margins": 4.4691643714904785, "rewards/rejected": -6.67617130279541, "step": 3161 }, { "epoch": 0.49, "learning_rate": 1.182819544582188e-05, "logits/chosen": -1.1309645175933838, "logits/rejected": -2.8839457035064697, "logps/chosen": -150.86373901367188, "logps/rejected": -489.8138122558594, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.0162084102630615, "rewards/margins": 4.82581901550293, "rewards/rejected": -6.84202766418457, "step": 3162 }, { "epoch": 0.49, "learning_rate": 1.1827462005290731e-05, "logits/chosen": -2.9202821254730225, "logits/rejected": -3.4371280670166016, "logps/chosen": -197.25477600097656, "logps/rejected": -365.29302978515625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.4144591093063354, "rewards/margins": 5.264573097229004, "rewards/rejected": -6.679032325744629, "step": 3163 }, { "epoch": 0.49, "learning_rate": 1.1826728564759583e-05, "logits/chosen": -2.995109796524048, "logits/rejected": -3.0795676708221436, "logps/chosen": -72.14061737060547, "logps/rejected": -170.57102966308594, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.9876593351364136, "rewards/margins": 5.556188583374023, "rewards/rejected": -7.543848037719727, "step": 3164 }, { "epoch": 0.49, "learning_rate": 1.1825995124228435e-05, "logits/chosen": -2.6206705570220947, "logits/rejected": -3.1116750240325928, "logps/chosen": -169.32371520996094, "logps/rejected": -321.3752136230469, "loss": 0.383, "rewards/accuracies": 0.5, "rewards/chosen": -1.8195282220840454, "rewards/margins": 3.789510488510132, "rewards/rejected": -5.609038829803467, "step": 3165 }, { "epoch": 0.49, "learning_rate": 1.1825261683697287e-05, "logits/chosen": -2.165644645690918, "logits/rejected": -2.700413703918457, "logps/chosen": -62.308738708496094, "logps/rejected": -194.81723022460938, "loss": 0.1742, "rewards/accuracies": 1.0, "rewards/chosen": -2.7540080547332764, "rewards/margins": 4.629988193511963, "rewards/rejected": -7.38399600982666, "step": 3166 }, { "epoch": 0.49, "learning_rate": 1.182452824316614e-05, "logits/chosen": -2.9884867668151855, "logits/rejected": -2.9974570274353027, "logps/chosen": -183.63441467285156, "logps/rejected": -420.5216369628906, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": -1.1437690258026123, "rewards/margins": 6.644857406616211, "rewards/rejected": -7.788626670837402, "step": 3167 }, { "epoch": 0.49, "learning_rate": 1.1823794802634992e-05, "logits/chosen": -3.1817283630371094, "logits/rejected": -1.717136263847351, "logps/chosen": -453.1629333496094, "logps/rejected": -195.51519775390625, "loss": 0.0452, "rewards/accuracies": 1.0, "rewards/chosen": -1.548091173171997, "rewards/margins": 4.5176873207092285, "rewards/rejected": -6.065778732299805, "step": 3168 }, { "epoch": 0.49, "learning_rate": 1.1823061362103844e-05, "logits/chosen": -2.9147331714630127, "logits/rejected": -3.031886100769043, "logps/chosen": -192.13009643554688, "logps/rejected": -304.93731689453125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -2.156388521194458, "rewards/margins": 4.5558013916015625, "rewards/rejected": -6.712189674377441, "step": 3169 }, { "epoch": 0.49, "learning_rate": 1.1822327921572696e-05, "logits/chosen": -3.1218419075012207, "logits/rejected": -1.4862793684005737, "logps/chosen": -319.2906188964844, "logps/rejected": -126.13168334960938, "loss": 1.7373, "rewards/accuracies": 0.0, "rewards/chosen": -3.8281707763671875, "rewards/margins": -1.521986961364746, "rewards/rejected": -2.3061840534210205, "step": 3170 }, { "epoch": 0.49, "learning_rate": 1.1821594481041548e-05, "logits/chosen": -2.254762649536133, "logits/rejected": -2.63124942779541, "logps/chosen": -106.29953002929688, "logps/rejected": -264.3909912109375, "loss": 0.0245, "rewards/accuracies": 1.0, "rewards/chosen": -2.494694232940674, "rewards/margins": 5.153072357177734, "rewards/rejected": -7.647766590118408, "step": 3171 }, { "epoch": 0.49, "learning_rate": 1.18208610405104e-05, "logits/chosen": -2.833906650543213, "logits/rejected": -3.398200750350952, "logps/chosen": -162.65774536132812, "logps/rejected": -348.9092712402344, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.039614677429199, "rewards/margins": 5.58765983581543, "rewards/rejected": -7.627274513244629, "step": 3172 }, { "epoch": 0.49, "learning_rate": 1.1820127599979252e-05, "logits/chosen": -2.75750732421875, "logits/rejected": -3.0207204818725586, "logps/chosen": -195.63711547851562, "logps/rejected": -307.63751220703125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -3.1914398670196533, "rewards/margins": 6.093984127044678, "rewards/rejected": -9.28542423248291, "step": 3173 }, { "epoch": 0.49, "learning_rate": 1.1819394159448104e-05, "logits/chosen": -2.828970432281494, "logits/rejected": -2.7695813179016113, "logps/chosen": -287.33050537109375, "logps/rejected": -250.71267700195312, "loss": 3.5554, "rewards/accuracies": 0.5, "rewards/chosen": -5.864447593688965, "rewards/margins": -0.43756842613220215, "rewards/rejected": -5.426879405975342, "step": 3174 }, { "epoch": 0.49, "learning_rate": 1.1818660718916956e-05, "logits/chosen": -2.6763904094696045, "logits/rejected": -2.000464916229248, "logps/chosen": -437.51678466796875, "logps/rejected": -294.8063659667969, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -3.614931583404541, "rewards/margins": 4.04149866104126, "rewards/rejected": -7.656430244445801, "step": 3175 }, { "epoch": 0.49, "learning_rate": 1.1817927278385809e-05, "logits/chosen": -2.7851755619049072, "logits/rejected": -2.690084934234619, "logps/chosen": -310.90966796875, "logps/rejected": -378.7336730957031, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.0625815391540527, "rewards/margins": 5.751959800720215, "rewards/rejected": -7.814541816711426, "step": 3176 }, { "epoch": 0.49, "learning_rate": 1.1817193837854661e-05, "logits/chosen": -1.6011722087860107, "logits/rejected": -2.944622755050659, "logps/chosen": -118.48151397705078, "logps/rejected": -305.61932373046875, "loss": 4.4081, "rewards/accuracies": 0.5, "rewards/chosen": -7.176170825958252, "rewards/margins": -2.430236339569092, "rewards/rejected": -4.74593448638916, "step": 3177 }, { "epoch": 0.49, "learning_rate": 1.1816460397323513e-05, "logits/chosen": -2.4363770484924316, "logits/rejected": -2.829261064529419, "logps/chosen": -84.89247131347656, "logps/rejected": -307.5062255859375, "loss": 0.0807, "rewards/accuracies": 1.0, "rewards/chosen": -1.8187350034713745, "rewards/margins": 4.579950332641602, "rewards/rejected": -6.398685455322266, "step": 3178 }, { "epoch": 0.49, "learning_rate": 1.1815726956792365e-05, "logits/chosen": -2.1978118419647217, "logits/rejected": -2.8270621299743652, "logps/chosen": -181.59226989746094, "logps/rejected": -210.81396484375, "loss": 2.692, "rewards/accuracies": 0.5, "rewards/chosen": -4.98708438873291, "rewards/margins": 0.4890577793121338, "rewards/rejected": -5.476142406463623, "step": 3179 }, { "epoch": 0.49, "learning_rate": 1.1814993516261217e-05, "logits/chosen": -2.696011543273926, "logits/rejected": -2.9025344848632812, "logps/chosen": -40.90998077392578, "logps/rejected": -150.77252197265625, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -2.289595127105713, "rewards/margins": 4.309898376464844, "rewards/rejected": -6.599493503570557, "step": 3180 }, { "epoch": 0.49, "learning_rate": 1.1814260075730069e-05, "logits/chosen": -3.2354021072387695, "logits/rejected": -3.025911808013916, "logps/chosen": -131.273193359375, "logps/rejected": -74.66879272460938, "loss": 2.8249, "rewards/accuracies": 0.5, "rewards/chosen": -4.899758815765381, "rewards/margins": -1.2987655401229858, "rewards/rejected": -3.6009931564331055, "step": 3181 }, { "epoch": 0.49, "learning_rate": 1.181352663519892e-05, "logits/chosen": -3.3233132362365723, "logits/rejected": -3.285983085632324, "logps/chosen": -134.63238525390625, "logps/rejected": -160.9522705078125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.2483253479003906, "rewards/margins": 6.531207084655762, "rewards/rejected": -7.779531955718994, "step": 3182 }, { "epoch": 0.5, "learning_rate": 1.1812793194667772e-05, "logits/chosen": -1.6937274932861328, "logits/rejected": -3.2208364009857178, "logps/chosen": -320.855712890625, "logps/rejected": -730.1312255859375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.9645326137542725, "rewards/margins": 6.872912406921387, "rewards/rejected": -8.837445259094238, "step": 3183 }, { "epoch": 0.5, "learning_rate": 1.1812059754136624e-05, "logits/chosen": -2.765397548675537, "logits/rejected": -2.274535655975342, "logps/chosen": -807.6937255859375, "logps/rejected": -399.1564025878906, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.723724365234375, "rewards/margins": 6.164872646331787, "rewards/rejected": -7.888597011566162, "step": 3184 }, { "epoch": 0.5, "learning_rate": 1.1811326313605478e-05, "logits/chosen": -2.7640509605407715, "logits/rejected": -2.9568746089935303, "logps/chosen": -119.18251037597656, "logps/rejected": -219.95928955078125, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -2.706880807876587, "rewards/margins": 4.7225823402404785, "rewards/rejected": -7.4294633865356445, "step": 3185 }, { "epoch": 0.5, "learning_rate": 1.181059287307433e-05, "logits/chosen": -2.622661828994751, "logits/rejected": -2.8228378295898438, "logps/chosen": -42.29815673828125, "logps/rejected": -182.91250610351562, "loss": 0.2268, "rewards/accuracies": 1.0, "rewards/chosen": -2.1207549571990967, "rewards/margins": 2.2218704223632812, "rewards/rejected": -4.342625617980957, "step": 3186 }, { "epoch": 0.5, "learning_rate": 1.1809859432543181e-05, "logits/chosen": -2.423731803894043, "logits/rejected": -2.757260799407959, "logps/chosen": -265.4132080078125, "logps/rejected": -297.42535400390625, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -1.8573083877563477, "rewards/margins": 5.354180335998535, "rewards/rejected": -7.211488723754883, "step": 3187 }, { "epoch": 0.5, "learning_rate": 1.1809125992012033e-05, "logits/chosen": -2.221778631210327, "logits/rejected": -2.838071823120117, "logps/chosen": -58.84342956542969, "logps/rejected": -133.2503662109375, "loss": 0.3949, "rewards/accuracies": 0.5, "rewards/chosen": -2.972376585006714, "rewards/margins": 2.984670400619507, "rewards/rejected": -5.957046985626221, "step": 3188 }, { "epoch": 0.5, "learning_rate": 1.1808392551480885e-05, "logits/chosen": -2.645582675933838, "logits/rejected": -3.0434634685516357, "logps/chosen": -765.30322265625, "logps/rejected": -715.01513671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.8044769763946533, "rewards/margins": 8.401873588562012, "rewards/rejected": -10.206350326538086, "step": 3189 }, { "epoch": 0.5, "learning_rate": 1.1807659110949739e-05, "logits/chosen": -3.013991117477417, "logits/rejected": -1.7173283100128174, "logps/chosen": -673.40380859375, "logps/rejected": -378.9112548828125, "loss": 1.31, "rewards/accuracies": 0.5, "rewards/chosen": -2.760864496231079, "rewards/margins": 2.4592714309692383, "rewards/rejected": -5.2201361656188965, "step": 3190 }, { "epoch": 0.5, "learning_rate": 1.180692567041859e-05, "logits/chosen": -3.156921148300171, "logits/rejected": -3.241053342819214, "logps/chosen": -201.36390686035156, "logps/rejected": -300.6136779785156, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -3.3337833881378174, "rewards/margins": 4.8168439865112305, "rewards/rejected": -8.150627136230469, "step": 3191 }, { "epoch": 0.5, "learning_rate": 1.1806192229887443e-05, "logits/chosen": -2.2417376041412354, "logits/rejected": -2.641390323638916, "logps/chosen": -350.17431640625, "logps/rejected": -356.4627685546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.708423137664795, "rewards/margins": 7.233852863311768, "rewards/rejected": -10.942276000976562, "step": 3192 }, { "epoch": 0.5, "learning_rate": 1.1805458789356294e-05, "logits/chosen": -2.0910189151763916, "logits/rejected": -3.093647003173828, "logps/chosen": -123.69801330566406, "logps/rejected": -384.564453125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.7061810493469238, "rewards/margins": 5.672616958618164, "rewards/rejected": -7.378798484802246, "step": 3193 }, { "epoch": 0.5, "learning_rate": 1.1804725348825148e-05, "logits/chosen": -2.7481300830841064, "logits/rejected": -2.546635389328003, "logps/chosen": -211.1082763671875, "logps/rejected": -333.60821533203125, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -3.230539321899414, "rewards/margins": 4.654074668884277, "rewards/rejected": -7.884613990783691, "step": 3194 }, { "epoch": 0.5, "learning_rate": 1.1803991908294e-05, "logits/chosen": -1.880419135093689, "logits/rejected": -3.064859628677368, "logps/chosen": -120.75992584228516, "logps/rejected": -357.30084228515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.647613286972046, "rewards/margins": 6.6907501220703125, "rewards/rejected": -8.338363647460938, "step": 3195 }, { "epoch": 0.5, "learning_rate": 1.1803258467762852e-05, "logits/chosen": -2.8287010192871094, "logits/rejected": -2.7221994400024414, "logps/chosen": -268.912109375, "logps/rejected": -378.9578857421875, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -2.8415367603302, "rewards/margins": 4.779233932495117, "rewards/rejected": -7.620770454406738, "step": 3196 }, { "epoch": 0.5, "learning_rate": 1.1802525027231704e-05, "logits/chosen": -2.636190891265869, "logits/rejected": -2.796473264694214, "logps/chosen": -187.77804565429688, "logps/rejected": -353.31048583984375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.321063995361328, "rewards/margins": 5.893872261047363, "rewards/rejected": -10.214936256408691, "step": 3197 }, { "epoch": 0.5, "learning_rate": 1.1801791586700556e-05, "logits/chosen": -2.0109570026397705, "logits/rejected": -2.912222146987915, "logps/chosen": -150.11669921875, "logps/rejected": -448.3995056152344, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.4817692041397095, "rewards/margins": 8.453903198242188, "rewards/rejected": -9.935672760009766, "step": 3198 }, { "epoch": 0.5, "learning_rate": 1.1801058146169407e-05, "logits/chosen": -3.2261223793029785, "logits/rejected": -3.394547700881958, "logps/chosen": -64.88070678710938, "logps/rejected": -179.06256103515625, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -1.9551798105239868, "rewards/margins": 4.94691276550293, "rewards/rejected": -6.902091979980469, "step": 3199 }, { "epoch": 0.5, "learning_rate": 1.180032470563826e-05, "logits/chosen": -2.700270175933838, "logits/rejected": -2.171664237976074, "logps/chosen": -315.29217529296875, "logps/rejected": -279.02490234375, "loss": 0.1607, "rewards/accuracies": 1.0, "rewards/chosen": -2.027984619140625, "rewards/margins": 4.398877143859863, "rewards/rejected": -6.426861763000488, "step": 3200 }, { "epoch": 0.5, "learning_rate": 1.1799591265107111e-05, "logits/chosen": -2.585634231567383, "logits/rejected": -2.7790608406066895, "logps/chosen": -632.67041015625, "logps/rejected": -441.17022705078125, "loss": 3.1182, "rewards/accuracies": 0.5, "rewards/chosen": -4.226240634918213, "rewards/margins": 0.7160096168518066, "rewards/rejected": -4.9422502517700195, "step": 3201 }, { "epoch": 0.5, "learning_rate": 1.1798857824575963e-05, "logits/chosen": -2.857503890991211, "logits/rejected": -2.319817543029785, "logps/chosen": -125.11947631835938, "logps/rejected": -192.86721801757812, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.3987836837768555, "rewards/margins": 5.3505706787109375, "rewards/rejected": -7.749353885650635, "step": 3202 }, { "epoch": 0.5, "learning_rate": 1.1798124384044817e-05, "logits/chosen": -1.1529579162597656, "logits/rejected": -2.552907705307007, "logps/chosen": -238.3985595703125, "logps/rejected": -436.0715637207031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.942305088043213, "rewards/margins": 8.849822044372559, "rewards/rejected": -10.792126655578613, "step": 3203 }, { "epoch": 0.5, "learning_rate": 1.1797390943513668e-05, "logits/chosen": -2.9606833457946777, "logits/rejected": -3.0908100605010986, "logps/chosen": -366.2082214355469, "logps/rejected": -301.32122802734375, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -2.302406072616577, "rewards/margins": 3.670012950897217, "rewards/rejected": -5.972418785095215, "step": 3204 }, { "epoch": 0.5, "learning_rate": 1.179665750298252e-05, "logits/chosen": -2.671535015106201, "logits/rejected": -1.9965760707855225, "logps/chosen": -307.2548828125, "logps/rejected": -243.45472717285156, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.4172439575195312, "rewards/margins": 5.934246063232422, "rewards/rejected": -7.351490020751953, "step": 3205 }, { "epoch": 0.5, "learning_rate": 1.1795924062451372e-05, "logits/chosen": -2.193171501159668, "logits/rejected": -3.2264466285705566, "logps/chosen": -59.96738052368164, "logps/rejected": -403.55523681640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9506562948226929, "rewards/margins": 8.704466819763184, "rewards/rejected": -10.655122756958008, "step": 3206 }, { "epoch": 0.5, "learning_rate": 1.1795190621920224e-05, "logits/chosen": -3.275973320007324, "logits/rejected": -3.059109687805176, "logps/chosen": -137.86990356445312, "logps/rejected": -135.64852905273438, "loss": 0.7111, "rewards/accuracies": 0.5, "rewards/chosen": -2.8300881385803223, "rewards/margins": 1.8123540878295898, "rewards/rejected": -4.642442226409912, "step": 3207 }, { "epoch": 0.5, "learning_rate": 1.1794457181389076e-05, "logits/chosen": -1.8906525373458862, "logits/rejected": -3.2300198078155518, "logps/chosen": -73.92535400390625, "logps/rejected": -249.59657287597656, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.8031846284866333, "rewards/margins": 5.5113115310668945, "rewards/rejected": -7.314496040344238, "step": 3208 }, { "epoch": 0.5, "learning_rate": 1.1793723740857928e-05, "logits/chosen": -2.4284603595733643, "logits/rejected": -2.8441433906555176, "logps/chosen": -199.8966827392578, "logps/rejected": -189.7637939453125, "loss": 3.3568, "rewards/accuracies": 0.5, "rewards/chosen": -5.685766220092773, "rewards/margins": -0.4691295623779297, "rewards/rejected": -5.216636657714844, "step": 3209 }, { "epoch": 0.5, "learning_rate": 1.179299030032678e-05, "logits/chosen": -1.945345163345337, "logits/rejected": -3.1382696628570557, "logps/chosen": -80.4140396118164, "logps/rejected": -462.77508544921875, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -2.3447659015655518, "rewards/margins": 7.08817195892334, "rewards/rejected": -9.432937622070312, "step": 3210 }, { "epoch": 0.5, "learning_rate": 1.1792256859795632e-05, "logits/chosen": -3.159381866455078, "logits/rejected": -2.7471466064453125, "logps/chosen": -328.19793701171875, "logps/rejected": -294.7005615234375, "loss": 0.0788, "rewards/accuracies": 1.0, "rewards/chosen": -1.775341510772705, "rewards/margins": 3.095346450805664, "rewards/rejected": -4.870687961578369, "step": 3211 }, { "epoch": 0.5, "learning_rate": 1.1791523419264485e-05, "logits/chosen": -2.804170608520508, "logits/rejected": -3.0756173133850098, "logps/chosen": -349.4518737792969, "logps/rejected": -329.1554260253906, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -2.3186116218566895, "rewards/margins": 4.238989353179932, "rewards/rejected": -6.557600975036621, "step": 3212 }, { "epoch": 0.5, "learning_rate": 1.1790789978733337e-05, "logits/chosen": -3.2814903259277344, "logits/rejected": -2.3596622943878174, "logps/chosen": -558.1368408203125, "logps/rejected": -302.9935302734375, "loss": 10.0201, "rewards/accuracies": 0.0, "rewards/chosen": -12.019830703735352, "rewards/margins": -10.019680976867676, "rewards/rejected": -2.000149726867676, "step": 3213 }, { "epoch": 0.5, "learning_rate": 1.1790056538202189e-05, "logits/chosen": -2.835679531097412, "logits/rejected": -2.557147979736328, "logps/chosen": -135.95433044433594, "logps/rejected": -389.7782287597656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.5845615863800049, "rewards/margins": 11.280996322631836, "rewards/rejected": -12.865558624267578, "step": 3214 }, { "epoch": 0.5, "learning_rate": 1.1789323097671041e-05, "logits/chosen": -2.761502742767334, "logits/rejected": -2.6735751628875732, "logps/chosen": -428.1515197753906, "logps/rejected": -367.4241638183594, "loss": 0.9351, "rewards/accuracies": 0.5, "rewards/chosen": -3.8396358489990234, "rewards/margins": 2.4820098876953125, "rewards/rejected": -6.321645736694336, "step": 3215 }, { "epoch": 0.5, "learning_rate": 1.1788589657139893e-05, "logits/chosen": -1.5856965780258179, "logits/rejected": -2.745351791381836, "logps/chosen": -238.85775756835938, "logps/rejected": -410.9653625488281, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -3.154721260070801, "rewards/margins": 5.296953201293945, "rewards/rejected": -8.451674461364746, "step": 3216 }, { "epoch": 0.5, "learning_rate": 1.1787856216608745e-05, "logits/chosen": -1.9918824434280396, "logits/rejected": -2.6917457580566406, "logps/chosen": -135.83578491210938, "logps/rejected": -309.716796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.6495583057403564, "rewards/margins": 8.236474990844727, "rewards/rejected": -9.886033058166504, "step": 3217 }, { "epoch": 0.5, "learning_rate": 1.1787122776077596e-05, "logits/chosen": -2.12762713432312, "logits/rejected": -3.0208523273468018, "logps/chosen": -378.6343994140625, "logps/rejected": -574.6246337890625, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -2.0632894039154053, "rewards/margins": 5.809150695800781, "rewards/rejected": -7.872439384460449, "step": 3218 }, { "epoch": 0.5, "learning_rate": 1.1786389335546448e-05, "logits/chosen": -2.7916810512542725, "logits/rejected": -2.791128158569336, "logps/chosen": -199.46824645996094, "logps/rejected": -310.2880554199219, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.6556146144866943, "rewards/margins": 8.355213165283203, "rewards/rejected": -11.010828018188477, "step": 3219 }, { "epoch": 0.5, "learning_rate": 1.17856558950153e-05, "logits/chosen": -2.142512083053589, "logits/rejected": -2.6497585773468018, "logps/chosen": -138.557373046875, "logps/rejected": -312.15069580078125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.9352726936340332, "rewards/margins": 6.491464614868164, "rewards/rejected": -8.426736831665039, "step": 3220 }, { "epoch": 0.5, "learning_rate": 1.1784922454484154e-05, "logits/chosen": -2.2076592445373535, "logits/rejected": -3.044118642807007, "logps/chosen": -31.892478942871094, "logps/rejected": -241.3399658203125, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": -1.3974053859710693, "rewards/margins": 3.2672629356384277, "rewards/rejected": -4.664668083190918, "step": 3221 }, { "epoch": 0.5, "learning_rate": 1.1784189013953006e-05, "logits/chosen": -2.7168707847595215, "logits/rejected": -1.8255926370620728, "logps/chosen": -323.9495849609375, "logps/rejected": -115.26054382324219, "loss": 2.1894, "rewards/accuracies": 0.5, "rewards/chosen": -4.340949535369873, "rewards/margins": -0.16074919700622559, "rewards/rejected": -4.180200576782227, "step": 3222 }, { "epoch": 0.5, "learning_rate": 1.1783455573421858e-05, "logits/chosen": -3.049855947494507, "logits/rejected": -3.400820732116699, "logps/chosen": -40.3105583190918, "logps/rejected": -179.7622833251953, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": -2.4794797897338867, "rewards/margins": 3.122462749481201, "rewards/rejected": -5.601942539215088, "step": 3223 }, { "epoch": 0.5, "learning_rate": 1.1782722132890711e-05, "logits/chosen": -1.489540457725525, "logits/rejected": -2.726611852645874, "logps/chosen": -76.86430358886719, "logps/rejected": -474.43585205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1184977293014526, "rewards/margins": 11.258174896240234, "rewards/rejected": -12.376672744750977, "step": 3224 }, { "epoch": 0.5, "learning_rate": 1.1781988692359563e-05, "logits/chosen": -2.622530937194824, "logits/rejected": -2.331629991531372, "logps/chosen": -130.75596618652344, "logps/rejected": -363.3290710449219, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.892073631286621, "rewards/margins": 5.861624717712402, "rewards/rejected": -8.753698348999023, "step": 3225 }, { "epoch": 0.5, "learning_rate": 1.1781255251828415e-05, "logits/chosen": -2.523921489715576, "logits/rejected": -2.6956100463867188, "logps/chosen": -226.9699249267578, "logps/rejected": -379.48858642578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.636616587638855, "rewards/margins": 8.051952362060547, "rewards/rejected": -9.688569068908691, "step": 3226 }, { "epoch": 0.5, "learning_rate": 1.1780521811297267e-05, "logits/chosen": -2.550088405609131, "logits/rejected": -2.9086995124816895, "logps/chosen": -505.7969970703125, "logps/rejected": -566.1129150390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.570765733718872, "rewards/margins": 7.016589164733887, "rewards/rejected": -8.58735466003418, "step": 3227 }, { "epoch": 0.5, "learning_rate": 1.1779788370766119e-05, "logits/chosen": -2.3482255935668945, "logits/rejected": -2.837679624557495, "logps/chosen": -30.531644821166992, "logps/rejected": -155.83319091796875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.4346007108688354, "rewards/margins": 5.725528717041016, "rewards/rejected": -7.160129547119141, "step": 3228 }, { "epoch": 0.5, "learning_rate": 1.177905493023497e-05, "logits/chosen": -2.281497001647949, "logits/rejected": -2.8867411613464355, "logps/chosen": -450.7969055175781, "logps/rejected": -368.7726745605469, "loss": 4.3331, "rewards/accuracies": 0.5, "rewards/chosen": -6.888023376464844, "rewards/margins": -1.373931884765625, "rewards/rejected": -5.514091491699219, "step": 3229 }, { "epoch": 0.5, "learning_rate": 1.1778321489703824e-05, "logits/chosen": -2.716259717941284, "logits/rejected": -3.2177398204803467, "logps/chosen": -144.71218872070312, "logps/rejected": -303.6011962890625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.992050051689148, "rewards/margins": 5.465338706970215, "rewards/rejected": -6.457388877868652, "step": 3230 }, { "epoch": 0.5, "learning_rate": 1.1777588049172676e-05, "logits/chosen": -1.7874127626419067, "logits/rejected": -2.471067428588867, "logps/chosen": -114.61001586914062, "logps/rejected": -226.30091857910156, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.544529438018799, "rewards/margins": 4.917302131652832, "rewards/rejected": -7.461832046508789, "step": 3231 }, { "epoch": 0.5, "learning_rate": 1.1776854608641528e-05, "logits/chosen": -2.8588578701019287, "logits/rejected": -1.9106857776641846, "logps/chosen": -403.14892578125, "logps/rejected": -524.351318359375, "loss": 4.7409, "rewards/accuracies": 0.5, "rewards/chosen": -5.715702056884766, "rewards/margins": 1.380373477935791, "rewards/rejected": -7.096075534820557, "step": 3232 }, { "epoch": 0.5, "learning_rate": 1.177612116811038e-05, "logits/chosen": -2.773353099822998, "logits/rejected": -1.8507858514785767, "logps/chosen": -391.95672607421875, "logps/rejected": -321.39495849609375, "loss": 5.5702, "rewards/accuracies": 0.5, "rewards/chosen": -6.394827365875244, "rewards/margins": -1.5189881324768066, "rewards/rejected": -4.8758392333984375, "step": 3233 }, { "epoch": 0.5, "learning_rate": 1.1775387727579232e-05, "logits/chosen": -2.1261560916900635, "logits/rejected": -2.9868757724761963, "logps/chosen": -110.21987915039062, "logps/rejected": -449.0478820800781, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -3.1548755168914795, "rewards/margins": 7.58559513092041, "rewards/rejected": -10.740470886230469, "step": 3234 }, { "epoch": 0.5, "learning_rate": 1.1774654287048084e-05, "logits/chosen": -2.9944632053375244, "logits/rejected": -2.5625996589660645, "logps/chosen": -247.9109344482422, "logps/rejected": -210.03890991210938, "loss": 1.7084, "rewards/accuracies": 0.5, "rewards/chosen": -2.557131052017212, "rewards/margins": 2.3613028526306152, "rewards/rejected": -4.918433666229248, "step": 3235 }, { "epoch": 0.5, "learning_rate": 1.1773920846516935e-05, "logits/chosen": -2.6916861534118652, "logits/rejected": -3.026658058166504, "logps/chosen": -68.574951171875, "logps/rejected": -174.41744995117188, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -2.7228429317474365, "rewards/margins": 4.980425834655762, "rewards/rejected": -7.703269004821777, "step": 3236 }, { "epoch": 0.5, "learning_rate": 1.1773187405985787e-05, "logits/chosen": -3.004300355911255, "logits/rejected": -1.9848113059997559, "logps/chosen": -681.2003173828125, "logps/rejected": -505.01611328125, "loss": 1.9751, "rewards/accuracies": 0.5, "rewards/chosen": -2.6838135719299316, "rewards/margins": 2.71838641166687, "rewards/rejected": -5.402200222015381, "step": 3237 }, { "epoch": 0.5, "learning_rate": 1.1772453965454639e-05, "logits/chosen": -3.0119197368621826, "logits/rejected": -3.223383665084839, "logps/chosen": -89.45426940917969, "logps/rejected": -235.7295379638672, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.9905807375907898, "rewards/margins": 4.985739707946777, "rewards/rejected": -5.976320743560791, "step": 3238 }, { "epoch": 0.5, "learning_rate": 1.1771720524923493e-05, "logits/chosen": -3.055180311203003, "logits/rejected": -3.1501245498657227, "logps/chosen": -101.69817352294922, "logps/rejected": -187.98683166503906, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.9390739798545837, "rewards/margins": 6.012462615966797, "rewards/rejected": -6.951536655426025, "step": 3239 }, { "epoch": 0.5, "learning_rate": 1.1770987084392345e-05, "logits/chosen": -2.699101686477661, "logits/rejected": -2.816598415374756, "logps/chosen": -103.77838134765625, "logps/rejected": -415.32806396484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5247658491134644, "rewards/margins": 9.225409507751465, "rewards/rejected": -10.750175476074219, "step": 3240 }, { "epoch": 0.5, "learning_rate": 1.1770253643861196e-05, "logits/chosen": -3.139472723007202, "logits/rejected": -2.7925174236297607, "logps/chosen": -803.1517333984375, "logps/rejected": -325.56268310546875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.6628447771072388, "rewards/margins": 6.040610313415527, "rewards/rejected": -7.703454971313477, "step": 3241 }, { "epoch": 0.5, "learning_rate": 1.1769520203330048e-05, "logits/chosen": -2.7786552906036377, "logits/rejected": -3.3252553939819336, "logps/chosen": -311.21697998046875, "logps/rejected": -476.1383056640625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.3612351417541504, "rewards/margins": 7.320948123931885, "rewards/rejected": -9.682183265686035, "step": 3242 }, { "epoch": 0.5, "learning_rate": 1.17687867627989e-05, "logits/chosen": -2.641247272491455, "logits/rejected": -2.7320353984832764, "logps/chosen": -264.82763671875, "logps/rejected": -578.628662109375, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.4394745826721191, "rewards/margins": 8.802295684814453, "rewards/rejected": -10.24177074432373, "step": 3243 }, { "epoch": 0.5, "learning_rate": 1.1768053322267752e-05, "logits/chosen": -1.3306037187576294, "logits/rejected": -2.9644551277160645, "logps/chosen": -144.58544921875, "logps/rejected": -409.4757080078125, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.9798076152801514, "rewards/margins": 6.715723037719727, "rewards/rejected": -8.695530891418457, "step": 3244 }, { "epoch": 0.5, "learning_rate": 1.1767319881736604e-05, "logits/chosen": -2.4004876613616943, "logits/rejected": -2.7137508392333984, "logps/chosen": -95.55732727050781, "logps/rejected": -233.4954376220703, "loss": 0.0788, "rewards/accuracies": 1.0, "rewards/chosen": -1.8288867473602295, "rewards/margins": 2.534794330596924, "rewards/rejected": -4.363681316375732, "step": 3245 }, { "epoch": 0.5, "learning_rate": 1.1766586441205456e-05, "logits/chosen": -3.074774742126465, "logits/rejected": -2.8802602291107178, "logps/chosen": -437.7545166015625, "logps/rejected": -495.50213623046875, "loss": 2.2647, "rewards/accuracies": 0.5, "rewards/chosen": -4.132836818695068, "rewards/margins": 3.1093368530273438, "rewards/rejected": -7.24217414855957, "step": 3246 }, { "epoch": 0.5, "learning_rate": 1.176585300067431e-05, "logits/chosen": -3.00307559967041, "logits/rejected": -3.130242109298706, "logps/chosen": -378.0059814453125, "logps/rejected": -434.7911071777344, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.577925205230713, "rewards/margins": 5.9532151222229, "rewards/rejected": -8.531140327453613, "step": 3247 }, { "epoch": 0.51, "learning_rate": 1.1765119560143161e-05, "logits/chosen": -1.5837103128433228, "logits/rejected": -2.8090603351593018, "logps/chosen": -75.46883392333984, "logps/rejected": -190.95944213867188, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.7028785943984985, "rewards/margins": 5.681127548217773, "rewards/rejected": -7.384006023406982, "step": 3248 }, { "epoch": 0.51, "learning_rate": 1.1764386119612013e-05, "logits/chosen": -1.7329413890838623, "logits/rejected": -2.639821767807007, "logps/chosen": -129.75765991210938, "logps/rejected": -309.6456604003906, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1598783731460571, "rewards/margins": 7.0589375495910645, "rewards/rejected": -8.218815803527832, "step": 3249 }, { "epoch": 0.51, "learning_rate": 1.1763652679080865e-05, "logits/chosen": -2.8140852451324463, "logits/rejected": -2.349871873855591, "logps/chosen": -312.93377685546875, "logps/rejected": -202.9508819580078, "loss": 3.6246, "rewards/accuracies": 0.5, "rewards/chosen": -4.966780662536621, "rewards/margins": -1.112525463104248, "rewards/rejected": -3.854255437850952, "step": 3250 }, { "epoch": 0.51, "learning_rate": 1.1762919238549717e-05, "logits/chosen": -2.8327584266662598, "logits/rejected": -3.4015004634857178, "logps/chosen": -27.877452850341797, "logps/rejected": -216.05612182617188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.3719968795776367, "rewards/margins": 6.362102508544922, "rewards/rejected": -7.734099388122559, "step": 3251 }, { "epoch": 0.51, "learning_rate": 1.1762185798018569e-05, "logits/chosen": -3.215955972671509, "logits/rejected": -2.192077875137329, "logps/chosen": -667.2742919921875, "logps/rejected": -279.843505859375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.15019989013671875, "rewards/margins": 6.7055864334106445, "rewards/rejected": -6.555386543273926, "step": 3252 }, { "epoch": 0.51, "learning_rate": 1.176145235748742e-05, "logits/chosen": -1.894911527633667, "logits/rejected": -2.7581512928009033, "logps/chosen": -152.05245971679688, "logps/rejected": -368.337158203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.257384777069092, "rewards/margins": 8.74692440032959, "rewards/rejected": -11.004308700561523, "step": 3253 }, { "epoch": 0.51, "learning_rate": 1.1760718916956273e-05, "logits/chosen": -2.8855807781219482, "logits/rejected": -2.9800851345062256, "logps/chosen": -465.22003173828125, "logps/rejected": -882.40966796875, "loss": 4.5305, "rewards/accuracies": 0.5, "rewards/chosen": -5.847651958465576, "rewards/margins": 1.6179122924804688, "rewards/rejected": -7.465564250946045, "step": 3254 }, { "epoch": 0.51, "learning_rate": 1.1759985476425124e-05, "logits/chosen": -2.4650659561157227, "logits/rejected": -2.8312718868255615, "logps/chosen": -46.071006774902344, "logps/rejected": -178.23191833496094, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -2.277276039123535, "rewards/margins": 3.862212657928467, "rewards/rejected": -6.139488220214844, "step": 3255 }, { "epoch": 0.51, "learning_rate": 1.1759252035893978e-05, "logits/chosen": -2.7992076873779297, "logits/rejected": -3.4004082679748535, "logps/chosen": -620.3007202148438, "logps/rejected": -544.0250854492188, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.4183769226074219, "rewards/margins": 4.95077657699585, "rewards/rejected": -6.3691534996032715, "step": 3256 }, { "epoch": 0.51, "learning_rate": 1.175851859536283e-05, "logits/chosen": -1.6130311489105225, "logits/rejected": -3.087876558303833, "logps/chosen": -115.95034790039062, "logps/rejected": -400.82257080078125, "loss": 0.1202, "rewards/accuracies": 1.0, "rewards/chosen": -1.6080063581466675, "rewards/margins": 5.003220558166504, "rewards/rejected": -6.611227035522461, "step": 3257 }, { "epoch": 0.51, "learning_rate": 1.1757785154831683e-05, "logits/chosen": -2.669975757598877, "logits/rejected": -3.150456190109253, "logps/chosen": -57.81140899658203, "logps/rejected": -223.36590576171875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.0376784801483154, "rewards/margins": 5.578182220458984, "rewards/rejected": -6.615860939025879, "step": 3258 }, { "epoch": 0.51, "learning_rate": 1.1757051714300535e-05, "logits/chosen": -1.9551113843917847, "logits/rejected": -3.101496934890747, "logps/chosen": -183.61546325683594, "logps/rejected": -393.86602783203125, "loss": 3.4354, "rewards/accuracies": 0.5, "rewards/chosen": -5.576754570007324, "rewards/margins": 0.5984253883361816, "rewards/rejected": -6.175179958343506, "step": 3259 }, { "epoch": 0.51, "learning_rate": 1.1756318273769387e-05, "logits/chosen": -1.4261411428451538, "logits/rejected": -2.2668609619140625, "logps/chosen": -173.9444580078125, "logps/rejected": -364.69940185546875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.557621479034424, "rewards/margins": 7.164846897125244, "rewards/rejected": -9.722468376159668, "step": 3260 }, { "epoch": 0.51, "learning_rate": 1.1755584833238239e-05, "logits/chosen": -2.559657335281372, "logits/rejected": -2.9033381938934326, "logps/chosen": -394.1028137207031, "logps/rejected": -506.90765380859375, "loss": 2.7174, "rewards/accuracies": 0.5, "rewards/chosen": -4.379450798034668, "rewards/margins": 0.7645440101623535, "rewards/rejected": -5.1439948081970215, "step": 3261 }, { "epoch": 0.51, "learning_rate": 1.1754851392707091e-05, "logits/chosen": -1.4179414510726929, "logits/rejected": -2.880337953567505, "logps/chosen": -153.68353271484375, "logps/rejected": -358.39642333984375, "loss": 3.4893, "rewards/accuracies": 0.5, "rewards/chosen": -4.226686477661133, "rewards/margins": -0.09339189529418945, "rewards/rejected": -4.133294582366943, "step": 3262 }, { "epoch": 0.51, "learning_rate": 1.1754117952175943e-05, "logits/chosen": -3.138317584991455, "logits/rejected": -2.721503973007202, "logps/chosen": -409.5162353515625, "logps/rejected": -442.911376953125, "loss": 2.4914, "rewards/accuracies": 0.5, "rewards/chosen": -4.670648097991943, "rewards/margins": 1.391951560974121, "rewards/rejected": -6.0625996589660645, "step": 3263 }, { "epoch": 0.51, "learning_rate": 1.1753384511644795e-05, "logits/chosen": -1.568499207496643, "logits/rejected": -2.050814151763916, "logps/chosen": -250.28945922851562, "logps/rejected": -353.6053466796875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.9105827808380127, "rewards/margins": 5.145517349243164, "rewards/rejected": -7.056099891662598, "step": 3264 }, { "epoch": 0.51, "learning_rate": 1.1752651071113648e-05, "logits/chosen": -2.1059274673461914, "logits/rejected": -2.9108495712280273, "logps/chosen": -132.5654754638672, "logps/rejected": -347.5223388671875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0859291553497314, "rewards/margins": 7.2942376136779785, "rewards/rejected": -8.380167007446289, "step": 3265 }, { "epoch": 0.51, "learning_rate": 1.17519176305825e-05, "logits/chosen": -2.849531412124634, "logits/rejected": -2.783517837524414, "logps/chosen": -248.96652221679688, "logps/rejected": -178.38746643066406, "loss": 2.9061, "rewards/accuracies": 0.5, "rewards/chosen": -3.6400201320648193, "rewards/margins": 1.7631304264068604, "rewards/rejected": -5.40315055847168, "step": 3266 }, { "epoch": 0.51, "learning_rate": 1.1751184190051352e-05, "logits/chosen": -2.5285754203796387, "logits/rejected": -2.711880683898926, "logps/chosen": -143.42108154296875, "logps/rejected": -241.13804626464844, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.2481884956359863, "rewards/margins": 6.242037296295166, "rewards/rejected": -8.490225791931152, "step": 3267 }, { "epoch": 0.51, "learning_rate": 1.1750450749520204e-05, "logits/chosen": -2.871781826019287, "logits/rejected": -3.2375993728637695, "logps/chosen": -102.66514587402344, "logps/rejected": -266.8712158203125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8473198413848877, "rewards/margins": 6.786458492279053, "rewards/rejected": -7.6337785720825195, "step": 3268 }, { "epoch": 0.51, "learning_rate": 1.1749717308989056e-05, "logits/chosen": -2.4531173706054688, "logits/rejected": -2.915031909942627, "logps/chosen": -179.52134704589844, "logps/rejected": -422.9158020019531, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.4344277381896973, "rewards/margins": 6.171602249145508, "rewards/rejected": -7.606030464172363, "step": 3269 }, { "epoch": 0.51, "learning_rate": 1.1748983868457908e-05, "logits/chosen": -2.6378355026245117, "logits/rejected": -3.040613889694214, "logps/chosen": -226.3580322265625, "logps/rejected": -267.02825927734375, "loss": 2.5968, "rewards/accuracies": 0.5, "rewards/chosen": -3.651463508605957, "rewards/margins": 0.8759102821350098, "rewards/rejected": -4.527373790740967, "step": 3270 }, { "epoch": 0.51, "learning_rate": 1.174825042792676e-05, "logits/chosen": -2.5400218963623047, "logits/rejected": -2.2095301151275635, "logps/chosen": -186.21029663085938, "logps/rejected": -264.4785461425781, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.9930710792541504, "rewards/margins": 4.474733829498291, "rewards/rejected": -6.467804908752441, "step": 3271 }, { "epoch": 0.51, "learning_rate": 1.1747516987395611e-05, "logits/chosen": -2.7855916023254395, "logits/rejected": -2.0678021907806396, "logps/chosen": -591.7896728515625, "logps/rejected": -354.1018371582031, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.0385655164718628, "rewards/margins": 4.814292907714844, "rewards/rejected": -5.852858543395996, "step": 3272 }, { "epoch": 0.51, "learning_rate": 1.1746783546864463e-05, "logits/chosen": -3.1130688190460205, "logits/rejected": -1.7741491794586182, "logps/chosen": -212.43032836914062, "logps/rejected": -57.889122009277344, "loss": 3.8344, "rewards/accuracies": 0.5, "rewards/chosen": -5.689756870269775, "rewards/margins": -3.213994026184082, "rewards/rejected": -2.4757628440856934, "step": 3273 }, { "epoch": 0.51, "learning_rate": 1.1746050106333317e-05, "logits/chosen": -1.783610224723816, "logits/rejected": -2.8401131629943848, "logps/chosen": -366.33062744140625, "logps/rejected": -543.419921875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7435979843139648, "rewards/margins": 9.32144546508789, "rewards/rejected": -10.065044403076172, "step": 3274 }, { "epoch": 0.51, "learning_rate": 1.1745316665802169e-05, "logits/chosen": -3.105600118637085, "logits/rejected": -3.2681591510772705, "logps/chosen": -130.43601989746094, "logps/rejected": -119.90548706054688, "loss": 1.6236, "rewards/accuracies": 0.5, "rewards/chosen": -2.4739198684692383, "rewards/margins": 0.5859369039535522, "rewards/rejected": -3.059856653213501, "step": 3275 }, { "epoch": 0.51, "learning_rate": 1.174458322527102e-05, "logits/chosen": -1.8241902589797974, "logits/rejected": -2.433610439300537, "logps/chosen": -223.48875427246094, "logps/rejected": -266.97235107421875, "loss": 2.4406, "rewards/accuracies": 0.5, "rewards/chosen": -4.47308874130249, "rewards/margins": 0.6909892559051514, "rewards/rejected": -5.164078235626221, "step": 3276 }, { "epoch": 0.51, "learning_rate": 1.1743849784739873e-05, "logits/chosen": -2.326395034790039, "logits/rejected": -3.15221905708313, "logps/chosen": -233.5169677734375, "logps/rejected": -405.7922058105469, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6663234829902649, "rewards/margins": 7.867918968200684, "rewards/rejected": -8.534242630004883, "step": 3277 }, { "epoch": 0.51, "learning_rate": 1.1743116344208724e-05, "logits/chosen": -2.5038058757781982, "logits/rejected": -2.969646453857422, "logps/chosen": -245.84571838378906, "logps/rejected": -340.8792724609375, "loss": 1.8883, "rewards/accuracies": 0.5, "rewards/chosen": -4.96832275390625, "rewards/margins": 0.9739320278167725, "rewards/rejected": -5.942255020141602, "step": 3278 }, { "epoch": 0.51, "learning_rate": 1.1742382903677576e-05, "logits/chosen": -2.1613566875457764, "logits/rejected": -2.8238542079925537, "logps/chosen": -105.87101745605469, "logps/rejected": -489.7090759277344, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.3782981634140015, "rewards/margins": 6.383809566497803, "rewards/rejected": -7.762107849121094, "step": 3279 }, { "epoch": 0.51, "learning_rate": 1.1741649463146428e-05, "logits/chosen": -1.2084709405899048, "logits/rejected": -2.7902231216430664, "logps/chosen": -97.98941040039062, "logps/rejected": -175.25799560546875, "loss": 1.9984, "rewards/accuracies": 0.5, "rewards/chosen": -3.6498475074768066, "rewards/margins": 0.5003907680511475, "rewards/rejected": -4.150238037109375, "step": 3280 }, { "epoch": 0.51, "learning_rate": 1.174091602261528e-05, "logits/chosen": -2.6471188068389893, "logits/rejected": -2.787116050720215, "logps/chosen": -219.26194763183594, "logps/rejected": -326.05718994140625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.356728434562683, "rewards/margins": 5.815972328186035, "rewards/rejected": -7.172700881958008, "step": 3281 }, { "epoch": 0.51, "learning_rate": 1.1740182582084132e-05, "logits/chosen": -2.7170422077178955, "logits/rejected": -1.9509010314941406, "logps/chosen": -238.00978088378906, "logps/rejected": -131.6068115234375, "loss": 2.7379, "rewards/accuracies": 0.5, "rewards/chosen": -5.298190116882324, "rewards/margins": -2.261707305908203, "rewards/rejected": -3.0364830493927, "step": 3282 }, { "epoch": 0.51, "learning_rate": 1.1739449141552986e-05, "logits/chosen": -2.8030190467834473, "logits/rejected": -2.7957029342651367, "logps/chosen": -334.6494445800781, "logps/rejected": -504.9938049316406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.297541856765747, "rewards/margins": 9.946365356445312, "rewards/rejected": -11.243907928466797, "step": 3283 }, { "epoch": 0.51, "learning_rate": 1.1738715701021837e-05, "logits/chosen": -2.7982864379882812, "logits/rejected": -1.3276581764221191, "logps/chosen": -242.4242706298828, "logps/rejected": -119.75080871582031, "loss": 2.4771, "rewards/accuracies": 0.5, "rewards/chosen": -3.3648900985717773, "rewards/margins": -0.9078466892242432, "rewards/rejected": -2.457043409347534, "step": 3284 }, { "epoch": 0.51, "learning_rate": 1.173798226049069e-05, "logits/chosen": -2.9251222610473633, "logits/rejected": -2.744166851043701, "logps/chosen": -139.42697143554688, "logps/rejected": -241.88284301757812, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.5402358770370483, "rewards/margins": 5.608495235443115, "rewards/rejected": -6.148731231689453, "step": 3285 }, { "epoch": 0.51, "learning_rate": 1.1737248819959541e-05, "logits/chosen": -3.216754674911499, "logits/rejected": -2.873687505722046, "logps/chosen": -603.9817504882812, "logps/rejected": -501.31121826171875, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.7999633550643921, "rewards/margins": 6.300589561462402, "rewards/rejected": -7.100552558898926, "step": 3286 }, { "epoch": 0.51, "learning_rate": 1.1736515379428393e-05, "logits/chosen": -1.875256061553955, "logits/rejected": -2.7097036838531494, "logps/chosen": -103.73033905029297, "logps/rejected": -207.546875, "loss": 0.0634, "rewards/accuracies": 1.0, "rewards/chosen": -1.7706100940704346, "rewards/margins": 3.2796530723571777, "rewards/rejected": -5.050262928009033, "step": 3287 }, { "epoch": 0.51, "learning_rate": 1.1735781938897245e-05, "logits/chosen": -1.769404411315918, "logits/rejected": -3.048412799835205, "logps/chosen": -317.99432373046875, "logps/rejected": -340.32763671875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.194811224937439, "rewards/margins": 5.160748481750488, "rewards/rejected": -6.355559349060059, "step": 3288 }, { "epoch": 0.51, "learning_rate": 1.1735048498366097e-05, "logits/chosen": -2.925595283508301, "logits/rejected": -2.910587787628174, "logps/chosen": -281.3094482421875, "logps/rejected": -342.11883544921875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.353681206703186, "rewards/margins": 7.804802894592285, "rewards/rejected": -9.158483505249023, "step": 3289 }, { "epoch": 0.51, "learning_rate": 1.173431505783495e-05, "logits/chosen": -3.0374419689178467, "logits/rejected": -2.472358226776123, "logps/chosen": -173.2636260986328, "logps/rejected": -312.34130859375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.1668994426727295, "rewards/margins": 6.308981895446777, "rewards/rejected": -8.475881576538086, "step": 3290 }, { "epoch": 0.51, "learning_rate": 1.1733581617303802e-05, "logits/chosen": -3.0878958702087402, "logits/rejected": -2.7166218757629395, "logps/chosen": -157.4439697265625, "logps/rejected": -162.1566162109375, "loss": 2.4548, "rewards/accuracies": 0.5, "rewards/chosen": -3.433135509490967, "rewards/margins": 0.34531474113464355, "rewards/rejected": -3.7784502506256104, "step": 3291 }, { "epoch": 0.51, "learning_rate": 1.1732848176772656e-05, "logits/chosen": -3.0741519927978516, "logits/rejected": -2.5764594078063965, "logps/chosen": -415.3551025390625, "logps/rejected": -380.1776428222656, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.7035088539123535, "rewards/margins": 5.664855003356934, "rewards/rejected": -7.368364334106445, "step": 3292 }, { "epoch": 0.51, "learning_rate": 1.1732114736241508e-05, "logits/chosen": -1.7879884243011475, "logits/rejected": -3.1341214179992676, "logps/chosen": -197.40841674804688, "logps/rejected": -457.505615234375, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.442051887512207, "rewards/margins": 7.755486965179443, "rewards/rejected": -9.197539329528809, "step": 3293 }, { "epoch": 0.51, "learning_rate": 1.173138129571036e-05, "logits/chosen": -1.786428451538086, "logits/rejected": -2.753939390182495, "logps/chosen": -78.4672622680664, "logps/rejected": -356.5927429199219, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6310165524482727, "rewards/margins": 8.38729476928711, "rewards/rejected": -9.018311500549316, "step": 3294 }, { "epoch": 0.51, "learning_rate": 1.1730647855179211e-05, "logits/chosen": -2.042454957962036, "logits/rejected": -2.91585373878479, "logps/chosen": -186.88302612304688, "logps/rejected": -509.81048583984375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.27062684297561646, "rewards/margins": 9.252517700195312, "rewards/rejected": -9.523144721984863, "step": 3295 }, { "epoch": 0.51, "learning_rate": 1.1729914414648063e-05, "logits/chosen": -2.498727798461914, "logits/rejected": -3.0781097412109375, "logps/chosen": -146.44329833984375, "logps/rejected": -301.0072326660156, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.4817402362823486, "rewards/margins": 5.085892677307129, "rewards/rejected": -6.567633152008057, "step": 3296 }, { "epoch": 0.51, "learning_rate": 1.1729180974116915e-05, "logits/chosen": -2.8097667694091797, "logits/rejected": -3.1448917388916016, "logps/chosen": -272.449462890625, "logps/rejected": -249.10910034179688, "loss": 0.1215, "rewards/accuracies": 1.0, "rewards/chosen": -1.8708915710449219, "rewards/margins": 3.077826738357544, "rewards/rejected": -4.948718070983887, "step": 3297 }, { "epoch": 0.51, "learning_rate": 1.1728447533585767e-05, "logits/chosen": -2.4007363319396973, "logits/rejected": -2.956989049911499, "logps/chosen": -273.9998779296875, "logps/rejected": -279.2649841308594, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.212249994277954, "rewards/margins": 6.169377326965332, "rewards/rejected": -7.381627082824707, "step": 3298 }, { "epoch": 0.51, "learning_rate": 1.1727714093054619e-05, "logits/chosen": -2.6058781147003174, "logits/rejected": -3.281684160232544, "logps/chosen": -424.6476135253906, "logps/rejected": -611.0288696289062, "loss": 0.0587, "rewards/accuracies": 1.0, "rewards/chosen": -0.992603063583374, "rewards/margins": 4.116897106170654, "rewards/rejected": -5.109499931335449, "step": 3299 }, { "epoch": 0.51, "learning_rate": 1.172698065252347e-05, "logits/chosen": -2.6880886554718018, "logits/rejected": -2.291783332824707, "logps/chosen": -528.81005859375, "logps/rejected": -435.785400390625, "loss": 0.5844, "rewards/accuracies": 0.5, "rewards/chosen": -0.7293838858604431, "rewards/margins": 3.813169002532959, "rewards/rejected": -4.542552947998047, "step": 3300 }, { "epoch": 0.51, "learning_rate": 1.1726247211992324e-05, "logits/chosen": -2.8642678260803223, "logits/rejected": -2.78359055519104, "logps/chosen": -618.7759399414062, "logps/rejected": -342.8167419433594, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.7365661859512329, "rewards/margins": 4.708268165588379, "rewards/rejected": -5.444834232330322, "step": 3301 }, { "epoch": 0.51, "learning_rate": 1.1725513771461176e-05, "logits/chosen": -1.6641573905944824, "logits/rejected": -2.8636062145233154, "logps/chosen": -202.78834533691406, "logps/rejected": -502.4455871582031, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.724408745765686, "rewards/margins": 6.224367618560791, "rewards/rejected": -7.9487762451171875, "step": 3302 }, { "epoch": 0.51, "learning_rate": 1.1724780330930028e-05, "logits/chosen": -2.283372402191162, "logits/rejected": -2.594360589981079, "logps/chosen": -194.5983123779297, "logps/rejected": -260.4000244140625, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -1.2705742120742798, "rewards/margins": 3.8548665046691895, "rewards/rejected": -5.12544059753418, "step": 3303 }, { "epoch": 0.51, "learning_rate": 1.172404689039888e-05, "logits/chosen": -2.430924892425537, "logits/rejected": -2.6550285816192627, "logps/chosen": -299.0923767089844, "logps/rejected": -345.13006591796875, "loss": 1.161, "rewards/accuracies": 0.5, "rewards/chosen": -3.3253073692321777, "rewards/margins": 1.528588056564331, "rewards/rejected": -4.853895664215088, "step": 3304 }, { "epoch": 0.51, "learning_rate": 1.1723313449867732e-05, "logits/chosen": -1.582750678062439, "logits/rejected": -2.902878999710083, "logps/chosen": -55.547698974609375, "logps/rejected": -241.1256103515625, "loss": 0.0925, "rewards/accuracies": 1.0, "rewards/chosen": -1.832540512084961, "rewards/margins": 4.904204368591309, "rewards/rejected": -6.7367448806762695, "step": 3305 }, { "epoch": 0.51, "learning_rate": 1.1722580009336584e-05, "logits/chosen": -2.654265880584717, "logits/rejected": -1.2678370475769043, "logps/chosen": -232.68048095703125, "logps/rejected": -140.87139892578125, "loss": 1.7995, "rewards/accuracies": 0.5, "rewards/chosen": -2.814620018005371, "rewards/margins": 1.4573845863342285, "rewards/rejected": -4.2720046043396, "step": 3306 }, { "epoch": 0.51, "learning_rate": 1.1721846568805436e-05, "logits/chosen": -1.9531917572021484, "logits/rejected": -2.174457550048828, "logps/chosen": -301.11187744140625, "logps/rejected": -354.55609130859375, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.40118488669395447, "rewards/margins": 5.472617149353027, "rewards/rejected": -5.873802185058594, "step": 3307 }, { "epoch": 0.51, "learning_rate": 1.1721113128274288e-05, "logits/chosen": -2.6571455001831055, "logits/rejected": -2.907665491104126, "logps/chosen": -219.77621459960938, "logps/rejected": -249.83145141601562, "loss": 2.2055, "rewards/accuracies": 0.5, "rewards/chosen": -4.151648998260498, "rewards/margins": 1.4362430572509766, "rewards/rejected": -5.587892055511475, "step": 3308 }, { "epoch": 0.51, "learning_rate": 1.172037968774314e-05, "logits/chosen": -2.8680498600006104, "logits/rejected": -3.379944086074829, "logps/chosen": -220.1057891845703, "logps/rejected": -358.178955078125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.2071678638458252, "rewards/margins": 6.068112850189209, "rewards/rejected": -7.275280952453613, "step": 3309 }, { "epoch": 0.51, "learning_rate": 1.1719646247211993e-05, "logits/chosen": -2.6424810886383057, "logits/rejected": -3.18790340423584, "logps/chosen": -83.77848815917969, "logps/rejected": -244.96044921875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.7831120491027832, "rewards/margins": 5.498010635375977, "rewards/rejected": -6.28112268447876, "step": 3310 }, { "epoch": 0.51, "learning_rate": 1.1718912806680845e-05, "logits/chosen": -1.8867533206939697, "logits/rejected": -2.8236372470855713, "logps/chosen": -133.09327697753906, "logps/rejected": -236.25393676757812, "loss": 2.2743, "rewards/accuracies": 0.5, "rewards/chosen": -4.528794765472412, "rewards/margins": 0.05351710319519043, "rewards/rejected": -4.582311630249023, "step": 3311 }, { "epoch": 0.52, "learning_rate": 1.1718179366149697e-05, "logits/chosen": -2.9843358993530273, "logits/rejected": -2.3405232429504395, "logps/chosen": -257.621337890625, "logps/rejected": -210.15611267089844, "loss": 1.5483, "rewards/accuracies": 0.5, "rewards/chosen": -2.641191005706787, "rewards/margins": 0.32797324657440186, "rewards/rejected": -2.9691643714904785, "step": 3312 }, { "epoch": 0.52, "learning_rate": 1.1717445925618549e-05, "logits/chosen": -0.6236211657524109, "logits/rejected": -2.819464921951294, "logps/chosen": -38.98899841308594, "logps/rejected": -320.286376953125, "loss": 0.2443, "rewards/accuracies": 1.0, "rewards/chosen": -2.2975802421569824, "rewards/margins": 4.379759788513184, "rewards/rejected": -6.677340030670166, "step": 3313 }, { "epoch": 0.52, "learning_rate": 1.17167124850874e-05, "logits/chosen": -0.7978522777557373, "logits/rejected": -2.8558270931243896, "logps/chosen": -36.58441162109375, "logps/rejected": -490.094970703125, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -1.5558862686157227, "rewards/margins": 5.670530319213867, "rewards/rejected": -7.22641658782959, "step": 3314 }, { "epoch": 0.52, "learning_rate": 1.1715979044556252e-05, "logits/chosen": -2.4797093868255615, "logits/rejected": -3.0389702320098877, "logps/chosen": -63.35344314575195, "logps/rejected": -176.73779296875, "loss": 0.1096, "rewards/accuracies": 1.0, "rewards/chosen": -1.2738382816314697, "rewards/margins": 3.562927484512329, "rewards/rejected": -4.836765766143799, "step": 3315 }, { "epoch": 0.52, "learning_rate": 1.1715245604025104e-05, "logits/chosen": -2.113304853439331, "logits/rejected": -3.0979714393615723, "logps/chosen": -140.0834503173828, "logps/rejected": -195.42977905273438, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.8351527452468872, "rewards/margins": 4.808648109436035, "rewards/rejected": -5.643800735473633, "step": 3316 }, { "epoch": 0.52, "learning_rate": 1.1714512163493956e-05, "logits/chosen": -2.4923815727233887, "logits/rejected": -1.228367567062378, "logps/chosen": -346.40771484375, "logps/rejected": -176.40933227539062, "loss": 1.213, "rewards/accuracies": 0.5, "rewards/chosen": -1.3595107793807983, "rewards/margins": 1.113276720046997, "rewards/rejected": -2.472787618637085, "step": 3317 }, { "epoch": 0.52, "learning_rate": 1.1713778722962808e-05, "logits/chosen": -2.844499349594116, "logits/rejected": -2.5073869228363037, "logps/chosen": -199.0332489013672, "logps/rejected": -306.9046325683594, "loss": 1.7746, "rewards/accuracies": 0.5, "rewards/chosen": -2.782285451889038, "rewards/margins": 2.4538657665252686, "rewards/rejected": -5.236151218414307, "step": 3318 }, { "epoch": 0.52, "learning_rate": 1.1713045282431662e-05, "logits/chosen": -1.7994904518127441, "logits/rejected": -2.891601324081421, "logps/chosen": -65.5314712524414, "logps/rejected": -201.96539306640625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.2655229568481445, "rewards/margins": 6.077950954437256, "rewards/rejected": -7.3434739112854, "step": 3319 }, { "epoch": 0.52, "learning_rate": 1.1712311841900513e-05, "logits/chosen": -3.2401435375213623, "logits/rejected": -2.414067029953003, "logps/chosen": -352.6545104980469, "logps/rejected": -134.77285766601562, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.48114147782325745, "rewards/margins": 5.261231422424316, "rewards/rejected": -5.742372989654541, "step": 3320 }, { "epoch": 0.52, "learning_rate": 1.1711578401369365e-05, "logits/chosen": -2.77703595161438, "logits/rejected": -3.2563090324401855, "logps/chosen": -412.0942687988281, "logps/rejected": -438.65924072265625, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -2.4511544704437256, "rewards/margins": 6.599369049072266, "rewards/rejected": -9.05052375793457, "step": 3321 }, { "epoch": 0.52, "learning_rate": 1.1710844960838217e-05, "logits/chosen": -0.6419197916984558, "logits/rejected": -2.775942087173462, "logps/chosen": -89.86489868164062, "logps/rejected": -883.950927734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9995231628417969, "rewards/margins": 9.612939834594727, "rewards/rejected": -10.612462997436523, "step": 3322 }, { "epoch": 0.52, "learning_rate": 1.1710111520307069e-05, "logits/chosen": -1.0348005294799805, "logits/rejected": -3.050550699234009, "logps/chosen": -32.991737365722656, "logps/rejected": -392.6493835449219, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -1.8032498359680176, "rewards/margins": 6.395364761352539, "rewards/rejected": -8.198614120483398, "step": 3323 }, { "epoch": 0.52, "learning_rate": 1.1709378079775923e-05, "logits/chosen": -3.0645432472229004, "logits/rejected": -2.824336290359497, "logps/chosen": -452.78033447265625, "logps/rejected": -288.5247802734375, "loss": 1.8794, "rewards/accuracies": 0.5, "rewards/chosen": -2.9872970581054688, "rewards/margins": -0.23486328125, "rewards/rejected": -2.7524337768554688, "step": 3324 }, { "epoch": 0.52, "learning_rate": 1.1708644639244775e-05, "logits/chosen": -3.036320924758911, "logits/rejected": -2.0762875080108643, "logps/chosen": -197.48251342773438, "logps/rejected": -107.05377960205078, "loss": 2.7494, "rewards/accuracies": 0.5, "rewards/chosen": -4.1938676834106445, "rewards/margins": -0.9688878059387207, "rewards/rejected": -3.2249794006347656, "step": 3325 }, { "epoch": 0.52, "learning_rate": 1.1707911198713626e-05, "logits/chosen": -1.4660577774047852, "logits/rejected": -3.1856982707977295, "logps/chosen": -134.88671875, "logps/rejected": -403.4100036621094, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -1.8685684204101562, "rewards/margins": 3.403233528137207, "rewards/rejected": -5.271801948547363, "step": 3326 }, { "epoch": 0.52, "learning_rate": 1.1707177758182478e-05, "logits/chosen": -2.943795919418335, "logits/rejected": -3.0120737552642822, "logps/chosen": -120.72757720947266, "logps/rejected": -181.98968505859375, "loss": 0.0821, "rewards/accuracies": 1.0, "rewards/chosen": -1.5847737789154053, "rewards/margins": 3.2539174556732178, "rewards/rejected": -4.838691234588623, "step": 3327 }, { "epoch": 0.52, "learning_rate": 1.1706444317651332e-05, "logits/chosen": -2.548508882522583, "logits/rejected": -2.8755717277526855, "logps/chosen": -42.89445495605469, "logps/rejected": -240.44451904296875, "loss": 0.1517, "rewards/accuracies": 1.0, "rewards/chosen": -1.4503886699676514, "rewards/margins": 1.8390384912490845, "rewards/rejected": -3.2894272804260254, "step": 3328 }, { "epoch": 0.52, "learning_rate": 1.1705710877120184e-05, "logits/chosen": -2.7585878372192383, "logits/rejected": -2.2598156929016113, "logps/chosen": -110.49173736572266, "logps/rejected": -111.58344268798828, "loss": 1.6558, "rewards/accuracies": 0.5, "rewards/chosen": -2.633924722671509, "rewards/margins": 1.1826014518737793, "rewards/rejected": -3.816526174545288, "step": 3329 }, { "epoch": 0.52, "learning_rate": 1.1704977436589036e-05, "logits/chosen": -2.734356641769409, "logits/rejected": -1.3469605445861816, "logps/chosen": -451.2304382324219, "logps/rejected": -190.77853393554688, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.9413076639175415, "rewards/margins": 5.850096225738525, "rewards/rejected": -6.791403770446777, "step": 3330 }, { "epoch": 0.52, "learning_rate": 1.1704243996057888e-05, "logits/chosen": -3.4028007984161377, "logits/rejected": -3.3107848167419434, "logps/chosen": -152.4928436279297, "logps/rejected": -152.5240478515625, "loss": 0.0786, "rewards/accuracies": 1.0, "rewards/chosen": -0.3983898162841797, "rewards/margins": 4.110023498535156, "rewards/rejected": -4.508413314819336, "step": 3331 }, { "epoch": 0.52, "learning_rate": 1.170351055552674e-05, "logits/chosen": -2.0951666831970215, "logits/rejected": -2.8855836391448975, "logps/chosen": -112.85884094238281, "logps/rejected": -289.8844299316406, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -1.1837602853775024, "rewards/margins": 5.396842956542969, "rewards/rejected": -6.580603122711182, "step": 3332 }, { "epoch": 0.52, "learning_rate": 1.1702777114995591e-05, "logits/chosen": -0.8811192512512207, "logits/rejected": -3.0602190494537354, "logps/chosen": -180.78659057617188, "logps/rejected": -488.09515380859375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.3368637561798096, "rewards/margins": 5.992518424987793, "rewards/rejected": -7.329382419586182, "step": 3333 }, { "epoch": 0.52, "learning_rate": 1.1702043674464443e-05, "logits/chosen": -2.853480339050293, "logits/rejected": -2.037792921066284, "logps/chosen": -349.3589782714844, "logps/rejected": -304.00506591796875, "loss": 1.769, "rewards/accuracies": 0.5, "rewards/chosen": -2.7539212703704834, "rewards/margins": 1.6330862045288086, "rewards/rejected": -4.387007713317871, "step": 3334 }, { "epoch": 0.52, "learning_rate": 1.1701310233933295e-05, "logits/chosen": -3.1762897968292236, "logits/rejected": -2.2616829872131348, "logps/chosen": -489.4365234375, "logps/rejected": -407.7065124511719, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -3.865875482559204, "rewards/margins": 4.574758052825928, "rewards/rejected": -8.440633773803711, "step": 3335 }, { "epoch": 0.52, "learning_rate": 1.1700576793402149e-05, "logits/chosen": -2.618431329727173, "logits/rejected": -3.115598440170288, "logps/chosen": -346.6578674316406, "logps/rejected": -288.09857177734375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.5133712887763977, "rewards/margins": 5.945263862609863, "rewards/rejected": -6.458635330200195, "step": 3336 }, { "epoch": 0.52, "learning_rate": 1.1699843352871e-05, "logits/chosen": -3.050593376159668, "logits/rejected": -3.1718757152557373, "logps/chosen": -589.1007690429688, "logps/rejected": -536.4962768554688, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -0.5664219260215759, "rewards/margins": 3.4141080379486084, "rewards/rejected": -3.98052978515625, "step": 3337 }, { "epoch": 0.52, "learning_rate": 1.1699109912339852e-05, "logits/chosen": -3.10526704788208, "logits/rejected": -2.9364826679229736, "logps/chosen": -210.25595092773438, "logps/rejected": -138.80496215820312, "loss": 2.8566, "rewards/accuracies": 0.5, "rewards/chosen": -4.322930812835693, "rewards/margins": -0.5477437973022461, "rewards/rejected": -3.7751870155334473, "step": 3338 }, { "epoch": 0.52, "learning_rate": 1.1698376471808704e-05, "logits/chosen": -2.9035964012145996, "logits/rejected": -2.913649082183838, "logps/chosen": -118.03778076171875, "logps/rejected": -260.9222412109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.1496810913085938, "rewards/margins": 7.64759635925293, "rewards/rejected": -8.797277450561523, "step": 3339 }, { "epoch": 0.52, "learning_rate": 1.1697643031277556e-05, "logits/chosen": -3.2227485179901123, "logits/rejected": -2.3516459465026855, "logps/chosen": -589.51611328125, "logps/rejected": -428.7674865722656, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.5600861310958862, "rewards/margins": 4.765088081359863, "rewards/rejected": -6.325174331665039, "step": 3340 }, { "epoch": 0.52, "learning_rate": 1.1696909590746408e-05, "logits/chosen": -2.695732593536377, "logits/rejected": -3.183948040008545, "logps/chosen": -283.2811584472656, "logps/rejected": -359.95623779296875, "loss": 0.095, "rewards/accuracies": 1.0, "rewards/chosen": -0.669559895992279, "rewards/margins": 3.316005229949951, "rewards/rejected": -3.985565185546875, "step": 3341 }, { "epoch": 0.52, "learning_rate": 1.169617615021526e-05, "logits/chosen": -2.8783764839172363, "logits/rejected": -3.0917809009552, "logps/chosen": -458.95831298828125, "logps/rejected": -397.915771484375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.07361145317554474, "rewards/margins": 6.280949592590332, "rewards/rejected": -6.354560852050781, "step": 3342 }, { "epoch": 0.52, "learning_rate": 1.1695442709684112e-05, "logits/chosen": -2.674400806427002, "logits/rejected": -2.9223012924194336, "logps/chosen": -228.14842224121094, "logps/rejected": -203.19183349609375, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.3424705266952515, "rewards/margins": 4.537107944488525, "rewards/rejected": -5.879578590393066, "step": 3343 }, { "epoch": 0.52, "learning_rate": 1.1694709269152964e-05, "logits/chosen": -2.762305974960327, "logits/rejected": -2.9696204662323, "logps/chosen": -190.72183227539062, "logps/rejected": -286.1719970703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.3554673194885254, "rewards/margins": 6.507081985473633, "rewards/rejected": -7.862548828125, "step": 3344 }, { "epoch": 0.52, "learning_rate": 1.1693975828621817e-05, "logits/chosen": -1.656456708908081, "logits/rejected": -2.8180229663848877, "logps/chosen": -118.09534454345703, "logps/rejected": -450.67230224609375, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -1.0671306848526, "rewards/margins": 7.312812805175781, "rewards/rejected": -8.37994384765625, "step": 3345 }, { "epoch": 0.52, "learning_rate": 1.1693242388090669e-05, "logits/chosen": -2.703547716140747, "logits/rejected": -2.7094955444335938, "logps/chosen": -114.92514038085938, "logps/rejected": -307.5231018066406, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.5701541900634766, "rewards/margins": 5.95032262802124, "rewards/rejected": -7.520476818084717, "step": 3346 }, { "epoch": 0.52, "learning_rate": 1.1692508947559521e-05, "logits/chosen": -3.2257540225982666, "logits/rejected": -2.6231086254119873, "logps/chosen": -708.7327880859375, "logps/rejected": -429.4931640625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.9428223371505737, "rewards/margins": 5.769106864929199, "rewards/rejected": -6.7119293212890625, "step": 3347 }, { "epoch": 0.52, "learning_rate": 1.1691775507028373e-05, "logits/chosen": -3.016660451889038, "logits/rejected": -2.991779088973999, "logps/chosen": -614.2747802734375, "logps/rejected": -951.8157958984375, "loss": 2.6961, "rewards/accuracies": 0.5, "rewards/chosen": -3.813620090484619, "rewards/margins": 1.287886142730713, "rewards/rejected": -5.101506233215332, "step": 3348 }, { "epoch": 0.52, "learning_rate": 1.1691042066497225e-05, "logits/chosen": -2.8689284324645996, "logits/rejected": -2.1341028213500977, "logps/chosen": -203.73158264160156, "logps/rejected": -291.0692443847656, "loss": 2.6142, "rewards/accuracies": 0.5, "rewards/chosen": -5.541280746459961, "rewards/margins": 2.0796780586242676, "rewards/rejected": -7.62095832824707, "step": 3349 }, { "epoch": 0.52, "learning_rate": 1.1690308625966077e-05, "logits/chosen": -2.386770248413086, "logits/rejected": -2.945003032684326, "logps/chosen": -123.60993957519531, "logps/rejected": -184.64154052734375, "loss": 2.8939, "rewards/accuracies": 0.5, "rewards/chosen": -3.8789725303649902, "rewards/margins": 0.9795007705688477, "rewards/rejected": -4.858473300933838, "step": 3350 }, { "epoch": 0.52, "learning_rate": 1.1689575185434928e-05, "logits/chosen": -2.92252254486084, "logits/rejected": -1.4381710290908813, "logps/chosen": -981.9436645507812, "logps/rejected": -421.972412109375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.6750046014785767, "rewards/margins": 6.730517387390137, "rewards/rejected": -8.405522346496582, "step": 3351 }, { "epoch": 0.52, "learning_rate": 1.168884174490378e-05, "logits/chosen": -1.9331775903701782, "logits/rejected": -2.858342409133911, "logps/chosen": -189.5455322265625, "logps/rejected": -351.01251220703125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.249437689781189, "rewards/margins": 6.272305488586426, "rewards/rejected": -7.521743297576904, "step": 3352 }, { "epoch": 0.52, "learning_rate": 1.1688108304372632e-05, "logits/chosen": -3.088942289352417, "logits/rejected": -2.986927032470703, "logps/chosen": -124.6072006225586, "logps/rejected": -144.95474243164062, "loss": 0.1928, "rewards/accuracies": 1.0, "rewards/chosen": -2.2294747829437256, "rewards/margins": 1.948509931564331, "rewards/rejected": -4.177984714508057, "step": 3353 }, { "epoch": 0.52, "learning_rate": 1.1687374863841486e-05, "logits/chosen": -2.354016065597534, "logits/rejected": -2.8521440029144287, "logps/chosen": -404.71868896484375, "logps/rejected": -446.024169921875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7970477938652039, "rewards/margins": 6.4043660163879395, "rewards/rejected": -7.201414108276367, "step": 3354 }, { "epoch": 0.52, "learning_rate": 1.1686641423310338e-05, "logits/chosen": -2.5538766384124756, "logits/rejected": -3.1475918292999268, "logps/chosen": -368.32562255859375, "logps/rejected": -405.4197082519531, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.1534385681152344, "rewards/margins": 5.660730838775635, "rewards/rejected": -6.814169406890869, "step": 3355 }, { "epoch": 0.52, "learning_rate": 1.168590798277919e-05, "logits/chosen": -2.9854319095611572, "logits/rejected": -3.025876760482788, "logps/chosen": -583.867431640625, "logps/rejected": -500.4532470703125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5435951352119446, "rewards/margins": 6.515167236328125, "rewards/rejected": -7.058762550354004, "step": 3356 }, { "epoch": 0.52, "learning_rate": 1.1685174542248041e-05, "logits/chosen": -1.694222092628479, "logits/rejected": -3.100964069366455, "logps/chosen": -150.51812744140625, "logps/rejected": -388.1374816894531, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.4425155818462372, "rewards/margins": 7.749822616577148, "rewards/rejected": -8.192337989807129, "step": 3357 }, { "epoch": 0.52, "learning_rate": 1.1684441101716895e-05, "logits/chosen": -2.8673272132873535, "logits/rejected": -2.6477513313293457, "logps/chosen": -158.37200927734375, "logps/rejected": -278.05078125, "loss": 2.202, "rewards/accuracies": 0.5, "rewards/chosen": -3.907939910888672, "rewards/margins": 2.156656503677368, "rewards/rejected": -6.064596652984619, "step": 3358 }, { "epoch": 0.52, "learning_rate": 1.1683707661185747e-05, "logits/chosen": -2.604729413986206, "logits/rejected": -3.0530457496643066, "logps/chosen": -249.5555419921875, "logps/rejected": -294.82177734375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.3821450471878052, "rewards/margins": 6.391119956970215, "rewards/rejected": -7.7732648849487305, "step": 3359 }, { "epoch": 0.52, "learning_rate": 1.1682974220654599e-05, "logits/chosen": -2.7861900329589844, "logits/rejected": -2.005561351776123, "logps/chosen": -393.1512451171875, "logps/rejected": -278.9766845703125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.7847001552581787, "rewards/margins": 5.1868438720703125, "rewards/rejected": -6.97154426574707, "step": 3360 }, { "epoch": 0.52, "learning_rate": 1.168224078012345e-05, "logits/chosen": -3.005119562149048, "logits/rejected": -1.113825798034668, "logps/chosen": -481.162353515625, "logps/rejected": -90.56224060058594, "loss": 2.9851, "rewards/accuracies": 0.5, "rewards/chosen": -5.02987813949585, "rewards/margins": -2.6276845932006836, "rewards/rejected": -2.402193546295166, "step": 3361 }, { "epoch": 0.52, "learning_rate": 1.1681507339592303e-05, "logits/chosen": -3.275439500808716, "logits/rejected": -3.2438745498657227, "logps/chosen": -133.3486328125, "logps/rejected": -139.61618041992188, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -1.4960819482803345, "rewards/margins": 3.805158853530884, "rewards/rejected": -5.301240921020508, "step": 3362 }, { "epoch": 0.52, "learning_rate": 1.1680773899061156e-05, "logits/chosen": -3.0336289405822754, "logits/rejected": -2.720797061920166, "logps/chosen": -494.0904541015625, "logps/rejected": -378.37677001953125, "loss": 2.3297, "rewards/accuracies": 0.5, "rewards/chosen": -4.996654033660889, "rewards/margins": 0.6183280944824219, "rewards/rejected": -5.6149821281433105, "step": 3363 }, { "epoch": 0.52, "learning_rate": 1.1680040458530008e-05, "logits/chosen": -2.9454755783081055, "logits/rejected": -2.8448221683502197, "logps/chosen": -119.03539276123047, "logps/rejected": -251.80288696289062, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -2.0004031658172607, "rewards/margins": 5.112682342529297, "rewards/rejected": -7.113085746765137, "step": 3364 }, { "epoch": 0.52, "learning_rate": 1.167930701799886e-05, "logits/chosen": -2.260120153427124, "logits/rejected": -2.92822003364563, "logps/chosen": -97.67215728759766, "logps/rejected": -382.795654296875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.2219716310501099, "rewards/margins": 7.677497863769531, "rewards/rejected": -8.899469375610352, "step": 3365 }, { "epoch": 0.52, "learning_rate": 1.1678573577467712e-05, "logits/chosen": -2.9164443016052246, "logits/rejected": -3.111969232559204, "logps/chosen": -74.36031341552734, "logps/rejected": -155.7257080078125, "loss": 2.8056, "rewards/accuracies": 0.5, "rewards/chosen": -3.420093297958374, "rewards/margins": 0.80892014503479, "rewards/rejected": -4.229013442993164, "step": 3366 }, { "epoch": 0.52, "learning_rate": 1.1677840136936564e-05, "logits/chosen": -3.1987903118133545, "logits/rejected": -3.075093984603882, "logps/chosen": -182.0283203125, "logps/rejected": -160.91456604003906, "loss": 2.0751, "rewards/accuracies": 0.5, "rewards/chosen": -3.7525784969329834, "rewards/margins": 0.07375383377075195, "rewards/rejected": -3.8263320922851562, "step": 3367 }, { "epoch": 0.52, "learning_rate": 1.1677106696405416e-05, "logits/chosen": -2.3560986518859863, "logits/rejected": -2.910377264022827, "logps/chosen": -439.9676513671875, "logps/rejected": -454.564208984375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.126755475997925, "rewards/margins": 5.946835994720459, "rewards/rejected": -8.073591232299805, "step": 3368 }, { "epoch": 0.52, "learning_rate": 1.1676373255874267e-05, "logits/chosen": -2.844177007675171, "logits/rejected": -3.1436538696289062, "logps/chosen": -277.9031982421875, "logps/rejected": -264.0115966796875, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -3.0757789611816406, "rewards/margins": 3.8455047607421875, "rewards/rejected": -6.921283721923828, "step": 3369 }, { "epoch": 0.52, "learning_rate": 1.167563981534312e-05, "logits/chosen": -1.3419041633605957, "logits/rejected": -1.6954929828643799, "logps/chosen": -163.17514038085938, "logps/rejected": -269.1915283203125, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.25063514709472656, "rewards/margins": 6.462419509887695, "rewards/rejected": -6.713054656982422, "step": 3370 }, { "epoch": 0.52, "learning_rate": 1.1674906374811971e-05, "logits/chosen": -1.3145756721496582, "logits/rejected": -2.7466084957122803, "logps/chosen": -128.05096435546875, "logps/rejected": -410.9194030761719, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -2.5497217178344727, "rewards/margins": 4.891547203063965, "rewards/rejected": -7.4412689208984375, "step": 3371 }, { "epoch": 0.52, "learning_rate": 1.1674172934280825e-05, "logits/chosen": -2.6928436756134033, "logits/rejected": -2.950958251953125, "logps/chosen": -72.35010528564453, "logps/rejected": -200.13778686523438, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -1.0187592506408691, "rewards/margins": 4.539249897003174, "rewards/rejected": -5.558009147644043, "step": 3372 }, { "epoch": 0.52, "learning_rate": 1.1673439493749677e-05, "logits/chosen": -2.536447286605835, "logits/rejected": -2.8308870792388916, "logps/chosen": -305.975830078125, "logps/rejected": -450.42376708984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.5781266689300537, "rewards/margins": 7.873295783996582, "rewards/rejected": -10.451421737670898, "step": 3373 }, { "epoch": 0.52, "learning_rate": 1.1672706053218528e-05, "logits/chosen": -2.3156139850616455, "logits/rejected": -2.7666265964508057, "logps/chosen": -169.95066833496094, "logps/rejected": -252.22621154785156, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.2446503639221191, "rewards/margins": 5.787782192230225, "rewards/rejected": -7.032432556152344, "step": 3374 }, { "epoch": 0.52, "learning_rate": 1.167197261268738e-05, "logits/chosen": -2.7690229415893555, "logits/rejected": -2.856896162033081, "logps/chosen": -40.23115158081055, "logps/rejected": -180.76266479492188, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -1.022281289100647, "rewards/margins": 5.002731800079346, "rewards/rejected": -6.025012969970703, "step": 3375 }, { "epoch": 0.53, "learning_rate": 1.1671239172156232e-05, "logits/chosen": -2.8732752799987793, "logits/rejected": -2.7349462509155273, "logps/chosen": -156.18829345703125, "logps/rejected": -218.4344940185547, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": -2.0912578105926514, "rewards/margins": 4.52137565612793, "rewards/rejected": -6.612633228302002, "step": 3376 }, { "epoch": 0.53, "learning_rate": 1.1670505731625084e-05, "logits/chosen": -2.9460675716400146, "logits/rejected": -3.1454672813415527, "logps/chosen": -80.8520278930664, "logps/rejected": -159.54931640625, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -1.3244264125823975, "rewards/margins": 4.310948848724365, "rewards/rejected": -5.635375499725342, "step": 3377 }, { "epoch": 0.53, "learning_rate": 1.1669772291093936e-05, "logits/chosen": -1.9901036024093628, "logits/rejected": -2.861827850341797, "logps/chosen": -133.35719299316406, "logps/rejected": -321.25897216796875, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -2.198228359222412, "rewards/margins": 3.7199525833129883, "rewards/rejected": -5.9181809425354, "step": 3378 }, { "epoch": 0.53, "learning_rate": 1.1669038850562788e-05, "logits/chosen": -1.9423824548721313, "logits/rejected": -3.1099355220794678, "logps/chosen": -72.0215835571289, "logps/rejected": -241.54747009277344, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.9206089973449707, "rewards/margins": 4.909846305847168, "rewards/rejected": -6.830455303192139, "step": 3379 }, { "epoch": 0.53, "learning_rate": 1.166830541003164e-05, "logits/chosen": -2.339359760284424, "logits/rejected": -2.865046977996826, "logps/chosen": -166.0049591064453, "logps/rejected": -228.43861389160156, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -1.6621414422988892, "rewards/margins": 4.7321882247924805, "rewards/rejected": -6.394329071044922, "step": 3380 }, { "epoch": 0.53, "learning_rate": 1.1667571969500493e-05, "logits/chosen": -1.812635898590088, "logits/rejected": -2.9389593601226807, "logps/chosen": -90.25627136230469, "logps/rejected": -156.87796020507812, "loss": 1.5211, "rewards/accuracies": 0.5, "rewards/chosen": -4.044776439666748, "rewards/margins": 0.24231719970703125, "rewards/rejected": -4.287093639373779, "step": 3381 }, { "epoch": 0.53, "learning_rate": 1.1666838528969345e-05, "logits/chosen": -2.9124131202697754, "logits/rejected": -2.6741068363189697, "logps/chosen": -374.0998840332031, "logps/rejected": -360.16937255859375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.46698611974716187, "rewards/margins": 6.846260070800781, "rewards/rejected": -6.379273891448975, "step": 3382 }, { "epoch": 0.53, "learning_rate": 1.1666105088438197e-05, "logits/chosen": -2.92240834236145, "logits/rejected": -1.3533003330230713, "logps/chosen": -262.49761962890625, "logps/rejected": -194.50759887695312, "loss": 0.2352, "rewards/accuracies": 1.0, "rewards/chosen": -2.2096810340881348, "rewards/margins": 3.2600066661834717, "rewards/rejected": -5.4696879386901855, "step": 3383 }, { "epoch": 0.53, "learning_rate": 1.1665371647907049e-05, "logits/chosen": -2.881985664367676, "logits/rejected": -2.5121986865997314, "logps/chosen": -1140.139404296875, "logps/rejected": -800.5170288085938, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -2.5981857776641846, "rewards/margins": 5.645195007324219, "rewards/rejected": -8.243380546569824, "step": 3384 }, { "epoch": 0.53, "learning_rate": 1.16646382073759e-05, "logits/chosen": -1.8716232776641846, "logits/rejected": -3.042346239089966, "logps/chosen": -59.418373107910156, "logps/rejected": -403.51055908203125, "loss": 0.1825, "rewards/accuracies": 1.0, "rewards/chosen": -2.1268129348754883, "rewards/margins": 2.9214963912963867, "rewards/rejected": -5.048309326171875, "step": 3385 }, { "epoch": 0.53, "learning_rate": 1.1663904766844753e-05, "logits/chosen": -2.7066726684570312, "logits/rejected": -2.8856122493743896, "logps/chosen": -629.6705932617188, "logps/rejected": -540.470947265625, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -2.003772020339966, "rewards/margins": 5.299172878265381, "rewards/rejected": -7.302945137023926, "step": 3386 }, { "epoch": 0.53, "learning_rate": 1.1663171326313605e-05, "logits/chosen": -1.9518766403198242, "logits/rejected": -2.4528164863586426, "logps/chosen": -122.59911346435547, "logps/rejected": -284.851318359375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.3361785411834717, "rewards/margins": 6.014181137084961, "rewards/rejected": -7.350359916687012, "step": 3387 }, { "epoch": 0.53, "learning_rate": 1.1662437885782456e-05, "logits/chosen": -2.997907876968384, "logits/rejected": -3.2605953216552734, "logps/chosen": -253.8899688720703, "logps/rejected": -307.7248840332031, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.0979725122451782, "rewards/margins": 5.852790832519531, "rewards/rejected": -6.950763702392578, "step": 3388 }, { "epoch": 0.53, "learning_rate": 1.1661704445251308e-05, "logits/chosen": -2.746570110321045, "logits/rejected": -2.54231858253479, "logps/chosen": -160.26556396484375, "logps/rejected": -120.55287170410156, "loss": 4.7366, "rewards/accuracies": 0.5, "rewards/chosen": -6.478814125061035, "rewards/margins": -2.232403039932251, "rewards/rejected": -4.246411323547363, "step": 3389 }, { "epoch": 0.53, "learning_rate": 1.1660971004720162e-05, "logits/chosen": -2.5850918292999268, "logits/rejected": -3.1282196044921875, "logps/chosen": -158.8896942138672, "logps/rejected": -328.4415283203125, "loss": 0.812, "rewards/accuracies": 0.5, "rewards/chosen": -1.9860312938690186, "rewards/margins": 4.095024108886719, "rewards/rejected": -6.081055641174316, "step": 3390 }, { "epoch": 0.53, "learning_rate": 1.1660237564189014e-05, "logits/chosen": -2.324312925338745, "logits/rejected": -1.7334678173065186, "logps/chosen": -1867.596435546875, "logps/rejected": -371.23876953125, "loss": 0.0763, "rewards/accuracies": 1.0, "rewards/chosen": -2.9403719902038574, "rewards/margins": 3.7742552757263184, "rewards/rejected": -6.714627265930176, "step": 3391 }, { "epoch": 0.53, "learning_rate": 1.1659504123657867e-05, "logits/chosen": -2.6299548149108887, "logits/rejected": -3.3816068172454834, "logps/chosen": -37.626953125, "logps/rejected": -264.5697937011719, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.7351992130279541, "rewards/margins": 5.594151496887207, "rewards/rejected": -6.32935094833374, "step": 3392 }, { "epoch": 0.53, "learning_rate": 1.165877068312672e-05, "logits/chosen": -1.873981237411499, "logits/rejected": -2.913315773010254, "logps/chosen": -165.44757080078125, "logps/rejected": -362.69732666015625, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -2.0080323219299316, "rewards/margins": 5.215616226196289, "rewards/rejected": -7.2236480712890625, "step": 3393 }, { "epoch": 0.53, "learning_rate": 1.1658037242595571e-05, "logits/chosen": -1.6982287168502808, "logits/rejected": -3.093585729598999, "logps/chosen": -194.3467254638672, "logps/rejected": -431.334716796875, "loss": 4.7179, "rewards/accuracies": 0.5, "rewards/chosen": -6.136207580566406, "rewards/margins": -2.570359468460083, "rewards/rejected": -3.565847873687744, "step": 3394 }, { "epoch": 0.53, "learning_rate": 1.1657303802064423e-05, "logits/chosen": -3.0615289211273193, "logits/rejected": -2.7352352142333984, "logps/chosen": -125.98171997070312, "logps/rejected": -189.76083374023438, "loss": 0.0814, "rewards/accuracies": 1.0, "rewards/chosen": -2.399559497833252, "rewards/margins": 6.761351585388184, "rewards/rejected": -9.160910606384277, "step": 3395 }, { "epoch": 0.53, "learning_rate": 1.1656570361533275e-05, "logits/chosen": -2.5705220699310303, "logits/rejected": -2.5661847591400146, "logps/chosen": -439.80389404296875, "logps/rejected": -489.1653747558594, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.4777405261993408, "rewards/margins": 6.4522504806518555, "rewards/rejected": -7.929991245269775, "step": 3396 }, { "epoch": 0.53, "learning_rate": 1.1655836921002127e-05, "logits/chosen": -2.935971975326538, "logits/rejected": -3.49495530128479, "logps/chosen": -71.47772216796875, "logps/rejected": -221.74322509765625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.8335968255996704, "rewards/margins": 4.8242340087890625, "rewards/rejected": -5.657831192016602, "step": 3397 }, { "epoch": 0.53, "learning_rate": 1.1655103480470979e-05, "logits/chosen": -2.1177070140838623, "logits/rejected": -3.175832748413086, "logps/chosen": -117.33476257324219, "logps/rejected": -463.05987548828125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.0392041206359863, "rewards/margins": 7.891917705535889, "rewards/rejected": -8.931121826171875, "step": 3398 }, { "epoch": 0.53, "learning_rate": 1.1654370039939832e-05, "logits/chosen": -1.6865997314453125, "logits/rejected": -1.6532354354858398, "logps/chosen": -327.3823547363281, "logps/rejected": -366.75262451171875, "loss": 1.5524, "rewards/accuracies": 0.5, "rewards/chosen": -4.300071716308594, "rewards/margins": 0.8579978942871094, "rewards/rejected": -5.158069610595703, "step": 3399 }, { "epoch": 0.53, "learning_rate": 1.1653636599408684e-05, "logits/chosen": -2.8144311904907227, "logits/rejected": -2.942883014678955, "logps/chosen": -418.39654541015625, "logps/rejected": -473.4082946777344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8054702877998352, "rewards/margins": 8.594095230102539, "rewards/rejected": -9.399564743041992, "step": 3400 }, { "epoch": 0.53, "learning_rate": 1.1652903158877536e-05, "logits/chosen": -2.6570372581481934, "logits/rejected": -2.8214738368988037, "logps/chosen": -202.95632934570312, "logps/rejected": -237.43142700195312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.38871538639068604, "rewards/margins": 6.898426055908203, "rewards/rejected": -7.287141799926758, "step": 3401 }, { "epoch": 0.53, "learning_rate": 1.1652169718346388e-05, "logits/chosen": -2.0777735710144043, "logits/rejected": -2.8824472427368164, "logps/chosen": -169.75131225585938, "logps/rejected": -393.2172546386719, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.8768886923789978, "rewards/margins": 6.519238471984863, "rewards/rejected": -7.396127700805664, "step": 3402 }, { "epoch": 0.53, "learning_rate": 1.165143627781524e-05, "logits/chosen": -2.3597476482391357, "logits/rejected": -2.914746046066284, "logps/chosen": -718.015380859375, "logps/rejected": -496.7875671386719, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8515808582305908, "rewards/margins": 6.417391777038574, "rewards/rejected": -7.268972873687744, "step": 3403 }, { "epoch": 0.53, "learning_rate": 1.1650702837284092e-05, "logits/chosen": -2.301666259765625, "logits/rejected": -2.91446852684021, "logps/chosen": -74.14744567871094, "logps/rejected": -246.36215209960938, "loss": 0.076, "rewards/accuracies": 1.0, "rewards/chosen": -4.184666156768799, "rewards/margins": 2.8718299865722656, "rewards/rejected": -7.056495666503906, "step": 3404 }, { "epoch": 0.53, "learning_rate": 1.1649969396752943e-05, "logits/chosen": -2.4640588760375977, "logits/rejected": -2.7833149433135986, "logps/chosen": -216.58729553222656, "logps/rejected": -130.3759765625, "loss": 3.1665, "rewards/accuracies": 0.5, "rewards/chosen": -6.113342761993408, "rewards/margins": -1.842768669128418, "rewards/rejected": -4.27057409286499, "step": 3405 }, { "epoch": 0.53, "learning_rate": 1.1649235956221795e-05, "logits/chosen": -1.2395520210266113, "logits/rejected": -2.5771586894989014, "logps/chosen": -153.29637145996094, "logps/rejected": -326.12982177734375, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.7042747735977173, "rewards/margins": 5.770353317260742, "rewards/rejected": -7.47462797164917, "step": 3406 }, { "epoch": 0.53, "learning_rate": 1.1648502515690647e-05, "logits/chosen": -2.04911208152771, "logits/rejected": -2.7545323371887207, "logps/chosen": -117.54441833496094, "logps/rejected": -314.087158203125, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -2.264537811279297, "rewards/margins": 5.273948669433594, "rewards/rejected": -7.538486480712891, "step": 3407 }, { "epoch": 0.53, "learning_rate": 1.16477690751595e-05, "logits/chosen": -2.5116159915924072, "logits/rejected": -3.1878609657287598, "logps/chosen": -145.8280029296875, "logps/rejected": -417.39154052734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5105628967285156, "rewards/margins": 10.36044692993164, "rewards/rejected": -10.871009826660156, "step": 3408 }, { "epoch": 0.53, "learning_rate": 1.1647035634628353e-05, "logits/chosen": -3.0975735187530518, "logits/rejected": -2.374875545501709, "logps/chosen": -466.71405029296875, "logps/rejected": -413.2432861328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.16326674818992615, "rewards/margins": 8.648214340209961, "rewards/rejected": -8.484947204589844, "step": 3409 }, { "epoch": 0.53, "learning_rate": 1.1646302194097205e-05, "logits/chosen": -1.5634022951126099, "logits/rejected": -3.088501214981079, "logps/chosen": -56.74915313720703, "logps/rejected": -272.05267333984375, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.859325647354126, "rewards/margins": 4.956204891204834, "rewards/rejected": -6.815530776977539, "step": 3410 }, { "epoch": 0.53, "learning_rate": 1.1645568753566056e-05, "logits/chosen": -2.1433446407318115, "logits/rejected": -3.005443811416626, "logps/chosen": -104.07266235351562, "logps/rejected": -226.27545166015625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.871948003768921, "rewards/margins": 6.362455368041992, "rewards/rejected": -8.234403610229492, "step": 3411 }, { "epoch": 0.53, "learning_rate": 1.1644835313034908e-05, "logits/chosen": -2.7634193897247314, "logits/rejected": -2.7672367095947266, "logps/chosen": -264.1006774902344, "logps/rejected": -222.72300720214844, "loss": 1.9379, "rewards/accuracies": 0.5, "rewards/chosen": -4.172083854675293, "rewards/margins": 0.4996190071105957, "rewards/rejected": -4.671702861785889, "step": 3412 }, { "epoch": 0.53, "learning_rate": 1.164410187250376e-05, "logits/chosen": -2.165205478668213, "logits/rejected": -2.917921781539917, "logps/chosen": -39.003868103027344, "logps/rejected": -245.32537841796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8446671962738037, "rewards/margins": 6.729302883148193, "rewards/rejected": -7.573969841003418, "step": 3413 }, { "epoch": 0.53, "learning_rate": 1.1643368431972612e-05, "logits/chosen": -2.9035236835479736, "logits/rejected": -2.8857905864715576, "logps/chosen": -136.04083251953125, "logps/rejected": -144.51324462890625, "loss": 2.388, "rewards/accuracies": 0.5, "rewards/chosen": -3.661996841430664, "rewards/margins": -1.4757989645004272, "rewards/rejected": -2.1861977577209473, "step": 3414 }, { "epoch": 0.53, "learning_rate": 1.1642634991441464e-05, "logits/chosen": -2.5439109802246094, "logits/rejected": -3.2396621704101562, "logps/chosen": -105.59004211425781, "logps/rejected": -516.380859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.3291358947753906, "rewards/margins": 7.428591728210449, "rewards/rejected": -7.75772762298584, "step": 3415 }, { "epoch": 0.53, "learning_rate": 1.1641901550910316e-05, "logits/chosen": -2.4331443309783936, "logits/rejected": -2.8072407245635986, "logps/chosen": -154.68760681152344, "logps/rejected": -228.52267456054688, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.17600297927856445, "rewards/margins": 9.091043472290039, "rewards/rejected": -8.915040016174316, "step": 3416 }, { "epoch": 0.53, "learning_rate": 1.164116811037917e-05, "logits/chosen": -3.1675374507904053, "logits/rejected": -3.057926654815674, "logps/chosen": -456.7770690917969, "logps/rejected": -478.37957763671875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.4412841796875, "rewards/margins": 6.379380702972412, "rewards/rejected": -7.82066535949707, "step": 3417 }, { "epoch": 0.53, "learning_rate": 1.1640434669848021e-05, "logits/chosen": -2.0715463161468506, "logits/rejected": -3.0489978790283203, "logps/chosen": -53.22914123535156, "logps/rejected": -252.45285034179688, "loss": 1.3957, "rewards/accuracies": 0.5, "rewards/chosen": -2.147151470184326, "rewards/margins": -0.3761630654335022, "rewards/rejected": -1.7709884643554688, "step": 3418 }, { "epoch": 0.53, "learning_rate": 1.1639701229316873e-05, "logits/chosen": -2.0405869483947754, "logits/rejected": -3.021898031234741, "logps/chosen": -147.05455017089844, "logps/rejected": -322.4982604980469, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.9159157276153564, "rewards/margins": 6.260214805603027, "rewards/rejected": -7.176130771636963, "step": 3419 }, { "epoch": 0.53, "learning_rate": 1.1638967788785725e-05, "logits/chosen": -2.9333362579345703, "logits/rejected": -2.711493492126465, "logps/chosen": -248.47994995117188, "logps/rejected": -301.1095275878906, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.24447935819625854, "rewards/margins": 4.921927452087402, "rewards/rejected": -5.166407108306885, "step": 3420 }, { "epoch": 0.53, "learning_rate": 1.1638234348254577e-05, "logits/chosen": -2.6491854190826416, "logits/rejected": -3.245434522628784, "logps/chosen": -255.79812622070312, "logps/rejected": -498.8031311035156, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6347297430038452, "rewards/margins": 7.144049644470215, "rewards/rejected": -7.778779983520508, "step": 3421 }, { "epoch": 0.53, "learning_rate": 1.1637500907723429e-05, "logits/chosen": -2.4921600818634033, "logits/rejected": -3.0786373615264893, "logps/chosen": -62.2744026184082, "logps/rejected": -337.2582092285156, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7556187510490417, "rewards/margins": 7.316842555999756, "rewards/rejected": -8.072461128234863, "step": 3422 }, { "epoch": 0.53, "learning_rate": 1.163676746719228e-05, "logits/chosen": -3.023535966873169, "logits/rejected": -2.391693592071533, "logps/chosen": -506.3330078125, "logps/rejected": -437.6559753417969, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.0009269714355469, "rewards/margins": 6.430296421051025, "rewards/rejected": -7.431223392486572, "step": 3423 }, { "epoch": 0.53, "learning_rate": 1.1636034026661133e-05, "logits/chosen": -2.8785581588745117, "logits/rejected": -2.367705821990967, "logps/chosen": -592.0740966796875, "logps/rejected": -394.655517578125, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -1.632930040359497, "rewards/margins": 4.150170803070068, "rewards/rejected": -5.7831010818481445, "step": 3424 }, { "epoch": 0.53, "learning_rate": 1.1635300586129986e-05, "logits/chosen": -2.3233704566955566, "logits/rejected": -3.096971035003662, "logps/chosen": -209.19482421875, "logps/rejected": -334.0345153808594, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.32209548354148865, "rewards/margins": 7.111188888549805, "rewards/rejected": -7.433283805847168, "step": 3425 }, { "epoch": 0.53, "learning_rate": 1.1634567145598838e-05, "logits/chosen": -2.109605550765991, "logits/rejected": -2.7770960330963135, "logps/chosen": -44.76580047607422, "logps/rejected": -233.5640869140625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.22806578874588013, "rewards/margins": 5.423871040344238, "rewards/rejected": -5.6519365310668945, "step": 3426 }, { "epoch": 0.53, "learning_rate": 1.1633833705067692e-05, "logits/chosen": -2.807441234588623, "logits/rejected": -2.747459650039673, "logps/chosen": -356.600341796875, "logps/rejected": -278.5797119140625, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -3.291491985321045, "rewards/margins": 3.941420078277588, "rewards/rejected": -7.232912063598633, "step": 3427 }, { "epoch": 0.53, "learning_rate": 1.1633100264536543e-05, "logits/chosen": -2.912094831466675, "logits/rejected": -3.302216053009033, "logps/chosen": -1094.34423828125, "logps/rejected": -807.5039672851562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6657440662384033, "rewards/margins": 10.526559829711914, "rewards/rejected": -12.192303657531738, "step": 3428 }, { "epoch": 0.53, "learning_rate": 1.1632366824005395e-05, "logits/chosen": -2.800433874130249, "logits/rejected": -2.8805506229400635, "logps/chosen": -39.25895690917969, "logps/rejected": -137.61289978027344, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.1059216856956482, "rewards/margins": 4.958322048187256, "rewards/rejected": -5.064243793487549, "step": 3429 }, { "epoch": 0.53, "learning_rate": 1.1631633383474247e-05, "logits/chosen": -0.7964562773704529, "logits/rejected": -2.596872568130493, "logps/chosen": -93.43801879882812, "logps/rejected": -366.6302490234375, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -2.7411680221557617, "rewards/margins": 4.309841156005859, "rewards/rejected": -7.051009178161621, "step": 3430 }, { "epoch": 0.53, "learning_rate": 1.1630899942943099e-05, "logits/chosen": -2.4689114093780518, "logits/rejected": -3.027756452560425, "logps/chosen": -261.39666748046875, "logps/rejected": -389.1038513183594, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.9595688581466675, "rewards/margins": 6.326014041900635, "rewards/rejected": -8.285582542419434, "step": 3431 }, { "epoch": 0.53, "learning_rate": 1.1630166502411951e-05, "logits/chosen": -3.0781586170196533, "logits/rejected": -2.214446544647217, "logps/chosen": -114.46694946289062, "logps/rejected": -32.94087219238281, "loss": 0.9106, "rewards/accuracies": 0.5, "rewards/chosen": -2.203268051147461, "rewards/margins": -0.35441842675209045, "rewards/rejected": -1.8488496541976929, "step": 3432 }, { "epoch": 0.53, "learning_rate": 1.1629433061880803e-05, "logits/chosen": -2.5372114181518555, "logits/rejected": -3.40946626663208, "logps/chosen": -77.02204132080078, "logps/rejected": -429.1086730957031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.22314491868019104, "rewards/margins": 9.264932632446289, "rewards/rejected": -9.041788101196289, "step": 3433 }, { "epoch": 0.53, "learning_rate": 1.1628699621349656e-05, "logits/chosen": -2.7141852378845215, "logits/rejected": -2.40643310546875, "logps/chosen": -107.37384033203125, "logps/rejected": -320.30859375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.1851723194122314, "rewards/margins": 7.335677146911621, "rewards/rejected": -8.52085018157959, "step": 3434 }, { "epoch": 0.53, "learning_rate": 1.1627966180818508e-05, "logits/chosen": -2.771486520767212, "logits/rejected": -2.0852887630462646, "logps/chosen": -446.9369201660156, "logps/rejected": -433.5301208496094, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.377798318862915, "rewards/margins": 7.550999641418457, "rewards/rejected": -8.92879867553711, "step": 3435 }, { "epoch": 0.53, "learning_rate": 1.162723274028736e-05, "logits/chosen": -2.400233745574951, "logits/rejected": -2.774965763092041, "logps/chosen": -212.41851806640625, "logps/rejected": -248.476806640625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.5299999713897705, "rewards/margins": 6.619028568267822, "rewards/rejected": -8.149028778076172, "step": 3436 }, { "epoch": 0.53, "learning_rate": 1.1626499299756212e-05, "logits/chosen": -2.868530750274658, "logits/rejected": -3.212496519088745, "logps/chosen": -93.29337310791016, "logps/rejected": -309.6393127441406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.49870362877845764, "rewards/margins": 7.735462665557861, "rewards/rejected": -8.234166145324707, "step": 3437 }, { "epoch": 0.53, "learning_rate": 1.1625765859225064e-05, "logits/chosen": -2.4857280254364014, "logits/rejected": -3.0792980194091797, "logps/chosen": -148.78155517578125, "logps/rejected": -373.26434326171875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.46128082275390625, "rewards/margins": 6.235443115234375, "rewards/rejected": -6.696723937988281, "step": 3438 }, { "epoch": 0.53, "learning_rate": 1.1625032418693916e-05, "logits/chosen": -2.2799582481384277, "logits/rejected": -2.8571579456329346, "logps/chosen": -543.5736083984375, "logps/rejected": -540.7808227539062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.85675048828125, "rewards/margins": 8.505938529968262, "rewards/rejected": -10.362689018249512, "step": 3439 }, { "epoch": 0.53, "learning_rate": 1.1624298978162768e-05, "logits/chosen": -3.0488638877868652, "logits/rejected": -3.226865530014038, "logps/chosen": -135.23361206054688, "logps/rejected": -241.5887908935547, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9031057357788086, "rewards/margins": 7.5714921951293945, "rewards/rejected": -8.474597930908203, "step": 3440 }, { "epoch": 0.54, "learning_rate": 1.162356553763162e-05, "logits/chosen": -3.2121083736419678, "logits/rejected": -2.7237865924835205, "logps/chosen": -544.222412109375, "logps/rejected": -351.16156005859375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9500102400779724, "rewards/margins": 6.9182915687561035, "rewards/rejected": -7.8683013916015625, "step": 3441 }, { "epoch": 0.54, "learning_rate": 1.1622832097100471e-05, "logits/chosen": -2.985109329223633, "logits/rejected": -2.9833052158355713, "logps/chosen": -167.45481872558594, "logps/rejected": -292.33935546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.848796010017395, "rewards/margins": 9.140785217285156, "rewards/rejected": -9.989582061767578, "step": 3442 }, { "epoch": 0.54, "learning_rate": 1.1622098656569325e-05, "logits/chosen": -3.0897610187530518, "logits/rejected": -2.467101573944092, "logps/chosen": -280.0877990722656, "logps/rejected": -244.69216918945312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5937225818634033, "rewards/margins": 8.92216682434082, "rewards/rejected": -10.515889167785645, "step": 3443 }, { "epoch": 0.54, "learning_rate": 1.1621365216038177e-05, "logits/chosen": -1.7223501205444336, "logits/rejected": -3.022742509841919, "logps/chosen": -35.683082580566406, "logps/rejected": -221.72817993164062, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.20995460450649261, "rewards/margins": 6.933117866516113, "rewards/rejected": -6.723163604736328, "step": 3444 }, { "epoch": 0.54, "learning_rate": 1.1620631775507029e-05, "logits/chosen": -2.465960741043091, "logits/rejected": -2.816091537475586, "logps/chosen": -118.94483947753906, "logps/rejected": -169.99005126953125, "loss": 0.6245, "rewards/accuracies": 0.5, "rewards/chosen": -0.5782287120819092, "rewards/margins": 2.4686341285705566, "rewards/rejected": -3.046862840652466, "step": 3445 }, { "epoch": 0.54, "learning_rate": 1.161989833497588e-05, "logits/chosen": -1.7474303245544434, "logits/rejected": -2.9657835960388184, "logps/chosen": -33.131649017333984, "logps/rejected": -257.270263671875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.022214703261852264, "rewards/margins": 7.628462791442871, "rewards/rejected": -7.650677680969238, "step": 3446 }, { "epoch": 0.54, "learning_rate": 1.1619164894444733e-05, "logits/chosen": -2.4743621349334717, "logits/rejected": -3.0787417888641357, "logps/chosen": -154.51800537109375, "logps/rejected": -281.8847351074219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.32682913541793823, "rewards/margins": 8.611490249633789, "rewards/rejected": -8.938318252563477, "step": 3447 }, { "epoch": 0.54, "learning_rate": 1.1618431453913584e-05, "logits/chosen": -2.2166924476623535, "logits/rejected": -2.7114903926849365, "logps/chosen": -202.98098754882812, "logps/rejected": -213.16075134277344, "loss": 2.6005, "rewards/accuracies": 0.5, "rewards/chosen": -3.256622314453125, "rewards/margins": 0.19865655899047852, "rewards/rejected": -3.4552788734436035, "step": 3448 }, { "epoch": 0.54, "learning_rate": 1.1617698013382436e-05, "logits/chosen": -3.066206932067871, "logits/rejected": -2.6252410411834717, "logps/chosen": -188.67120361328125, "logps/rejected": -61.65189743041992, "loss": 1.9056, "rewards/accuracies": 0.0, "rewards/chosen": -2.738938570022583, "rewards/margins": -1.6986682415008545, "rewards/rejected": -1.040270209312439, "step": 3449 }, { "epoch": 0.54, "learning_rate": 1.1616964572851288e-05, "logits/chosen": -1.6584521532058716, "logits/rejected": -2.6876823902130127, "logps/chosen": -211.2166748046875, "logps/rejected": -400.9697265625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.4363715648651123, "rewards/margins": 6.526395320892334, "rewards/rejected": -8.962766647338867, "step": 3450 }, { "epoch": 0.54, "learning_rate": 1.161623113232014e-05, "logits/chosen": -2.9318981170654297, "logits/rejected": -3.014552354812622, "logps/chosen": -367.4774169921875, "logps/rejected": -450.876953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.787947177886963, "rewards/margins": 8.626142501831055, "rewards/rejected": -10.414090156555176, "step": 3451 }, { "epoch": 0.54, "learning_rate": 1.1615497691788994e-05, "logits/chosen": -1.5321753025054932, "logits/rejected": -2.6833441257476807, "logps/chosen": -139.2526092529297, "logps/rejected": -402.79376220703125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.492462158203125, "rewards/margins": 8.515059471130371, "rewards/rejected": -10.007521629333496, "step": 3452 }, { "epoch": 0.54, "learning_rate": 1.1614764251257845e-05, "logits/chosen": -2.683471202850342, "logits/rejected": -1.7415153980255127, "logps/chosen": -709.4216918945312, "logps/rejected": -471.70916748046875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.2249202728271484, "rewards/margins": 6.568307876586914, "rewards/rejected": -7.7932281494140625, "step": 3453 }, { "epoch": 0.54, "learning_rate": 1.1614030810726697e-05, "logits/chosen": -2.8128764629364014, "logits/rejected": -3.1763343811035156, "logps/chosen": -97.96482849121094, "logps/rejected": -188.4627227783203, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -1.302789568901062, "rewards/margins": 4.798450946807861, "rewards/rejected": -6.101240634918213, "step": 3454 }, { "epoch": 0.54, "learning_rate": 1.161329737019555e-05, "logits/chosen": -3.0056674480438232, "logits/rejected": -2.1599278450012207, "logps/chosen": -120.24142456054688, "logps/rejected": -226.89634704589844, "loss": 0.1152, "rewards/accuracies": 1.0, "rewards/chosen": -2.072360038757324, "rewards/margins": 5.410496711730957, "rewards/rejected": -7.482856750488281, "step": 3455 }, { "epoch": 0.54, "learning_rate": 1.1612563929664401e-05, "logits/chosen": -2.9909090995788574, "logits/rejected": -3.093597650527954, "logps/chosen": -333.4172058105469, "logps/rejected": -335.81634521484375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.1135199069976807, "rewards/margins": 5.246871471405029, "rewards/rejected": -6.360391616821289, "step": 3456 }, { "epoch": 0.54, "learning_rate": 1.1611830489133253e-05, "logits/chosen": -2.556467056274414, "logits/rejected": -2.4761979579925537, "logps/chosen": -117.0707015991211, "logps/rejected": -167.9081573486328, "loss": 0.7509, "rewards/accuracies": 0.5, "rewards/chosen": -1.5422999858856201, "rewards/margins": 3.61367130279541, "rewards/rejected": -5.155971527099609, "step": 3457 }, { "epoch": 0.54, "learning_rate": 1.1611097048602105e-05, "logits/chosen": -2.3287179470062256, "logits/rejected": -3.251267433166504, "logps/chosen": -75.60289001464844, "logps/rejected": -364.59710693359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.998434841632843, "rewards/margins": 8.514375686645508, "rewards/rejected": -9.512809753417969, "step": 3458 }, { "epoch": 0.54, "learning_rate": 1.1610363608070958e-05, "logits/chosen": -1.2547557353973389, "logits/rejected": -2.917773962020874, "logps/chosen": -52.95667266845703, "logps/rejected": -272.566162109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8785282373428345, "rewards/margins": 7.717265605926514, "rewards/rejected": -8.595793724060059, "step": 3459 }, { "epoch": 0.54, "learning_rate": 1.160963016753981e-05, "logits/chosen": -1.5672255754470825, "logits/rejected": -2.927455186843872, "logps/chosen": -63.44955062866211, "logps/rejected": -337.16387939453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.008098840713501, "rewards/margins": 8.48904037475586, "rewards/rejected": -9.497138977050781, "step": 3460 }, { "epoch": 0.54, "learning_rate": 1.1608896727008664e-05, "logits/chosen": -2.747596263885498, "logits/rejected": -2.9191644191741943, "logps/chosen": -163.15927124023438, "logps/rejected": -324.39764404296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1799649000167847, "rewards/margins": 8.013736724853516, "rewards/rejected": -9.19370174407959, "step": 3461 }, { "epoch": 0.54, "learning_rate": 1.1608163286477516e-05, "logits/chosen": -2.705493688583374, "logits/rejected": -2.061882734298706, "logps/chosen": -88.81282806396484, "logps/rejected": -133.30780029296875, "loss": 0.2952, "rewards/accuracies": 1.0, "rewards/chosen": -1.53666353225708, "rewards/margins": 2.739985466003418, "rewards/rejected": -4.276648998260498, "step": 3462 }, { "epoch": 0.54, "learning_rate": 1.1607429845946368e-05, "logits/chosen": -1.1454603672027588, "logits/rejected": -2.7063844203948975, "logps/chosen": -92.12067413330078, "logps/rejected": -396.4898681640625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.190555453300476, "rewards/margins": 7.243305206298828, "rewards/rejected": -8.433860778808594, "step": 3463 }, { "epoch": 0.54, "learning_rate": 1.160669640541522e-05, "logits/chosen": -2.791677236557007, "logits/rejected": -3.330444812774658, "logps/chosen": -113.38716125488281, "logps/rejected": -297.648681640625, "loss": 2.3365, "rewards/accuracies": 0.5, "rewards/chosen": -4.114426612854004, "rewards/margins": -0.4774996042251587, "rewards/rejected": -3.6369271278381348, "step": 3464 }, { "epoch": 0.54, "learning_rate": 1.1605962964884071e-05, "logits/chosen": -2.1558477878570557, "logits/rejected": -3.056229829788208, "logps/chosen": -128.6741943359375, "logps/rejected": -689.2188110351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.06919746100902557, "rewards/margins": 12.533220291137695, "rewards/rejected": -12.46402359008789, "step": 3465 }, { "epoch": 0.54, "learning_rate": 1.1605229524352923e-05, "logits/chosen": -2.160907506942749, "logits/rejected": -2.980403184890747, "logps/chosen": -196.9056396484375, "logps/rejected": -339.7081604003906, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.8262115716934204, "rewards/margins": 5.8595805168151855, "rewards/rejected": -7.685791969299316, "step": 3466 }, { "epoch": 0.54, "learning_rate": 1.1604496083821775e-05, "logits/chosen": -2.5645055770874023, "logits/rejected": -2.8628811836242676, "logps/chosen": -395.4340515136719, "logps/rejected": -351.6625061035156, "loss": 2.8153, "rewards/accuracies": 0.5, "rewards/chosen": -4.045068740844727, "rewards/margins": 1.8643913269042969, "rewards/rejected": -5.909460067749023, "step": 3467 }, { "epoch": 0.54, "learning_rate": 1.1603762643290627e-05, "logits/chosen": -1.0431491136550903, "logits/rejected": -2.7255923748016357, "logps/chosen": -48.06330871582031, "logps/rejected": -330.0379943847656, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.3896335363388062, "rewards/margins": 5.710513114929199, "rewards/rejected": -7.100146293640137, "step": 3468 }, { "epoch": 0.54, "learning_rate": 1.1603029202759479e-05, "logits/chosen": -2.4368388652801514, "logits/rejected": -2.9844205379486084, "logps/chosen": -171.43609619140625, "logps/rejected": -385.74090576171875, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.4244126081466675, "rewards/margins": 7.230169296264648, "rewards/rejected": -8.654582023620605, "step": 3469 }, { "epoch": 0.54, "learning_rate": 1.1602295762228332e-05, "logits/chosen": -2.485818386077881, "logits/rejected": -2.414824962615967, "logps/chosen": -252.68325805664062, "logps/rejected": -242.65863037109375, "loss": 0.4745, "rewards/accuracies": 0.5, "rewards/chosen": -2.310185432434082, "rewards/margins": 4.093625068664551, "rewards/rejected": -6.403810501098633, "step": 3470 }, { "epoch": 0.54, "learning_rate": 1.1601562321697184e-05, "logits/chosen": -2.938164234161377, "logits/rejected": -1.927390694618225, "logps/chosen": -180.26206970214844, "logps/rejected": -211.69964599609375, "loss": 1.8073, "rewards/accuracies": 0.5, "rewards/chosen": -1.8750187158584595, "rewards/margins": 3.0854740142822266, "rewards/rejected": -4.960493087768555, "step": 3471 }, { "epoch": 0.54, "learning_rate": 1.1600828881166036e-05, "logits/chosen": -3.164149761199951, "logits/rejected": -3.3466367721557617, "logps/chosen": -373.42559814453125, "logps/rejected": -310.2767333984375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.7319514751434326, "rewards/margins": 5.232081890106201, "rewards/rejected": -5.964033126831055, "step": 3472 }, { "epoch": 0.54, "learning_rate": 1.1600095440634888e-05, "logits/chosen": -2.5266270637512207, "logits/rejected": -2.873009204864502, "logps/chosen": -752.4234619140625, "logps/rejected": -767.9119873046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6699981689453125, "rewards/margins": 8.805009841918945, "rewards/rejected": -9.475008010864258, "step": 3473 }, { "epoch": 0.54, "learning_rate": 1.159936200010374e-05, "logits/chosen": -3.0077908039093018, "logits/rejected": -1.615397334098816, "logps/chosen": -683.1397094726562, "logps/rejected": -357.6563720703125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.8446075916290283, "rewards/margins": 6.146820068359375, "rewards/rejected": -7.991427898406982, "step": 3474 }, { "epoch": 0.54, "learning_rate": 1.1598628559572592e-05, "logits/chosen": -2.329824447631836, "logits/rejected": -2.897813320159912, "logps/chosen": -85.41127014160156, "logps/rejected": -349.5218505859375, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -2.850623369216919, "rewards/margins": 5.2632598876953125, "rewards/rejected": -8.113883018493652, "step": 3475 }, { "epoch": 0.54, "learning_rate": 1.1597895119041444e-05, "logits/chosen": -2.3541061878204346, "logits/rejected": -2.840308666229248, "logps/chosen": -260.68182373046875, "logps/rejected": -366.722900390625, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.106670379638672, "rewards/margins": 6.923776626586914, "rewards/rejected": -9.030447006225586, "step": 3476 }, { "epoch": 0.54, "learning_rate": 1.1597161678510296e-05, "logits/chosen": -1.5917237997055054, "logits/rejected": -2.998105049133301, "logps/chosen": -52.24812316894531, "logps/rejected": -237.10995483398438, "loss": 0.2386, "rewards/accuracies": 1.0, "rewards/chosen": -2.164651870727539, "rewards/margins": 4.585147380828857, "rewards/rejected": -6.7497992515563965, "step": 3477 }, { "epoch": 0.54, "learning_rate": 1.1596428237979148e-05, "logits/chosen": -2.592811107635498, "logits/rejected": -1.9535621404647827, "logps/chosen": -77.00275421142578, "logps/rejected": -125.34037017822266, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8306192755699158, "rewards/margins": 6.349936008453369, "rewards/rejected": -7.18055534362793, "step": 3478 }, { "epoch": 0.54, "learning_rate": 1.1595694797448001e-05, "logits/chosen": -1.6626249551773071, "logits/rejected": -2.890744686126709, "logps/chosen": -54.80449676513672, "logps/rejected": -334.70977783203125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.909428596496582, "rewards/margins": 7.44317626953125, "rewards/rejected": -9.352604866027832, "step": 3479 }, { "epoch": 0.54, "learning_rate": 1.1594961356916853e-05, "logits/chosen": -2.632415771484375, "logits/rejected": -2.9376819133758545, "logps/chosen": -213.79116821289062, "logps/rejected": -315.2803955078125, "loss": 0.2652, "rewards/accuracies": 1.0, "rewards/chosen": -2.4149115085601807, "rewards/margins": 2.5914623737335205, "rewards/rejected": -5.006373882293701, "step": 3480 }, { "epoch": 0.54, "learning_rate": 1.1594227916385705e-05, "logits/chosen": -1.8391002416610718, "logits/rejected": -2.762331485748291, "logps/chosen": -42.40162658691406, "logps/rejected": -189.32435607910156, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.576233386993408, "rewards/margins": 5.041039943695068, "rewards/rejected": -7.617273330688477, "step": 3481 }, { "epoch": 0.54, "learning_rate": 1.1593494475854557e-05, "logits/chosen": -2.9896738529205322, "logits/rejected": -2.8905370235443115, "logps/chosen": -107.83811950683594, "logps/rejected": -86.56314849853516, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -2.209949016571045, "rewards/margins": 3.6824610233306885, "rewards/rejected": -5.8924102783203125, "step": 3482 }, { "epoch": 0.54, "learning_rate": 1.1592761035323409e-05, "logits/chosen": -3.0519511699676514, "logits/rejected": -2.4911386966705322, "logps/chosen": -358.04608154296875, "logps/rejected": -248.24356079101562, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.025099992752075, "rewards/margins": 6.753576278686523, "rewards/rejected": -8.77867603302002, "step": 3483 }, { "epoch": 0.54, "learning_rate": 1.159202759479226e-05, "logits/chosen": -1.875014066696167, "logits/rejected": -3.0921146869659424, "logps/chosen": -166.27810668945312, "logps/rejected": -274.02728271484375, "loss": 2.1618, "rewards/accuracies": 0.5, "rewards/chosen": -5.294458866119385, "rewards/margins": -1.467753529548645, "rewards/rejected": -3.82670521736145, "step": 3484 }, { "epoch": 0.54, "learning_rate": 1.1591294154261112e-05, "logits/chosen": -2.5526037216186523, "logits/rejected": -2.68902850151062, "logps/chosen": -199.79580688476562, "logps/rejected": -282.45172119140625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.838111162185669, "rewards/margins": 8.667036056518555, "rewards/rejected": -11.505146980285645, "step": 3485 }, { "epoch": 0.54, "learning_rate": 1.1590560713729964e-05, "logits/chosen": -2.1450002193450928, "logits/rejected": -3.0735702514648438, "logps/chosen": -83.8709945678711, "logps/rejected": -247.758056640625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.8611423969268799, "rewards/margins": 5.9367499351501465, "rewards/rejected": -7.7978925704956055, "step": 3486 }, { "epoch": 0.54, "learning_rate": 1.1589827273198816e-05, "logits/chosen": -2.914597272872925, "logits/rejected": -2.422736644744873, "logps/chosen": -290.64605712890625, "logps/rejected": -285.85137939453125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.3592400550842285, "rewards/margins": 6.149870872497559, "rewards/rejected": -8.509111404418945, "step": 3487 }, { "epoch": 0.54, "learning_rate": 1.158909383266767e-05, "logits/chosen": -3.0329935550689697, "logits/rejected": -2.1295344829559326, "logps/chosen": -217.80435180664062, "logps/rejected": -191.32969665527344, "loss": 1.4812, "rewards/accuracies": 0.5, "rewards/chosen": -4.2876763343811035, "rewards/margins": 1.1235804557800293, "rewards/rejected": -5.411256790161133, "step": 3488 }, { "epoch": 0.54, "learning_rate": 1.1588360392136522e-05, "logits/chosen": -3.0377771854400635, "logits/rejected": -2.8893637657165527, "logps/chosen": -168.09115600585938, "logps/rejected": -166.389404296875, "loss": 3.3951, "rewards/accuracies": 0.5, "rewards/chosen": -5.009366035461426, "rewards/margins": -0.420093297958374, "rewards/rejected": -4.589272499084473, "step": 3489 }, { "epoch": 0.54, "learning_rate": 1.1587626951605373e-05, "logits/chosen": -2.5010147094726562, "logits/rejected": -2.8724417686462402, "logps/chosen": -49.1568603515625, "logps/rejected": -242.0810546875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4724656343460083, "rewards/margins": 7.988681316375732, "rewards/rejected": -9.461146354675293, "step": 3490 }, { "epoch": 0.54, "learning_rate": 1.1586893511074225e-05, "logits/chosen": -2.8240790367126465, "logits/rejected": -2.6388039588928223, "logps/chosen": -198.4793701171875, "logps/rejected": -328.1888122558594, "loss": 1.5438, "rewards/accuracies": 0.5, "rewards/chosen": -5.777134895324707, "rewards/margins": 2.488631010055542, "rewards/rejected": -8.265766143798828, "step": 3491 }, { "epoch": 0.54, "learning_rate": 1.1586160070543077e-05, "logits/chosen": -2.234938383102417, "logits/rejected": -2.748831272125244, "logps/chosen": -402.45806884765625, "logps/rejected": -518.04296875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.9130144119262695, "rewards/margins": 5.560850620269775, "rewards/rejected": -9.473864555358887, "step": 3492 }, { "epoch": 0.54, "learning_rate": 1.158542663001193e-05, "logits/chosen": -2.817262649536133, "logits/rejected": -2.479537010192871, "logps/chosen": -129.01846313476562, "logps/rejected": -154.02688598632812, "loss": 1.3582, "rewards/accuracies": 0.5, "rewards/chosen": -3.269854784011841, "rewards/margins": 1.5819365978240967, "rewards/rejected": -4.8517913818359375, "step": 3493 }, { "epoch": 0.54, "learning_rate": 1.1584693189480783e-05, "logits/chosen": -2.6981313228607178, "logits/rejected": -2.934420347213745, "logps/chosen": -108.74896240234375, "logps/rejected": -278.0558166503906, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.8234901428222656, "rewards/margins": 5.522915840148926, "rewards/rejected": -7.346405982971191, "step": 3494 }, { "epoch": 0.54, "learning_rate": 1.1583959748949635e-05, "logits/chosen": -2.9471633434295654, "logits/rejected": -2.739804744720459, "logps/chosen": -329.30029296875, "logps/rejected": -331.19451904296875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.290499687194824, "rewards/margins": 6.154997825622559, "rewards/rejected": -9.445497512817383, "step": 3495 }, { "epoch": 0.54, "learning_rate": 1.1583226308418486e-05, "logits/chosen": -2.735250473022461, "logits/rejected": -2.6268184185028076, "logps/chosen": -90.58234405517578, "logps/rejected": -154.0467529296875, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": -4.42264461517334, "rewards/margins": 3.543325662612915, "rewards/rejected": -7.965970516204834, "step": 3496 }, { "epoch": 0.54, "learning_rate": 1.158249286788734e-05, "logits/chosen": -2.03171443939209, "logits/rejected": -2.6476573944091797, "logps/chosen": -168.6415252685547, "logps/rejected": -290.38922119140625, "loss": 3.5196, "rewards/accuracies": 0.5, "rewards/chosen": -6.074625015258789, "rewards/margins": -0.5823254585266113, "rewards/rejected": -5.492299556732178, "step": 3497 }, { "epoch": 0.54, "learning_rate": 1.1581759427356192e-05, "logits/chosen": -2.6501379013061523, "logits/rejected": -2.7716591358184814, "logps/chosen": -181.1242218017578, "logps/rejected": -245.18601989746094, "loss": 0.5026, "rewards/accuracies": 0.5, "rewards/chosen": -2.8577866554260254, "rewards/margins": 3.2884716987609863, "rewards/rejected": -6.146258354187012, "step": 3498 }, { "epoch": 0.54, "learning_rate": 1.1581025986825044e-05, "logits/chosen": -1.812896490097046, "logits/rejected": -2.7723989486694336, "logps/chosen": -250.05886840820312, "logps/rejected": -364.28704833984375, "loss": 3.5957, "rewards/accuracies": 0.5, "rewards/chosen": -6.879014492034912, "rewards/margins": 1.64036226272583, "rewards/rejected": -8.519376754760742, "step": 3499 }, { "epoch": 0.54, "learning_rate": 1.1580292546293896e-05, "logits/chosen": -2.894545078277588, "logits/rejected": -3.1982851028442383, "logps/chosen": -289.12823486328125, "logps/rejected": -408.4632568359375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.435449242591858, "rewards/margins": 6.246557235717773, "rewards/rejected": -7.6820068359375, "step": 3500 }, { "epoch": 0.54, "learning_rate": 1.1579559105762748e-05, "logits/chosen": -2.3365061283111572, "logits/rejected": -2.98248028755188, "logps/chosen": -113.26289367675781, "logps/rejected": -266.22955322265625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.176107168197632, "rewards/margins": 6.752254009246826, "rewards/rejected": -8.928360939025879, "step": 3501 }, { "epoch": 0.54, "learning_rate": 1.15788256652316e-05, "logits/chosen": -2.8996386528015137, "logits/rejected": -2.0821783542633057, "logps/chosen": -208.92117309570312, "logps/rejected": -132.25672912597656, "loss": 3.1255, "rewards/accuracies": 0.5, "rewards/chosen": -4.914212703704834, "rewards/margins": -1.8884881734848022, "rewards/rejected": -3.025724411010742, "step": 3502 }, { "epoch": 0.54, "learning_rate": 1.1578092224700451e-05, "logits/chosen": -2.562055826187134, "logits/rejected": -3.0246479511260986, "logps/chosen": -304.8929748535156, "logps/rejected": -221.2220916748047, "loss": 5.8738, "rewards/accuracies": 0.0, "rewards/chosen": -7.254706382751465, "rewards/margins": -5.869574546813965, "rewards/rejected": -1.3851318359375, "step": 3503 }, { "epoch": 0.54, "learning_rate": 1.1577358784169303e-05, "logits/chosen": -2.454489231109619, "logits/rejected": -2.832261085510254, "logps/chosen": -212.6045379638672, "logps/rejected": -379.95428466796875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -3.55397367477417, "rewards/margins": 5.307342529296875, "rewards/rejected": -8.861316680908203, "step": 3504 }, { "epoch": 0.55, "learning_rate": 1.1576625343638155e-05, "logits/chosen": -2.4541780948638916, "logits/rejected": -3.1753034591674805, "logps/chosen": -411.66143798828125, "logps/rejected": -523.7425537109375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -2.7401742935180664, "rewards/margins": 5.385821342468262, "rewards/rejected": -8.125995635986328, "step": 3505 }, { "epoch": 0.55, "learning_rate": 1.1575891903107009e-05, "logits/chosen": -3.0857224464416504, "logits/rejected": -3.4475553035736084, "logps/chosen": -80.70787048339844, "logps/rejected": -205.82537841796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5906769037246704, "rewards/margins": 7.081966400146484, "rewards/rejected": -8.672642707824707, "step": 3506 }, { "epoch": 0.55, "learning_rate": 1.157515846257586e-05, "logits/chosen": -2.2707340717315674, "logits/rejected": -2.82057785987854, "logps/chosen": -290.8055419921875, "logps/rejected": -430.9138488769531, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.4818276166915894, "rewards/margins": 4.145804405212402, "rewards/rejected": -5.627632141113281, "step": 3507 }, { "epoch": 0.55, "learning_rate": 1.1574425022044712e-05, "logits/chosen": -2.8401777744293213, "logits/rejected": -2.9381251335144043, "logps/chosen": -126.67498779296875, "logps/rejected": -353.698974609375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.1720558404922485, "rewards/margins": 6.8515753746032715, "rewards/rejected": -8.02363109588623, "step": 3508 }, { "epoch": 0.55, "learning_rate": 1.1573691581513564e-05, "logits/chosen": -2.772397518157959, "logits/rejected": -2.1641974449157715, "logps/chosen": -548.08642578125, "logps/rejected": -247.69903564453125, "loss": 5.9539, "rewards/accuracies": 0.5, "rewards/chosen": -7.813554763793945, "rewards/margins": -2.1855320930480957, "rewards/rejected": -5.62802267074585, "step": 3509 }, { "epoch": 0.55, "learning_rate": 1.1572958140982416e-05, "logits/chosen": -2.652979850769043, "logits/rejected": -3.162320137023926, "logps/chosen": -178.96971130371094, "logps/rejected": -292.1346435546875, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.58026123046875, "rewards/margins": 4.159853935241699, "rewards/rejected": -5.740115165710449, "step": 3510 }, { "epoch": 0.55, "learning_rate": 1.1572224700451268e-05, "logits/chosen": -0.9318769574165344, "logits/rejected": -2.93392014503479, "logps/chosen": -209.47003173828125, "logps/rejected": -526.1456909179688, "loss": 2.6652, "rewards/accuracies": 0.5, "rewards/chosen": -7.86381721496582, "rewards/margins": -0.5239763259887695, "rewards/rejected": -7.339840888977051, "step": 3511 }, { "epoch": 0.55, "learning_rate": 1.157149125992012e-05, "logits/chosen": -1.5498785972595215, "logits/rejected": -2.6987202167510986, "logps/chosen": -217.1848602294922, "logps/rejected": -401.5852355957031, "loss": 1.7359, "rewards/accuracies": 0.5, "rewards/chosen": -3.1220366954803467, "rewards/margins": 3.4794578552246094, "rewards/rejected": -6.601494312286377, "step": 3512 }, { "epoch": 0.55, "learning_rate": 1.1570757819388972e-05, "logits/chosen": -2.5864036083221436, "logits/rejected": -3.0035548210144043, "logps/chosen": -113.21055603027344, "logps/rejected": -214.99679565429688, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -2.3271191120147705, "rewards/margins": 4.410556793212891, "rewards/rejected": -6.737675666809082, "step": 3513 }, { "epoch": 0.55, "learning_rate": 1.1570024378857824e-05, "logits/chosen": -1.7365182638168335, "logits/rejected": -2.8588738441467285, "logps/chosen": -154.5150146484375, "logps/rejected": -434.34320068359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2095954418182373, "rewards/margins": 9.085884094238281, "rewards/rejected": -11.295480728149414, "step": 3514 }, { "epoch": 0.55, "learning_rate": 1.1569290938326677e-05, "logits/chosen": -2.71016788482666, "logits/rejected": -2.479123592376709, "logps/chosen": -514.7596435546875, "logps/rejected": -577.7150268554688, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -4.058270454406738, "rewards/margins": 6.062403678894043, "rewards/rejected": -10.120674133300781, "step": 3515 }, { "epoch": 0.55, "learning_rate": 1.1568557497795529e-05, "logits/chosen": -2.246619701385498, "logits/rejected": -3.0616579055786133, "logps/chosen": -262.40032958984375, "logps/rejected": -380.76995849609375, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -0.9406360387802124, "rewards/margins": 3.3251657485961914, "rewards/rejected": -4.265801906585693, "step": 3516 }, { "epoch": 0.55, "learning_rate": 1.1567824057264381e-05, "logits/chosen": -2.576178550720215, "logits/rejected": -2.8808891773223877, "logps/chosen": -286.753662109375, "logps/rejected": -408.8078918457031, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.81145977973938, "rewards/margins": 6.214248180389404, "rewards/rejected": -9.025708198547363, "step": 3517 }, { "epoch": 0.55, "learning_rate": 1.1567090616733233e-05, "logits/chosen": -2.3334157466888428, "logits/rejected": -3.1429786682128906, "logps/chosen": -154.90673828125, "logps/rejected": -264.2619934082031, "loss": 0.1124, "rewards/accuracies": 1.0, "rewards/chosen": -1.9303841590881348, "rewards/margins": 2.1296567916870117, "rewards/rejected": -4.0600409507751465, "step": 3518 }, { "epoch": 0.55, "learning_rate": 1.1566357176202085e-05, "logits/chosen": -2.043952703475952, "logits/rejected": -2.789252758026123, "logps/chosen": -103.62478637695312, "logps/rejected": -241.70635986328125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.0640921592712402, "rewards/margins": 5.903481483459473, "rewards/rejected": -7.967574119567871, "step": 3519 }, { "epoch": 0.55, "learning_rate": 1.1565623735670937e-05, "logits/chosen": -3.115227699279785, "logits/rejected": -2.2386043071746826, "logps/chosen": -257.5993957519531, "logps/rejected": -220.4127197265625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.549562931060791, "rewards/margins": 6.355005264282227, "rewards/rejected": -8.90456771850586, "step": 3520 }, { "epoch": 0.55, "learning_rate": 1.1564890295139788e-05, "logits/chosen": -2.8075547218322754, "logits/rejected": -2.874185562133789, "logps/chosen": -152.00929260253906, "logps/rejected": -462.27978515625, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -2.4679951667785645, "rewards/margins": 8.895124435424805, "rewards/rejected": -11.363119125366211, "step": 3521 }, { "epoch": 0.55, "learning_rate": 1.156415685460864e-05, "logits/chosen": -2.140414237976074, "logits/rejected": -2.622993230819702, "logps/chosen": -371.49267578125, "logps/rejected": -318.97314453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.558435082435608, "rewards/margins": 8.246864318847656, "rewards/rejected": -9.805298805236816, "step": 3522 }, { "epoch": 0.55, "learning_rate": 1.1563423414077494e-05, "logits/chosen": -3.0989677906036377, "logits/rejected": -2.507838726043701, "logps/chosen": -115.13755798339844, "logps/rejected": -138.933349609375, "loss": 1.3456, "rewards/accuracies": 0.5, "rewards/chosen": -5.424078941345215, "rewards/margins": 0.6281143426895142, "rewards/rejected": -6.0521931648254395, "step": 3523 }, { "epoch": 0.55, "learning_rate": 1.1562689973546346e-05, "logits/chosen": -2.8758344650268555, "logits/rejected": -3.095820188522339, "logps/chosen": -119.7873306274414, "logps/rejected": -243.67233276367188, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.977724552154541, "rewards/margins": 6.338565349578857, "rewards/rejected": -8.316289901733398, "step": 3524 }, { "epoch": 0.55, "learning_rate": 1.1561956533015198e-05, "logits/chosen": -2.9800796508789062, "logits/rejected": -2.9009132385253906, "logps/chosen": -54.19536209106445, "logps/rejected": -139.09994506835938, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.1992329359054565, "rewards/margins": 4.880261421203613, "rewards/rejected": -6.079494476318359, "step": 3525 }, { "epoch": 0.55, "learning_rate": 1.156122309248405e-05, "logits/chosen": -1.8446646928787231, "logits/rejected": -2.5727431774139404, "logps/chosen": -84.3207778930664, "logps/rejected": -343.8507080078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.375911235809326, "rewards/margins": 8.456939697265625, "rewards/rejected": -10.83285140991211, "step": 3526 }, { "epoch": 0.55, "learning_rate": 1.1560489651952903e-05, "logits/chosen": -1.4504905939102173, "logits/rejected": -2.752824544906616, "logps/chosen": -52.483577728271484, "logps/rejected": -280.23004150390625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.518167018890381, "rewards/margins": 6.945550441741943, "rewards/rejected": -9.463717460632324, "step": 3527 }, { "epoch": 0.55, "learning_rate": 1.1559756211421755e-05, "logits/chosen": -2.7757976055145264, "logits/rejected": -3.029366970062256, "logps/chosen": -47.191158294677734, "logps/rejected": -167.63182067871094, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -2.6785478591918945, "rewards/margins": 4.325390338897705, "rewards/rejected": -7.0039381980896, "step": 3528 }, { "epoch": 0.55, "learning_rate": 1.1559022770890607e-05, "logits/chosen": -3.046393871307373, "logits/rejected": -3.2523248195648193, "logps/chosen": -69.27349853515625, "logps/rejected": -182.63009643554688, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -1.7212920188903809, "rewards/margins": 4.479842662811279, "rewards/rejected": -6.20113468170166, "step": 3529 }, { "epoch": 0.55, "learning_rate": 1.1558289330359459e-05, "logits/chosen": -2.860262155532837, "logits/rejected": -2.7585625648498535, "logps/chosen": -197.6365966796875, "logps/rejected": -270.6941833496094, "loss": 0.541, "rewards/accuracies": 0.5, "rewards/chosen": -1.4089000225067139, "rewards/margins": 1.8682736158370972, "rewards/rejected": -3.2771737575531006, "step": 3530 }, { "epoch": 0.55, "learning_rate": 1.155755588982831e-05, "logits/chosen": -2.8861515522003174, "logits/rejected": -2.979865789413452, "logps/chosen": -88.67060852050781, "logps/rejected": -218.3836669921875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.0200430154800415, "rewards/margins": 5.871187686920166, "rewards/rejected": -6.891230583190918, "step": 3531 }, { "epoch": 0.55, "learning_rate": 1.1556822449297164e-05, "logits/chosen": -2.4902820587158203, "logits/rejected": -2.801790714263916, "logps/chosen": -213.6873321533203, "logps/rejected": -261.77044677734375, "loss": 0.5271, "rewards/accuracies": 0.5, "rewards/chosen": -2.135971784591675, "rewards/margins": 3.95065975189209, "rewards/rejected": -6.086631774902344, "step": 3532 }, { "epoch": 0.55, "learning_rate": 1.1556089008766016e-05, "logits/chosen": -2.6984784603118896, "logits/rejected": -3.196991443634033, "logps/chosen": -131.02439880371094, "logps/rejected": -317.5541076660156, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -0.7577497959136963, "rewards/margins": 5.195804119110107, "rewards/rejected": -5.953554153442383, "step": 3533 }, { "epoch": 0.55, "learning_rate": 1.1555355568234868e-05, "logits/chosen": -2.5171878337860107, "logits/rejected": -2.9484684467315674, "logps/chosen": -179.80023193359375, "logps/rejected": -203.563720703125, "loss": 1.6246, "rewards/accuracies": 0.5, "rewards/chosen": -3.3076584339141846, "rewards/margins": 1.2600501775741577, "rewards/rejected": -4.567708492279053, "step": 3534 }, { "epoch": 0.55, "learning_rate": 1.155462212770372e-05, "logits/chosen": -2.8056931495666504, "logits/rejected": -2.941124200820923, "logps/chosen": -241.86807250976562, "logps/rejected": -358.12286376953125, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 0.1830955445766449, "rewards/margins": 6.898349761962891, "rewards/rejected": -6.715254306793213, "step": 3535 }, { "epoch": 0.55, "learning_rate": 1.1553888687172572e-05, "logits/chosen": -2.868297815322876, "logits/rejected": -3.146483898162842, "logps/chosen": -174.9700927734375, "logps/rejected": -285.4112243652344, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -1.6519737243652344, "rewards/margins": 4.639981269836426, "rewards/rejected": -6.29195499420166, "step": 3536 }, { "epoch": 0.55, "learning_rate": 1.1553155246641424e-05, "logits/chosen": -2.3994436264038086, "logits/rejected": -3.0793910026550293, "logps/chosen": -64.62821197509766, "logps/rejected": -189.26385498046875, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -1.858372449874878, "rewards/margins": 3.4680583477020264, "rewards/rejected": -5.326430320739746, "step": 3537 }, { "epoch": 0.55, "learning_rate": 1.1552421806110275e-05, "logits/chosen": -2.814697027206421, "logits/rejected": -2.9058237075805664, "logps/chosen": -41.48700714111328, "logps/rejected": -228.52308654785156, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -1.0446699857711792, "rewards/margins": 6.285755634307861, "rewards/rejected": -7.33042573928833, "step": 3538 }, { "epoch": 0.55, "learning_rate": 1.1551688365579127e-05, "logits/chosen": -2.7120583057403564, "logits/rejected": -2.7645437717437744, "logps/chosen": -116.87340545654297, "logps/rejected": -258.93438720703125, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -1.884875774383545, "rewards/margins": 5.302783966064453, "rewards/rejected": -7.18765926361084, "step": 3539 }, { "epoch": 0.55, "learning_rate": 1.155095492504798e-05, "logits/chosen": -2.872483491897583, "logits/rejected": -1.863945722579956, "logps/chosen": -961.6580810546875, "logps/rejected": -410.1192932128906, "loss": 3.6522, "rewards/accuracies": 0.5, "rewards/chosen": -5.1653289794921875, "rewards/margins": 0.30974316596984863, "rewards/rejected": -5.475072383880615, "step": 3540 }, { "epoch": 0.55, "learning_rate": 1.1550221484516833e-05, "logits/chosen": -2.8965959548950195, "logits/rejected": -2.961771249771118, "logps/chosen": -41.78657913208008, "logps/rejected": -167.97511291503906, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.0283721685409546, "rewards/margins": 5.409780979156494, "rewards/rejected": -6.438153266906738, "step": 3541 }, { "epoch": 0.55, "learning_rate": 1.1549488043985685e-05, "logits/chosen": -2.811692953109741, "logits/rejected": -3.1169824600219727, "logps/chosen": -363.0144958496094, "logps/rejected": -563.4995727539062, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.8446632623672485, "rewards/margins": 10.679302215576172, "rewards/rejected": -12.523965835571289, "step": 3542 }, { "epoch": 0.55, "learning_rate": 1.1548754603454537e-05, "logits/chosen": -2.7806262969970703, "logits/rejected": -2.442892551422119, "logps/chosen": -279.53302001953125, "logps/rejected": -330.597900390625, "loss": 3.2245, "rewards/accuracies": 0.0, "rewards/chosen": -6.360001564025879, "rewards/margins": -3.058655023574829, "rewards/rejected": -3.3013463020324707, "step": 3543 }, { "epoch": 0.55, "learning_rate": 1.1548021162923388e-05, "logits/chosen": -1.4825372695922852, "logits/rejected": -2.7779548168182373, "logps/chosen": -99.1695556640625, "logps/rejected": -328.6201171875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.2475406676530838, "rewards/margins": 6.453248023986816, "rewards/rejected": -6.700788974761963, "step": 3544 }, { "epoch": 0.55, "learning_rate": 1.154728772239224e-05, "logits/chosen": -2.869774580001831, "logits/rejected": -2.6952085494995117, "logps/chosen": -772.5701904296875, "logps/rejected": -751.93359375, "loss": 5.0119, "rewards/accuracies": 0.5, "rewards/chosen": -6.992010593414307, "rewards/margins": -1.4464144706726074, "rewards/rejected": -5.545596122741699, "step": 3545 }, { "epoch": 0.55, "learning_rate": 1.1546554281861092e-05, "logits/chosen": -2.5266287326812744, "logits/rejected": -3.111041307449341, "logps/chosen": -93.10968780517578, "logps/rejected": -249.70245361328125, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -1.1320213079452515, "rewards/margins": 6.189783096313477, "rewards/rejected": -7.321804046630859, "step": 3546 }, { "epoch": 0.55, "learning_rate": 1.1545820841329944e-05, "logits/chosen": -3.009071111679077, "logits/rejected": -3.033220052719116, "logps/chosen": -567.3965454101562, "logps/rejected": -446.1208190917969, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.8357445001602173, "rewards/margins": 6.39532995223999, "rewards/rejected": -7.231074333190918, "step": 3547 }, { "epoch": 0.55, "learning_rate": 1.1545087400798796e-05, "logits/chosen": -2.6062698364257812, "logits/rejected": -3.0488741397857666, "logps/chosen": -184.78636169433594, "logps/rejected": -282.2948303222656, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": 0.19732171297073364, "rewards/margins": 4.904919624328613, "rewards/rejected": -4.7075982093811035, "step": 3548 }, { "epoch": 0.55, "learning_rate": 1.1544353960267648e-05, "logits/chosen": -1.9433399438858032, "logits/rejected": -3.1135151386260986, "logps/chosen": -106.73199462890625, "logps/rejected": -496.24041748046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.2243373990058899, "rewards/margins": 7.79876708984375, "rewards/rejected": -7.574429512023926, "step": 3549 }, { "epoch": 0.55, "learning_rate": 1.1543620519736501e-05, "logits/chosen": -2.5780797004699707, "logits/rejected": -3.155304193496704, "logps/chosen": -137.56448364257812, "logps/rejected": -272.4344482421875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.12040576338768005, "rewards/margins": 7.475331783294678, "rewards/rejected": -7.354926109313965, "step": 3550 }, { "epoch": 0.55, "learning_rate": 1.1542887079205353e-05, "logits/chosen": -2.8513340950012207, "logits/rejected": -2.887493848800659, "logps/chosen": -142.50184631347656, "logps/rejected": -238.241943359375, "loss": 2.5709, "rewards/accuracies": 0.5, "rewards/chosen": -2.188455581665039, "rewards/margins": 2.6506636142730713, "rewards/rejected": -4.839118957519531, "step": 3551 }, { "epoch": 0.55, "learning_rate": 1.1542153638674205e-05, "logits/chosen": -3.0223541259765625, "logits/rejected": -2.189542055130005, "logps/chosen": -410.98895263671875, "logps/rejected": -250.03707885742188, "loss": 2.6039, "rewards/accuracies": 0.5, "rewards/chosen": -3.5793120861053467, "rewards/margins": 1.1569490432739258, "rewards/rejected": -4.736260890960693, "step": 3552 }, { "epoch": 0.55, "learning_rate": 1.1541420198143057e-05, "logits/chosen": -2.728790044784546, "logits/rejected": -2.922184944152832, "logps/chosen": -103.10559844970703, "logps/rejected": -250.64913940429688, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/chosen": -0.9384064078330994, "rewards/margins": 5.058472633361816, "rewards/rejected": -5.996878623962402, "step": 3553 }, { "epoch": 0.55, "learning_rate": 1.1540686757611909e-05, "logits/chosen": -2.718268871307373, "logits/rejected": -2.8800160884857178, "logps/chosen": -304.5029602050781, "logps/rejected": -421.667724609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6688232421875, "rewards/margins": 8.313880920410156, "rewards/rejected": -8.982704162597656, "step": 3554 }, { "epoch": 0.55, "learning_rate": 1.153995331708076e-05, "logits/chosen": -1.7517139911651611, "logits/rejected": -2.6244750022888184, "logps/chosen": -278.057373046875, "logps/rejected": -312.9630432128906, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.9423469305038452, "rewards/margins": 8.101885795593262, "rewards/rejected": -9.044233322143555, "step": 3555 }, { "epoch": 0.55, "learning_rate": 1.1539219876549613e-05, "logits/chosen": -1.4082396030426025, "logits/rejected": -2.875988721847534, "logps/chosen": -153.7906494140625, "logps/rejected": -280.1953125, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": 0.05567246675491333, "rewards/margins": 4.594384670257568, "rewards/rejected": -4.538712501525879, "step": 3556 }, { "epoch": 0.55, "learning_rate": 1.1538486436018465e-05, "logits/chosen": -2.7537312507629395, "logits/rejected": -1.724683403968811, "logps/chosen": -248.79869079589844, "logps/rejected": -86.68083190917969, "loss": 0.4957, "rewards/accuracies": 0.5, "rewards/chosen": 0.06125295162200928, "rewards/margins": 2.1586148738861084, "rewards/rejected": -2.0973620414733887, "step": 3557 }, { "epoch": 0.55, "learning_rate": 1.1537752995487316e-05, "logits/chosen": -2.6792311668395996, "logits/rejected": -3.0937533378601074, "logps/chosen": -446.8138732910156, "logps/rejected": -439.36737060546875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7487777471542358, "rewards/margins": 6.807255744934082, "rewards/rejected": -7.556033134460449, "step": 3558 }, { "epoch": 0.55, "learning_rate": 1.153701955495617e-05, "logits/chosen": -1.8495360612869263, "logits/rejected": -2.9961206912994385, "logps/chosen": -82.13156127929688, "logps/rejected": -312.589599609375, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.8968879580497742, "rewards/margins": 8.019282341003418, "rewards/rejected": -8.916170120239258, "step": 3559 }, { "epoch": 0.55, "learning_rate": 1.1536286114425022e-05, "logits/chosen": -3.0267345905303955, "logits/rejected": -1.6450124979019165, "logps/chosen": -404.5588073730469, "logps/rejected": -146.04653930664062, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/chosen": -1.9544731378555298, "rewards/margins": 3.7110953330993652, "rewards/rejected": -5.6655683517456055, "step": 3560 }, { "epoch": 0.55, "learning_rate": 1.1535552673893875e-05, "logits/chosen": -2.0479350090026855, "logits/rejected": -2.470003843307495, "logps/chosen": -516.3636474609375, "logps/rejected": -646.166015625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.2505395412445068, "rewards/margins": 6.092345237731934, "rewards/rejected": -7.3428850173950195, "step": 3561 }, { "epoch": 0.55, "learning_rate": 1.1534819233362727e-05, "logits/chosen": -2.414638042449951, "logits/rejected": -3.147588014602661, "logps/chosen": -117.3491439819336, "logps/rejected": -341.0675354003906, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 0.31192752718925476, "rewards/margins": 5.46394681930542, "rewards/rejected": -5.152019500732422, "step": 3562 }, { "epoch": 0.55, "learning_rate": 1.153408579283158e-05, "logits/chosen": -2.710988998413086, "logits/rejected": -2.921255350112915, "logps/chosen": -178.3585968017578, "logps/rejected": -325.0157775878906, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.9897211790084839, "rewards/margins": 6.086294174194336, "rewards/rejected": -7.076015472412109, "step": 3563 }, { "epoch": 0.55, "learning_rate": 1.1533352352300431e-05, "logits/chosen": -2.784876823425293, "logits/rejected": -2.4683618545532227, "logps/chosen": -200.71453857421875, "logps/rejected": -359.8085021972656, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.9418464303016663, "rewards/margins": 6.121338844299316, "rewards/rejected": -7.063185691833496, "step": 3564 }, { "epoch": 0.55, "learning_rate": 1.1532618911769283e-05, "logits/chosen": -2.948603868484497, "logits/rejected": -2.211928129196167, "logps/chosen": -680.6713256835938, "logps/rejected": -372.455322265625, "loss": 1.0396, "rewards/accuracies": 0.5, "rewards/chosen": -2.834095001220703, "rewards/margins": 1.6832520961761475, "rewards/rejected": -4.51734733581543, "step": 3565 }, { "epoch": 0.55, "learning_rate": 1.1531885471238135e-05, "logits/chosen": -2.9437427520751953, "logits/rejected": -2.5562403202056885, "logps/chosen": -146.59933471679688, "logps/rejected": -177.3673095703125, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": 0.23156318068504333, "rewards/margins": 4.394133567810059, "rewards/rejected": -4.162569999694824, "step": 3566 }, { "epoch": 0.55, "learning_rate": 1.1531152030706987e-05, "logits/chosen": -3.0687124729156494, "logits/rejected": -3.2090957164764404, "logps/chosen": -205.6918487548828, "logps/rejected": -332.37579345703125, "loss": 3.1132, "rewards/accuracies": 0.5, "rewards/chosen": -4.543035984039307, "rewards/margins": -0.685504674911499, "rewards/rejected": -3.8575310707092285, "step": 3567 }, { "epoch": 0.55, "learning_rate": 1.153041859017584e-05, "logits/chosen": -2.842928886413574, "logits/rejected": -2.2036635875701904, "logps/chosen": -250.0449676513672, "logps/rejected": -292.4444580078125, "loss": 3.5678, "rewards/accuracies": 0.5, "rewards/chosen": -3.7301344871520996, "rewards/margins": -0.7094359397888184, "rewards/rejected": -3.0206985473632812, "step": 3568 }, { "epoch": 0.56, "learning_rate": 1.1529685149644692e-05, "logits/chosen": -1.9409328699111938, "logits/rejected": -3.124558448791504, "logps/chosen": -189.55392456054688, "logps/rejected": -507.3275146484375, "loss": 1.5362, "rewards/accuracies": 0.5, "rewards/chosen": -3.7995998859405518, "rewards/margins": 0.44250524044036865, "rewards/rejected": -4.242105007171631, "step": 3569 }, { "epoch": 0.56, "learning_rate": 1.1528951709113544e-05, "logits/chosen": -2.2581446170806885, "logits/rejected": -2.924480438232422, "logps/chosen": -236.60494995117188, "logps/rejected": -296.1471252441406, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.4361778497695923, "rewards/margins": 6.403825283050537, "rewards/rejected": -7.84000301361084, "step": 3570 }, { "epoch": 0.56, "learning_rate": 1.1528218268582396e-05, "logits/chosen": -2.9254348278045654, "logits/rejected": -3.066348075866699, "logps/chosen": -167.93338012695312, "logps/rejected": -306.62188720703125, "loss": 2.1944, "rewards/accuracies": 0.5, "rewards/chosen": -3.3821446895599365, "rewards/margins": 0.6416041851043701, "rewards/rejected": -4.023748874664307, "step": 3571 }, { "epoch": 0.56, "learning_rate": 1.1527484828051248e-05, "logits/chosen": -2.667591094970703, "logits/rejected": -2.241734027862549, "logps/chosen": -594.7822875976562, "logps/rejected": -511.8438720703125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.7385982275009155, "rewards/margins": 6.4175872802734375, "rewards/rejected": -7.156185150146484, "step": 3572 }, { "epoch": 0.56, "learning_rate": 1.15267513875201e-05, "logits/chosen": -2.1872665882110596, "logits/rejected": -2.9131107330322266, "logps/chosen": -135.95176696777344, "logps/rejected": -368.3753662109375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.28790515661239624, "rewards/margins": 7.962096691131592, "rewards/rejected": -8.250001907348633, "step": 3573 }, { "epoch": 0.56, "learning_rate": 1.1526017946988952e-05, "logits/chosen": -3.0719032287597656, "logits/rejected": -3.1279914379119873, "logps/chosen": -263.0968017578125, "logps/rejected": -412.94195556640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.42108842730522156, "rewards/margins": 7.770993709564209, "rewards/rejected": -8.192082405090332, "step": 3574 }, { "epoch": 0.56, "learning_rate": 1.1525284506457803e-05, "logits/chosen": -2.283454656600952, "logits/rejected": -2.905170202255249, "logps/chosen": -20.817947387695312, "logps/rejected": -248.76937866210938, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.07873964309692383, "rewards/margins": 6.804776191711426, "rewards/rejected": -6.88351583480835, "step": 3575 }, { "epoch": 0.56, "learning_rate": 1.1524551065926655e-05, "logits/chosen": -2.7888457775115967, "logits/rejected": -3.31065034866333, "logps/chosen": -329.8723449707031, "logps/rejected": -395.68914794921875, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -0.9006576538085938, "rewards/margins": 4.427975654602051, "rewards/rejected": -5.3286333084106445, "step": 3576 }, { "epoch": 0.56, "learning_rate": 1.1523817625395509e-05, "logits/chosen": -2.948148250579834, "logits/rejected": -3.398667097091675, "logps/chosen": -114.03034973144531, "logps/rejected": -213.62042236328125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.8099108934402466, "rewards/margins": 5.463476181030273, "rewards/rejected": -6.2733869552612305, "step": 3577 }, { "epoch": 0.56, "learning_rate": 1.152308418486436e-05, "logits/chosen": -2.4418389797210693, "logits/rejected": -3.1762561798095703, "logps/chosen": -178.74349975585938, "logps/rejected": -291.0714416503906, "loss": 0.571, "rewards/accuracies": 0.5, "rewards/chosen": -1.6331653594970703, "rewards/margins": 3.2676830291748047, "rewards/rejected": -4.900848388671875, "step": 3578 }, { "epoch": 0.56, "learning_rate": 1.1522350744333213e-05, "logits/chosen": -1.5364216566085815, "logits/rejected": -2.941159248352051, "logps/chosen": -96.97911834716797, "logps/rejected": -301.1761474609375, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": -0.32535821199417114, "rewards/margins": 3.9303243160247803, "rewards/rejected": -4.255682468414307, "step": 3579 }, { "epoch": 0.56, "learning_rate": 1.1521617303802065e-05, "logits/chosen": -2.0951273441314697, "logits/rejected": -3.084017515182495, "logps/chosen": -136.63394165039062, "logps/rejected": -333.61175537109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7446339130401611, "rewards/margins": 6.703282356262207, "rewards/rejected": -7.447916507720947, "step": 3580 }, { "epoch": 0.56, "learning_rate": 1.1520883863270916e-05, "logits/chosen": -2.8899426460266113, "logits/rejected": -3.145259380340576, "logps/chosen": -190.29794311523438, "logps/rejected": -266.96429443359375, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -0.8957565426826477, "rewards/margins": 4.631370544433594, "rewards/rejected": -5.527127265930176, "step": 3581 }, { "epoch": 0.56, "learning_rate": 1.1520150422739768e-05, "logits/chosen": -3.0312023162841797, "logits/rejected": -2.261871576309204, "logps/chosen": -222.08447265625, "logps/rejected": -175.43533325195312, "loss": 0.1171, "rewards/accuracies": 1.0, "rewards/chosen": -0.3620169460773468, "rewards/margins": 5.808126926422119, "rewards/rejected": -6.170144081115723, "step": 3582 }, { "epoch": 0.56, "learning_rate": 1.151941698220862e-05, "logits/chosen": -2.670024871826172, "logits/rejected": -1.4892913103103638, "logps/chosen": -275.6931457519531, "logps/rejected": -272.7283020019531, "loss": 4.6279, "rewards/accuracies": 0.5, "rewards/chosen": -5.363761901855469, "rewards/margins": -0.4472212791442871, "rewards/rejected": -4.916540622711182, "step": 3583 }, { "epoch": 0.56, "learning_rate": 1.1518683541677472e-05, "logits/chosen": -2.228148937225342, "logits/rejected": -2.3717494010925293, "logps/chosen": -181.98255920410156, "logps/rejected": -372.6626892089844, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": -0.24775201082229614, "rewards/margins": 3.308809757232666, "rewards/rejected": -3.5565617084503174, "step": 3584 }, { "epoch": 0.56, "learning_rate": 1.1517950101146324e-05, "logits/chosen": -2.710663080215454, "logits/rejected": -2.8591716289520264, "logps/chosen": -200.5808563232422, "logps/rejected": -411.987548828125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.0552860498428345, "rewards/margins": 8.3753080368042, "rewards/rejected": -9.430594444274902, "step": 3585 }, { "epoch": 0.56, "learning_rate": 1.1517216660615177e-05, "logits/chosen": -3.1887974739074707, "logits/rejected": -2.568798303604126, "logps/chosen": -635.5300903320312, "logps/rejected": -310.71099853515625, "loss": 2.8273, "rewards/accuracies": 0.5, "rewards/chosen": -2.437086582183838, "rewards/margins": -0.06404829025268555, "rewards/rejected": -2.3730382919311523, "step": 3586 }, { "epoch": 0.56, "learning_rate": 1.151648322008403e-05, "logits/chosen": -2.7499349117279053, "logits/rejected": -2.3971047401428223, "logps/chosen": -404.34490966796875, "logps/rejected": -381.5263671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8496421575546265, "rewards/margins": 8.151142120361328, "rewards/rejected": -9.000783920288086, "step": 3587 }, { "epoch": 0.56, "learning_rate": 1.1515749779552881e-05, "logits/chosen": -3.084784746170044, "logits/rejected": -3.0397067070007324, "logps/chosen": -50.57012176513672, "logps/rejected": -107.57779693603516, "loss": 1.5906, "rewards/accuracies": 0.5, "rewards/chosen": -1.5644598007202148, "rewards/margins": 2.999037027359009, "rewards/rejected": -4.563497066497803, "step": 3588 }, { "epoch": 0.56, "learning_rate": 1.1515016339021733e-05, "logits/chosen": -0.6926606893539429, "logits/rejected": -2.198566436767578, "logps/chosen": -123.18266296386719, "logps/rejected": -592.05224609375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.3758317828178406, "rewards/margins": 10.71086597442627, "rewards/rejected": -11.086697578430176, "step": 3589 }, { "epoch": 0.56, "learning_rate": 1.1514282898490585e-05, "logits/chosen": -2.9388797283172607, "logits/rejected": -2.202955722808838, "logps/chosen": -97.00006103515625, "logps/rejected": -238.13356018066406, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.3103303909301758, "rewards/margins": 7.696066856384277, "rewards/rejected": -7.385736465454102, "step": 3590 }, { "epoch": 0.56, "learning_rate": 1.1513549457959437e-05, "logits/chosen": -2.3320441246032715, "logits/rejected": -2.7223856449127197, "logps/chosen": -278.55120849609375, "logps/rejected": -248.07000732421875, "loss": 3.2575, "rewards/accuracies": 0.0, "rewards/chosen": -6.445965766906738, "rewards/margins": -3.216808795928955, "rewards/rejected": -3.229156970977783, "step": 3591 }, { "epoch": 0.56, "learning_rate": 1.1512816017428289e-05, "logits/chosen": -2.8397650718688965, "logits/rejected": -2.619905710220337, "logps/chosen": -271.1109619140625, "logps/rejected": -280.43243408203125, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.03286054730415344, "rewards/margins": 5.091454982757568, "rewards/rejected": -5.1243157386779785, "step": 3592 }, { "epoch": 0.56, "learning_rate": 1.1512082576897142e-05, "logits/chosen": -2.2695703506469727, "logits/rejected": -3.0312373638153076, "logps/chosen": -46.06536102294922, "logps/rejected": -288.02197265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.1392170786857605, "rewards/margins": 6.926115989685059, "rewards/rejected": -6.786898612976074, "step": 3593 }, { "epoch": 0.56, "learning_rate": 1.1511349136365994e-05, "logits/chosen": -2.569779396057129, "logits/rejected": -2.9015121459960938, "logps/chosen": -176.7389678955078, "logps/rejected": -278.3711853027344, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.18716202676296234, "rewards/margins": 5.751720428466797, "rewards/rejected": -5.938882827758789, "step": 3594 }, { "epoch": 0.56, "learning_rate": 1.1510615695834848e-05, "logits/chosen": -2.844862461090088, "logits/rejected": -2.933436155319214, "logps/chosen": -115.42578125, "logps/rejected": -239.88510131835938, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -1.3188958168029785, "rewards/margins": 5.331787109375, "rewards/rejected": -6.6506829261779785, "step": 3595 }, { "epoch": 0.56, "learning_rate": 1.15098822553037e-05, "logits/chosen": -2.712975025177002, "logits/rejected": -1.341817855834961, "logps/chosen": -415.0667724609375, "logps/rejected": -260.7398986816406, "loss": 4.6074, "rewards/accuracies": 0.5, "rewards/chosen": -4.426190376281738, "rewards/margins": -0.8072113990783691, "rewards/rejected": -3.618978977203369, "step": 3596 }, { "epoch": 0.56, "learning_rate": 1.1509148814772552e-05, "logits/chosen": -2.279024839401245, "logits/rejected": -3.0614092350006104, "logps/chosen": -83.61913299560547, "logps/rejected": -253.46066284179688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.1666572093963623, "rewards/margins": 6.621434688568115, "rewards/rejected": -7.788091659545898, "step": 3597 }, { "epoch": 0.56, "learning_rate": 1.1508415374241403e-05, "logits/chosen": -2.7408225536346436, "logits/rejected": -3.3218019008636475, "logps/chosen": -247.96041870117188, "logps/rejected": -397.3216552734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.9593032598495483, "rewards/margins": 7.977226257324219, "rewards/rejected": -9.936529159545898, "step": 3598 }, { "epoch": 0.56, "learning_rate": 1.1507681933710255e-05, "logits/chosen": -2.3954265117645264, "logits/rejected": -2.9568347930908203, "logps/chosen": -104.59247589111328, "logps/rejected": -234.1117706298828, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.03875885158777237, "rewards/margins": 7.558202743530273, "rewards/rejected": -7.596961498260498, "step": 3599 }, { "epoch": 0.56, "learning_rate": 1.1506948493179107e-05, "logits/chosen": -1.621800184249878, "logits/rejected": -3.203953742980957, "logps/chosen": -77.0787124633789, "logps/rejected": -434.1968688964844, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.6561375856399536, "rewards/margins": 7.987480163574219, "rewards/rejected": -8.643617630004883, "step": 3600 }, { "epoch": 0.56, "learning_rate": 1.1506215052647959e-05, "logits/chosen": -2.0300698280334473, "logits/rejected": -3.013566493988037, "logps/chosen": -51.95050811767578, "logps/rejected": -304.843994140625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.021481133997440338, "rewards/margins": 6.3820343017578125, "rewards/rejected": -6.360552787780762, "step": 3601 }, { "epoch": 0.56, "learning_rate": 1.1505481612116811e-05, "logits/chosen": -3.213918924331665, "logits/rejected": -2.809009313583374, "logps/chosen": -162.51145935058594, "logps/rejected": -178.8002471923828, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 1.4512416124343872, "rewards/margins": 6.937339782714844, "rewards/rejected": -5.486097812652588, "step": 3602 }, { "epoch": 0.56, "learning_rate": 1.1504748171585663e-05, "logits/chosen": -2.750843048095703, "logits/rejected": -2.1858842372894287, "logps/chosen": -191.06884765625, "logps/rejected": -195.87596130371094, "loss": 3.7344, "rewards/accuracies": 0.5, "rewards/chosen": -3.8916962146759033, "rewards/margins": -0.7487583160400391, "rewards/rejected": -3.1429378986358643, "step": 3603 }, { "epoch": 0.56, "learning_rate": 1.1504014731054516e-05, "logits/chosen": -2.3954532146453857, "logits/rejected": -3.1377131938934326, "logps/chosen": -143.78713989257812, "logps/rejected": -356.6495056152344, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.700836181640625, "rewards/margins": 4.679973125457764, "rewards/rejected": -5.380809307098389, "step": 3604 }, { "epoch": 0.56, "learning_rate": 1.1503281290523368e-05, "logits/chosen": -2.2448582649230957, "logits/rejected": -3.084928274154663, "logps/chosen": -159.78854370117188, "logps/rejected": -347.0975341796875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.12930068373680115, "rewards/margins": 5.566921234130859, "rewards/rejected": -5.696221828460693, "step": 3605 }, { "epoch": 0.56, "learning_rate": 1.150254784999222e-05, "logits/chosen": -3.010108709335327, "logits/rejected": -2.3036904335021973, "logps/chosen": -133.42356872558594, "logps/rejected": -46.24968719482422, "loss": 4.2981, "rewards/accuracies": 0.0, "rewards/chosen": -5.207681655883789, "rewards/margins": -4.205456256866455, "rewards/rejected": -1.0022255182266235, "step": 3606 }, { "epoch": 0.56, "learning_rate": 1.1501814409461072e-05, "logits/chosen": -1.6822890043258667, "logits/rejected": -3.0268681049346924, "logps/chosen": -455.499267578125, "logps/rejected": -646.683837890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.5554840564727783, "rewards/margins": 6.948957443237305, "rewards/rejected": -8.50444221496582, "step": 3607 }, { "epoch": 0.56, "learning_rate": 1.1501080968929924e-05, "logits/chosen": -1.7660374641418457, "logits/rejected": -2.8712785243988037, "logps/chosen": -226.2014617919922, "logps/rejected": -283.62841796875, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.31031036376953125, "rewards/margins": 4.659479141235352, "rewards/rejected": -4.969789505004883, "step": 3608 }, { "epoch": 0.56, "learning_rate": 1.1500347528398776e-05, "logits/chosen": -2.9147369861602783, "logits/rejected": -3.4957923889160156, "logps/chosen": -32.49550247192383, "logps/rejected": -296.72393798828125, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.0213991403579712, "rewards/margins": 4.697114944458008, "rewards/rejected": -5.718514442443848, "step": 3609 }, { "epoch": 0.56, "learning_rate": 1.1499614087867628e-05, "logits/chosen": -2.5357494354248047, "logits/rejected": -3.0988690853118896, "logps/chosen": -83.64364624023438, "logps/rejected": -270.2882995605469, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6338939666748047, "rewards/margins": 6.81395149230957, "rewards/rejected": -7.447845458984375, "step": 3610 }, { "epoch": 0.56, "learning_rate": 1.149888064733648e-05, "logits/chosen": -2.1794676780700684, "logits/rejected": -2.9332351684570312, "logps/chosen": -190.3892059326172, "logps/rejected": -354.669677734375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.4274789094924927, "rewards/margins": 8.504491806030273, "rewards/rejected": -8.931970596313477, "step": 3611 }, { "epoch": 0.56, "learning_rate": 1.1498147206805331e-05, "logits/chosen": -2.567484140396118, "logits/rejected": -2.9015047550201416, "logps/chosen": -229.56503295898438, "logps/rejected": -348.7979431152344, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.0239129066467285, "rewards/margins": 5.794474124908447, "rewards/rejected": -7.818387031555176, "step": 3612 }, { "epoch": 0.56, "learning_rate": 1.1497413766274185e-05, "logits/chosen": -2.9039201736450195, "logits/rejected": -2.2407984733581543, "logps/chosen": -138.690673828125, "logps/rejected": -209.079345703125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.381927490234375, "rewards/margins": 5.715810775756836, "rewards/rejected": -7.097738265991211, "step": 3613 }, { "epoch": 0.56, "learning_rate": 1.1496680325743037e-05, "logits/chosen": -2.6215245723724365, "logits/rejected": -3.0402607917785645, "logps/chosen": -33.72333908081055, "logps/rejected": -325.75006103515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.19648200273513794, "rewards/margins": 7.878411293029785, "rewards/rejected": -8.074893951416016, "step": 3614 }, { "epoch": 0.56, "learning_rate": 1.1495946885211889e-05, "logits/chosen": -2.0183935165405273, "logits/rejected": -3.030491828918457, "logps/chosen": -75.23988342285156, "logps/rejected": -186.90731811523438, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.2683025896549225, "rewards/margins": 5.700909614562988, "rewards/rejected": -5.969212532043457, "step": 3615 }, { "epoch": 0.56, "learning_rate": 1.149521344468074e-05, "logits/chosen": -2.6820688247680664, "logits/rejected": -3.0492072105407715, "logps/chosen": -323.5047912597656, "logps/rejected": -474.0713806152344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.3425232172012329, "rewards/margins": 8.130760192871094, "rewards/rejected": -8.473283767700195, "step": 3616 }, { "epoch": 0.56, "learning_rate": 1.1494480004149592e-05, "logits/chosen": -3.266205310821533, "logits/rejected": -2.8610541820526123, "logps/chosen": -225.77581787109375, "logps/rejected": -144.1474609375, "loss": 1.2193, "rewards/accuracies": 0.5, "rewards/chosen": -2.3762893676757812, "rewards/margins": 1.9988946914672852, "rewards/rejected": -4.375184059143066, "step": 3617 }, { "epoch": 0.56, "learning_rate": 1.1493746563618444e-05, "logits/chosen": -2.678774833679199, "logits/rejected": -2.6627914905548096, "logps/chosen": -241.95648193359375, "logps/rejected": -373.51812744140625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.1199485063552856, "rewards/margins": 5.7370476722717285, "rewards/rejected": -6.856996536254883, "step": 3618 }, { "epoch": 0.56, "learning_rate": 1.1493013123087296e-05, "logits/chosen": -3.033330202102661, "logits/rejected": -2.158620834350586, "logps/chosen": -187.00096130371094, "logps/rejected": -84.57606506347656, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -2.053251266479492, "rewards/margins": 3.1988155841827393, "rewards/rejected": -5.252066612243652, "step": 3619 }, { "epoch": 0.56, "learning_rate": 1.1492279682556148e-05, "logits/chosen": -1.2247024774551392, "logits/rejected": -2.864168882369995, "logps/chosen": -88.08840942382812, "logps/rejected": -333.15850830078125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.6006162762641907, "rewards/margins": 5.721260070800781, "rewards/rejected": -6.321876525878906, "step": 3620 }, { "epoch": 0.56, "learning_rate": 1.1491546242025002e-05, "logits/chosen": -2.9861037731170654, "logits/rejected": -2.953362226486206, "logps/chosen": -149.4151611328125, "logps/rejected": -125.91172790527344, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -1.2246079444885254, "rewards/margins": 4.210921287536621, "rewards/rejected": -5.4355292320251465, "step": 3621 }, { "epoch": 0.56, "learning_rate": 1.1490812801493854e-05, "logits/chosen": -1.478609561920166, "logits/rejected": -2.9942924976348877, "logps/chosen": -203.45260620117188, "logps/rejected": -495.3186340332031, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/chosen": -0.7605587244033813, "rewards/margins": 6.832756519317627, "rewards/rejected": -7.593315124511719, "step": 3622 }, { "epoch": 0.56, "learning_rate": 1.1490079360962705e-05, "logits/chosen": -3.2821285724639893, "logits/rejected": -2.897423505783081, "logps/chosen": -595.4776611328125, "logps/rejected": -380.4130554199219, "loss": 0.0369, "rewards/accuracies": 1.0, "rewards/chosen": -0.5659561157226562, "rewards/margins": 3.5769448280334473, "rewards/rejected": -4.1429009437561035, "step": 3623 }, { "epoch": 0.56, "learning_rate": 1.1489345920431557e-05, "logits/chosen": -2.1024866104125977, "logits/rejected": -2.8680458068847656, "logps/chosen": -72.6918716430664, "logps/rejected": -155.0550537109375, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -2.1955950260162354, "rewards/margins": 3.6871814727783203, "rewards/rejected": -5.882776260375977, "step": 3624 }, { "epoch": 0.56, "learning_rate": 1.148861247990041e-05, "logits/chosen": -3.0525407791137695, "logits/rejected": -2.665735960006714, "logps/chosen": -306.3592529296875, "logps/rejected": -329.9140930175781, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.502851128578186, "rewards/margins": 6.616260528564453, "rewards/rejected": -7.119112014770508, "step": 3625 }, { "epoch": 0.56, "learning_rate": 1.1487879039369261e-05, "logits/chosen": -3.02938175201416, "logits/rejected": -2.95457124710083, "logps/chosen": -353.90167236328125, "logps/rejected": -238.23574829101562, "loss": 2.5134, "rewards/accuracies": 0.5, "rewards/chosen": -1.453912377357483, "rewards/margins": -0.13252544403076172, "rewards/rejected": -1.321386694908142, "step": 3626 }, { "epoch": 0.56, "learning_rate": 1.1487145598838115e-05, "logits/chosen": -3.057558059692383, "logits/rejected": -2.1614134311676025, "logps/chosen": -208.94708251953125, "logps/rejected": -149.02560424804688, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.11908894777297974, "rewards/margins": 6.103164196014404, "rewards/rejected": -6.22225284576416, "step": 3627 }, { "epoch": 0.56, "learning_rate": 1.1486412158306967e-05, "logits/chosen": -2.7646775245666504, "logits/rejected": -2.533830165863037, "logps/chosen": -176.30055236816406, "logps/rejected": -215.90982055664062, "loss": 2.8768, "rewards/accuracies": 0.5, "rewards/chosen": -3.6058385372161865, "rewards/margins": 1.7094390392303467, "rewards/rejected": -5.315277576446533, "step": 3628 }, { "epoch": 0.56, "learning_rate": 1.1485678717775818e-05, "logits/chosen": -2.8171305656433105, "logits/rejected": -2.942185640335083, "logps/chosen": -541.6962890625, "logps/rejected": -649.72314453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.1628127098083496, "rewards/margins": 7.6055378913879395, "rewards/rejected": -9.768350601196289, "step": 3629 }, { "epoch": 0.56, "learning_rate": 1.1484945277244672e-05, "logits/chosen": -3.289752960205078, "logits/rejected": -3.3004047870635986, "logps/chosen": -227.86550903320312, "logps/rejected": -242.65316772460938, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -1.2132446765899658, "rewards/margins": 5.140607833862305, "rewards/rejected": -6.353852272033691, "step": 3630 }, { "epoch": 0.56, "learning_rate": 1.1484211836713524e-05, "logits/chosen": -3.3450889587402344, "logits/rejected": -2.3063488006591797, "logps/chosen": -301.23089599609375, "logps/rejected": -138.222900390625, "loss": 4.9558, "rewards/accuracies": 0.0, "rewards/chosen": -4.773563385009766, "rewards/margins": -4.947580337524414, "rewards/rejected": 0.1740165650844574, "step": 3631 }, { "epoch": 0.56, "learning_rate": 1.1483478396182376e-05, "logits/chosen": -3.3107712268829346, "logits/rejected": -2.7451610565185547, "logps/chosen": -308.18438720703125, "logps/rejected": -115.31187438964844, "loss": 0.1583, "rewards/accuracies": 1.0, "rewards/chosen": -3.9851067066192627, "rewards/margins": 2.212101459503174, "rewards/rejected": -6.197208404541016, "step": 3632 }, { "epoch": 0.57, "learning_rate": 1.1482744955651228e-05, "logits/chosen": -2.6898093223571777, "logits/rejected": -2.775649309158325, "logps/chosen": -96.81461334228516, "logps/rejected": -187.6792449951172, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -2.9199466705322266, "rewards/margins": 6.004398345947266, "rewards/rejected": -8.924345016479492, "step": 3633 }, { "epoch": 0.57, "learning_rate": 1.148201151512008e-05, "logits/chosen": -2.22652268409729, "logits/rejected": -3.1506245136260986, "logps/chosen": -49.945735931396484, "logps/rejected": -236.52371215820312, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -1.054041862487793, "rewards/margins": 5.57056999206543, "rewards/rejected": -6.624611854553223, "step": 3634 }, { "epoch": 0.57, "learning_rate": 1.1481278074588931e-05, "logits/chosen": -2.36037015914917, "logits/rejected": -2.9215939044952393, "logps/chosen": -503.7220153808594, "logps/rejected": -499.6175842285156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.86894953250885, "rewards/margins": 9.111019134521484, "rewards/rejected": -10.979969024658203, "step": 3635 }, { "epoch": 0.57, "learning_rate": 1.1480544634057783e-05, "logits/chosen": -1.0533759593963623, "logits/rejected": -2.3555867671966553, "logps/chosen": -587.6279296875, "logps/rejected": -417.2546081542969, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.343085289001465, "rewards/margins": 7.337990760803223, "rewards/rejected": -9.681076049804688, "step": 3636 }, { "epoch": 0.57, "learning_rate": 1.1479811193526635e-05, "logits/chosen": -2.919807195663452, "logits/rejected": -3.053163766860962, "logps/chosen": -61.51112747192383, "logps/rejected": -171.12875366210938, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -1.3125152587890625, "rewards/margins": 4.3420820236206055, "rewards/rejected": -5.654597282409668, "step": 3637 }, { "epoch": 0.57, "learning_rate": 1.1479077752995487e-05, "logits/chosen": -2.110647678375244, "logits/rejected": -2.768350839614868, "logps/chosen": -153.3618927001953, "logps/rejected": -177.77316284179688, "loss": 0.3859, "rewards/accuracies": 0.5, "rewards/chosen": -3.066035747528076, "rewards/margins": 2.5135226249694824, "rewards/rejected": -5.579558372497559, "step": 3638 }, { "epoch": 0.57, "learning_rate": 1.147834431246434e-05, "logits/chosen": -2.606215000152588, "logits/rejected": -3.1847643852233887, "logps/chosen": -85.51934051513672, "logps/rejected": -187.8975830078125, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.16213268041610718, "rewards/margins": 4.291805744171143, "rewards/rejected": -4.4539384841918945, "step": 3639 }, { "epoch": 0.57, "learning_rate": 1.1477610871933192e-05, "logits/chosen": -2.251753330230713, "logits/rejected": -3.1070172786712646, "logps/chosen": -304.1242370605469, "logps/rejected": -473.6994934082031, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.6355682611465454, "rewards/margins": 6.152870178222656, "rewards/rejected": -6.78843879699707, "step": 3640 }, { "epoch": 0.57, "learning_rate": 1.1476877431402044e-05, "logits/chosen": -2.909855365753174, "logits/rejected": -2.3683907985687256, "logps/chosen": -412.9145202636719, "logps/rejected": -376.7316589355469, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.4569137394428253, "rewards/margins": 7.463541507720947, "rewards/rejected": -7.006628036499023, "step": 3641 }, { "epoch": 0.57, "learning_rate": 1.1476143990870896e-05, "logits/chosen": -2.7764079570770264, "logits/rejected": -3.091585397720337, "logps/chosen": -92.02510070800781, "logps/rejected": -199.89051818847656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.347132921218872, "rewards/margins": 8.521112442016602, "rewards/rejected": -9.868245124816895, "step": 3642 }, { "epoch": 0.57, "learning_rate": 1.1475410550339748e-05, "logits/chosen": -2.919173240661621, "logits/rejected": -2.136899709701538, "logps/chosen": -226.4412078857422, "logps/rejected": -96.61764526367188, "loss": 1.6744, "rewards/accuracies": 0.5, "rewards/chosen": -5.142521858215332, "rewards/margins": 0.1969224214553833, "rewards/rejected": -5.339444160461426, "step": 3643 }, { "epoch": 0.57, "learning_rate": 1.14746771098086e-05, "logits/chosen": -1.9346380233764648, "logits/rejected": -3.0067219734191895, "logps/chosen": -114.9818115234375, "logps/rejected": -265.854736328125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.24518856406211853, "rewards/margins": 8.658058166503906, "rewards/rejected": -8.90324592590332, "step": 3644 }, { "epoch": 0.57, "learning_rate": 1.1473943669277452e-05, "logits/chosen": -2.8112034797668457, "logits/rejected": -3.2033979892730713, "logps/chosen": -66.14786529541016, "logps/rejected": -184.42440795898438, "loss": 0.0848, "rewards/accuracies": 1.0, "rewards/chosen": -3.3793678283691406, "rewards/margins": 3.9689605236053467, "rewards/rejected": -7.348328590393066, "step": 3645 }, { "epoch": 0.57, "learning_rate": 1.1473210228746304e-05, "logits/chosen": -3.0365982055664062, "logits/rejected": -2.681995153427124, "logps/chosen": -82.7668685913086, "logps/rejected": -98.66925811767578, "loss": 0.0816, "rewards/accuracies": 1.0, "rewards/chosen": -4.109956741333008, "rewards/margins": 3.2065277099609375, "rewards/rejected": -7.316484451293945, "step": 3646 }, { "epoch": 0.57, "learning_rate": 1.1472476788215156e-05, "logits/chosen": -1.4073419570922852, "logits/rejected": -3.270174503326416, "logps/chosen": -139.64974975585938, "logps/rejected": -759.593017578125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.8697624206542969, "rewards/margins": 6.87597131729126, "rewards/rejected": -7.745733737945557, "step": 3647 }, { "epoch": 0.57, "learning_rate": 1.147174334768401e-05, "logits/chosen": -2.5174198150634766, "logits/rejected": -3.098111152648926, "logps/chosen": -372.84527587890625, "logps/rejected": -445.56427001953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.395040512084961, "rewards/margins": 8.308713912963867, "rewards/rejected": -9.703754425048828, "step": 3648 }, { "epoch": 0.57, "learning_rate": 1.1471009907152861e-05, "logits/chosen": -2.8581206798553467, "logits/rejected": -3.2922935485839844, "logps/chosen": -249.5047149658203, "logps/rejected": -311.6187744140625, "loss": 0.3688, "rewards/accuracies": 0.5, "rewards/chosen": 0.05087052285671234, "rewards/margins": 2.1929757595062256, "rewards/rejected": -2.1421051025390625, "step": 3649 }, { "epoch": 0.57, "learning_rate": 1.1470276466621713e-05, "logits/chosen": -3.1362736225128174, "logits/rejected": -3.3212087154388428, "logps/chosen": -557.891845703125, "logps/rejected": -274.5126953125, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -1.0552040338516235, "rewards/margins": 3.64448618888855, "rewards/rejected": -4.699689865112305, "step": 3650 }, { "epoch": 0.57, "learning_rate": 1.1469543026090565e-05, "logits/chosen": -2.7758121490478516, "logits/rejected": -3.327831268310547, "logps/chosen": -120.50717163085938, "logps/rejected": -186.7388916015625, "loss": 1.7168, "rewards/accuracies": 0.5, "rewards/chosen": -4.629946708679199, "rewards/margins": -0.3398432731628418, "rewards/rejected": -4.290103435516357, "step": 3651 }, { "epoch": 0.57, "learning_rate": 1.1468809585559417e-05, "logits/chosen": -3.0315918922424316, "logits/rejected": -3.1691741943359375, "logps/chosen": -519.2080078125, "logps/rejected": -546.10205078125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.1631606817245483, "rewards/margins": 4.911870956420898, "rewards/rejected": -6.0750322341918945, "step": 3652 }, { "epoch": 0.57, "learning_rate": 1.1468076145028269e-05, "logits/chosen": -2.2725253105163574, "logits/rejected": -3.0207998752593994, "logps/chosen": -117.02262878417969, "logps/rejected": -341.3874816894531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.15045110881328583, "rewards/margins": 8.73904037475586, "rewards/rejected": -8.889491081237793, "step": 3653 }, { "epoch": 0.57, "learning_rate": 1.146734270449712e-05, "logits/chosen": -2.929058074951172, "logits/rejected": -1.981368899345398, "logps/chosen": -311.083740234375, "logps/rejected": -315.3280944824219, "loss": 2.4441, "rewards/accuracies": 0.5, "rewards/chosen": -2.8609824180603027, "rewards/margins": 3.3617730140686035, "rewards/rejected": -6.222755432128906, "step": 3654 }, { "epoch": 0.57, "learning_rate": 1.1466609263965972e-05, "logits/chosen": -2.854520082473755, "logits/rejected": -2.280911684036255, "logps/chosen": -566.8525390625, "logps/rejected": -428.7984924316406, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.57440185546875, "rewards/margins": 6.660764694213867, "rewards/rejected": -8.235166549682617, "step": 3655 }, { "epoch": 0.57, "learning_rate": 1.1465875823434824e-05, "logits/chosen": -1.8576786518096924, "logits/rejected": -3.2216084003448486, "logps/chosen": -100.5013656616211, "logps/rejected": -397.0823669433594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.20163726806640625, "rewards/margins": 9.251197814941406, "rewards/rejected": -9.452835083007812, "step": 3656 }, { "epoch": 0.57, "learning_rate": 1.1465142382903678e-05, "logits/chosen": -1.9310969114303589, "logits/rejected": -2.9272077083587646, "logps/chosen": -63.19510269165039, "logps/rejected": -347.79974365234375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -4.522522449493408, "rewards/margins": 6.116100311279297, "rewards/rejected": -10.638622283935547, "step": 3657 }, { "epoch": 0.57, "learning_rate": 1.146440894237253e-05, "logits/chosen": -2.800262689590454, "logits/rejected": -2.551231622695923, "logps/chosen": -334.9428405761719, "logps/rejected": -314.87432861328125, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -3.008251667022705, "rewards/margins": 6.094901084899902, "rewards/rejected": -9.103153228759766, "step": 3658 }, { "epoch": 0.57, "learning_rate": 1.1463675501841382e-05, "logits/chosen": -2.8722469806671143, "logits/rejected": -3.0494980812072754, "logps/chosen": -269.51239013671875, "logps/rejected": -206.06944274902344, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.7218647003173828, "rewards/margins": 5.188348770141602, "rewards/rejected": -5.910213470458984, "step": 3659 }, { "epoch": 0.57, "learning_rate": 1.1462942061310233e-05, "logits/chosen": -2.710308790206909, "logits/rejected": -2.0675902366638184, "logps/chosen": -425.1526794433594, "logps/rejected": -266.9228515625, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -2.576786518096924, "rewards/margins": 3.6111161708831787, "rewards/rejected": -6.187902450561523, "step": 3660 }, { "epoch": 0.57, "learning_rate": 1.1462208620779087e-05, "logits/chosen": -2.4162559509277344, "logits/rejected": -3.029879093170166, "logps/chosen": -143.41937255859375, "logps/rejected": -258.22613525390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6769976615905762, "rewards/margins": 6.876097679138184, "rewards/rejected": -8.553094863891602, "step": 3661 }, { "epoch": 0.57, "learning_rate": 1.1461475180247939e-05, "logits/chosen": -2.8172080516815186, "logits/rejected": -1.2394380569458008, "logps/chosen": -218.17535400390625, "logps/rejected": -265.153076171875, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -3.3929078578948975, "rewards/margins": 5.230575084686279, "rewards/rejected": -8.623482704162598, "step": 3662 }, { "epoch": 0.57, "learning_rate": 1.146074173971679e-05, "logits/chosen": -1.9704283475875854, "logits/rejected": -2.8272085189819336, "logps/chosen": -47.3111572265625, "logps/rejected": -292.1805419921875, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.3998842239379883, "rewards/margins": 5.12802267074585, "rewards/rejected": -7.527906894683838, "step": 3663 }, { "epoch": 0.57, "learning_rate": 1.1460008299185643e-05, "logits/chosen": -2.9057576656341553, "logits/rejected": -2.264547348022461, "logps/chosen": -723.0548095703125, "logps/rejected": -566.8521728515625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.2153183221817017, "rewards/margins": 6.377742290496826, "rewards/rejected": -7.593060493469238, "step": 3664 }, { "epoch": 0.57, "learning_rate": 1.1459274858654495e-05, "logits/chosen": -1.8491861820220947, "logits/rejected": -2.899919271469116, "logps/chosen": -161.78482055664062, "logps/rejected": -496.1041259765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3100314140319824, "rewards/margins": 11.59673023223877, "rewards/rejected": -13.906761169433594, "step": 3665 }, { "epoch": 0.57, "learning_rate": 1.1458541418123348e-05, "logits/chosen": -2.649243116378784, "logits/rejected": -3.0133345127105713, "logps/chosen": -61.09877014160156, "logps/rejected": -219.6842498779297, "loss": 0.1637, "rewards/accuracies": 1.0, "rewards/chosen": -3.506840705871582, "rewards/margins": 2.4373793601989746, "rewards/rejected": -5.944220066070557, "step": 3666 }, { "epoch": 0.57, "learning_rate": 1.14578079775922e-05, "logits/chosen": -3.130528688430786, "logits/rejected": -3.0749173164367676, "logps/chosen": -536.1361083984375, "logps/rejected": -426.5137939453125, "loss": 3.5129, "rewards/accuracies": 0.5, "rewards/chosen": -5.244291305541992, "rewards/margins": -1.0388052463531494, "rewards/rejected": -4.205486297607422, "step": 3667 }, { "epoch": 0.57, "learning_rate": 1.1457074537061052e-05, "logits/chosen": -2.586115837097168, "logits/rejected": -3.02400541305542, "logps/chosen": -38.0880126953125, "logps/rejected": -164.76889038085938, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.1888656616210938, "rewards/margins": 5.721883773803711, "rewards/rejected": -7.910749435424805, "step": 3668 }, { "epoch": 0.57, "learning_rate": 1.1456341096529904e-05, "logits/chosen": -2.962292194366455, "logits/rejected": -1.2756141424179077, "logps/chosen": -242.7659149169922, "logps/rejected": -200.7233428955078, "loss": 2.4346, "rewards/accuracies": 0.5, "rewards/chosen": -5.161035537719727, "rewards/margins": 0.6359329223632812, "rewards/rejected": -5.796968460083008, "step": 3669 }, { "epoch": 0.57, "learning_rate": 1.1455607655998756e-05, "logits/chosen": -2.144202709197998, "logits/rejected": -2.9907772541046143, "logps/chosen": -114.28335571289062, "logps/rejected": -210.5489959716797, "loss": 0.7163, "rewards/accuracies": 0.5, "rewards/chosen": -3.7613072395324707, "rewards/margins": 4.3879570960998535, "rewards/rejected": -8.149264335632324, "step": 3670 }, { "epoch": 0.57, "learning_rate": 1.1454874215467607e-05, "logits/chosen": -2.843526840209961, "logits/rejected": -2.954180955886841, "logps/chosen": -107.89018249511719, "logps/rejected": -268.72222900390625, "loss": 0.1887, "rewards/accuracies": 1.0, "rewards/chosen": -4.210546493530273, "rewards/margins": 4.429804801940918, "rewards/rejected": -8.640351295471191, "step": 3671 }, { "epoch": 0.57, "learning_rate": 1.145414077493646e-05, "logits/chosen": -3.167894124984741, "logits/rejected": -3.181662082672119, "logps/chosen": -793.4202270507812, "logps/rejected": -480.2547607421875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -3.038381814956665, "rewards/margins": 5.6020002365112305, "rewards/rejected": -8.640382766723633, "step": 3672 }, { "epoch": 0.57, "learning_rate": 1.1453407334405311e-05, "logits/chosen": -2.4678430557250977, "logits/rejected": -2.9486052989959717, "logps/chosen": -95.6083984375, "logps/rejected": -328.95611572265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.355377197265625, "rewards/margins": 7.264391899108887, "rewards/rejected": -9.619769096374512, "step": 3673 }, { "epoch": 0.57, "learning_rate": 1.1452673893874163e-05, "logits/chosen": -2.517446756362915, "logits/rejected": -3.116194248199463, "logps/chosen": -197.9132537841797, "logps/rejected": -304.98980712890625, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": -1.6969335079193115, "rewards/margins": 3.925764799118042, "rewards/rejected": -5.6226983070373535, "step": 3674 }, { "epoch": 0.57, "learning_rate": 1.1451940453343017e-05, "logits/chosen": -3.181102991104126, "logits/rejected": -3.071378469467163, "logps/chosen": -167.18226623535156, "logps/rejected": -167.50106811523438, "loss": 3.4506, "rewards/accuracies": 0.5, "rewards/chosen": -7.063874244689941, "rewards/margins": -0.5658838748931885, "rewards/rejected": -6.497990608215332, "step": 3675 }, { "epoch": 0.57, "learning_rate": 1.1451207012811869e-05, "logits/chosen": -2.6906728744506836, "logits/rejected": -3.048421621322632, "logps/chosen": -163.29647827148438, "logps/rejected": -292.9314880371094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3782901763916016, "rewards/margins": 8.896905899047852, "rewards/rejected": -10.275196075439453, "step": 3676 }, { "epoch": 0.57, "learning_rate": 1.145047357228072e-05, "logits/chosen": -2.7308318614959717, "logits/rejected": -3.0955512523651123, "logps/chosen": -77.88533020019531, "logps/rejected": -265.44085693359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.2044248580932617, "rewards/margins": 6.470876693725586, "rewards/rejected": -9.675301551818848, "step": 3677 }, { "epoch": 0.57, "learning_rate": 1.1449740131749572e-05, "logits/chosen": -1.684183120727539, "logits/rejected": -2.0723588466644287, "logps/chosen": -133.72354125976562, "logps/rejected": -288.736328125, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -3.0447511672973633, "rewards/margins": 4.505980968475342, "rewards/rejected": -7.550732612609863, "step": 3678 }, { "epoch": 0.57, "learning_rate": 1.1449006691218424e-05, "logits/chosen": -2.25405216217041, "logits/rejected": -3.0919735431671143, "logps/chosen": -510.7430419921875, "logps/rejected": -538.7088623046875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.3312393426895142, "rewards/margins": 6.707015037536621, "rewards/rejected": -8.038253784179688, "step": 3679 }, { "epoch": 0.57, "learning_rate": 1.1448273250687276e-05, "logits/chosen": -2.4826433658599854, "logits/rejected": -2.906816005706787, "logps/chosen": -96.53173065185547, "logps/rejected": -240.89149475097656, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.832246780395508, "rewards/margins": 7.083261013031006, "rewards/rejected": -9.915508270263672, "step": 3680 }, { "epoch": 0.57, "learning_rate": 1.1447539810156128e-05, "logits/chosen": -2.238316535949707, "logits/rejected": -3.2560739517211914, "logps/chosen": -83.95933532714844, "logps/rejected": -373.1824951171875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.113811492919922, "rewards/margins": 7.657749176025391, "rewards/rejected": -10.771560668945312, "step": 3681 }, { "epoch": 0.57, "learning_rate": 1.144680636962498e-05, "logits/chosen": -2.5596766471862793, "logits/rejected": -3.2229881286621094, "logps/chosen": -300.23431396484375, "logps/rejected": -277.8503112792969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5095603466033936, "rewards/margins": 9.420394897460938, "rewards/rejected": -9.929954528808594, "step": 3682 }, { "epoch": 0.57, "learning_rate": 1.1446072929093832e-05, "logits/chosen": -2.8884875774383545, "logits/rejected": -2.8916499614715576, "logps/chosen": -49.4478759765625, "logps/rejected": -366.23138427734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.4310462474823, "rewards/margins": 10.581690788269043, "rewards/rejected": -13.012737274169922, "step": 3683 }, { "epoch": 0.57, "learning_rate": 1.1445339488562685e-05, "logits/chosen": -2.8228232860565186, "logits/rejected": -3.078441858291626, "logps/chosen": -71.05062866210938, "logps/rejected": -216.0810546875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.5462098121643066, "rewards/margins": 6.597106456756592, "rewards/rejected": -9.143316268920898, "step": 3684 }, { "epoch": 0.57, "learning_rate": 1.1444606048031537e-05, "logits/chosen": -1.36605703830719, "logits/rejected": -2.8972675800323486, "logps/chosen": -84.21157836914062, "logps/rejected": -420.8668212890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.4260621070861816, "rewards/margins": 8.297136306762695, "rewards/rejected": -10.723198890686035, "step": 3685 }, { "epoch": 0.57, "learning_rate": 1.1443872607500389e-05, "logits/chosen": -1.9203271865844727, "logits/rejected": -2.7995738983154297, "logps/chosen": -140.70843505859375, "logps/rejected": -210.0196075439453, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.676382303237915, "rewards/margins": 5.965939521789551, "rewards/rejected": -9.642321586608887, "step": 3686 }, { "epoch": 0.57, "learning_rate": 1.1443139166969241e-05, "logits/chosen": -2.6428983211517334, "logits/rejected": -3.037198543548584, "logps/chosen": -243.327880859375, "logps/rejected": -260.1278381347656, "loss": 3.0931, "rewards/accuracies": 0.5, "rewards/chosen": -5.6196417808532715, "rewards/margins": 2.405794620513916, "rewards/rejected": -8.025436401367188, "step": 3687 }, { "epoch": 0.57, "learning_rate": 1.1442405726438093e-05, "logits/chosen": -2.638932704925537, "logits/rejected": -3.059769630432129, "logps/chosen": -150.32601928710938, "logps/rejected": -163.82908630371094, "loss": 1.429, "rewards/accuracies": 0.5, "rewards/chosen": -4.6229729652404785, "rewards/margins": 1.4051222801208496, "rewards/rejected": -6.028095245361328, "step": 3688 }, { "epoch": 0.57, "learning_rate": 1.1441672285906945e-05, "logits/chosen": -2.9606475830078125, "logits/rejected": -1.9648549556732178, "logps/chosen": -295.57501220703125, "logps/rejected": -152.69668579101562, "loss": 3.5072, "rewards/accuracies": 0.5, "rewards/chosen": -7.314465522766113, "rewards/margins": -0.14809942245483398, "rewards/rejected": -7.166366100311279, "step": 3689 }, { "epoch": 0.57, "learning_rate": 1.1440938845375797e-05, "logits/chosen": -2.89032244682312, "logits/rejected": -3.291581392288208, "logps/chosen": -87.0169448852539, "logps/rejected": -218.05604553222656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.227975845336914, "rewards/margins": 7.999592304229736, "rewards/rejected": -9.227568626403809, "step": 3690 }, { "epoch": 0.57, "learning_rate": 1.1440205404844648e-05, "logits/chosen": -2.923431158065796, "logits/rejected": -2.996544599533081, "logps/chosen": -233.9734649658203, "logps/rejected": -504.3010559082031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8872756958007812, "rewards/margins": 9.985610961914062, "rewards/rejected": -11.872886657714844, "step": 3691 }, { "epoch": 0.57, "learning_rate": 1.14394719643135e-05, "logits/chosen": -2.2214934825897217, "logits/rejected": -2.8065876960754395, "logps/chosen": -176.7254180908203, "logps/rejected": -271.9826354980469, "loss": 1.1909, "rewards/accuracies": 0.5, "rewards/chosen": -4.216615676879883, "rewards/margins": 4.0485029220581055, "rewards/rejected": -8.265118598937988, "step": 3692 }, { "epoch": 0.57, "learning_rate": 1.1438738523782354e-05, "logits/chosen": -2.9210240840911865, "logits/rejected": -2.789639472961426, "logps/chosen": -96.80683898925781, "logps/rejected": -204.04531860351562, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.5512545108795166, "rewards/margins": 6.836930274963379, "rewards/rejected": -9.388184547424316, "step": 3693 }, { "epoch": 0.57, "learning_rate": 1.1438005083251206e-05, "logits/chosen": -2.8390097618103027, "logits/rejected": -2.691348075866699, "logps/chosen": -267.6994934082031, "logps/rejected": -440.93646240234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7299301624298096, "rewards/margins": 6.784799575805664, "rewards/rejected": -8.514729499816895, "step": 3694 }, { "epoch": 0.57, "learning_rate": 1.143727164272006e-05, "logits/chosen": -3.1344432830810547, "logits/rejected": -2.5019237995147705, "logps/chosen": -360.8463134765625, "logps/rejected": -401.80438232421875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8262726068496704, "rewards/margins": 7.665502548217773, "rewards/rejected": -8.491775512695312, "step": 3695 }, { "epoch": 0.57, "learning_rate": 1.1436538202188911e-05, "logits/chosen": -2.25154709815979, "logits/rejected": -2.9653687477111816, "logps/chosen": -92.00611877441406, "logps/rejected": -351.8104248046875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -3.1130619049072266, "rewards/margins": 7.134984016418457, "rewards/rejected": -10.248045921325684, "step": 3696 }, { "epoch": 0.57, "learning_rate": 1.1435804761657763e-05, "logits/chosen": -2.2912757396698, "logits/rejected": -2.9697861671447754, "logps/chosen": -74.64611053466797, "logps/rejected": -211.98046875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.835230827331543, "rewards/margins": 5.897984027862549, "rewards/rejected": -8.73321533203125, "step": 3697 }, { "epoch": 0.58, "learning_rate": 1.1435071321126615e-05, "logits/chosen": -2.504549026489258, "logits/rejected": -2.9920356273651123, "logps/chosen": -239.0300750732422, "logps/rejected": -448.08831787109375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.0951454639434814, "rewards/margins": 6.879698753356934, "rewards/rejected": -9.974843978881836, "step": 3698 }, { "epoch": 0.58, "learning_rate": 1.1434337880595467e-05, "logits/chosen": -2.913557767868042, "logits/rejected": -3.192413568496704, "logps/chosen": -163.9542236328125, "logps/rejected": -272.2907409667969, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.2455075979232788, "rewards/margins": 6.371999740600586, "rewards/rejected": -7.617506980895996, "step": 3699 }, { "epoch": 0.58, "learning_rate": 1.1433604440064319e-05, "logits/chosen": -2.7013278007507324, "logits/rejected": -2.8756980895996094, "logps/chosen": -215.89749145507812, "logps/rejected": -222.4218292236328, "loss": 1.0573, "rewards/accuracies": 0.5, "rewards/chosen": -3.2923996448516846, "rewards/margins": 2.246358633041382, "rewards/rejected": -5.538758277893066, "step": 3700 }, { "epoch": 0.58, "learning_rate": 1.143287099953317e-05, "logits/chosen": -1.9247546195983887, "logits/rejected": -2.954864978790283, "logps/chosen": -238.21475219726562, "logps/rejected": -365.7162780761719, "loss": 2.7379, "rewards/accuracies": 0.5, "rewards/chosen": -4.948610305786133, "rewards/margins": -1.3131139278411865, "rewards/rejected": -3.635496139526367, "step": 3701 }, { "epoch": 0.58, "learning_rate": 1.1432137559002024e-05, "logits/chosen": -2.7677035331726074, "logits/rejected": -2.4855446815490723, "logps/chosen": -295.5372009277344, "logps/rejected": -370.8128662109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.2889816164970398, "rewards/margins": 10.697381973266602, "rewards/rejected": -10.98636245727539, "step": 3702 }, { "epoch": 0.58, "learning_rate": 1.1431404118470876e-05, "logits/chosen": -2.3272464275360107, "logits/rejected": -3.121476411819458, "logps/chosen": -153.09840393066406, "logps/rejected": -335.51202392578125, "loss": 0.0652, "rewards/accuracies": 1.0, "rewards/chosen": -3.1076595783233643, "rewards/margins": 6.736283302307129, "rewards/rejected": -9.843942642211914, "step": 3703 }, { "epoch": 0.58, "learning_rate": 1.1430670677939728e-05, "logits/chosen": -1.1682543754577637, "logits/rejected": -2.3691279888153076, "logps/chosen": -98.14081573486328, "logps/rejected": -337.33013916015625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.659430980682373, "rewards/margins": 6.00596284866333, "rewards/rejected": -8.665393829345703, "step": 3704 }, { "epoch": 0.58, "learning_rate": 1.142993723740858e-05, "logits/chosen": -2.400590658187866, "logits/rejected": -3.278468370437622, "logps/chosen": -94.47712707519531, "logps/rejected": -278.2532958984375, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -1.235079288482666, "rewards/margins": 5.02341365814209, "rewards/rejected": -6.258492469787598, "step": 3705 }, { "epoch": 0.58, "learning_rate": 1.1429203796877432e-05, "logits/chosen": -1.9306976795196533, "logits/rejected": -3.012580394744873, "logps/chosen": -158.03628540039062, "logps/rejected": -350.06695556640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3205032348632812, "rewards/margins": 8.133659362792969, "rewards/rejected": -9.45416259765625, "step": 3706 }, { "epoch": 0.58, "learning_rate": 1.1428470356346284e-05, "logits/chosen": -3.1186580657958984, "logits/rejected": -2.5033106803894043, "logps/chosen": -185.52859497070312, "logps/rejected": -219.7071990966797, "loss": 0.1601, "rewards/accuracies": 1.0, "rewards/chosen": -1.722190022468567, "rewards/margins": 1.7705435752868652, "rewards/rejected": -3.4927334785461426, "step": 3707 }, { "epoch": 0.58, "learning_rate": 1.1427736915815135e-05, "logits/chosen": -2.965749502182007, "logits/rejected": -3.152998685836792, "logps/chosen": -90.7530517578125, "logps/rejected": -282.8446960449219, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.7458938956260681, "rewards/margins": 6.250984191894531, "rewards/rejected": -6.996878147125244, "step": 3708 }, { "epoch": 0.58, "learning_rate": 1.1427003475283987e-05, "logits/chosen": -2.201585054397583, "logits/rejected": -3.0611701011657715, "logps/chosen": -202.52450561523438, "logps/rejected": -316.7652587890625, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.779754638671875, "rewards/margins": 4.961587905883789, "rewards/rejected": -7.741342544555664, "step": 3709 }, { "epoch": 0.58, "learning_rate": 1.1426270034752841e-05, "logits/chosen": -2.2516798973083496, "logits/rejected": -3.2853264808654785, "logps/chosen": -200.15231323242188, "logps/rejected": -429.57623291015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.6086020469665527, "rewards/margins": 7.990416526794434, "rewards/rejected": -9.599019050598145, "step": 3710 }, { "epoch": 0.58, "learning_rate": 1.1425536594221693e-05, "logits/chosen": -3.0303337574005127, "logits/rejected": -2.257476806640625, "logps/chosen": -355.6817321777344, "logps/rejected": -319.04486083984375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.5003163814544678, "rewards/margins": 6.885374546051025, "rewards/rejected": -10.385690689086914, "step": 3711 }, { "epoch": 0.58, "learning_rate": 1.1424803153690545e-05, "logits/chosen": -2.158031702041626, "logits/rejected": -3.2202346324920654, "logps/chosen": -187.9502410888672, "logps/rejected": -293.68218994140625, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -0.509538471698761, "rewards/margins": 4.209855079650879, "rewards/rejected": -4.719393253326416, "step": 3712 }, { "epoch": 0.58, "learning_rate": 1.1424069713159397e-05, "logits/chosen": -2.858954906463623, "logits/rejected": -1.4725412130355835, "logps/chosen": -333.9150085449219, "logps/rejected": -152.5198974609375, "loss": 1.3687, "rewards/accuracies": 0.5, "rewards/chosen": -4.119793891906738, "rewards/margins": 1.5983549356460571, "rewards/rejected": -5.718148708343506, "step": 3713 }, { "epoch": 0.58, "learning_rate": 1.1423336272628248e-05, "logits/chosen": -1.637101173400879, "logits/rejected": -2.9801716804504395, "logps/chosen": -135.9988250732422, "logps/rejected": -595.8887939453125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.4469181299209595, "rewards/margins": 8.955915451049805, "rewards/rejected": -10.402833938598633, "step": 3714 }, { "epoch": 0.58, "learning_rate": 1.14226028320971e-05, "logits/chosen": -2.3404688835144043, "logits/rejected": -3.0952470302581787, "logps/chosen": -280.7158203125, "logps/rejected": -313.41461181640625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9037874341011047, "rewards/margins": 6.6253814697265625, "rewards/rejected": -7.529169082641602, "step": 3715 }, { "epoch": 0.58, "learning_rate": 1.1421869391565952e-05, "logits/chosen": -3.255060911178589, "logits/rejected": -2.2586350440979004, "logps/chosen": -301.46136474609375, "logps/rejected": -110.16950988769531, "loss": 3.5037, "rewards/accuracies": 0.5, "rewards/chosen": -3.2156171798706055, "rewards/margins": 0.5608341693878174, "rewards/rejected": -3.776451587677002, "step": 3716 }, { "epoch": 0.58, "learning_rate": 1.1421135951034804e-05, "logits/chosen": -1.9326907396316528, "logits/rejected": -3.1581103801727295, "logps/chosen": -104.2134017944336, "logps/rejected": -294.6053771972656, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.3459213376045227, "rewards/margins": 6.910398006439209, "rewards/rejected": -7.256319522857666, "step": 3717 }, { "epoch": 0.58, "learning_rate": 1.1420402510503656e-05, "logits/chosen": -2.2176711559295654, "logits/rejected": -2.7988240718841553, "logps/chosen": -137.59732055664062, "logps/rejected": -444.21600341796875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.8273632526397705, "rewards/margins": 5.52903938293457, "rewards/rejected": -7.35640287399292, "step": 3718 }, { "epoch": 0.58, "learning_rate": 1.141966906997251e-05, "logits/chosen": -2.864562749862671, "logits/rejected": -2.530531644821167, "logps/chosen": -796.8557739257812, "logps/rejected": -622.06640625, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -1.0745913982391357, "rewards/margins": 5.224212646484375, "rewards/rejected": -6.29880428314209, "step": 3719 }, { "epoch": 0.58, "learning_rate": 1.1418935629441361e-05, "logits/chosen": -2.5414905548095703, "logits/rejected": -3.149233818054199, "logps/chosen": -259.1197204589844, "logps/rejected": -460.56280517578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.6844418048858643, "rewards/margins": 7.822142601013184, "rewards/rejected": -8.506584167480469, "step": 3720 }, { "epoch": 0.58, "learning_rate": 1.1418202188910213e-05, "logits/chosen": -2.5021092891693115, "logits/rejected": -3.233792543411255, "logps/chosen": -335.0982971191406, "logps/rejected": -462.2969970703125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.2217857837677002, "rewards/margins": 7.7471137046813965, "rewards/rejected": -8.968899726867676, "step": 3721 }, { "epoch": 0.58, "learning_rate": 1.1417468748379065e-05, "logits/chosen": -2.558738946914673, "logits/rejected": -2.9860262870788574, "logps/chosen": -182.5458526611328, "logps/rejected": -283.04132080078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.6822417974472046, "rewards/margins": 7.484594345092773, "rewards/rejected": -9.16683578491211, "step": 3722 }, { "epoch": 0.58, "learning_rate": 1.1416735307847917e-05, "logits/chosen": -2.0306785106658936, "logits/rejected": -2.787123441696167, "logps/chosen": -276.725341796875, "logps/rejected": -420.13421630859375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.7593140006065369, "rewards/margins": 5.6011810302734375, "rewards/rejected": -6.360495090484619, "step": 3723 }, { "epoch": 0.58, "learning_rate": 1.1416001867316769e-05, "logits/chosen": -2.95013689994812, "logits/rejected": -3.1806800365448, "logps/chosen": -96.41339111328125, "logps/rejected": -239.76954650878906, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.1507399082183838, "rewards/margins": 5.041219711303711, "rewards/rejected": -6.191959381103516, "step": 3724 }, { "epoch": 0.58, "learning_rate": 1.141526842678562e-05, "logits/chosen": -2.8260879516601562, "logits/rejected": -1.9117258787155151, "logps/chosen": -412.3056640625, "logps/rejected": -311.0234375, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -3.963408946990967, "rewards/margins": 6.033494472503662, "rewards/rejected": -9.996903419494629, "step": 3725 }, { "epoch": 0.58, "learning_rate": 1.1414534986254473e-05, "logits/chosen": -1.6806602478027344, "logits/rejected": -2.8357927799224854, "logps/chosen": -100.24462890625, "logps/rejected": -308.3668212890625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.532572031021118, "rewards/margins": 5.832361221313477, "rewards/rejected": -8.364933013916016, "step": 3726 }, { "epoch": 0.58, "learning_rate": 1.1413801545723326e-05, "logits/chosen": -2.641112804412842, "logits/rejected": -2.767359495162964, "logps/chosen": -41.25077819824219, "logps/rejected": -217.2710418701172, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": -2.2496156692504883, "rewards/margins": 5.078789710998535, "rewards/rejected": -7.328405380249023, "step": 3727 }, { "epoch": 0.58, "learning_rate": 1.1413068105192178e-05, "logits/chosen": -1.628723382949829, "logits/rejected": -2.8468925952911377, "logps/chosen": -168.84690856933594, "logps/rejected": -174.94454956054688, "loss": 4.8984, "rewards/accuracies": 0.5, "rewards/chosen": -7.810830116271973, "rewards/margins": -3.6274056434631348, "rewards/rejected": -4.183424949645996, "step": 3728 }, { "epoch": 0.58, "learning_rate": 1.1412334664661032e-05, "logits/chosen": -2.0230460166931152, "logits/rejected": -3.2717251777648926, "logps/chosen": -110.33770751953125, "logps/rejected": -315.70172119140625, "loss": 0.0472, "rewards/accuracies": 1.0, "rewards/chosen": -2.998784065246582, "rewards/margins": 5.2999267578125, "rewards/rejected": -8.298710823059082, "step": 3729 }, { "epoch": 0.58, "learning_rate": 1.1411601224129884e-05, "logits/chosen": -2.8519020080566406, "logits/rejected": -3.0665528774261475, "logps/chosen": -215.06112670898438, "logps/rejected": -361.27276611328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.074470281600952, "rewards/margins": 9.943780899047852, "rewards/rejected": -13.018251419067383, "step": 3730 }, { "epoch": 0.58, "learning_rate": 1.1410867783598735e-05, "logits/chosen": -3.0043528079986572, "logits/rejected": -3.1166203022003174, "logps/chosen": -134.72996520996094, "logps/rejected": -189.41799926757812, "loss": 5.438, "rewards/accuracies": 0.5, "rewards/chosen": -6.581544876098633, "rewards/margins": -2.589790105819702, "rewards/rejected": -3.9917545318603516, "step": 3731 }, { "epoch": 0.58, "learning_rate": 1.1410134343067587e-05, "logits/chosen": -2.4474406242370605, "logits/rejected": -2.9160454273223877, "logps/chosen": -266.722412109375, "logps/rejected": -323.4549865722656, "loss": 1.7504, "rewards/accuracies": 0.5, "rewards/chosen": -2.7838566303253174, "rewards/margins": 0.7104305028915405, "rewards/rejected": -3.4942870140075684, "step": 3732 }, { "epoch": 0.58, "learning_rate": 1.140940090253644e-05, "logits/chosen": -2.0420989990234375, "logits/rejected": -3.037001371383667, "logps/chosen": -87.71670532226562, "logps/rejected": -256.0386047363281, "loss": 0.2645, "rewards/accuracies": 1.0, "rewards/chosen": -5.395872116088867, "rewards/margins": 3.1229705810546875, "rewards/rejected": -8.518842697143555, "step": 3733 }, { "epoch": 0.58, "learning_rate": 1.1408667462005291e-05, "logits/chosen": -2.7959072589874268, "logits/rejected": -2.953319549560547, "logps/chosen": -292.52740478515625, "logps/rejected": -383.80267333984375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.302863836288452, "rewards/margins": 7.72726583480835, "rewards/rejected": -11.030130386352539, "step": 3734 }, { "epoch": 0.58, "learning_rate": 1.1407934021474143e-05, "logits/chosen": -3.234248161315918, "logits/rejected": -2.9511005878448486, "logps/chosen": -511.6772155761719, "logps/rejected": -370.1541748046875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.045818328857422, "rewards/margins": 7.499096870422363, "rewards/rejected": -10.544915199279785, "step": 3735 }, { "epoch": 0.58, "learning_rate": 1.1407200580942995e-05, "logits/chosen": -2.827334403991699, "logits/rejected": -1.2357333898544312, "logps/chosen": -274.0373840332031, "logps/rejected": -129.59567260742188, "loss": 0.4883, "rewards/accuracies": 0.5, "rewards/chosen": -3.762556552886963, "rewards/margins": 2.8550517559051514, "rewards/rejected": -6.617608070373535, "step": 3736 }, { "epoch": 0.58, "learning_rate": 1.1406467140411848e-05, "logits/chosen": -2.8137478828430176, "logits/rejected": -2.8911218643188477, "logps/chosen": -80.25770568847656, "logps/rejected": -179.62356567382812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.1711459159851074, "rewards/margins": 6.657601356506348, "rewards/rejected": -9.828746795654297, "step": 3737 }, { "epoch": 0.58, "learning_rate": 1.14057336998807e-05, "logits/chosen": -2.6440165042877197, "logits/rejected": -2.851260185241699, "logps/chosen": -210.76370239257812, "logps/rejected": -342.39892578125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.6515052318572998, "rewards/margins": 5.948895454406738, "rewards/rejected": -7.600400924682617, "step": 3738 }, { "epoch": 0.58, "learning_rate": 1.1405000259349552e-05, "logits/chosen": -2.440688133239746, "logits/rejected": -2.9678523540496826, "logps/chosen": -383.80560302734375, "logps/rejected": -423.22259521484375, "loss": 1.8854, "rewards/accuracies": 0.5, "rewards/chosen": -4.340610980987549, "rewards/margins": 2.390836000442505, "rewards/rejected": -6.731447219848633, "step": 3739 }, { "epoch": 0.58, "learning_rate": 1.1404266818818404e-05, "logits/chosen": -2.911580801010132, "logits/rejected": -2.6110341548919678, "logps/chosen": -495.112060546875, "logps/rejected": -430.32659912109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.3135406970977783, "rewards/margins": 8.373693466186523, "rewards/rejected": -10.687234878540039, "step": 3740 }, { "epoch": 0.58, "learning_rate": 1.1403533378287256e-05, "logits/chosen": -2.2601075172424316, "logits/rejected": -2.9293668270111084, "logps/chosen": -92.18328094482422, "logps/rejected": -156.81304931640625, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": -4.070364475250244, "rewards/margins": 2.1401548385620117, "rewards/rejected": -6.210519790649414, "step": 3741 }, { "epoch": 0.58, "learning_rate": 1.1402799937756108e-05, "logits/chosen": -2.9938619136810303, "logits/rejected": -1.7029954195022583, "logps/chosen": -336.09271240234375, "logps/rejected": -238.6060791015625, "loss": 3.2334, "rewards/accuracies": 0.5, "rewards/chosen": -5.254147052764893, "rewards/margins": 1.3897960186004639, "rewards/rejected": -6.643942832946777, "step": 3742 }, { "epoch": 0.58, "learning_rate": 1.140206649722496e-05, "logits/chosen": -2.385284662246704, "logits/rejected": -2.9470462799072266, "logps/chosen": -203.80349731445312, "logps/rejected": -207.08326721191406, "loss": 1.8309, "rewards/accuracies": 0.5, "rewards/chosen": -6.291018009185791, "rewards/margins": -0.3959696292877197, "rewards/rejected": -5.895048141479492, "step": 3743 }, { "epoch": 0.58, "learning_rate": 1.1401333056693812e-05, "logits/chosen": -2.2771573066711426, "logits/rejected": -2.788601875305176, "logps/chosen": -358.47052001953125, "logps/rejected": -406.1453857421875, "loss": 4.2859, "rewards/accuracies": 0.5, "rewards/chosen": -6.334016799926758, "rewards/margins": -0.9352004528045654, "rewards/rejected": -5.398816108703613, "step": 3744 }, { "epoch": 0.58, "learning_rate": 1.1400599616162663e-05, "logits/chosen": -1.7817378044128418, "logits/rejected": -2.9154858589172363, "logps/chosen": -185.39678955078125, "logps/rejected": -289.68280029296875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.3070147037506104, "rewards/margins": 6.175509452819824, "rewards/rejected": -8.482524871826172, "step": 3745 }, { "epoch": 0.58, "learning_rate": 1.1399866175631517e-05, "logits/chosen": -2.8212099075317383, "logits/rejected": -2.188408613204956, "logps/chosen": -313.0325622558594, "logps/rejected": -318.84173583984375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -3.600295066833496, "rewards/margins": 5.545995712280273, "rewards/rejected": -9.14629077911377, "step": 3746 }, { "epoch": 0.58, "learning_rate": 1.1399132735100369e-05, "logits/chosen": -2.41265606880188, "logits/rejected": -3.175403118133545, "logps/chosen": -86.6352767944336, "logps/rejected": -167.05372619628906, "loss": 0.578, "rewards/accuracies": 0.5, "rewards/chosen": -4.396503925323486, "rewards/margins": 2.476611852645874, "rewards/rejected": -6.873115539550781, "step": 3747 }, { "epoch": 0.58, "learning_rate": 1.139839929456922e-05, "logits/chosen": -1.9891610145568848, "logits/rejected": -3.1965839862823486, "logps/chosen": -263.7124938964844, "logps/rejected": -544.6969604492188, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -3.3301408290863037, "rewards/margins": 5.73439884185791, "rewards/rejected": -9.064539909362793, "step": 3748 }, { "epoch": 0.58, "learning_rate": 1.1397665854038073e-05, "logits/chosen": -1.785681128501892, "logits/rejected": -2.805290937423706, "logps/chosen": -240.92745971679688, "logps/rejected": -523.591064453125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.419156551361084, "rewards/margins": 8.984769821166992, "rewards/rejected": -11.403926849365234, "step": 3749 }, { "epoch": 0.58, "learning_rate": 1.1396932413506924e-05, "logits/chosen": -3.017350435256958, "logits/rejected": -3.1873385906219482, "logps/chosen": -139.48660278320312, "logps/rejected": -143.9655303955078, "loss": 3.6334, "rewards/accuracies": 0.5, "rewards/chosen": -8.500768661499023, "rewards/margins": -1.0275962352752686, "rewards/rejected": -7.473172664642334, "step": 3750 }, { "epoch": 0.58, "learning_rate": 1.1396198972975776e-05, "logits/chosen": -3.1171584129333496, "logits/rejected": -3.037137746810913, "logps/chosen": -314.0421142578125, "logps/rejected": -239.92721557617188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.291525363922119, "rewards/margins": 8.063156127929688, "rewards/rejected": -10.354681015014648, "step": 3751 }, { "epoch": 0.58, "learning_rate": 1.1395465532444628e-05, "logits/chosen": -3.209437608718872, "logits/rejected": -2.669268846511841, "logps/chosen": -778.071533203125, "logps/rejected": -565.123291015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.3691468238830566, "rewards/margins": 9.315837860107422, "rewards/rejected": -12.68498420715332, "step": 3752 }, { "epoch": 0.58, "learning_rate": 1.139473209191348e-05, "logits/chosen": -2.942998170852661, "logits/rejected": -2.404130458831787, "logps/chosen": -314.47149658203125, "logps/rejected": -298.33056640625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.6816681623458862, "rewards/margins": 5.252923965454102, "rewards/rejected": -6.934592247009277, "step": 3753 }, { "epoch": 0.58, "learning_rate": 1.1393998651382332e-05, "logits/chosen": -2.7463324069976807, "logits/rejected": -3.0441653728485107, "logps/chosen": -214.57696533203125, "logps/rejected": -220.9471435546875, "loss": 2.603, "rewards/accuracies": 0.5, "rewards/chosen": -5.355958938598633, "rewards/margins": 2.639362335205078, "rewards/rejected": -7.995321750640869, "step": 3754 }, { "epoch": 0.58, "learning_rate": 1.1393265210851186e-05, "logits/chosen": -1.956192135810852, "logits/rejected": -3.046992540359497, "logps/chosen": -105.3440170288086, "logps/rejected": -259.63360595703125, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -3.3019323348999023, "rewards/margins": 4.836931228637695, "rewards/rejected": -8.138862609863281, "step": 3755 }, { "epoch": 0.58, "learning_rate": 1.1392531770320037e-05, "logits/chosen": -1.7964317798614502, "logits/rejected": -3.204399347305298, "logps/chosen": -185.10455322265625, "logps/rejected": -430.3843078613281, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -3.1338913440704346, "rewards/margins": 5.887667655944824, "rewards/rejected": -9.02155876159668, "step": 3756 }, { "epoch": 0.58, "learning_rate": 1.139179832978889e-05, "logits/chosen": -2.5883655548095703, "logits/rejected": -2.8964807987213135, "logps/chosen": -602.8572998046875, "logps/rejected": -596.037109375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.2961678504943848, "rewards/margins": 6.61505651473999, "rewards/rejected": -8.911224365234375, "step": 3757 }, { "epoch": 0.58, "learning_rate": 1.1391064889257741e-05, "logits/chosen": -2.1803667545318604, "logits/rejected": -3.224015951156616, "logps/chosen": -56.76649475097656, "logps/rejected": -261.793701171875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.108952522277832, "rewards/margins": 7.097247123718262, "rewards/rejected": -11.206199645996094, "step": 3758 }, { "epoch": 0.58, "learning_rate": 1.1390331448726593e-05, "logits/chosen": -2.8507959842681885, "logits/rejected": -2.865173101425171, "logps/chosen": -68.39875030517578, "logps/rejected": -177.09097290039062, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -2.088801383972168, "rewards/margins": 5.342013835906982, "rewards/rejected": -7.43081521987915, "step": 3759 }, { "epoch": 0.58, "learning_rate": 1.1389598008195445e-05, "logits/chosen": -2.9657351970672607, "logits/rejected": -2.37990140914917, "logps/chosen": -506.4150390625, "logps/rejected": -398.0498046875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.385456919670105, "rewards/margins": 8.978435516357422, "rewards/rejected": -10.363893508911133, "step": 3760 }, { "epoch": 0.58, "learning_rate": 1.1388864567664299e-05, "logits/chosen": -2.453338384628296, "logits/rejected": -2.9606237411499023, "logps/chosen": -172.2955322265625, "logps/rejected": -306.6884460449219, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.7223129272460938, "rewards/margins": 6.839934349060059, "rewards/rejected": -9.562247276306152, "step": 3761 }, { "epoch": 0.59, "learning_rate": 1.138813112713315e-05, "logits/chosen": -2.7783656120300293, "logits/rejected": -3.0191054344177246, "logps/chosen": -386.33306884765625, "logps/rejected": -426.7893981933594, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": -2.1019599437713623, "rewards/margins": 4.378424167633057, "rewards/rejected": -6.48038387298584, "step": 3762 }, { "epoch": 0.59, "learning_rate": 1.1387397686602002e-05, "logits/chosen": -2.2417523860931396, "logits/rejected": -2.90035343170166, "logps/chosen": -81.28016662597656, "logps/rejected": -219.58111572265625, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -3.634911298751831, "rewards/margins": 3.6055703163146973, "rewards/rejected": -7.240481376647949, "step": 3763 }, { "epoch": 0.59, "learning_rate": 1.1386664246070856e-05, "logits/chosen": -1.7220195531845093, "logits/rejected": -3.038853883743286, "logps/chosen": -523.3615112304688, "logps/rejected": -675.689208984375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.672511339187622, "rewards/margins": 6.155308723449707, "rewards/rejected": -7.82781982421875, "step": 3764 }, { "epoch": 0.59, "learning_rate": 1.1385930805539708e-05, "logits/chosen": -2.7683491706848145, "logits/rejected": -2.850734233856201, "logps/chosen": -286.0385437011719, "logps/rejected": -173.98696899414062, "loss": 2.3946, "rewards/accuracies": 0.5, "rewards/chosen": -5.421591281890869, "rewards/margins": -0.7316375970840454, "rewards/rejected": -4.689953804016113, "step": 3765 }, { "epoch": 0.59, "learning_rate": 1.138519736500856e-05, "logits/chosen": -2.8873465061187744, "logits/rejected": -2.483502149581909, "logps/chosen": -475.14532470703125, "logps/rejected": -427.32928466796875, "loss": 1.2195, "rewards/accuracies": 0.5, "rewards/chosen": -5.134860992431641, "rewards/margins": 1.6708309650421143, "rewards/rejected": -6.805691719055176, "step": 3766 }, { "epoch": 0.59, "learning_rate": 1.1384463924477412e-05, "logits/chosen": -2.084303855895996, "logits/rejected": -2.8398995399475098, "logps/chosen": -351.4514465332031, "logps/rejected": -511.80401611328125, "loss": 0.7131, "rewards/accuracies": 0.5, "rewards/chosen": -3.031036615371704, "rewards/margins": 3.63470196723938, "rewards/rejected": -6.665738582611084, "step": 3767 }, { "epoch": 0.59, "learning_rate": 1.1383730483946263e-05, "logits/chosen": -2.4374170303344727, "logits/rejected": -1.7385483980178833, "logps/chosen": -281.8446350097656, "logps/rejected": -317.89080810546875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.0088257789611816, "rewards/margins": 7.990514278411865, "rewards/rejected": -9.999340057373047, "step": 3768 }, { "epoch": 0.59, "learning_rate": 1.1382997043415115e-05, "logits/chosen": -3.139448881149292, "logits/rejected": -1.7495685815811157, "logps/chosen": -459.2395324707031, "logps/rejected": -285.97174072265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.0838661193847656, "rewards/margins": 7.868006229400635, "rewards/rejected": -9.951871871948242, "step": 3769 }, { "epoch": 0.59, "learning_rate": 1.1382263602883967e-05, "logits/chosen": -2.8497493267059326, "logits/rejected": -1.855318307876587, "logps/chosen": -484.9773864746094, "logps/rejected": -319.0884094238281, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.355676174163818, "rewards/margins": 7.279804229736328, "rewards/rejected": -11.635480880737305, "step": 3770 }, { "epoch": 0.59, "learning_rate": 1.1381530162352819e-05, "logits/chosen": -2.289659261703491, "logits/rejected": -3.1489903926849365, "logps/chosen": -112.54673767089844, "logps/rejected": -297.87213134765625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.578592300415039, "rewards/margins": 5.451725006103516, "rewards/rejected": -8.030317306518555, "step": 3771 }, { "epoch": 0.59, "learning_rate": 1.1380796721821671e-05, "logits/chosen": -2.812912940979004, "logits/rejected": -2.486579418182373, "logps/chosen": -141.6285858154297, "logps/rejected": -258.6893310546875, "loss": 2.9848, "rewards/accuracies": 0.5, "rewards/chosen": -5.808703899383545, "rewards/margins": 0.3544025421142578, "rewards/rejected": -6.163106441497803, "step": 3772 }, { "epoch": 0.59, "learning_rate": 1.1380063281290524e-05, "logits/chosen": -3.148965358734131, "logits/rejected": -2.795053005218506, "logps/chosen": -114.4146728515625, "logps/rejected": -200.74703979492188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.648360252380371, "rewards/margins": 6.665919303894043, "rewards/rejected": -8.314279556274414, "step": 3773 }, { "epoch": 0.59, "learning_rate": 1.1379329840759376e-05, "logits/chosen": -2.8587725162506104, "logits/rejected": -3.155881881713867, "logps/chosen": -93.25181579589844, "logps/rejected": -225.88604736328125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -3.148594379425049, "rewards/margins": 5.741822242736816, "rewards/rejected": -8.890417098999023, "step": 3774 }, { "epoch": 0.59, "learning_rate": 1.1378596400228228e-05, "logits/chosen": -2.8135159015655518, "logits/rejected": -2.3139264583587646, "logps/chosen": -162.2673797607422, "logps/rejected": -211.44940185546875, "loss": 2.7719, "rewards/accuracies": 0.5, "rewards/chosen": -5.3007941246032715, "rewards/margins": 0.3827056884765625, "rewards/rejected": -5.683499813079834, "step": 3775 }, { "epoch": 0.59, "learning_rate": 1.137786295969708e-05, "logits/chosen": -2.052095413208008, "logits/rejected": -3.1890103816986084, "logps/chosen": -179.9556884765625, "logps/rejected": -335.82611083984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.4467625617980957, "rewards/margins": 6.878811359405518, "rewards/rejected": -9.325573921203613, "step": 3776 }, { "epoch": 0.59, "learning_rate": 1.1377129519165932e-05, "logits/chosen": -3.0707225799560547, "logits/rejected": -2.9922397136688232, "logps/chosen": -323.8525390625, "logps/rejected": -448.4826354980469, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.741748809814453, "rewards/margins": 7.687811374664307, "rewards/rejected": -10.429559707641602, "step": 3777 }, { "epoch": 0.59, "learning_rate": 1.1376396078634784e-05, "logits/chosen": -2.980926990509033, "logits/rejected": -2.658893346786499, "logps/chosen": -343.551513671875, "logps/rejected": -189.45223999023438, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -1.9560333490371704, "rewards/margins": 4.836963653564453, "rewards/rejected": -6.792997360229492, "step": 3778 }, { "epoch": 0.59, "learning_rate": 1.1375662638103636e-05, "logits/chosen": -2.9615044593811035, "logits/rejected": -2.3296711444854736, "logps/chosen": -194.2808837890625, "logps/rejected": -211.849853515625, "loss": 4.3546, "rewards/accuracies": 0.5, "rewards/chosen": -7.448457717895508, "rewards/margins": -0.52530837059021, "rewards/rejected": -6.923149108886719, "step": 3779 }, { "epoch": 0.59, "learning_rate": 1.1374929197572488e-05, "logits/chosen": -3.258516311645508, "logits/rejected": -2.7986769676208496, "logps/chosen": -533.5994873046875, "logps/rejected": -459.4504699707031, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0777161121368408, "rewards/margins": 7.016687393188477, "rewards/rejected": -8.094403266906738, "step": 3780 }, { "epoch": 0.59, "learning_rate": 1.137419575704134e-05, "logits/chosen": -2.871000051498413, "logits/rejected": -3.3930654525756836, "logps/chosen": -86.29936981201172, "logps/rejected": -339.2493896484375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.39951753616333, "rewards/margins": 6.6476030349731445, "rewards/rejected": -9.047121047973633, "step": 3781 }, { "epoch": 0.59, "learning_rate": 1.1373462316510193e-05, "logits/chosen": -2.7628543376922607, "logits/rejected": -2.203073501586914, "logps/chosen": -192.04623413085938, "logps/rejected": -205.4157257080078, "loss": 2.2037, "rewards/accuracies": 0.5, "rewards/chosen": -5.486850738525391, "rewards/margins": -0.19551753997802734, "rewards/rejected": -5.291333198547363, "step": 3782 }, { "epoch": 0.59, "learning_rate": 1.1372728875979045e-05, "logits/chosen": -2.9025156497955322, "logits/rejected": -1.9771910905838013, "logps/chosen": -482.138671875, "logps/rejected": -381.3697204589844, "loss": 3.8077, "rewards/accuracies": 0.5, "rewards/chosen": -5.6130218505859375, "rewards/margins": -3.223773241043091, "rewards/rejected": -2.389248847961426, "step": 3783 }, { "epoch": 0.59, "learning_rate": 1.1371995435447897e-05, "logits/chosen": -2.783231258392334, "logits/rejected": -2.8445184230804443, "logps/chosen": -110.36475372314453, "logps/rejected": -283.575927734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.079300403594971, "rewards/margins": 6.563937187194824, "rewards/rejected": -10.643237113952637, "step": 3784 }, { "epoch": 0.59, "learning_rate": 1.1371261994916749e-05, "logits/chosen": -2.471632957458496, "logits/rejected": -3.2184531688690186, "logps/chosen": -143.8885955810547, "logps/rejected": -334.9945068359375, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -2.240877151489258, "rewards/margins": 4.549601078033447, "rewards/rejected": -6.790477752685547, "step": 3785 }, { "epoch": 0.59, "learning_rate": 1.13705285543856e-05, "logits/chosen": -2.4016871452331543, "logits/rejected": -2.9131088256835938, "logps/chosen": -47.83314895629883, "logps/rejected": -286.340087890625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -3.1191680431365967, "rewards/margins": 6.442755222320557, "rewards/rejected": -9.56192398071289, "step": 3786 }, { "epoch": 0.59, "learning_rate": 1.1369795113854452e-05, "logits/chosen": -2.3099284172058105, "logits/rejected": -2.9942433834075928, "logps/chosen": -138.2125701904297, "logps/rejected": -352.056884765625, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": -2.147681474685669, "rewards/margins": 4.076122283935547, "rewards/rejected": -6.223803520202637, "step": 3787 }, { "epoch": 0.59, "learning_rate": 1.1369061673323304e-05, "logits/chosen": -1.0153640508651733, "logits/rejected": -2.6281542778015137, "logps/chosen": -136.7032470703125, "logps/rejected": -479.2372741699219, "loss": 0.4453, "rewards/accuracies": 0.5, "rewards/chosen": -3.6644887924194336, "rewards/margins": 2.521726608276367, "rewards/rejected": -6.186215400695801, "step": 3788 }, { "epoch": 0.59, "learning_rate": 1.1368328232792156e-05, "logits/chosen": -2.9046974182128906, "logits/rejected": -2.3699772357940674, "logps/chosen": -288.7195739746094, "logps/rejected": -86.915283203125, "loss": 6.039, "rewards/accuracies": 0.0, "rewards/chosen": -8.489408493041992, "rewards/margins": -6.036134719848633, "rewards/rejected": -2.4532744884490967, "step": 3789 }, { "epoch": 0.59, "learning_rate": 1.1367594792261008e-05, "logits/chosen": -3.0338306427001953, "logits/rejected": -2.7417895793914795, "logps/chosen": -390.56109619140625, "logps/rejected": -381.7865905761719, "loss": 4.2031, "rewards/accuracies": 0.0, "rewards/chosen": -7.306227207183838, "rewards/margins": -4.08674430847168, "rewards/rejected": -3.219482660293579, "step": 3790 }, { "epoch": 0.59, "learning_rate": 1.1366861351729862e-05, "logits/chosen": -2.9256017208099365, "logits/rejected": -3.346935749053955, "logps/chosen": -69.05440521240234, "logps/rejected": -146.88702392578125, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -2.7363808155059814, "rewards/margins": 3.816941499710083, "rewards/rejected": -6.5533223152160645, "step": 3791 }, { "epoch": 0.59, "learning_rate": 1.1366127911198714e-05, "logits/chosen": -3.066807508468628, "logits/rejected": -3.3907039165496826, "logps/chosen": -481.9176940917969, "logps/rejected": -525.98486328125, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.5845017433166504, "rewards/margins": 5.275301933288574, "rewards/rejected": -6.859804153442383, "step": 3792 }, { "epoch": 0.59, "learning_rate": 1.1365394470667565e-05, "logits/chosen": -2.5684914588928223, "logits/rejected": -3.1208324432373047, "logps/chosen": -415.00079345703125, "logps/rejected": -403.9771423339844, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.992483913898468, "rewards/margins": 5.101643085479736, "rewards/rejected": -6.0941267013549805, "step": 3793 }, { "epoch": 0.59, "learning_rate": 1.1364661030136417e-05, "logits/chosen": -3.0849790573120117, "logits/rejected": -3.024681329727173, "logps/chosen": -94.19522094726562, "logps/rejected": -132.9316864013672, "loss": 1.359, "rewards/accuracies": 0.5, "rewards/chosen": -4.4967265129089355, "rewards/margins": 1.1685669422149658, "rewards/rejected": -5.6652936935424805, "step": 3794 }, { "epoch": 0.59, "learning_rate": 1.136392758960527e-05, "logits/chosen": -2.9734718799591064, "logits/rejected": -2.9369494915008545, "logps/chosen": -289.5185546875, "logps/rejected": -322.4537353515625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.47236967086792, "rewards/margins": 6.223978042602539, "rewards/rejected": -7.696347236633301, "step": 3795 }, { "epoch": 0.59, "learning_rate": 1.1363194149074123e-05, "logits/chosen": -2.0347132682800293, "logits/rejected": -3.0957717895507812, "logps/chosen": -382.9113464355469, "logps/rejected": -606.059814453125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7642974853515625, "rewards/margins": 6.35329008102417, "rewards/rejected": -7.117587566375732, "step": 3796 }, { "epoch": 0.59, "learning_rate": 1.1362460708542975e-05, "logits/chosen": -3.113016366958618, "logits/rejected": -3.1218607425689697, "logps/chosen": -108.55606079101562, "logps/rejected": -121.36599731445312, "loss": 1.7211, "rewards/accuracies": 0.5, "rewards/chosen": -5.4425530433654785, "rewards/margins": 0.44272756576538086, "rewards/rejected": -5.885280609130859, "step": 3797 }, { "epoch": 0.59, "learning_rate": 1.1361727268011827e-05, "logits/chosen": -2.3694710731506348, "logits/rejected": -3.1709091663360596, "logps/chosen": -136.21707153320312, "logps/rejected": -284.56011962890625, "loss": 0.4783, "rewards/accuracies": 1.0, "rewards/chosen": -2.8827643394470215, "rewards/margins": 0.5848724842071533, "rewards/rejected": -3.467636823654175, "step": 3798 }, { "epoch": 0.59, "learning_rate": 1.1360993827480678e-05, "logits/chosen": -3.0789427757263184, "logits/rejected": -2.625091314315796, "logps/chosen": -553.0535278320312, "logps/rejected": -435.98468017578125, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -2.255251407623291, "rewards/margins": 5.311933517456055, "rewards/rejected": -7.5671844482421875, "step": 3799 }, { "epoch": 0.59, "learning_rate": 1.1360260386949532e-05, "logits/chosen": -2.432265043258667, "logits/rejected": -2.844573736190796, "logps/chosen": -79.20022583007812, "logps/rejected": -259.4814147949219, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.7517848014831543, "rewards/margins": 6.0672736167907715, "rewards/rejected": -8.819058418273926, "step": 3800 }, { "epoch": 0.59, "learning_rate": 1.1359526946418384e-05, "logits/chosen": -2.5194754600524902, "logits/rejected": -3.211479663848877, "logps/chosen": -790.029541015625, "logps/rejected": -727.0862426757812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.54815673828125, "rewards/margins": 7.041447639465332, "rewards/rejected": -6.493290901184082, "step": 3801 }, { "epoch": 0.59, "learning_rate": 1.1358793505887236e-05, "logits/chosen": -1.6462807655334473, "logits/rejected": -2.9470531940460205, "logps/chosen": -62.10527801513672, "logps/rejected": -249.44268798828125, "loss": 0.32, "rewards/accuracies": 1.0, "rewards/chosen": -2.5988476276397705, "rewards/margins": 2.506157398223877, "rewards/rejected": -5.105004787445068, "step": 3802 }, { "epoch": 0.59, "learning_rate": 1.1358060065356088e-05, "logits/chosen": -3.284130334854126, "logits/rejected": -2.8912978172302246, "logps/chosen": -203.859130859375, "logps/rejected": -183.46128845214844, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.12652587890625, "rewards/margins": 5.170866966247559, "rewards/rejected": -7.297392845153809, "step": 3803 }, { "epoch": 0.59, "learning_rate": 1.135732662482494e-05, "logits/chosen": -2.9059836864471436, "logits/rejected": -1.7040486335754395, "logps/chosen": -285.917236328125, "logps/rejected": -203.66017150878906, "loss": 1.67, "rewards/accuracies": 0.5, "rewards/chosen": -3.325733184814453, "rewards/margins": 1.4455097913742065, "rewards/rejected": -4.771243095397949, "step": 3804 }, { "epoch": 0.59, "learning_rate": 1.1356593184293791e-05, "logits/chosen": -2.1412289142608643, "logits/rejected": -3.1714229583740234, "logps/chosen": -256.43609619140625, "logps/rejected": -411.2609558105469, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -2.9790306091308594, "rewards/margins": 4.599856853485107, "rewards/rejected": -7.578886985778809, "step": 3805 }, { "epoch": 0.59, "learning_rate": 1.1355859743762643e-05, "logits/chosen": -2.7775046825408936, "logits/rejected": -2.3570914268493652, "logps/chosen": -162.3367919921875, "logps/rejected": -131.379150390625, "loss": 0.452, "rewards/accuracies": 0.5, "rewards/chosen": -4.250762939453125, "rewards/margins": 1.0870940685272217, "rewards/rejected": -5.337857246398926, "step": 3806 }, { "epoch": 0.59, "learning_rate": 1.1355126303231495e-05, "logits/chosen": -1.7611613273620605, "logits/rejected": -2.527693510055542, "logps/chosen": -231.48423767089844, "logps/rejected": -373.8289794921875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.7196091413497925, "rewards/margins": 6.4896135330200195, "rewards/rejected": -8.209222793579102, "step": 3807 }, { "epoch": 0.59, "learning_rate": 1.1354392862700349e-05, "logits/chosen": -2.2363979816436768, "logits/rejected": -3.003906726837158, "logps/chosen": -473.049560546875, "logps/rejected": -648.5032958984375, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.3188978433609009, "rewards/margins": 4.602695465087891, "rewards/rejected": -5.92159366607666, "step": 3808 }, { "epoch": 0.59, "learning_rate": 1.13536594221692e-05, "logits/chosen": -2.892659902572632, "logits/rejected": -3.0341365337371826, "logps/chosen": -103.47822570800781, "logps/rejected": -155.1496124267578, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -1.2302546501159668, "rewards/margins": 4.794427871704102, "rewards/rejected": -6.02468204498291, "step": 3809 }, { "epoch": 0.59, "learning_rate": 1.1352925981638052e-05, "logits/chosen": -2.8492369651794434, "logits/rejected": -2.7672064304351807, "logps/chosen": -138.1631622314453, "logps/rejected": -313.09588623046875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.2338775396347046, "rewards/margins": 7.871538162231445, "rewards/rejected": -9.105415344238281, "step": 3810 }, { "epoch": 0.59, "learning_rate": 1.1352192541106904e-05, "logits/chosen": -3.1269874572753906, "logits/rejected": -2.6481845378875732, "logps/chosen": -255.770751953125, "logps/rejected": -84.87005615234375, "loss": 1.9364, "rewards/accuracies": 0.5, "rewards/chosen": -4.441409111022949, "rewards/margins": -1.034205675125122, "rewards/rejected": -3.4072036743164062, "step": 3811 }, { "epoch": 0.59, "learning_rate": 1.1351459100575756e-05, "logits/chosen": -2.113144874572754, "logits/rejected": -2.736832618713379, "logps/chosen": -84.00997924804688, "logps/rejected": -271.7889404296875, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -3.6594910621643066, "rewards/margins": 6.699710845947266, "rewards/rejected": -10.359201431274414, "step": 3812 }, { "epoch": 0.59, "learning_rate": 1.1350725660044608e-05, "logits/chosen": -2.096243143081665, "logits/rejected": -2.72208833694458, "logps/chosen": -176.159912109375, "logps/rejected": -278.52923583984375, "loss": 1.9345, "rewards/accuracies": 0.5, "rewards/chosen": -4.752583980560303, "rewards/margins": 2.2986645698547363, "rewards/rejected": -7.051248550415039, "step": 3813 }, { "epoch": 0.59, "learning_rate": 1.134999221951346e-05, "logits/chosen": -3.0015499591827393, "logits/rejected": -3.018350601196289, "logps/chosen": -345.48297119140625, "logps/rejected": -290.8227233886719, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.6518433094024658, "rewards/margins": 5.432872772216797, "rewards/rejected": -7.084715843200684, "step": 3814 }, { "epoch": 0.59, "learning_rate": 1.1349258778982312e-05, "logits/chosen": -2.0747992992401123, "logits/rejected": -3.003516674041748, "logps/chosen": -196.12606811523438, "logps/rejected": -372.8465270996094, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.7009949684143066, "rewards/margins": 4.748960971832275, "rewards/rejected": -6.449955940246582, "step": 3815 }, { "epoch": 0.59, "learning_rate": 1.1348525338451164e-05, "logits/chosen": -2.8231282234191895, "logits/rejected": -2.2179856300354004, "logps/chosen": -162.10922241210938, "logps/rejected": -299.1278381347656, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2040767669677734, "rewards/margins": 7.846374988555908, "rewards/rejected": -9.050451278686523, "step": 3816 }, { "epoch": 0.59, "learning_rate": 1.1347791897920017e-05, "logits/chosen": -2.401155710220337, "logits/rejected": -3.0336484909057617, "logps/chosen": -131.35195922851562, "logps/rejected": -366.782470703125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.9170767068862915, "rewards/margins": 5.949051856994629, "rewards/rejected": -7.866128921508789, "step": 3817 }, { "epoch": 0.59, "learning_rate": 1.1347058457388869e-05, "logits/chosen": -1.8670402765274048, "logits/rejected": -1.9972831010818481, "logps/chosen": -223.65673828125, "logps/rejected": -159.47113037109375, "loss": 2.4665, "rewards/accuracies": 0.5, "rewards/chosen": -4.149089336395264, "rewards/margins": -0.7690749168395996, "rewards/rejected": -3.380014419555664, "step": 3818 }, { "epoch": 0.59, "learning_rate": 1.1346325016857721e-05, "logits/chosen": -2.683781623840332, "logits/rejected": -1.7806833982467651, "logps/chosen": -465.13677978515625, "logps/rejected": -334.827880859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2881927490234375, "rewards/margins": 8.487504959106445, "rewards/rejected": -10.775697708129883, "step": 3819 }, { "epoch": 0.59, "learning_rate": 1.1345591576326573e-05, "logits/chosen": -3.2144103050231934, "logits/rejected": -3.4383299350738525, "logps/chosen": -58.611778259277344, "logps/rejected": -241.12371826171875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.8716449737548828, "rewards/margins": 5.612725257873535, "rewards/rejected": -7.484370231628418, "step": 3820 }, { "epoch": 0.59, "learning_rate": 1.1344858135795425e-05, "logits/chosen": -2.9695491790771484, "logits/rejected": -2.9837026596069336, "logps/chosen": -123.4378433227539, "logps/rejected": -100.16981506347656, "loss": 0.9848, "rewards/accuracies": 0.5, "rewards/chosen": -4.469911098480225, "rewards/margins": 1.452331304550171, "rewards/rejected": -5.922242641448975, "step": 3821 }, { "epoch": 0.59, "learning_rate": 1.1344124695264277e-05, "logits/chosen": -2.4001474380493164, "logits/rejected": -3.0836780071258545, "logps/chosen": -253.4063262939453, "logps/rejected": -375.82708740234375, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -0.9708000421524048, "rewards/margins": 3.9196908473968506, "rewards/rejected": -4.890491008758545, "step": 3822 }, { "epoch": 0.59, "learning_rate": 1.1343391254733129e-05, "logits/chosen": -1.8966623544692993, "logits/rejected": -3.078758478164673, "logps/chosen": -102.60893249511719, "logps/rejected": -498.59368896484375, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -2.808811902999878, "rewards/margins": 6.39202880859375, "rewards/rejected": -9.200840950012207, "step": 3823 }, { "epoch": 0.59, "learning_rate": 1.134265781420198e-05, "logits/chosen": -1.8662474155426025, "logits/rejected": -2.872769355773926, "logps/chosen": -161.82020568847656, "logps/rejected": -361.5374755859375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.44132232666015625, "rewards/margins": 7.648825168609619, "rewards/rejected": -8.090147972106934, "step": 3824 }, { "epoch": 0.59, "learning_rate": 1.1341924373670832e-05, "logits/chosen": -2.325178623199463, "logits/rejected": -3.0770328044891357, "logps/chosen": -507.2879333496094, "logps/rejected": -535.7103881835938, "loss": 0.0884, "rewards/accuracies": 1.0, "rewards/chosen": -2.2459120750427246, "rewards/margins": 6.650696277618408, "rewards/rejected": -8.896608352661133, "step": 3825 }, { "epoch": 0.6, "learning_rate": 1.1341190933139686e-05, "logits/chosen": -3.2194430828094482, "logits/rejected": -1.1519800424575806, "logps/chosen": -421.1298522949219, "logps/rejected": -145.26927185058594, "loss": 5.6363, "rewards/accuracies": 0.0, "rewards/chosen": -7.193617343902588, "rewards/margins": -5.631857872009277, "rewards/rejected": -1.5617599487304688, "step": 3826 }, { "epoch": 0.6, "learning_rate": 1.1340457492608538e-05, "logits/chosen": -2.794233560562134, "logits/rejected": -1.3430386781692505, "logps/chosen": -396.3179626464844, "logps/rejected": -264.4306640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.1447555422782898, "rewards/margins": 7.939354419708252, "rewards/rejected": -8.084110260009766, "step": 3827 }, { "epoch": 0.6, "learning_rate": 1.133972405207739e-05, "logits/chosen": -2.8510594367980957, "logits/rejected": -3.132737636566162, "logps/chosen": -79.31163024902344, "logps/rejected": -225.75582885742188, "loss": 0.122, "rewards/accuracies": 1.0, "rewards/chosen": -2.186999797821045, "rewards/margins": 2.2994351387023926, "rewards/rejected": -4.4864349365234375, "step": 3828 }, { "epoch": 0.6, "learning_rate": 1.1338990611546242e-05, "logits/chosen": -2.90686297416687, "logits/rejected": -3.0688250064849854, "logps/chosen": -427.21630859375, "logps/rejected": -434.22344970703125, "loss": 0.1099, "rewards/accuracies": 1.0, "rewards/chosen": -1.535016655921936, "rewards/margins": 3.4541895389556885, "rewards/rejected": -4.989206314086914, "step": 3829 }, { "epoch": 0.6, "learning_rate": 1.1338257171015095e-05, "logits/chosen": -3.057321071624756, "logits/rejected": -2.7397236824035645, "logps/chosen": -205.25933837890625, "logps/rejected": -109.49266052246094, "loss": 1.5511, "rewards/accuracies": 0.0, "rewards/chosen": -1.7055672407150269, "rewards/margins": -1.308423638343811, "rewards/rejected": -0.39714357256889343, "step": 3830 }, { "epoch": 0.6, "learning_rate": 1.1337523730483947e-05, "logits/chosen": -2.844992160797119, "logits/rejected": -1.4398324489593506, "logps/chosen": -149.74574279785156, "logps/rejected": -157.52175903320312, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -3.612424373626709, "rewards/margins": 4.362489700317383, "rewards/rejected": -7.97491455078125, "step": 3831 }, { "epoch": 0.6, "learning_rate": 1.1336790289952799e-05, "logits/chosen": -2.9935355186462402, "logits/rejected": -2.352473258972168, "logps/chosen": -176.177490234375, "logps/rejected": -157.3241729736328, "loss": 0.1119, "rewards/accuracies": 1.0, "rewards/chosen": -1.50504469871521, "rewards/margins": 3.0673513412475586, "rewards/rejected": -4.572396278381348, "step": 3832 }, { "epoch": 0.6, "learning_rate": 1.133605684942165e-05, "logits/chosen": -1.483755111694336, "logits/rejected": -2.974074602127075, "logps/chosen": -87.15834045410156, "logps/rejected": -415.39434814453125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.9753434658050537, "rewards/margins": 5.672842502593994, "rewards/rejected": -7.648185729980469, "step": 3833 }, { "epoch": 0.6, "learning_rate": 1.1335323408890503e-05, "logits/chosen": -1.4148956537246704, "logits/rejected": -3.0404982566833496, "logps/chosen": -100.0013656616211, "logps/rejected": -315.9381103515625, "loss": 0.08, "rewards/accuracies": 1.0, "rewards/chosen": -1.4906318187713623, "rewards/margins": 3.7594308853149414, "rewards/rejected": -5.250062942504883, "step": 3834 }, { "epoch": 0.6, "learning_rate": 1.1334589968359356e-05, "logits/chosen": -1.0909090042114258, "logits/rejected": -2.9786131381988525, "logps/chosen": -140.8876953125, "logps/rejected": -363.95318603515625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.7263599634170532, "rewards/margins": 5.4825053215026855, "rewards/rejected": -6.208865165710449, "step": 3835 }, { "epoch": 0.6, "learning_rate": 1.1333856527828208e-05, "logits/chosen": -2.40051531791687, "logits/rejected": -3.220998764038086, "logps/chosen": -235.1065673828125, "logps/rejected": -385.9633483886719, "loss": 1.9576, "rewards/accuracies": 0.5, "rewards/chosen": -2.8986454010009766, "rewards/margins": -0.2881641387939453, "rewards/rejected": -2.6104812622070312, "step": 3836 }, { "epoch": 0.6, "learning_rate": 1.133312308729706e-05, "logits/chosen": -3.2704944610595703, "logits/rejected": -2.1753718852996826, "logps/chosen": -191.87481689453125, "logps/rejected": -56.499874114990234, "loss": 0.4444, "rewards/accuracies": 1.0, "rewards/chosen": -3.3259949684143066, "rewards/margins": 0.7186582088470459, "rewards/rejected": -4.044652938842773, "step": 3837 }, { "epoch": 0.6, "learning_rate": 1.1332389646765912e-05, "logits/chosen": -2.705209493637085, "logits/rejected": -2.5131990909576416, "logps/chosen": -200.41183471679688, "logps/rejected": -347.7033996582031, "loss": 2.5738, "rewards/accuracies": 0.5, "rewards/chosen": -3.1170248985290527, "rewards/margins": 1.9089007377624512, "rewards/rejected": -5.025925636291504, "step": 3838 }, { "epoch": 0.6, "learning_rate": 1.1331656206234764e-05, "logits/chosen": -2.638701915740967, "logits/rejected": -2.9026596546173096, "logps/chosen": -159.08731079101562, "logps/rejected": -233.9174346923828, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.16672517359256744, "rewards/margins": 7.377845287322998, "rewards/rejected": -7.211119651794434, "step": 3839 }, { "epoch": 0.6, "learning_rate": 1.1330922765703616e-05, "logits/chosen": -2.7519102096557617, "logits/rejected": -2.3147218227386475, "logps/chosen": -825.014404296875, "logps/rejected": -537.9427490234375, "loss": 0.0956, "rewards/accuracies": 1.0, "rewards/chosen": -3.7766551971435547, "rewards/margins": 3.4681553840637207, "rewards/rejected": -7.244810581207275, "step": 3840 }, { "epoch": 0.6, "learning_rate": 1.1330189325172467e-05, "logits/chosen": -2.1491591930389404, "logits/rejected": -2.7898387908935547, "logps/chosen": -230.79580688476562, "logps/rejected": -230.1001739501953, "loss": 2.0899, "rewards/accuracies": 0.5, "rewards/chosen": -3.8036696910858154, "rewards/margins": 2.022773027420044, "rewards/rejected": -5.826442718505859, "step": 3841 }, { "epoch": 0.6, "learning_rate": 1.132945588464132e-05, "logits/chosen": -3.1702418327331543, "logits/rejected": -1.6217983961105347, "logps/chosen": -495.0688171386719, "logps/rejected": -197.14608764648438, "loss": 4.1595, "rewards/accuracies": 0.0, "rewards/chosen": -4.489519119262695, "rewards/margins": -4.140552043914795, "rewards/rejected": -0.3489669859409332, "step": 3842 }, { "epoch": 0.6, "learning_rate": 1.1328722444110171e-05, "logits/chosen": -2.0453414916992188, "logits/rejected": -3.3371379375457764, "logps/chosen": -149.0727996826172, "logps/rejected": -324.235107421875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 0.24754029512405396, "rewards/margins": 6.079195976257324, "rewards/rejected": -5.831655979156494, "step": 3843 }, { "epoch": 0.6, "learning_rate": 1.1327989003579025e-05, "logits/chosen": -2.798243284225464, "logits/rejected": -2.9308581352233887, "logps/chosen": -464.9422607421875, "logps/rejected": -421.90386962890625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.667083740234375, "rewards/margins": 6.117079257965088, "rewards/rejected": -6.784162998199463, "step": 3844 }, { "epoch": 0.6, "learning_rate": 1.1327255563047877e-05, "logits/chosen": -3.0225064754486084, "logits/rejected": -3.3763821125030518, "logps/chosen": -95.83289337158203, "logps/rejected": -242.76638793945312, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": 0.46646520495414734, "rewards/margins": 3.980525493621826, "rewards/rejected": -3.5140602588653564, "step": 3845 }, { "epoch": 0.6, "learning_rate": 1.1326522122516729e-05, "logits/chosen": -2.9136552810668945, "logits/rejected": -0.7591527104377747, "logps/chosen": -323.78729248046875, "logps/rejected": -183.95901489257812, "loss": 1.3202, "rewards/accuracies": 0.5, "rewards/chosen": -3.7808353900909424, "rewards/margins": 2.0016095638275146, "rewards/rejected": -5.782444953918457, "step": 3846 }, { "epoch": 0.6, "learning_rate": 1.132578868198558e-05, "logits/chosen": -3.137930393218994, "logits/rejected": -2.929858684539795, "logps/chosen": -159.62855529785156, "logps/rejected": -179.12631225585938, "loss": 1.62, "rewards/accuracies": 0.5, "rewards/chosen": -4.3319807052612305, "rewards/margins": 0.6260933876037598, "rewards/rejected": -4.958074569702148, "step": 3847 }, { "epoch": 0.6, "learning_rate": 1.1325055241454432e-05, "logits/chosen": -3.3454020023345947, "logits/rejected": -2.854191541671753, "logps/chosen": -796.6625366210938, "logps/rejected": -336.1961975097656, "loss": 0.542, "rewards/accuracies": 0.5, "rewards/chosen": -1.6272690296173096, "rewards/margins": 0.9462294578552246, "rewards/rejected": -2.5734987258911133, "step": 3848 }, { "epoch": 0.6, "learning_rate": 1.1324321800923284e-05, "logits/chosen": -1.661501169204712, "logits/rejected": -3.0114200115203857, "logps/chosen": -105.55189514160156, "logps/rejected": -394.7645263671875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.41928598284721375, "rewards/margins": 6.2574462890625, "rewards/rejected": -6.676732063293457, "step": 3849 }, { "epoch": 0.6, "learning_rate": 1.1323588360392136e-05, "logits/chosen": -2.841932535171509, "logits/rejected": -2.9923195838928223, "logps/chosen": -319.1539611816406, "logps/rejected": -470.8682556152344, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": 0.8133773803710938, "rewards/margins": 4.426983833312988, "rewards/rejected": -3.6136062145233154, "step": 3850 }, { "epoch": 0.6, "learning_rate": 1.1322854919860988e-05, "logits/chosen": -2.225391149520874, "logits/rejected": -2.7687370777130127, "logps/chosen": -145.2491455078125, "logps/rejected": -266.3743896484375, "loss": 1.3513, "rewards/accuracies": 0.5, "rewards/chosen": -4.458342552185059, "rewards/margins": 1.0594072341918945, "rewards/rejected": -5.517749786376953, "step": 3851 }, { "epoch": 0.6, "learning_rate": 1.132212147932984e-05, "logits/chosen": -3.1798593997955322, "logits/rejected": -2.8159902095794678, "logps/chosen": -109.9598388671875, "logps/rejected": -201.28367614746094, "loss": 1.3813, "rewards/accuracies": 0.5, "rewards/chosen": -1.6400047540664673, "rewards/margins": 3.981292247772217, "rewards/rejected": -5.6212968826293945, "step": 3852 }, { "epoch": 0.6, "learning_rate": 1.1321388038798693e-05, "logits/chosen": -1.9937382936477661, "logits/rejected": -3.163499116897583, "logps/chosen": -396.19342041015625, "logps/rejected": -460.94970703125, "loss": 3.0362, "rewards/accuracies": 0.5, "rewards/chosen": -2.256866931915283, "rewards/margins": 0.007506132125854492, "rewards/rejected": -2.2643730640411377, "step": 3853 }, { "epoch": 0.6, "learning_rate": 1.1320654598267545e-05, "logits/chosen": -2.9233949184417725, "logits/rejected": -3.00470232963562, "logps/chosen": -332.76507568359375, "logps/rejected": -361.78826904296875, "loss": 0.0628, "rewards/accuracies": 1.0, "rewards/chosen": -1.089190125465393, "rewards/margins": 3.3308675289154053, "rewards/rejected": -4.420057773590088, "step": 3854 }, { "epoch": 0.6, "learning_rate": 1.1319921157736397e-05, "logits/chosen": -2.7810964584350586, "logits/rejected": -2.8991849422454834, "logps/chosen": -141.44290161132812, "logps/rejected": -197.51907348632812, "loss": 1.7685, "rewards/accuracies": 0.5, "rewards/chosen": -1.9582462310791016, "rewards/margins": -1.2101794481277466, "rewards/rejected": -0.748066782951355, "step": 3855 }, { "epoch": 0.6, "learning_rate": 1.1319187717205249e-05, "logits/chosen": -3.0651018619537354, "logits/rejected": -3.193899631500244, "logps/chosen": -221.77371215820312, "logps/rejected": -224.09735107421875, "loss": 3.0363, "rewards/accuracies": 0.5, "rewards/chosen": -3.3651299476623535, "rewards/margins": -0.9479401111602783, "rewards/rejected": -2.417189836502075, "step": 3856 }, { "epoch": 0.6, "learning_rate": 1.1318454276674101e-05, "logits/chosen": -2.9622528553009033, "logits/rejected": -3.154073476791382, "logps/chosen": -330.4715881347656, "logps/rejected": -464.36328125, "loss": 1.2445, "rewards/accuracies": 0.5, "rewards/chosen": -2.4739954471588135, "rewards/margins": 0.5555182695388794, "rewards/rejected": -3.0295135974884033, "step": 3857 }, { "epoch": 0.6, "learning_rate": 1.1317720836142953e-05, "logits/chosen": -2.930081844329834, "logits/rejected": -2.7946414947509766, "logps/chosen": -244.73159790039062, "logps/rejected": -249.6361083984375, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -1.2090072631835938, "rewards/margins": 4.245425701141357, "rewards/rejected": -5.454432964324951, "step": 3858 }, { "epoch": 0.6, "learning_rate": 1.1316987395611805e-05, "logits/chosen": -2.926335334777832, "logits/rejected": -3.1796224117279053, "logps/chosen": -104.57454681396484, "logps/rejected": -193.72262573242188, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -1.578556776046753, "rewards/margins": 3.9246597290039062, "rewards/rejected": -5.503216743469238, "step": 3859 }, { "epoch": 0.6, "learning_rate": 1.1316253955080657e-05, "logits/chosen": -2.965651750564575, "logits/rejected": -3.0735361576080322, "logps/chosen": -37.22947692871094, "logps/rejected": -124.14289855957031, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -2.2191641330718994, "rewards/margins": 3.5707716941833496, "rewards/rejected": -5.789936065673828, "step": 3860 }, { "epoch": 0.6, "learning_rate": 1.1315520514549508e-05, "logits/chosen": -2.4640283584594727, "logits/rejected": -3.1037778854370117, "logps/chosen": -101.09746551513672, "logps/rejected": -139.92515563964844, "loss": 2.5217, "rewards/accuracies": 0.5, "rewards/chosen": -3.619030475616455, "rewards/margins": -1.0618103742599487, "rewards/rejected": -2.557220220565796, "step": 3861 }, { "epoch": 0.6, "learning_rate": 1.1314787074018362e-05, "logits/chosen": -2.299762487411499, "logits/rejected": -3.138329267501831, "logps/chosen": -518.7109375, "logps/rejected": -669.0897827148438, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.773681640625, "rewards/margins": 7.300634384155273, "rewards/rejected": -8.074316024780273, "step": 3862 }, { "epoch": 0.6, "learning_rate": 1.1314053633487214e-05, "logits/chosen": -2.29244327545166, "logits/rejected": -2.943817615509033, "logps/chosen": -607.4160766601562, "logps/rejected": -547.3241577148438, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.1314560174942017, "rewards/margins": 4.116491317749023, "rewards/rejected": -5.2479472160339355, "step": 3863 }, { "epoch": 0.6, "learning_rate": 1.1313320192956067e-05, "logits/chosen": -2.3409342765808105, "logits/rejected": -3.19960355758667, "logps/chosen": -153.46780395507812, "logps/rejected": -332.199462890625, "loss": 0.1175, "rewards/accuracies": 1.0, "rewards/chosen": -0.9546013474464417, "rewards/margins": 3.618206024169922, "rewards/rejected": -4.572807312011719, "step": 3864 }, { "epoch": 0.6, "learning_rate": 1.131258675242492e-05, "logits/chosen": -1.4592366218566895, "logits/rejected": -2.988877773284912, "logps/chosen": -203.3714599609375, "logps/rejected": -282.9185791015625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.10882759839296341, "rewards/margins": 8.112761497497559, "rewards/rejected": -8.003934860229492, "step": 3865 }, { "epoch": 0.6, "learning_rate": 1.1311853311893771e-05, "logits/chosen": -2.2459139823913574, "logits/rejected": -3.2716121673583984, "logps/chosen": -78.32632446289062, "logps/rejected": -323.4661865234375, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": -1.509468913078308, "rewards/margins": 3.132946729660034, "rewards/rejected": -4.642415523529053, "step": 3866 }, { "epoch": 0.6, "learning_rate": 1.1311119871362623e-05, "logits/chosen": -2.106348752975464, "logits/rejected": -2.750547170639038, "logps/chosen": -127.244873046875, "logps/rejected": -111.57488250732422, "loss": 1.4307, "rewards/accuracies": 0.5, "rewards/chosen": -4.470761299133301, "rewards/margins": -0.1888521909713745, "rewards/rejected": -4.281908988952637, "step": 3867 }, { "epoch": 0.6, "learning_rate": 1.1310386430831475e-05, "logits/chosen": -3.0957956314086914, "logits/rejected": -2.676241397857666, "logps/chosen": -354.0926513671875, "logps/rejected": -494.8096923828125, "loss": 2.3823, "rewards/accuracies": 0.5, "rewards/chosen": -2.9706473350524902, "rewards/margins": -1.0947235822677612, "rewards/rejected": -1.875923991203308, "step": 3868 }, { "epoch": 0.6, "learning_rate": 1.1309652990300327e-05, "logits/chosen": -2.835853099822998, "logits/rejected": -3.050281047821045, "logps/chosen": -141.18063354492188, "logps/rejected": -282.0349426269531, "loss": 3.0685, "rewards/accuracies": 0.0, "rewards/chosen": -3.9581289291381836, "rewards/margins": -2.9374518394470215, "rewards/rejected": -1.020677089691162, "step": 3869 }, { "epoch": 0.6, "learning_rate": 1.1308919549769179e-05, "logits/chosen": -1.9195928573608398, "logits/rejected": -2.537653923034668, "logps/chosen": -136.7377166748047, "logps/rejected": -206.19308471679688, "loss": 1.6968, "rewards/accuracies": 0.5, "rewards/chosen": -3.2425737380981445, "rewards/margins": -1.1188690662384033, "rewards/rejected": -2.123704433441162, "step": 3870 }, { "epoch": 0.6, "learning_rate": 1.1308186109238032e-05, "logits/chosen": -2.551459312438965, "logits/rejected": -2.9517550468444824, "logps/chosen": -144.12124633789062, "logps/rejected": -303.08685302734375, "loss": 0.0976, "rewards/accuracies": 1.0, "rewards/chosen": -0.11058962345123291, "rewards/margins": 3.967480421066284, "rewards/rejected": -4.078070163726807, "step": 3871 }, { "epoch": 0.6, "learning_rate": 1.1307452668706884e-05, "logits/chosen": -3.0600342750549316, "logits/rejected": -3.143254280090332, "logps/chosen": -241.07826232910156, "logps/rejected": -195.88650512695312, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.4909319281578064, "rewards/margins": 4.922614097595215, "rewards/rejected": -5.413546085357666, "step": 3872 }, { "epoch": 0.6, "learning_rate": 1.1306719228175736e-05, "logits/chosen": -2.9927260875701904, "logits/rejected": -3.049501657485962, "logps/chosen": -116.81341552734375, "logps/rejected": -125.73460388183594, "loss": 0.5199, "rewards/accuracies": 0.5, "rewards/chosen": -2.6502914428710938, "rewards/margins": 2.119633674621582, "rewards/rejected": -4.769925117492676, "step": 3873 }, { "epoch": 0.6, "learning_rate": 1.1305985787644588e-05, "logits/chosen": -1.2650861740112305, "logits/rejected": -2.7987616062164307, "logps/chosen": -156.99984741210938, "logps/rejected": -292.6755065917969, "loss": 2.0135, "rewards/accuracies": 0.5, "rewards/chosen": -2.889397144317627, "rewards/margins": -1.2777546644210815, "rewards/rejected": -1.6116424798965454, "step": 3874 }, { "epoch": 0.6, "learning_rate": 1.130525234711344e-05, "logits/chosen": -2.585305690765381, "logits/rejected": -3.164712429046631, "logps/chosen": -178.64585876464844, "logps/rejected": -363.3363037109375, "loss": 0.0772, "rewards/accuracies": 1.0, "rewards/chosen": -0.10710029304027557, "rewards/margins": 4.824814319610596, "rewards/rejected": -4.931914329528809, "step": 3875 }, { "epoch": 0.6, "learning_rate": 1.1304518906582292e-05, "logits/chosen": -2.870666027069092, "logits/rejected": -3.28902530670166, "logps/chosen": -47.75986099243164, "logps/rejected": -169.0861358642578, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.4728250801563263, "rewards/margins": 4.9507036209106445, "rewards/rejected": -5.423528671264648, "step": 3876 }, { "epoch": 0.6, "learning_rate": 1.1303785466051144e-05, "logits/chosen": -1.465413212776184, "logits/rejected": -2.879265308380127, "logps/chosen": -178.3244171142578, "logps/rejected": -316.92010498046875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.9846389889717102, "rewards/margins": 5.301544189453125, "rewards/rejected": -6.2861833572387695, "step": 3877 }, { "epoch": 0.6, "learning_rate": 1.1303052025519995e-05, "logits/chosen": -3.1338984966278076, "logits/rejected": -2.7336318492889404, "logps/chosen": -205.6801300048828, "logps/rejected": -91.76631927490234, "loss": 3.155, "rewards/accuracies": 0.5, "rewards/chosen": -3.817363977432251, "rewards/margins": -1.1261646747589111, "rewards/rejected": -2.69119930267334, "step": 3878 }, { "epoch": 0.6, "learning_rate": 1.1302318584988847e-05, "logits/chosen": -2.414865493774414, "logits/rejected": -2.82002329826355, "logps/chosen": -51.57634353637695, "logps/rejected": -247.79661560058594, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -2.0834319591522217, "rewards/margins": 4.9455766677856445, "rewards/rejected": -7.029008388519287, "step": 3879 }, { "epoch": 0.6, "learning_rate": 1.1301585144457701e-05, "logits/chosen": -2.856140375137329, "logits/rejected": -2.951230525970459, "logps/chosen": -444.79095458984375, "logps/rejected": -351.7503356933594, "loss": 0.0828, "rewards/accuracies": 1.0, "rewards/chosen": -2.3455116748809814, "rewards/margins": 3.3017640113830566, "rewards/rejected": -5.647275924682617, "step": 3880 }, { "epoch": 0.6, "learning_rate": 1.1300851703926553e-05, "logits/chosen": -2.5512568950653076, "logits/rejected": -3.0836257934570312, "logps/chosen": -100.9012222290039, "logps/rejected": -246.1087646484375, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -1.1470959186553955, "rewards/margins": 4.346148490905762, "rewards/rejected": -5.493244171142578, "step": 3881 }, { "epoch": 0.6, "learning_rate": 1.1300118263395405e-05, "logits/chosen": -2.27651309967041, "logits/rejected": -1.9686939716339111, "logps/chosen": -561.5164794921875, "logps/rejected": -507.553466796875, "loss": 0.1561, "rewards/accuracies": 1.0, "rewards/chosen": -2.2574265003204346, "rewards/margins": 2.7658019065856934, "rewards/rejected": -5.023228645324707, "step": 3882 }, { "epoch": 0.6, "learning_rate": 1.1299384822864256e-05, "logits/chosen": -2.3507285118103027, "logits/rejected": -3.2054824829101562, "logps/chosen": -107.63780212402344, "logps/rejected": -274.62310791015625, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -0.07470627129077911, "rewards/margins": 5.8823394775390625, "rewards/rejected": -5.957046031951904, "step": 3883 }, { "epoch": 0.6, "learning_rate": 1.1298651382333108e-05, "logits/chosen": -2.317112922668457, "logits/rejected": -2.5736312866210938, "logps/chosen": -119.22876739501953, "logps/rejected": -284.91455078125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.7611698508262634, "rewards/margins": 7.161773204803467, "rewards/rejected": -6.400603294372559, "step": 3884 }, { "epoch": 0.6, "learning_rate": 1.129791794180196e-05, "logits/chosen": -3.071183681488037, "logits/rejected": -2.8964836597442627, "logps/chosen": -140.25985717773438, "logps/rejected": -41.12327575683594, "loss": 2.2655, "rewards/accuracies": 0.0, "rewards/chosen": -3.7456681728363037, "rewards/margins": -2.027333974838257, "rewards/rejected": -1.7183341979980469, "step": 3885 }, { "epoch": 0.6, "learning_rate": 1.1297184501270812e-05, "logits/chosen": -3.0369415283203125, "logits/rejected": -3.138587713241577, "logps/chosen": -236.33201599121094, "logps/rejected": -219.09506225585938, "loss": 1.0855, "rewards/accuracies": 0.5, "rewards/chosen": -1.1731605529785156, "rewards/margins": -0.06491243839263916, "rewards/rejected": -1.1082481145858765, "step": 3886 }, { "epoch": 0.6, "learning_rate": 1.1296451060739664e-05, "logits/chosen": -1.7864917516708374, "logits/rejected": -2.4768552780151367, "logps/chosen": -313.3138427734375, "logps/rejected": -478.02056884765625, "loss": 1.9773, "rewards/accuracies": 0.5, "rewards/chosen": -2.294487237930298, "rewards/margins": 3.507235527038574, "rewards/rejected": -5.801722526550293, "step": 3887 }, { "epoch": 0.6, "learning_rate": 1.1295717620208516e-05, "logits/chosen": -2.770477294921875, "logits/rejected": -2.913198232650757, "logps/chosen": -41.704097747802734, "logps/rejected": -121.21185302734375, "loss": 0.0349, "rewards/accuracies": 1.0, "rewards/chosen": -1.7309012413024902, "rewards/margins": 3.588460922241211, "rewards/rejected": -5.319362163543701, "step": 3888 }, { "epoch": 0.6, "learning_rate": 1.129498417967737e-05, "logits/chosen": -3.26627254486084, "logits/rejected": -2.9601547718048096, "logps/chosen": -176.1329803466797, "logps/rejected": -250.47525024414062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.31951218843460083, "rewards/margins": 7.725247383117676, "rewards/rejected": -8.044759750366211, "step": 3889 }, { "epoch": 0.6, "learning_rate": 1.1294250739146221e-05, "logits/chosen": -2.8509674072265625, "logits/rejected": -2.805884838104248, "logps/chosen": -90.0737533569336, "logps/rejected": -114.71149444580078, "loss": 1.7812, "rewards/accuracies": 0.5, "rewards/chosen": -2.5769457817077637, "rewards/margins": -0.351733922958374, "rewards/rejected": -2.2252118587493896, "step": 3890 }, { "epoch": 0.61, "learning_rate": 1.1293517298615073e-05, "logits/chosen": -1.883669376373291, "logits/rejected": -3.0206594467163086, "logps/chosen": -115.70333862304688, "logps/rejected": -186.23220825195312, "loss": 1.7934, "rewards/accuracies": 0.5, "rewards/chosen": -2.8388895988464355, "rewards/margins": -0.31091463565826416, "rewards/rejected": -2.527975082397461, "step": 3891 }, { "epoch": 0.61, "learning_rate": 1.1292783858083925e-05, "logits/chosen": -1.6612036228179932, "logits/rejected": -2.0332257747650146, "logps/chosen": -247.23443603515625, "logps/rejected": -390.5746154785156, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.590380072593689, "rewards/margins": 6.678796768188477, "rewards/rejected": -7.269177436828613, "step": 3892 }, { "epoch": 0.61, "learning_rate": 1.1292050417552777e-05, "logits/chosen": -1.2694807052612305, "logits/rejected": -2.7180075645446777, "logps/chosen": -101.81356048583984, "logps/rejected": -354.3893737792969, "loss": 0.0508, "rewards/accuracies": 1.0, "rewards/chosen": -1.3629353046417236, "rewards/margins": 3.3479602336883545, "rewards/rejected": -4.710895538330078, "step": 3893 }, { "epoch": 0.61, "learning_rate": 1.1291316977021629e-05, "logits/chosen": -1.3063379526138306, "logits/rejected": -2.748926877975464, "logps/chosen": -132.29864501953125, "logps/rejected": -280.62310791015625, "loss": 3.0505, "rewards/accuracies": 0.5, "rewards/chosen": -4.671891689300537, "rewards/margins": -1.804160237312317, "rewards/rejected": -2.8677315711975098, "step": 3894 }, { "epoch": 0.61, "learning_rate": 1.129058353649048e-05, "logits/chosen": -3.027470111846924, "logits/rejected": -2.3401272296905518, "logps/chosen": -393.7093505859375, "logps/rejected": -217.11228942871094, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 0.8277328610420227, "rewards/margins": 5.675360679626465, "rewards/rejected": -4.847627639770508, "step": 3895 }, { "epoch": 0.61, "learning_rate": 1.1289850095959334e-05, "logits/chosen": -1.7570825815200806, "logits/rejected": -3.1871485710144043, "logps/chosen": -121.82379150390625, "logps/rejected": -430.4486083984375, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": 0.0758972093462944, "rewards/margins": 5.005975723266602, "rewards/rejected": -4.930078506469727, "step": 3896 }, { "epoch": 0.61, "learning_rate": 1.1289116655428186e-05, "logits/chosen": -2.8347115516662598, "logits/rejected": -2.445127010345459, "logps/chosen": -181.1497802734375, "logps/rejected": -215.0966796875, "loss": 0.1557, "rewards/accuracies": 1.0, "rewards/chosen": -0.7262169122695923, "rewards/margins": 4.566350936889648, "rewards/rejected": -5.292568206787109, "step": 3897 }, { "epoch": 0.61, "learning_rate": 1.128838321489704e-05, "logits/chosen": -3.288525342941284, "logits/rejected": -2.9590659141540527, "logps/chosen": -216.9969024658203, "logps/rejected": -88.14620971679688, "loss": 3.1349, "rewards/accuracies": 0.5, "rewards/chosen": -3.8854410648345947, "rewards/margins": -1.5138250589370728, "rewards/rejected": -2.3716158866882324, "step": 3898 }, { "epoch": 0.61, "learning_rate": 1.1287649774365892e-05, "logits/chosen": -2.1316168308258057, "logits/rejected": -2.9504520893096924, "logps/chosen": -85.90694427490234, "logps/rejected": -137.6337890625, "loss": 1.3819, "rewards/accuracies": 0.5, "rewards/chosen": -2.7378273010253906, "rewards/margins": 0.21001756191253662, "rewards/rejected": -2.947844982147217, "step": 3899 }, { "epoch": 0.61, "learning_rate": 1.1286916333834744e-05, "logits/chosen": -2.750833034515381, "logits/rejected": -2.862969160079956, "logps/chosen": -191.56857299804688, "logps/rejected": -171.14779663085938, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -0.37634506821632385, "rewards/margins": 3.110128879547119, "rewards/rejected": -3.48647403717041, "step": 3900 }, { "epoch": 0.61, "learning_rate": 1.1286182893303595e-05, "logits/chosen": -2.078364133834839, "logits/rejected": -3.009854555130005, "logps/chosen": -117.02737426757812, "logps/rejected": -382.5371398925781, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.36325302720069885, "rewards/margins": 9.368635177612305, "rewards/rejected": -9.005382537841797, "step": 3901 }, { "epoch": 0.61, "learning_rate": 1.1285449452772447e-05, "logits/chosen": -3.060800075531006, "logits/rejected": -2.7606570720672607, "logps/chosen": -352.04058837890625, "logps/rejected": -296.2684326171875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": 0.3583465814590454, "rewards/margins": 6.018024444580078, "rewards/rejected": -5.659677982330322, "step": 3902 }, { "epoch": 0.61, "learning_rate": 1.1284716012241299e-05, "logits/chosen": -1.9627560377120972, "logits/rejected": -2.8948447704315186, "logps/chosen": -163.91940307617188, "logps/rejected": -415.1997375488281, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.519998550415039, "rewards/margins": 6.235137462615967, "rewards/rejected": -7.755136489868164, "step": 3903 }, { "epoch": 0.61, "learning_rate": 1.1283982571710151e-05, "logits/chosen": -2.8925058841705322, "logits/rejected": -2.032442569732666, "logps/chosen": -252.67868041992188, "logps/rejected": -187.5664825439453, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": 0.115636445581913, "rewards/margins": 5.02073860168457, "rewards/rejected": -4.905101776123047, "step": 3904 }, { "epoch": 0.61, "learning_rate": 1.1283249131179003e-05, "logits/chosen": -2.0470473766326904, "logits/rejected": -2.9076406955718994, "logps/chosen": -251.50787353515625, "logps/rejected": -282.13494873046875, "loss": 0.7198, "rewards/accuracies": 0.5, "rewards/chosen": -1.419148325920105, "rewards/margins": -0.02556614577770233, "rewards/rejected": -1.3935821056365967, "step": 3905 }, { "epoch": 0.61, "learning_rate": 1.1282515690647856e-05, "logits/chosen": -2.715125322341919, "logits/rejected": -2.923192262649536, "logps/chosen": -95.12433624267578, "logps/rejected": -178.74163818359375, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": 0.7330665588378906, "rewards/margins": 3.845242500305176, "rewards/rejected": -3.1121761798858643, "step": 3906 }, { "epoch": 0.61, "learning_rate": 1.1281782250116708e-05, "logits/chosen": -2.9464519023895264, "logits/rejected": -3.2888128757476807, "logps/chosen": -93.26718139648438, "logps/rejected": -223.00379943847656, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": -1.021776556968689, "rewards/margins": 3.3876147270202637, "rewards/rejected": -4.409391403198242, "step": 3907 }, { "epoch": 0.61, "learning_rate": 1.128104880958556e-05, "logits/chosen": -2.746140480041504, "logits/rejected": -1.6849595308303833, "logps/chosen": -182.58587646484375, "logps/rejected": -103.34420013427734, "loss": 2.0539, "rewards/accuracies": 0.5, "rewards/chosen": -3.007615327835083, "rewards/margins": 0.6673519611358643, "rewards/rejected": -3.6749672889709473, "step": 3908 }, { "epoch": 0.61, "learning_rate": 1.1280315369054412e-05, "logits/chosen": -2.352478265762329, "logits/rejected": -2.788816452026367, "logps/chosen": -303.6651611328125, "logps/rejected": -375.38946533203125, "loss": 3.6663, "rewards/accuracies": 0.5, "rewards/chosen": -4.015003204345703, "rewards/margins": 0.1210317611694336, "rewards/rejected": -4.136034965515137, "step": 3909 }, { "epoch": 0.61, "learning_rate": 1.1279581928523264e-05, "logits/chosen": -2.5455710887908936, "logits/rejected": -2.869521141052246, "logps/chosen": -54.356300354003906, "logps/rejected": -142.20973205566406, "loss": 0.4186, "rewards/accuracies": 0.5, "rewards/chosen": -2.6014931201934814, "rewards/margins": 1.1627554893493652, "rewards/rejected": -3.7642486095428467, "step": 3910 }, { "epoch": 0.61, "learning_rate": 1.1278848487992116e-05, "logits/chosen": -3.0415866374969482, "logits/rejected": -2.508087635040283, "logps/chosen": -238.719482421875, "logps/rejected": -195.47657775878906, "loss": 1.6925, "rewards/accuracies": 0.5, "rewards/chosen": -1.5594688653945923, "rewards/margins": 0.7288079261779785, "rewards/rejected": -2.2882766723632812, "step": 3911 }, { "epoch": 0.61, "learning_rate": 1.1278115047460968e-05, "logits/chosen": -3.0268776416778564, "logits/rejected": -2.6885828971862793, "logps/chosen": -718.406005859375, "logps/rejected": -493.2301940917969, "loss": 2.7868, "rewards/accuracies": 0.5, "rewards/chosen": -2.821725368499756, "rewards/margins": 0.16803622245788574, "rewards/rejected": -2.9897615909576416, "step": 3912 }, { "epoch": 0.61, "learning_rate": 1.127738160692982e-05, "logits/chosen": -2.962581157684326, "logits/rejected": -2.5283656120300293, "logps/chosen": -283.0514221191406, "logps/rejected": -196.66810607910156, "loss": 0.0487, "rewards/accuracies": 1.0, "rewards/chosen": 0.12126083672046661, "rewards/margins": 3.5134506225585938, "rewards/rejected": -3.3921899795532227, "step": 3913 }, { "epoch": 0.61, "learning_rate": 1.1276648166398671e-05, "logits/chosen": -3.2909369468688965, "logits/rejected": -3.2322638034820557, "logps/chosen": -176.0740509033203, "logps/rejected": -264.3721618652344, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.627861738204956, "rewards/margins": 6.954584121704102, "rewards/rejected": -8.58244514465332, "step": 3914 }, { "epoch": 0.61, "learning_rate": 1.1275914725867525e-05, "logits/chosen": -3.3418405055999756, "logits/rejected": -3.0387654304504395, "logps/chosen": -309.95111083984375, "logps/rejected": -206.2626190185547, "loss": 2.6855, "rewards/accuracies": 0.5, "rewards/chosen": -3.0163168907165527, "rewards/margins": -0.5083491802215576, "rewards/rejected": -2.507967948913574, "step": 3915 }, { "epoch": 0.61, "learning_rate": 1.1275181285336377e-05, "logits/chosen": -1.7966582775115967, "logits/rejected": -2.8519623279571533, "logps/chosen": -68.54230499267578, "logps/rejected": -210.43130493164062, "loss": 0.1347, "rewards/accuracies": 1.0, "rewards/chosen": -4.088109016418457, "rewards/margins": 2.035317897796631, "rewards/rejected": -6.123426914215088, "step": 3916 }, { "epoch": 0.61, "learning_rate": 1.1274447844805229e-05, "logits/chosen": -1.8772327899932861, "logits/rejected": -3.0837981700897217, "logps/chosen": -190.84066772460938, "logps/rejected": -560.6573486328125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": 0.328909307718277, "rewards/margins": 5.069012641906738, "rewards/rejected": -4.740103244781494, "step": 3917 }, { "epoch": 0.61, "learning_rate": 1.127371440427408e-05, "logits/chosen": -2.921339988708496, "logits/rejected": -2.6237480640411377, "logps/chosen": -124.77452087402344, "logps/rejected": -191.76344299316406, "loss": 0.8753, "rewards/accuracies": 0.5, "rewards/chosen": -2.7008609771728516, "rewards/margins": 2.927985906600952, "rewards/rejected": -5.628846645355225, "step": 3918 }, { "epoch": 0.61, "learning_rate": 1.1272980963742933e-05, "logits/chosen": -2.9812211990356445, "logits/rejected": -3.312464714050293, "logps/chosen": -289.9338684082031, "logps/rejected": -466.49188232421875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.2147090882062912, "rewards/margins": 6.305081367492676, "rewards/rejected": -6.5197906494140625, "step": 3919 }, { "epoch": 0.61, "learning_rate": 1.1272247523211784e-05, "logits/chosen": -3.0237035751342773, "logits/rejected": -2.4232053756713867, "logps/chosen": -497.9859619140625, "logps/rejected": -342.1025695800781, "loss": 0.08, "rewards/accuracies": 1.0, "rewards/chosen": -0.6271835565567017, "rewards/margins": 4.251715183258057, "rewards/rejected": -4.878898620605469, "step": 3920 }, { "epoch": 0.61, "learning_rate": 1.1271514082680636e-05, "logits/chosen": -2.8347480297088623, "logits/rejected": -2.9823641777038574, "logps/chosen": -76.16474151611328, "logps/rejected": -259.5994873046875, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.4669952392578125, "rewards/margins": 5.871818542480469, "rewards/rejected": -7.338813304901123, "step": 3921 }, { "epoch": 0.61, "learning_rate": 1.1270780642149488e-05, "logits/chosen": -2.9558444023132324, "logits/rejected": -2.301020383834839, "logps/chosen": -151.88473510742188, "logps/rejected": -234.54452514648438, "loss": 0.1471, "rewards/accuracies": 1.0, "rewards/chosen": -3.717288017272949, "rewards/margins": 3.2853171825408936, "rewards/rejected": -7.002605438232422, "step": 3922 }, { "epoch": 0.61, "learning_rate": 1.127004720161834e-05, "logits/chosen": -3.2976748943328857, "logits/rejected": -2.1835103034973145, "logps/chosen": -275.1523742675781, "logps/rejected": -61.95954132080078, "loss": 1.3498, "rewards/accuracies": 0.5, "rewards/chosen": -1.411917805671692, "rewards/margins": 2.211240768432617, "rewards/rejected": -3.6231584548950195, "step": 3923 }, { "epoch": 0.61, "learning_rate": 1.1269313761087194e-05, "logits/chosen": -2.767490863800049, "logits/rejected": -3.251504421234131, "logps/chosen": -587.7713623046875, "logps/rejected": -628.0230712890625, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": -1.3561843633651733, "rewards/margins": 3.1203231811523438, "rewards/rejected": -4.476507663726807, "step": 3924 }, { "epoch": 0.61, "learning_rate": 1.1268580320556046e-05, "logits/chosen": -2.7353322505950928, "logits/rejected": -2.9343903064727783, "logps/chosen": -436.86663818359375, "logps/rejected": -454.96612548828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.1289806365966797, "rewards/margins": 7.537284851074219, "rewards/rejected": -7.408304214477539, "step": 3925 }, { "epoch": 0.61, "learning_rate": 1.1267846880024897e-05, "logits/chosen": -2.243049144744873, "logits/rejected": -3.021148204803467, "logps/chosen": -81.13430786132812, "logps/rejected": -187.92576599121094, "loss": 0.3043, "rewards/accuracies": 1.0, "rewards/chosen": -2.3607378005981445, "rewards/margins": 2.0465455055236816, "rewards/rejected": -4.407283306121826, "step": 3926 }, { "epoch": 0.61, "learning_rate": 1.126711343949375e-05, "logits/chosen": -2.7387278079986572, "logits/rejected": -3.1524245738983154, "logps/chosen": -200.9192657470703, "logps/rejected": -413.9040222167969, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.809513509273529, "rewards/margins": 7.44227933883667, "rewards/rejected": -8.251792907714844, "step": 3927 }, { "epoch": 0.61, "learning_rate": 1.1266379998962601e-05, "logits/chosen": -3.048694133758545, "logits/rejected": -3.091362953186035, "logps/chosen": -96.4549789428711, "logps/rejected": -173.35092163085938, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/chosen": -1.6806073188781738, "rewards/margins": 3.7338035106658936, "rewards/rejected": -5.414410591125488, "step": 3928 }, { "epoch": 0.61, "learning_rate": 1.1265646558431453e-05, "logits/chosen": -2.9902100563049316, "logits/rejected": -2.7820920944213867, "logps/chosen": -346.0469970703125, "logps/rejected": -194.34840393066406, "loss": 2.5465, "rewards/accuracies": 0.5, "rewards/chosen": -3.65216064453125, "rewards/margins": 0.16675925254821777, "rewards/rejected": -3.8189198970794678, "step": 3929 }, { "epoch": 0.61, "learning_rate": 1.1264913117900307e-05, "logits/chosen": -2.9849040508270264, "logits/rejected": -2.523277759552002, "logps/chosen": -170.650634765625, "logps/rejected": -128.1167755126953, "loss": 0.9904, "rewards/accuracies": 0.5, "rewards/chosen": -2.0411765575408936, "rewards/margins": 2.9162936210632324, "rewards/rejected": -4.957469940185547, "step": 3930 }, { "epoch": 0.61, "learning_rate": 1.1264179677369159e-05, "logits/chosen": -2.8381507396698, "logits/rejected": -2.879485607147217, "logps/chosen": -165.34202575683594, "logps/rejected": -240.5644073486328, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.6742610931396484, "rewards/margins": 4.905538558959961, "rewards/rejected": -6.579799652099609, "step": 3931 }, { "epoch": 0.61, "learning_rate": 1.126344623683801e-05, "logits/chosen": -2.6108486652374268, "logits/rejected": -2.8768575191497803, "logps/chosen": -124.91305541992188, "logps/rejected": -196.52996826171875, "loss": 0.6067, "rewards/accuracies": 0.5, "rewards/chosen": -2.366507053375244, "rewards/margins": 1.8491146564483643, "rewards/rejected": -4.2156219482421875, "step": 3932 }, { "epoch": 0.61, "learning_rate": 1.1262712796306864e-05, "logits/chosen": -3.3700225353240967, "logits/rejected": -2.379610538482666, "logps/chosen": -608.3162841796875, "logps/rejected": -90.48516082763672, "loss": 0.4048, "rewards/accuracies": 0.5, "rewards/chosen": -1.2425727844238281, "rewards/margins": 2.4479880332946777, "rewards/rejected": -3.690560817718506, "step": 3933 }, { "epoch": 0.61, "learning_rate": 1.1261979355775716e-05, "logits/chosen": -2.974008321762085, "logits/rejected": -2.1152496337890625, "logps/chosen": -158.11582946777344, "logps/rejected": -119.79647827148438, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": -1.245370864868164, "rewards/margins": 3.0509378910064697, "rewards/rejected": -4.296308994293213, "step": 3934 }, { "epoch": 0.61, "learning_rate": 1.1261245915244568e-05, "logits/chosen": -2.694220781326294, "logits/rejected": -3.2234137058258057, "logps/chosen": -510.5723876953125, "logps/rejected": -502.5959777832031, "loss": 0.1805, "rewards/accuracies": 1.0, "rewards/chosen": -1.1676362752914429, "rewards/margins": 2.2810580730438232, "rewards/rejected": -3.4486942291259766, "step": 3935 }, { "epoch": 0.61, "learning_rate": 1.126051247471342e-05, "logits/chosen": -2.0144832134246826, "logits/rejected": -2.927173137664795, "logps/chosen": -240.4687042236328, "logps/rejected": -334.2959899902344, "loss": 1.8378, "rewards/accuracies": 0.5, "rewards/chosen": -2.8147659301757812, "rewards/margins": 2.0644357204437256, "rewards/rejected": -4.879201889038086, "step": 3936 }, { "epoch": 0.61, "learning_rate": 1.1259779034182271e-05, "logits/chosen": -3.296894073486328, "logits/rejected": -2.8499062061309814, "logps/chosen": -306.94744873046875, "logps/rejected": -302.8265380859375, "loss": 1.5866, "rewards/accuracies": 0.5, "rewards/chosen": -3.5800251960754395, "rewards/margins": -0.6361919641494751, "rewards/rejected": -2.943833351135254, "step": 3937 }, { "epoch": 0.61, "learning_rate": 1.1259045593651123e-05, "logits/chosen": -2.8002307415008545, "logits/rejected": -3.05141282081604, "logps/chosen": -616.6724853515625, "logps/rejected": -539.169921875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.9405930042266846, "rewards/margins": 5.725750923156738, "rewards/rejected": -6.666343688964844, "step": 3938 }, { "epoch": 0.61, "learning_rate": 1.1258312153119975e-05, "logits/chosen": -3.207172155380249, "logits/rejected": -2.9631826877593994, "logps/chosen": -498.3229675292969, "logps/rejected": -358.45013427734375, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": 0.1915535032749176, "rewards/margins": 3.873347759246826, "rewards/rejected": -3.6817941665649414, "step": 3939 }, { "epoch": 0.61, "learning_rate": 1.1257578712588827e-05, "logits/chosen": -1.724705457687378, "logits/rejected": -2.8929152488708496, "logps/chosen": -133.93374633789062, "logps/rejected": -290.5638427734375, "loss": 0.1218, "rewards/accuracies": 1.0, "rewards/chosen": -1.407435417175293, "rewards/margins": 3.439664125442505, "rewards/rejected": -4.847099304199219, "step": 3940 }, { "epoch": 0.61, "learning_rate": 1.1256845272057679e-05, "logits/chosen": -2.780738115310669, "logits/rejected": -3.1856331825256348, "logps/chosen": -159.35922241210938, "logps/rejected": -272.22613525390625, "loss": 0.0767, "rewards/accuracies": 1.0, "rewards/chosen": -1.8317265510559082, "rewards/margins": 3.753312349319458, "rewards/rejected": -5.585038661956787, "step": 3941 }, { "epoch": 0.61, "learning_rate": 1.1256111831526533e-05, "logits/chosen": -2.8747196197509766, "logits/rejected": -2.7087740898132324, "logps/chosen": -178.77935791015625, "logps/rejected": -160.9072265625, "loss": 0.9177, "rewards/accuracies": 0.5, "rewards/chosen": -2.591341018676758, "rewards/margins": -0.16229116916656494, "rewards/rejected": -2.4290497303009033, "step": 3942 }, { "epoch": 0.61, "learning_rate": 1.1255378390995384e-05, "logits/chosen": -1.892016887664795, "logits/rejected": -2.2806031703948975, "logps/chosen": -189.57266235351562, "logps/rejected": -324.3529968261719, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.1467244625091553, "rewards/margins": 5.699883460998535, "rewards/rejected": -6.8466081619262695, "step": 3943 }, { "epoch": 0.61, "learning_rate": 1.1254644950464236e-05, "logits/chosen": -2.9393393993377686, "logits/rejected": -2.4606432914733887, "logps/chosen": -379.3324890136719, "logps/rejected": -499.1001281738281, "loss": 2.0207, "rewards/accuracies": 0.5, "rewards/chosen": -3.539158582687378, "rewards/margins": -0.11713385581970215, "rewards/rejected": -3.422024726867676, "step": 3944 }, { "epoch": 0.61, "learning_rate": 1.1253911509933088e-05, "logits/chosen": -2.5735230445861816, "logits/rejected": -2.8957555294036865, "logps/chosen": -571.5149536132812, "logps/rejected": -618.7806396484375, "loss": 0.7656, "rewards/accuracies": 0.5, "rewards/chosen": -1.803534746170044, "rewards/margins": 2.0091023445129395, "rewards/rejected": -3.8126373291015625, "step": 3945 }, { "epoch": 0.61, "learning_rate": 1.125317806940194e-05, "logits/chosen": -2.430751085281372, "logits/rejected": -2.9331905841827393, "logps/chosen": -87.39372253417969, "logps/rejected": -241.69984436035156, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": -3.1306869983673096, "rewards/margins": 4.149126052856445, "rewards/rejected": -7.279812812805176, "step": 3946 }, { "epoch": 0.61, "learning_rate": 1.1252444628870792e-05, "logits/chosen": -3.0237317085266113, "logits/rejected": -2.9443776607513428, "logps/chosen": -320.7308349609375, "logps/rejected": -270.39373779296875, "loss": 0.1445, "rewards/accuracies": 1.0, "rewards/chosen": -1.2743656635284424, "rewards/margins": 2.364893913269043, "rewards/rejected": -3.6392593383789062, "step": 3947 }, { "epoch": 0.61, "learning_rate": 1.1251711188339644e-05, "logits/chosen": -2.5190811157226562, "logits/rejected": -2.844480037689209, "logps/chosen": -125.86907196044922, "logps/rejected": -335.2242736816406, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.1940224170684814, "rewards/margins": 8.60369873046875, "rewards/rejected": -9.797720909118652, "step": 3948 }, { "epoch": 0.61, "learning_rate": 1.1250977747808496e-05, "logits/chosen": -2.912766218185425, "logits/rejected": -2.9854137897491455, "logps/chosen": -282.10498046875, "logps/rejected": -406.4267883300781, "loss": 2.1804, "rewards/accuracies": 0.0, "rewards/chosen": -4.378761291503906, "rewards/margins": -1.973246455192566, "rewards/rejected": -2.405514717102051, "step": 3949 }, { "epoch": 0.61, "learning_rate": 1.1250244307277348e-05, "logits/chosen": -2.2799487113952637, "logits/rejected": -2.935269355773926, "logps/chosen": -54.463775634765625, "logps/rejected": -273.2449035644531, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -2.89404296875, "rewards/margins": 4.8433332443237305, "rewards/rejected": -7.737375736236572, "step": 3950 }, { "epoch": 0.61, "learning_rate": 1.1249510866746201e-05, "logits/chosen": -2.523433208465576, "logits/rejected": -3.041250705718994, "logps/chosen": -84.76036834716797, "logps/rejected": -331.9014892578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.6770912408828735, "rewards/margins": 6.793468475341797, "rewards/rejected": -8.470560073852539, "step": 3951 }, { "epoch": 0.61, "learning_rate": 1.1248777426215053e-05, "logits/chosen": -3.0734241008758545, "logits/rejected": -2.799956798553467, "logps/chosen": -467.386474609375, "logps/rejected": -527.4923095703125, "loss": 2.2133, "rewards/accuracies": 0.5, "rewards/chosen": -2.5376648902893066, "rewards/margins": -1.0789856910705566, "rewards/rejected": -1.45867919921875, "step": 3952 }, { "epoch": 0.61, "learning_rate": 1.1248043985683905e-05, "logits/chosen": -2.617410898208618, "logits/rejected": -3.2805469036102295, "logps/chosen": -731.0698852539062, "logps/rejected": -1027.64404296875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.06282806396484375, "rewards/margins": 6.445426940917969, "rewards/rejected": -6.5082550048828125, "step": 3953 }, { "epoch": 0.61, "learning_rate": 1.1247310545152757e-05, "logits/chosen": -2.0286998748779297, "logits/rejected": -2.884814500808716, "logps/chosen": -81.90064239501953, "logps/rejected": -216.11111450195312, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -0.7919510006904602, "rewards/margins": 5.093491077423096, "rewards/rejected": -5.885441780090332, "step": 3954 }, { "epoch": 0.62, "learning_rate": 1.1246577104621609e-05, "logits/chosen": -2.9577689170837402, "logits/rejected": -3.0627384185791016, "logps/chosen": -166.47451782226562, "logps/rejected": -303.25579833984375, "loss": 0.0916, "rewards/accuracies": 1.0, "rewards/chosen": -1.6255508661270142, "rewards/margins": 2.6036276817321777, "rewards/rejected": -4.229178428649902, "step": 3955 }, { "epoch": 0.62, "learning_rate": 1.124584366409046e-05, "logits/chosen": -3.410179615020752, "logits/rejected": -3.359463930130005, "logps/chosen": -202.08522033691406, "logps/rejected": -219.6368408203125, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -3.08791446685791, "rewards/margins": 5.156253814697266, "rewards/rejected": -8.244168281555176, "step": 3956 }, { "epoch": 0.62, "learning_rate": 1.1245110223559312e-05, "logits/chosen": -1.94948148727417, "logits/rejected": -3.1127476692199707, "logps/chosen": -111.61028289794922, "logps/rejected": -393.92132568359375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.0695377588272095, "rewards/margins": 6.582761764526367, "rewards/rejected": -7.652299880981445, "step": 3957 }, { "epoch": 0.62, "learning_rate": 1.1244376783028164e-05, "logits/chosen": -2.5827372074127197, "logits/rejected": -3.1276190280914307, "logps/chosen": -119.45449829101562, "logps/rejected": -343.74798583984375, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -2.3648054599761963, "rewards/margins": 4.567439079284668, "rewards/rejected": -6.932244300842285, "step": 3958 }, { "epoch": 0.62, "learning_rate": 1.1243643342497016e-05, "logits/chosen": -1.9488987922668457, "logits/rejected": -2.843637704849243, "logps/chosen": -151.53436279296875, "logps/rejected": -312.28033447265625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.1492347717285156, "rewards/margins": 5.130732536315918, "rewards/rejected": -6.279967308044434, "step": 3959 }, { "epoch": 0.62, "learning_rate": 1.124290990196587e-05, "logits/chosen": -3.40244460105896, "logits/rejected": -2.3026180267333984, "logps/chosen": -419.86065673828125, "logps/rejected": -162.83387756347656, "loss": 1.2038, "rewards/accuracies": 0.5, "rewards/chosen": -3.4076874256134033, "rewards/margins": -0.2345828413963318, "rewards/rejected": -3.1731045246124268, "step": 3960 }, { "epoch": 0.62, "learning_rate": 1.1242176461434722e-05, "logits/chosen": -2.774679660797119, "logits/rejected": -2.810124397277832, "logps/chosen": -128.878173828125, "logps/rejected": -87.66981506347656, "loss": 0.2218, "rewards/accuracies": 1.0, "rewards/chosen": -2.31664776802063, "rewards/margins": 2.128662586212158, "rewards/rejected": -4.445310115814209, "step": 3961 }, { "epoch": 0.62, "learning_rate": 1.1241443020903574e-05, "logits/chosen": -3.2845959663391113, "logits/rejected": -3.4693310260772705, "logps/chosen": -359.61285400390625, "logps/rejected": -310.65155029296875, "loss": 0.1089, "rewards/accuracies": 1.0, "rewards/chosen": -0.6954008340835571, "rewards/margins": 3.9257636070251465, "rewards/rejected": -4.621164321899414, "step": 3962 }, { "epoch": 0.62, "learning_rate": 1.1240709580372425e-05, "logits/chosen": -2.260805368423462, "logits/rejected": -2.954617738723755, "logps/chosen": -131.98020935058594, "logps/rejected": -327.1648254394531, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.2018020749092102, "rewards/margins": 7.677480697631836, "rewards/rejected": -7.879282474517822, "step": 3963 }, { "epoch": 0.62, "learning_rate": 1.1239976139841279e-05, "logits/chosen": -3.136486291885376, "logits/rejected": -2.9824368953704834, "logps/chosen": -164.71359252929688, "logps/rejected": -184.0829315185547, "loss": 1.6639, "rewards/accuracies": 0.5, "rewards/chosen": -3.5373353958129883, "rewards/margins": 0.39086389541625977, "rewards/rejected": -3.928199291229248, "step": 3964 }, { "epoch": 0.62, "learning_rate": 1.123924269931013e-05, "logits/chosen": -2.832808494567871, "logits/rejected": -3.1277248859405518, "logps/chosen": -91.86215209960938, "logps/rejected": -206.3621826171875, "loss": 0.2518, "rewards/accuracies": 1.0, "rewards/chosen": -1.2166881561279297, "rewards/margins": 2.369365692138672, "rewards/rejected": -3.5860538482666016, "step": 3965 }, { "epoch": 0.62, "learning_rate": 1.1238509258778983e-05, "logits/chosen": -2.9716551303863525, "logits/rejected": -2.9728541374206543, "logps/chosen": -196.515869140625, "logps/rejected": -335.29400634765625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.0816314220428467, "rewards/margins": 6.5573530197143555, "rewards/rejected": -7.638984680175781, "step": 3966 }, { "epoch": 0.62, "learning_rate": 1.1237775818247835e-05, "logits/chosen": -2.304549217224121, "logits/rejected": -3.0046846866607666, "logps/chosen": -78.73098754882812, "logps/rejected": -199.01889038085938, "loss": 0.0693, "rewards/accuracies": 1.0, "rewards/chosen": -1.4978861808776855, "rewards/margins": 2.6433897018432617, "rewards/rejected": -4.141275882720947, "step": 3967 }, { "epoch": 0.62, "learning_rate": 1.1237042377716686e-05, "logits/chosen": -2.759542942047119, "logits/rejected": -2.9172556400299072, "logps/chosen": -111.87088012695312, "logps/rejected": -134.96286010742188, "loss": 0.9251, "rewards/accuracies": 0.5, "rewards/chosen": -1.2224034070968628, "rewards/margins": 2.5925278663635254, "rewards/rejected": -3.8149311542510986, "step": 3968 }, { "epoch": 0.62, "learning_rate": 1.123630893718554e-05, "logits/chosen": -2.1018760204315186, "logits/rejected": -2.9340591430664062, "logps/chosen": -306.18695068359375, "logps/rejected": -417.8577880859375, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": 0.2901668846607208, "rewards/margins": 5.32691764831543, "rewards/rejected": -5.036750793457031, "step": 3969 }, { "epoch": 0.62, "learning_rate": 1.1235575496654392e-05, "logits/chosen": -2.681086540222168, "logits/rejected": -2.812086343765259, "logps/chosen": -102.1933822631836, "logps/rejected": -139.6841583251953, "loss": 0.9907, "rewards/accuracies": 0.5, "rewards/chosen": -2.5636610984802246, "rewards/margins": 2.3242602348327637, "rewards/rejected": -4.887921333312988, "step": 3970 }, { "epoch": 0.62, "learning_rate": 1.1234842056123244e-05, "logits/chosen": -2.8359315395355225, "logits/rejected": -2.638519287109375, "logps/chosen": -154.40243530273438, "logps/rejected": -225.72645568847656, "loss": 2.3032, "rewards/accuracies": 0.5, "rewards/chosen": -4.1692657470703125, "rewards/margins": 0.6001858711242676, "rewards/rejected": -4.76945161819458, "step": 3971 }, { "epoch": 0.62, "learning_rate": 1.1234108615592096e-05, "logits/chosen": -3.422072649002075, "logits/rejected": -2.9813337326049805, "logps/chosen": -268.9785461425781, "logps/rejected": -144.28562927246094, "loss": 2.3842, "rewards/accuracies": 0.5, "rewards/chosen": -2.3264122009277344, "rewards/margins": -0.3056216239929199, "rewards/rejected": -2.0207905769348145, "step": 3972 }, { "epoch": 0.62, "learning_rate": 1.1233375175060948e-05, "logits/chosen": -2.8768322467803955, "logits/rejected": -2.1962153911590576, "logps/chosen": -383.27691650390625, "logps/rejected": -347.5052795410156, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -0.06291806697845459, "rewards/margins": 4.060325622558594, "rewards/rejected": -4.123243808746338, "step": 3973 }, { "epoch": 0.62, "learning_rate": 1.12326417345298e-05, "logits/chosen": -2.9807207584381104, "logits/rejected": -2.109022617340088, "logps/chosen": -178.09121704101562, "logps/rejected": -71.76436614990234, "loss": 4.4821, "rewards/accuracies": 0.0, "rewards/chosen": -6.457712650299072, "rewards/margins": -4.456084251403809, "rewards/rejected": -2.0016283988952637, "step": 3974 }, { "epoch": 0.62, "learning_rate": 1.1231908293998651e-05, "logits/chosen": -2.763782024383545, "logits/rejected": -3.4921483993530273, "logps/chosen": -62.536590576171875, "logps/rejected": -320.6613464355469, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5503570437431335, "rewards/margins": 6.809713840484619, "rewards/rejected": -7.360071182250977, "step": 3975 }, { "epoch": 0.62, "learning_rate": 1.1231174853467503e-05, "logits/chosen": -0.632583498954773, "logits/rejected": -2.5700156688690186, "logps/chosen": -80.01277923583984, "logps/rejected": -463.94842529296875, "loss": 0.3526, "rewards/accuracies": 1.0, "rewards/chosen": -2.8383076190948486, "rewards/margins": 0.8657482862472534, "rewards/rejected": -3.7040557861328125, "step": 3976 }, { "epoch": 0.62, "learning_rate": 1.1230441412936355e-05, "logits/chosen": -3.219810962677002, "logits/rejected": -2.63547682762146, "logps/chosen": -552.825927734375, "logps/rejected": -484.76885986328125, "loss": 1.3866, "rewards/accuracies": 0.5, "rewards/chosen": -2.559722900390625, "rewards/margins": 0.9243897199630737, "rewards/rejected": -3.484112501144409, "step": 3977 }, { "epoch": 0.62, "learning_rate": 1.1229707972405209e-05, "logits/chosen": -2.399116039276123, "logits/rejected": -2.878307580947876, "logps/chosen": -48.26954650878906, "logps/rejected": -184.50897216796875, "loss": 1.9841, "rewards/accuracies": 0.5, "rewards/chosen": -3.1844210624694824, "rewards/margins": -0.17540407180786133, "rewards/rejected": -3.009016990661621, "step": 3978 }, { "epoch": 0.62, "learning_rate": 1.122897453187406e-05, "logits/chosen": -2.935030698776245, "logits/rejected": -2.565692901611328, "logps/chosen": -445.3765869140625, "logps/rejected": -399.6075134277344, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -0.5962783694267273, "rewards/margins": 4.054608345031738, "rewards/rejected": -4.650886535644531, "step": 3979 }, { "epoch": 0.62, "learning_rate": 1.1228241091342912e-05, "logits/chosen": -2.830134391784668, "logits/rejected": -3.3078575134277344, "logps/chosen": -78.5091552734375, "logps/rejected": -174.69432067871094, "loss": 0.0839, "rewards/accuracies": 1.0, "rewards/chosen": -1.3096003532409668, "rewards/margins": 2.640453815460205, "rewards/rejected": -3.950054168701172, "step": 3980 }, { "epoch": 0.62, "learning_rate": 1.1227507650811764e-05, "logits/chosen": -1.8073136806488037, "logits/rejected": -3.08074951171875, "logps/chosen": -120.33195495605469, "logps/rejected": -403.74139404296875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.5661495327949524, "rewards/margins": 5.878772258758545, "rewards/rejected": -6.444921970367432, "step": 3981 }, { "epoch": 0.62, "learning_rate": 1.1226774210280616e-05, "logits/chosen": -2.877063751220703, "logits/rejected": -3.322766065597534, "logps/chosen": -474.107177734375, "logps/rejected": -510.9100341796875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.235876441001892, "rewards/margins": 6.422462463378906, "rewards/rejected": -7.658339023590088, "step": 3982 }, { "epoch": 0.62, "learning_rate": 1.1226040769749468e-05, "logits/chosen": -2.198016405105591, "logits/rejected": -2.886892318725586, "logps/chosen": -91.3906478881836, "logps/rejected": -429.6050720214844, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.46131688356399536, "rewards/margins": 6.949936866760254, "rewards/rejected": -7.411253452301025, "step": 3983 }, { "epoch": 0.62, "learning_rate": 1.122530732921832e-05, "logits/chosen": -1.9212400913238525, "logits/rejected": -2.695199966430664, "logps/chosen": -133.1707000732422, "logps/rejected": -252.21923828125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.475581407546997, "rewards/margins": 5.190776824951172, "rewards/rejected": -6.66635799407959, "step": 3984 }, { "epoch": 0.62, "learning_rate": 1.1224573888687172e-05, "logits/chosen": -2.8526360988616943, "logits/rejected": -3.211398124694824, "logps/chosen": -126.83018493652344, "logps/rejected": -240.69699096679688, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -1.4052762985229492, "rewards/margins": 4.800106525421143, "rewards/rejected": -6.205382823944092, "step": 3985 }, { "epoch": 0.62, "learning_rate": 1.1223840448156024e-05, "logits/chosen": -2.6954238414764404, "logits/rejected": -3.2622616291046143, "logps/chosen": -73.27017211914062, "logps/rejected": -294.23760986328125, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -1.2802302837371826, "rewards/margins": 5.176328659057617, "rewards/rejected": -6.456559181213379, "step": 3986 }, { "epoch": 0.62, "learning_rate": 1.1223107007624877e-05, "logits/chosen": -2.501438617706299, "logits/rejected": -3.163900375366211, "logps/chosen": -277.74652099609375, "logps/rejected": -393.08184814453125, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.5364219546318054, "rewards/margins": 5.426794052124023, "rewards/rejected": -5.9632158279418945, "step": 3987 }, { "epoch": 0.62, "learning_rate": 1.1222373567093729e-05, "logits/chosen": -3.246323347091675, "logits/rejected": -2.953136682510376, "logps/chosen": -289.1648254394531, "logps/rejected": -154.59286499023438, "loss": 2.3391, "rewards/accuracies": 0.5, "rewards/chosen": -2.6216881275177, "rewards/margins": -0.4199432134628296, "rewards/rejected": -2.201744794845581, "step": 3988 }, { "epoch": 0.62, "learning_rate": 1.1221640126562581e-05, "logits/chosen": -3.022939443588257, "logits/rejected": -2.659817695617676, "logps/chosen": -242.121337890625, "logps/rejected": -278.8496398925781, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -1.6519935131072998, "rewards/margins": 4.809089660644531, "rewards/rejected": -6.46108341217041, "step": 3989 }, { "epoch": 0.62, "learning_rate": 1.1220906686031433e-05, "logits/chosen": -2.6305503845214844, "logits/rejected": -3.311521291732788, "logps/chosen": -209.6475830078125, "logps/rejected": -429.6455078125, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.6830556392669678, "rewards/margins": 5.018056869506836, "rewards/rejected": -7.701112747192383, "step": 3990 }, { "epoch": 0.62, "learning_rate": 1.1220173245500285e-05, "logits/chosen": -2.718533515930176, "logits/rejected": -1.872636079788208, "logps/chosen": -468.1292724609375, "logps/rejected": -468.8604736328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0544689893722534, "rewards/margins": 8.217283248901367, "rewards/rejected": -9.271751403808594, "step": 3991 }, { "epoch": 0.62, "learning_rate": 1.1219439804969137e-05, "logits/chosen": -1.4314112663269043, "logits/rejected": -2.993195056915283, "logps/chosen": -48.886940002441406, "logps/rejected": -380.962158203125, "loss": 1.4028, "rewards/accuracies": 0.5, "rewards/chosen": -3.285285472869873, "rewards/margins": 0.18178141117095947, "rewards/rejected": -3.467067003250122, "step": 3992 }, { "epoch": 0.62, "learning_rate": 1.1218706364437989e-05, "logits/chosen": -2.7213711738586426, "logits/rejected": -3.0565357208251953, "logps/chosen": -78.04014587402344, "logps/rejected": -336.9935302734375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1440415382385254, "rewards/margins": 7.295219421386719, "rewards/rejected": -8.439260482788086, "step": 3993 }, { "epoch": 0.62, "learning_rate": 1.121797292390684e-05, "logits/chosen": -1.7454195022583008, "logits/rejected": -3.198056936264038, "logps/chosen": -102.22474670410156, "logps/rejected": -349.67572021484375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.40177345275878906, "rewards/margins": 4.121815204620361, "rewards/rejected": -4.52358865737915, "step": 3994 }, { "epoch": 0.62, "learning_rate": 1.1217239483375694e-05, "logits/chosen": -2.824705123901367, "logits/rejected": -3.381910800933838, "logps/chosen": -24.04330825805664, "logps/rejected": -263.63873291015625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.8712281584739685, "rewards/margins": 5.833197593688965, "rewards/rejected": -6.704425811767578, "step": 3995 }, { "epoch": 0.62, "learning_rate": 1.1216506042844546e-05, "logits/chosen": -1.9223707914352417, "logits/rejected": -3.220069646835327, "logps/chosen": -16.72154426574707, "logps/rejected": -382.7513122558594, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.087075375020504, "rewards/margins": 7.597175598144531, "rewards/rejected": -7.684250831604004, "step": 3996 }, { "epoch": 0.62, "learning_rate": 1.1215772602313398e-05, "logits/chosen": -2.051164388656616, "logits/rejected": -3.0333235263824463, "logps/chosen": -138.68728637695312, "logps/rejected": -325.15411376953125, "loss": 2.764, "rewards/accuracies": 0.5, "rewards/chosen": -4.279770374298096, "rewards/margins": 0.6224789619445801, "rewards/rejected": -4.902249336242676, "step": 3997 }, { "epoch": 0.62, "learning_rate": 1.1215039161782251e-05, "logits/chosen": -2.250136375427246, "logits/rejected": -3.079509973526001, "logps/chosen": -35.26020050048828, "logps/rejected": -128.46533203125, "loss": 0.5082, "rewards/accuracies": 0.5, "rewards/chosen": -2.0662436485290527, "rewards/margins": 0.9643293023109436, "rewards/rejected": -3.0305728912353516, "step": 3998 }, { "epoch": 0.62, "learning_rate": 1.1214305721251103e-05, "logits/chosen": -1.774070382118225, "logits/rejected": -2.513303279876709, "logps/chosen": -164.89874267578125, "logps/rejected": -210.4889678955078, "loss": 1.924, "rewards/accuracies": 0.5, "rewards/chosen": -3.2434544563293457, "rewards/margins": 1.4002063274383545, "rewards/rejected": -4.643660545349121, "step": 3999 }, { "epoch": 0.62, "learning_rate": 1.1213572280719955e-05, "logits/chosen": -1.47332763671875, "logits/rejected": -3.0007879734039307, "logps/chosen": -137.46435546875, "logps/rejected": -232.36419677734375, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -1.8621537685394287, "rewards/margins": 4.1117963790893555, "rewards/rejected": -5.973950386047363, "step": 4000 }, { "epoch": 0.62, "learning_rate": 1.1212838840188807e-05, "logits/chosen": -2.568153142929077, "logits/rejected": -3.2591161727905273, "logps/chosen": -27.097497940063477, "logps/rejected": -272.1467590332031, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1081955432891846, "rewards/margins": 6.701936721801758, "rewards/rejected": -7.810132026672363, "step": 4001 }, { "epoch": 0.62, "learning_rate": 1.1212105399657659e-05, "logits/chosen": -3.0617446899414062, "logits/rejected": -3.3317596912384033, "logps/chosen": -89.55439758300781, "logps/rejected": -259.8100891113281, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 0.03605547547340393, "rewards/margins": 5.656888961791992, "rewards/rejected": -5.620833396911621, "step": 4002 }, { "epoch": 0.62, "learning_rate": 1.121137195912651e-05, "logits/chosen": -2.65350079536438, "logits/rejected": -2.9753575325012207, "logps/chosen": -246.0158233642578, "logps/rejected": -242.39492797851562, "loss": 2.2907, "rewards/accuracies": 0.5, "rewards/chosen": -2.9898064136505127, "rewards/margins": 0.8672294616699219, "rewards/rejected": -3.8570358753204346, "step": 4003 }, { "epoch": 0.62, "learning_rate": 1.1210638518595364e-05, "logits/chosen": -1.0496532917022705, "logits/rejected": -2.588704824447632, "logps/chosen": -183.2197723388672, "logps/rejected": -511.7016906738281, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.4872825145721436, "rewards/margins": 6.651630401611328, "rewards/rejected": -8.13891315460205, "step": 4004 }, { "epoch": 0.62, "learning_rate": 1.1209905078064216e-05, "logits/chosen": -3.197437047958374, "logits/rejected": -2.7119596004486084, "logps/chosen": -270.70062255859375, "logps/rejected": -271.9329528808594, "loss": 1.6029, "rewards/accuracies": 0.5, "rewards/chosen": -2.8443894386291504, "rewards/margins": 2.2310197353363037, "rewards/rejected": -5.075408935546875, "step": 4005 }, { "epoch": 0.62, "learning_rate": 1.1209171637533068e-05, "logits/chosen": -3.087541341781616, "logits/rejected": -3.156486988067627, "logps/chosen": -401.38824462890625, "logps/rejected": -366.4848327636719, "loss": 0.3787, "rewards/accuracies": 0.5, "rewards/chosen": -0.9585174322128296, "rewards/margins": 2.051980495452881, "rewards/rejected": -3.010498046875, "step": 4006 }, { "epoch": 0.62, "learning_rate": 1.120843819700192e-05, "logits/chosen": -2.819312334060669, "logits/rejected": -2.4334261417388916, "logps/chosen": -336.64044189453125, "logps/rejected": -278.2328796386719, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0383539199829102, "rewards/margins": 4.018822193145752, "rewards/rejected": -5.057176113128662, "step": 4007 }, { "epoch": 0.62, "learning_rate": 1.1207704756470772e-05, "logits/chosen": -2.9548656940460205, "logits/rejected": -3.000678062438965, "logps/chosen": -429.4012451171875, "logps/rejected": -519.1054077148438, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.4301589727401733, "rewards/margins": 6.097002983093262, "rewards/rejected": -7.527162551879883, "step": 4008 }, { "epoch": 0.62, "learning_rate": 1.1206971315939624e-05, "logits/chosen": -2.0504276752471924, "logits/rejected": -2.902437925338745, "logps/chosen": -234.1061248779297, "logps/rejected": -461.03277587890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.3920658230781555, "rewards/margins": 7.772737503051758, "rewards/rejected": -7.380671501159668, "step": 4009 }, { "epoch": 0.62, "learning_rate": 1.1206237875408476e-05, "logits/chosen": -2.8985495567321777, "logits/rejected": -1.588416576385498, "logps/chosen": -269.0281982421875, "logps/rejected": -200.86428833007812, "loss": 2.6254, "rewards/accuracies": 0.5, "rewards/chosen": -3.309500217437744, "rewards/margins": 0.7596230506896973, "rewards/rejected": -4.069123268127441, "step": 4010 }, { "epoch": 0.62, "learning_rate": 1.1205504434877327e-05, "logits/chosen": -2.8391835689544678, "logits/rejected": -2.6499619483947754, "logps/chosen": -443.5958251953125, "logps/rejected": -352.25628662109375, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -2.090554714202881, "rewards/margins": 3.717416286468506, "rewards/rejected": -5.807971000671387, "step": 4011 }, { "epoch": 0.62, "learning_rate": 1.120477099434618e-05, "logits/chosen": -2.6660959720611572, "logits/rejected": -2.9466705322265625, "logps/chosen": -130.13027954101562, "logps/rejected": -363.48919677734375, "loss": 0.9833, "rewards/accuracies": 0.5, "rewards/chosen": -2.7264087200164795, "rewards/margins": 2.5585384368896484, "rewards/rejected": -5.284947395324707, "step": 4012 }, { "epoch": 0.62, "learning_rate": 1.1204037553815033e-05, "logits/chosen": -2.6780941486358643, "logits/rejected": -3.0775468349456787, "logps/chosen": -437.536376953125, "logps/rejected": -420.1271057128906, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.9057891368865967, "rewards/margins": 7.312963485717773, "rewards/rejected": -9.21875286102295, "step": 4013 }, { "epoch": 0.62, "learning_rate": 1.1203304113283885e-05, "logits/chosen": -2.776514768600464, "logits/rejected": -2.837998867034912, "logps/chosen": -330.4127197265625, "logps/rejected": -311.94659423828125, "loss": 0.5468, "rewards/accuracies": 0.5, "rewards/chosen": -3.8668556213378906, "rewards/margins": 1.6940861940383911, "rewards/rejected": -5.56094217300415, "step": 4014 }, { "epoch": 0.62, "learning_rate": 1.1202570672752737e-05, "logits/chosen": -2.584383249282837, "logits/rejected": -3.0429093837738037, "logps/chosen": -84.38261413574219, "logps/rejected": -217.02066040039062, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.3607618808746338, "rewards/margins": 6.364246368408203, "rewards/rejected": -7.725008010864258, "step": 4015 }, { "epoch": 0.62, "learning_rate": 1.1201837232221588e-05, "logits/chosen": -1.7626162767410278, "logits/rejected": -2.766403913497925, "logps/chosen": -172.48411560058594, "logps/rejected": -330.5720520019531, "loss": 2.9521, "rewards/accuracies": 0.5, "rewards/chosen": -3.681072235107422, "rewards/margins": 0.01972675323486328, "rewards/rejected": -3.700798988342285, "step": 4016 }, { "epoch": 0.62, "learning_rate": 1.120110379169044e-05, "logits/chosen": -2.9075424671173096, "logits/rejected": -2.9173550605773926, "logps/chosen": -156.67335510253906, "logps/rejected": -241.82644653320312, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.0616205930709839, "rewards/margins": 5.20181941986084, "rewards/rejected": -6.263440132141113, "step": 4017 }, { "epoch": 0.62, "learning_rate": 1.1200370351159292e-05, "logits/chosen": -2.5371227264404297, "logits/rejected": -2.8081564903259277, "logps/chosen": -149.13980102539062, "logps/rejected": -234.63645935058594, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.867268443107605, "rewards/margins": 6.293036937713623, "rewards/rejected": -7.160305023193359, "step": 4018 }, { "epoch": 0.63, "learning_rate": 1.1199636910628144e-05, "logits/chosen": -3.0734498500823975, "logits/rejected": -2.74372935295105, "logps/chosen": -128.789794921875, "logps/rejected": -173.3234405517578, "loss": 1.2604, "rewards/accuracies": 0.5, "rewards/chosen": -2.356093645095825, "rewards/margins": 2.1694259643554688, "rewards/rejected": -4.525519847869873, "step": 4019 }, { "epoch": 0.63, "learning_rate": 1.1198903470096996e-05, "logits/chosen": -2.415745973587036, "logits/rejected": -2.7476961612701416, "logps/chosen": -92.86857604980469, "logps/rejected": -112.81359100341797, "loss": 1.8677, "rewards/accuracies": 0.5, "rewards/chosen": -2.456347703933716, "rewards/margins": 1.5545539855957031, "rewards/rejected": -4.01090145111084, "step": 4020 }, { "epoch": 0.63, "learning_rate": 1.1198170029565848e-05, "logits/chosen": -3.0068411827087402, "logits/rejected": -3.212029218673706, "logps/chosen": -63.10327911376953, "logps/rejected": -291.10791015625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 0.14987096190452576, "rewards/margins": 6.9278974533081055, "rewards/rejected": -6.778026580810547, "step": 4021 }, { "epoch": 0.63, "learning_rate": 1.1197436589034701e-05, "logits/chosen": -2.0434985160827637, "logits/rejected": -3.057756185531616, "logps/chosen": -56.250213623046875, "logps/rejected": -290.370361328125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.4028593897819519, "rewards/margins": 6.605135917663574, "rewards/rejected": -6.202276706695557, "step": 4022 }, { "epoch": 0.63, "learning_rate": 1.1196703148503553e-05, "logits/chosen": -2.077820062637329, "logits/rejected": -3.12861967086792, "logps/chosen": -131.97315979003906, "logps/rejected": -447.67156982421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1721302270889282, "rewards/margins": 8.057175636291504, "rewards/rejected": -9.2293062210083, "step": 4023 }, { "epoch": 0.63, "learning_rate": 1.1195969707972405e-05, "logits/chosen": -3.195063829421997, "logits/rejected": -2.8175277709960938, "logps/chosen": -507.7418212890625, "logps/rejected": -276.88983154296875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.9621261358261108, "rewards/margins": 7.849722862243652, "rewards/rejected": -8.811848640441895, "step": 4024 }, { "epoch": 0.63, "learning_rate": 1.1195236267441257e-05, "logits/chosen": -1.9892158508300781, "logits/rejected": -3.1717453002929688, "logps/chosen": -168.15390014648438, "logps/rejected": -410.5451354980469, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.3153495788574219, "rewards/margins": 8.227011680603027, "rewards/rejected": -8.54236125946045, "step": 4025 }, { "epoch": 0.63, "learning_rate": 1.1194502826910109e-05, "logits/chosen": -3.1776747703552246, "logits/rejected": -1.961702585220337, "logps/chosen": -404.35723876953125, "logps/rejected": -293.04315185546875, "loss": 2.1417, "rewards/accuracies": 0.5, "rewards/chosen": -1.8904038667678833, "rewards/margins": 0.2047116756439209, "rewards/rejected": -2.0951154232025146, "step": 4026 }, { "epoch": 0.63, "learning_rate": 1.119376938637896e-05, "logits/chosen": -2.0185000896453857, "logits/rejected": -3.0111043453216553, "logps/chosen": -131.86009216308594, "logps/rejected": -269.5142822265625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.23603892326355, "rewards/margins": 6.179658889770508, "rewards/rejected": -8.415698051452637, "step": 4027 }, { "epoch": 0.63, "learning_rate": 1.1193035945847813e-05, "logits/chosen": -1.6257752180099487, "logits/rejected": -2.7246170043945312, "logps/chosen": -238.78683471679688, "logps/rejected": -431.7957458496094, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -3.20802640914917, "rewards/margins": 6.059270858764648, "rewards/rejected": -9.26729679107666, "step": 4028 }, { "epoch": 0.63, "learning_rate": 1.1192302505316665e-05, "logits/chosen": -2.916940212249756, "logits/rejected": -3.1801273822784424, "logps/chosen": -60.302284240722656, "logps/rejected": -114.58824157714844, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.4329228401184082, "rewards/margins": 4.35984468460083, "rewards/rejected": -5.792767524719238, "step": 4029 }, { "epoch": 0.63, "learning_rate": 1.1191569064785518e-05, "logits/chosen": -2.941520929336548, "logits/rejected": -3.1064133644104004, "logps/chosen": -63.40502166748047, "logps/rejected": -189.65176391601562, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8562495112419128, "rewards/margins": 7.750720500946045, "rewards/rejected": -8.606969833374023, "step": 4030 }, { "epoch": 0.63, "learning_rate": 1.119083562425437e-05, "logits/chosen": -1.7782496213912964, "logits/rejected": -2.8489596843719482, "logps/chosen": -66.59458923339844, "logps/rejected": -307.228759765625, "loss": 0.1769, "rewards/accuracies": 1.0, "rewards/chosen": -1.7916831970214844, "rewards/margins": 4.453993797302246, "rewards/rejected": -6.2456769943237305, "step": 4031 }, { "epoch": 0.63, "learning_rate": 1.1190102183723224e-05, "logits/chosen": -2.4588918685913086, "logits/rejected": -3.086289644241333, "logps/chosen": -246.28387451171875, "logps/rejected": -334.4169006347656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.44548892974853516, "rewards/margins": 9.519145965576172, "rewards/rejected": -9.964634895324707, "step": 4032 }, { "epoch": 0.63, "learning_rate": 1.1189368743192075e-05, "logits/chosen": -1.7538164854049683, "logits/rejected": -2.9371116161346436, "logps/chosen": -302.9239807128906, "logps/rejected": -523.899169921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.5508644580841064, "rewards/margins": 7.925628185272217, "rewards/rejected": -8.476492881774902, "step": 4033 }, { "epoch": 0.63, "learning_rate": 1.1188635302660927e-05, "logits/chosen": -2.440912961959839, "logits/rejected": -3.075382947921753, "logps/chosen": -53.13308334350586, "logps/rejected": -306.455322265625, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": 0.1119663268327713, "rewards/margins": 4.260817527770996, "rewards/rejected": -4.148850917816162, "step": 4034 }, { "epoch": 0.63, "learning_rate": 1.118790186212978e-05, "logits/chosen": -1.6374759674072266, "logits/rejected": -2.8498167991638184, "logps/chosen": -138.52334594726562, "logps/rejected": -376.037841796875, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -0.032716378569602966, "rewards/margins": 6.996048927307129, "rewards/rejected": -7.0287652015686035, "step": 4035 }, { "epoch": 0.63, "learning_rate": 1.1187168421598631e-05, "logits/chosen": -2.7361578941345215, "logits/rejected": -3.2567780017852783, "logps/chosen": -101.110107421875, "logps/rejected": -257.27703857421875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.2646756172180176, "rewards/margins": 5.065803050994873, "rewards/rejected": -6.330478668212891, "step": 4036 }, { "epoch": 0.63, "learning_rate": 1.1186434981067483e-05, "logits/chosen": -2.519866466522217, "logits/rejected": -2.312514066696167, "logps/chosen": -243.18923950195312, "logps/rejected": -261.03021240234375, "loss": 3.8104, "rewards/accuracies": 0.5, "rewards/chosen": -5.036148548126221, "rewards/margins": -1.654576063156128, "rewards/rejected": -3.3815724849700928, "step": 4037 }, { "epoch": 0.63, "learning_rate": 1.1185701540536335e-05, "logits/chosen": -2.3220431804656982, "logits/rejected": -2.973729372024536, "logps/chosen": -77.84590911865234, "logps/rejected": -239.02557373046875, "loss": 0.1267, "rewards/accuracies": 1.0, "rewards/chosen": -0.4429807662963867, "rewards/margins": 4.859431743621826, "rewards/rejected": -5.302412509918213, "step": 4038 }, { "epoch": 0.63, "learning_rate": 1.1184968100005187e-05, "logits/chosen": -2.700744867324829, "logits/rejected": -2.9331390857696533, "logps/chosen": -113.21183013916016, "logps/rejected": -144.94944763183594, "loss": 4.2391, "rewards/accuracies": 0.5, "rewards/chosen": -4.377520561218262, "rewards/margins": -1.1178226470947266, "rewards/rejected": -3.259697675704956, "step": 4039 }, { "epoch": 0.63, "learning_rate": 1.118423465947404e-05, "logits/chosen": -0.8099798560142517, "logits/rejected": -2.863450765609741, "logps/chosen": -54.819602966308594, "logps/rejected": -530.2596435546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.188098430633545, "rewards/margins": 7.668238639831543, "rewards/rejected": -8.85633659362793, "step": 4040 }, { "epoch": 0.63, "learning_rate": 1.1183501218942892e-05, "logits/chosen": -2.102360486984253, "logits/rejected": -2.9950592517852783, "logps/chosen": -378.4876403808594, "logps/rejected": -536.103759765625, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -0.4442329704761505, "rewards/margins": 6.965350151062012, "rewards/rejected": -7.40958309173584, "step": 4041 }, { "epoch": 0.63, "learning_rate": 1.1182767778411744e-05, "logits/chosen": -2.6480631828308105, "logits/rejected": -1.7963054180145264, "logps/chosen": -318.0971374511719, "logps/rejected": -232.23800659179688, "loss": 3.0888, "rewards/accuracies": 0.5, "rewards/chosen": -3.9241952896118164, "rewards/margins": 0.09918546676635742, "rewards/rejected": -4.023380756378174, "step": 4042 }, { "epoch": 0.63, "learning_rate": 1.1182034337880596e-05, "logits/chosen": -2.78236985206604, "logits/rejected": -3.017540454864502, "logps/chosen": -352.25634765625, "logps/rejected": -326.44537353515625, "loss": 3.5378, "rewards/accuracies": 0.5, "rewards/chosen": -4.132843017578125, "rewards/margins": 0.1952974796295166, "rewards/rejected": -4.328140735626221, "step": 4043 }, { "epoch": 0.63, "learning_rate": 1.1181300897349448e-05, "logits/chosen": -1.8489041328430176, "logits/rejected": -2.8208088874816895, "logps/chosen": -237.5027618408203, "logps/rejected": -351.7352600097656, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -1.9085514545440674, "rewards/margins": 5.5407257080078125, "rewards/rejected": -7.449277400970459, "step": 4044 }, { "epoch": 0.63, "learning_rate": 1.11805674568183e-05, "logits/chosen": -2.602753162384033, "logits/rejected": -2.7782294750213623, "logps/chosen": -272.4177551269531, "logps/rejected": -395.68450927734375, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.6139718294143677, "rewards/margins": 4.865827560424805, "rewards/rejected": -6.479799270629883, "step": 4045 }, { "epoch": 0.63, "learning_rate": 1.1179834016287152e-05, "logits/chosen": -1.9486780166625977, "logits/rejected": -2.8975489139556885, "logps/chosen": -71.3311996459961, "logps/rejected": -318.6303405761719, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -1.1031652688980103, "rewards/margins": 5.525298595428467, "rewards/rejected": -6.6284637451171875, "step": 4046 }, { "epoch": 0.63, "learning_rate": 1.1179100575756003e-05, "logits/chosen": -3.01023268699646, "logits/rejected": -3.1972033977508545, "logps/chosen": -137.0162353515625, "logps/rejected": -220.64956665039062, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.1823160648345947, "rewards/margins": 6.169328212738037, "rewards/rejected": -7.351644515991211, "step": 4047 }, { "epoch": 0.63, "learning_rate": 1.1178367135224855e-05, "logits/chosen": -0.8371648192405701, "logits/rejected": -2.4791481494903564, "logps/chosen": -40.227203369140625, "logps/rejected": -468.89739990234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.2957717180252075, "rewards/margins": 9.963855743408203, "rewards/rejected": -10.259626388549805, "step": 4048 }, { "epoch": 0.63, "learning_rate": 1.1177633694693709e-05, "logits/chosen": -2.4776859283447266, "logits/rejected": -2.818284749984741, "logps/chosen": -38.210296630859375, "logps/rejected": -130.5443878173828, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -0.7431662082672119, "rewards/margins": 3.3903369903564453, "rewards/rejected": -4.133503437042236, "step": 4049 }, { "epoch": 0.63, "learning_rate": 1.117690025416256e-05, "logits/chosen": -2.627362012863159, "logits/rejected": -2.845762014389038, "logps/chosen": -60.931427001953125, "logps/rejected": -130.9286346435547, "loss": 0.0637, "rewards/accuracies": 1.0, "rewards/chosen": 0.2632249891757965, "rewards/margins": 4.548168659210205, "rewards/rejected": -4.284943580627441, "step": 4050 }, { "epoch": 0.63, "learning_rate": 1.1176166813631413e-05, "logits/chosen": -0.8304392099380493, "logits/rejected": -2.3943586349487305, "logps/chosen": -50.34172821044922, "logps/rejected": -290.5740966796875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.4986180067062378, "rewards/margins": 6.068948745727539, "rewards/rejected": -6.567566871643066, "step": 4051 }, { "epoch": 0.63, "learning_rate": 1.1175433373100265e-05, "logits/chosen": -2.6862926483154297, "logits/rejected": -2.8757784366607666, "logps/chosen": -92.23869323730469, "logps/rejected": -238.92982482910156, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.604861855506897, "rewards/margins": 4.902253150939941, "rewards/rejected": -5.507115364074707, "step": 4052 }, { "epoch": 0.63, "learning_rate": 1.1174699932569116e-05, "logits/chosen": -2.7735116481781006, "logits/rejected": -2.844285488128662, "logps/chosen": -25.188270568847656, "logps/rejected": -250.97164916992188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.32618990540504456, "rewards/margins": 8.647045135498047, "rewards/rejected": -8.973234176635742, "step": 4053 }, { "epoch": 0.63, "learning_rate": 1.1173966492037968e-05, "logits/chosen": -2.652833938598633, "logits/rejected": -2.669339418411255, "logps/chosen": -71.1993179321289, "logps/rejected": -188.5843505859375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.18190079927444458, "rewards/margins": 5.123366355895996, "rewards/rejected": -5.305267333984375, "step": 4054 }, { "epoch": 0.63, "learning_rate": 1.117323305150682e-05, "logits/chosen": -2.8922741413116455, "logits/rejected": -3.1971356868743896, "logps/chosen": -146.1833038330078, "logps/rejected": -226.4234619140625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.4199821949005127, "rewards/margins": 6.434332370758057, "rewards/rejected": -7.85431432723999, "step": 4055 }, { "epoch": 0.63, "learning_rate": 1.1172499610975672e-05, "logits/chosen": -2.8103606700897217, "logits/rejected": -2.9847800731658936, "logps/chosen": -118.44725036621094, "logps/rejected": -293.0146484375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.373688280582428, "rewards/margins": 6.428736686706543, "rewards/rejected": -6.802425384521484, "step": 4056 }, { "epoch": 0.63, "learning_rate": 1.1171766170444524e-05, "logits/chosen": -2.8042006492614746, "logits/rejected": -2.1361842155456543, "logps/chosen": -238.48471069335938, "logps/rejected": -192.6933135986328, "loss": 1.8399, "rewards/accuracies": 0.5, "rewards/chosen": -2.960643768310547, "rewards/margins": 0.9454127550125122, "rewards/rejected": -3.9060566425323486, "step": 4057 }, { "epoch": 0.63, "learning_rate": 1.1171032729913378e-05, "logits/chosen": -2.80334210395813, "logits/rejected": -1.8339922428131104, "logps/chosen": -118.51560974121094, "logps/rejected": -64.44872283935547, "loss": 2.6965, "rewards/accuracies": 0.5, "rewards/chosen": -3.24322247505188, "rewards/margins": -1.74712336063385, "rewards/rejected": -1.4960991144180298, "step": 4058 }, { "epoch": 0.63, "learning_rate": 1.117029928938223e-05, "logits/chosen": -3.0059051513671875, "logits/rejected": -2.768798351287842, "logps/chosen": -354.4434814453125, "logps/rejected": -322.8087158203125, "loss": 3.8143, "rewards/accuracies": 0.5, "rewards/chosen": -4.822389602661133, "rewards/margins": 1.1461420059204102, "rewards/rejected": -5.968531131744385, "step": 4059 }, { "epoch": 0.63, "learning_rate": 1.1169565848851081e-05, "logits/chosen": -2.4657647609710693, "logits/rejected": -2.9992363452911377, "logps/chosen": -138.23773193359375, "logps/rejected": -404.6041259765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.38723617792129517, "rewards/margins": 7.574271202087402, "rewards/rejected": -7.9615068435668945, "step": 4060 }, { "epoch": 0.63, "learning_rate": 1.1168832408319933e-05, "logits/chosen": -2.805915355682373, "logits/rejected": -2.6460509300231934, "logps/chosen": -299.471923828125, "logps/rejected": -330.24884033203125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.1096959114074707, "rewards/margins": 6.6138763427734375, "rewards/rejected": -7.723572731018066, "step": 4061 }, { "epoch": 0.63, "learning_rate": 1.1168098967788785e-05, "logits/chosen": -1.9334583282470703, "logits/rejected": -2.6756184101104736, "logps/chosen": -134.62635803222656, "logps/rejected": -328.69354248046875, "loss": 0.0813, "rewards/accuracies": 1.0, "rewards/chosen": -0.5404133200645447, "rewards/margins": 5.2844038009643555, "rewards/rejected": -5.824817657470703, "step": 4062 }, { "epoch": 0.63, "learning_rate": 1.1167365527257637e-05, "logits/chosen": -1.9694281816482544, "logits/rejected": -3.137017011642456, "logps/chosen": -45.554012298583984, "logps/rejected": -236.35113525390625, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5414634943008423, "rewards/margins": 4.411101341247559, "rewards/rejected": -4.9525651931762695, "step": 4063 }, { "epoch": 0.63, "learning_rate": 1.116663208672649e-05, "logits/chosen": -2.8835833072662354, "logits/rejected": -2.5964787006378174, "logps/chosen": -103.55026245117188, "logps/rejected": -175.31837463378906, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.35385990142822266, "rewards/margins": 6.257473945617676, "rewards/rejected": -6.611333847045898, "step": 4064 }, { "epoch": 0.63, "learning_rate": 1.1165898646195342e-05, "logits/chosen": -2.1484756469726562, "logits/rejected": -2.2965850830078125, "logps/chosen": -175.98048400878906, "logps/rejected": -383.95086669921875, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -0.6367321014404297, "rewards/margins": 3.4620914459228516, "rewards/rejected": -4.098823547363281, "step": 4065 }, { "epoch": 0.63, "learning_rate": 1.1165165205664194e-05, "logits/chosen": -2.7002511024475098, "logits/rejected": -2.8738343715667725, "logps/chosen": -198.20486450195312, "logps/rejected": -250.09445190429688, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": -0.938205361366272, "rewards/margins": 2.9809062480926514, "rewards/rejected": -3.919111728668213, "step": 4066 }, { "epoch": 0.63, "learning_rate": 1.1164431765133048e-05, "logits/chosen": -2.496975898742676, "logits/rejected": -3.1352016925811768, "logps/chosen": -404.6859130859375, "logps/rejected": -772.604736328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5321707725524902, "rewards/margins": 8.421387672424316, "rewards/rejected": -9.953557968139648, "step": 4067 }, { "epoch": 0.63, "learning_rate": 1.11636983246019e-05, "logits/chosen": -2.846795082092285, "logits/rejected": -3.007871389389038, "logps/chosen": -113.39775848388672, "logps/rejected": -191.7980194091797, "loss": 1.8645, "rewards/accuracies": 0.5, "rewards/chosen": -2.4153270721435547, "rewards/margins": 0.21287882328033447, "rewards/rejected": -2.6282057762145996, "step": 4068 }, { "epoch": 0.63, "learning_rate": 1.1162964884070752e-05, "logits/chosen": -2.897519111633301, "logits/rejected": -3.077685594558716, "logps/chosen": -19.913278579711914, "logps/rejected": -150.470947265625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 0.029856815934181213, "rewards/margins": 5.703024864196777, "rewards/rejected": -5.673168182373047, "step": 4069 }, { "epoch": 0.63, "learning_rate": 1.1162231443539603e-05, "logits/chosen": -0.7187501192092896, "logits/rejected": -2.0529985427856445, "logps/chosen": -132.94244384765625, "logps/rejected": -368.9200744628906, "loss": 0.0966, "rewards/accuracies": 1.0, "rewards/chosen": -1.5357952117919922, "rewards/margins": 4.903301239013672, "rewards/rejected": -6.439096450805664, "step": 4070 }, { "epoch": 0.63, "learning_rate": 1.1161498003008455e-05, "logits/chosen": -3.095720052719116, "logits/rejected": -2.7961316108703613, "logps/chosen": -331.6069030761719, "logps/rejected": -140.23880004882812, "loss": 5.5192, "rewards/accuracies": 0.5, "rewards/chosen": -5.37183141708374, "rewards/margins": -2.022519111633301, "rewards/rejected": -3.3493123054504395, "step": 4071 }, { "epoch": 0.63, "learning_rate": 1.1160764562477307e-05, "logits/chosen": -3.125671863555908, "logits/rejected": -2.77352237701416, "logps/chosen": -102.39215850830078, "logps/rejected": -174.05250549316406, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.21123582124710083, "rewards/margins": 5.191714286804199, "rewards/rejected": -5.402950286865234, "step": 4072 }, { "epoch": 0.63, "learning_rate": 1.1160031121946159e-05, "logits/chosen": -2.326820135116577, "logits/rejected": -3.3065900802612305, "logps/chosen": -28.075199127197266, "logps/rejected": -293.23333740234375, "loss": 0.1882, "rewards/accuracies": 1.0, "rewards/chosen": -0.9854048490524292, "rewards/margins": 3.4354114532470703, "rewards/rejected": -4.420816421508789, "step": 4073 }, { "epoch": 0.63, "learning_rate": 1.1159297681415011e-05, "logits/chosen": -2.2936508655548096, "logits/rejected": -2.6842293739318848, "logps/chosen": -77.8450698852539, "logps/rejected": -120.26325225830078, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -0.5502912402153015, "rewards/margins": 4.649905681610107, "rewards/rejected": -5.200197219848633, "step": 4074 }, { "epoch": 0.63, "learning_rate": 1.1158564240883863e-05, "logits/chosen": -2.2350375652313232, "logits/rejected": -3.0239322185516357, "logps/chosen": -95.31707000732422, "logps/rejected": -452.36895751953125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6449702978134155, "rewards/margins": 8.348073959350586, "rewards/rejected": -8.99304485321045, "step": 4075 }, { "epoch": 0.63, "learning_rate": 1.1157830800352716e-05, "logits/chosen": -2.068527936935425, "logits/rejected": -3.223600149154663, "logps/chosen": -38.219886779785156, "logps/rejected": -532.8499755859375, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.4765048325061798, "rewards/margins": 5.2092695236206055, "rewards/rejected": -5.685774326324463, "step": 4076 }, { "epoch": 0.63, "learning_rate": 1.1157097359821568e-05, "logits/chosen": -1.9554611444473267, "logits/rejected": -2.949141263961792, "logps/chosen": -134.83587646484375, "logps/rejected": -422.14508056640625, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -0.9205776453018188, "rewards/margins": 5.899015426635742, "rewards/rejected": -6.819592475891113, "step": 4077 }, { "epoch": 0.63, "learning_rate": 1.115636391929042e-05, "logits/chosen": -2.381492853164673, "logits/rejected": -2.7934577465057373, "logps/chosen": -263.5973205566406, "logps/rejected": -386.8379211425781, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.8587226867675781, "rewards/margins": 5.8112616539001465, "rewards/rejected": -7.669984340667725, "step": 4078 }, { "epoch": 0.63, "learning_rate": 1.1155630478759272e-05, "logits/chosen": -2.3260738849639893, "logits/rejected": -3.03668475151062, "logps/chosen": -105.6316146850586, "logps/rejected": -338.2959899902344, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.0507179498672485, "rewards/margins": 7.615226745605469, "rewards/rejected": -8.665945053100586, "step": 4079 }, { "epoch": 0.63, "learning_rate": 1.1154897038228124e-05, "logits/chosen": -3.076725959777832, "logits/rejected": -2.1824686527252197, "logps/chosen": -255.95774841308594, "logps/rejected": -198.25514221191406, "loss": 3.1431, "rewards/accuracies": 0.5, "rewards/chosen": -3.8900928497314453, "rewards/margins": 0.5774548053741455, "rewards/rejected": -4.467547416687012, "step": 4080 }, { "epoch": 0.63, "learning_rate": 1.1154163597696976e-05, "logits/chosen": -1.3357449769973755, "logits/rejected": -3.293368339538574, "logps/chosen": -146.42347717285156, "logps/rejected": -543.7642211914062, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.3349231779575348, "rewards/margins": 6.950495719909668, "rewards/rejected": -7.285418510437012, "step": 4081 }, { "epoch": 0.63, "learning_rate": 1.1153430157165828e-05, "logits/chosen": -2.8803935050964355, "logits/rejected": -2.966986656188965, "logps/chosen": -427.6847229003906, "logps/rejected": -291.38555908203125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.07123297452926636, "rewards/margins": 6.34699010848999, "rewards/rejected": -6.418222904205322, "step": 4082 }, { "epoch": 0.63, "learning_rate": 1.115269671663468e-05, "logits/chosen": -2.7299020290374756, "logits/rejected": -2.915123462677002, "logps/chosen": -11.252333641052246, "logps/rejected": -99.10775756835938, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 0.4704838991165161, "rewards/margins": 5.7823333740234375, "rewards/rejected": -5.311849594116211, "step": 4083 }, { "epoch": 0.64, "learning_rate": 1.1151963276103533e-05, "logits/chosen": -2.5083723068237305, "logits/rejected": -3.0949997901916504, "logps/chosen": -47.49345779418945, "logps/rejected": -315.3095703125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.2766188979148865, "rewards/margins": 6.991899490356445, "rewards/rejected": -7.268518447875977, "step": 4084 }, { "epoch": 0.64, "learning_rate": 1.1151229835572385e-05, "logits/chosen": -2.3188867568969727, "logits/rejected": -2.875063419342041, "logps/chosen": -180.58297729492188, "logps/rejected": -226.61865234375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.77214515209198, "rewards/margins": 4.964519500732422, "rewards/rejected": -6.736664772033691, "step": 4085 }, { "epoch": 0.64, "learning_rate": 1.1150496395041237e-05, "logits/chosen": -3.1933071613311768, "logits/rejected": -2.5954487323760986, "logps/chosen": -380.52239990234375, "logps/rejected": -271.26495361328125, "loss": 3.5873, "rewards/accuracies": 0.5, "rewards/chosen": -3.2350242137908936, "rewards/margins": -0.9979772567749023, "rewards/rejected": -2.237046957015991, "step": 4086 }, { "epoch": 0.64, "learning_rate": 1.1149762954510089e-05, "logits/chosen": -2.7737197875976562, "logits/rejected": -3.1247458457946777, "logps/chosen": -104.38764190673828, "logps/rejected": -268.7882080078125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.26763612031936646, "rewards/margins": 6.703024864196777, "rewards/rejected": -6.970661163330078, "step": 4087 }, { "epoch": 0.64, "learning_rate": 1.114902951397894e-05, "logits/chosen": -2.214599132537842, "logits/rejected": -2.881375789642334, "logps/chosen": -76.65540313720703, "logps/rejected": -362.7685852050781, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7881591320037842, "rewards/margins": 8.68950080871582, "rewards/rejected": -10.477659225463867, "step": 4088 }, { "epoch": 0.64, "learning_rate": 1.1148296073447793e-05, "logits/chosen": -2.735062599182129, "logits/rejected": -1.532556176185608, "logps/chosen": -228.54315185546875, "logps/rejected": -214.33309936523438, "loss": 0.1377, "rewards/accuracies": 1.0, "rewards/chosen": -1.487884521484375, "rewards/margins": 3.1502504348754883, "rewards/rejected": -4.638134956359863, "step": 4089 }, { "epoch": 0.64, "learning_rate": 1.1147562632916644e-05, "logits/chosen": -0.9137086868286133, "logits/rejected": -2.8132245540618896, "logps/chosen": -69.5811767578125, "logps/rejected": -292.3275451660156, "loss": 0.7142, "rewards/accuracies": 0.5, "rewards/chosen": -2.134092330932617, "rewards/margins": 2.84370756149292, "rewards/rejected": -4.977799892425537, "step": 4090 }, { "epoch": 0.64, "learning_rate": 1.1146829192385496e-05, "logits/chosen": -1.9431471824645996, "logits/rejected": -2.9942970275878906, "logps/chosen": -308.325439453125, "logps/rejected": -400.5850830078125, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -1.0522540807724, "rewards/margins": 4.528411865234375, "rewards/rejected": -5.580665588378906, "step": 4091 }, { "epoch": 0.64, "learning_rate": 1.1146095751854348e-05, "logits/chosen": -2.72894549369812, "logits/rejected": -1.6713093519210815, "logps/chosen": -302.5766906738281, "logps/rejected": -188.4680633544922, "loss": 0.7629, "rewards/accuracies": 0.5, "rewards/chosen": -0.850341796875, "rewards/margins": 2.7760753631591797, "rewards/rejected": -3.6264171600341797, "step": 4092 }, { "epoch": 0.64, "learning_rate": 1.1145362311323202e-05, "logits/chosen": -2.3490681648254395, "logits/rejected": -2.750647783279419, "logps/chosen": -144.8668212890625, "logps/rejected": -328.67657470703125, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.489577293395996, "rewards/margins": 6.196980953216553, "rewards/rejected": -7.686558723449707, "step": 4093 }, { "epoch": 0.64, "learning_rate": 1.1144628870792054e-05, "logits/chosen": -2.8364391326904297, "logits/rejected": -2.769526720046997, "logps/chosen": -169.89602661132812, "logps/rejected": -298.80267333984375, "loss": 2.5075, "rewards/accuracies": 0.5, "rewards/chosen": -5.164608955383301, "rewards/margins": 3.589501142501831, "rewards/rejected": -8.754110336303711, "step": 4094 }, { "epoch": 0.64, "learning_rate": 1.1143895430260906e-05, "logits/chosen": -0.9961658120155334, "logits/rejected": -2.7826156616210938, "logps/chosen": -39.26968765258789, "logps/rejected": -327.6734924316406, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -0.7996681928634644, "rewards/margins": 5.011727809906006, "rewards/rejected": -5.81139612197876, "step": 4095 }, { "epoch": 0.64, "learning_rate": 1.1143161989729757e-05, "logits/chosen": -2.8657076358795166, "logits/rejected": -2.984187602996826, "logps/chosen": -323.9442443847656, "logps/rejected": -409.4239501953125, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -0.4952407777309418, "rewards/margins": 6.54256010055542, "rewards/rejected": -7.0378007888793945, "step": 4096 }, { "epoch": 0.64, "learning_rate": 1.114242854919861e-05, "logits/chosen": -2.806645393371582, "logits/rejected": -2.8083455562591553, "logps/chosen": -291.4927673339844, "logps/rejected": -265.19891357421875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.3903915286064148, "rewards/margins": 5.680044174194336, "rewards/rejected": -6.070435523986816, "step": 4097 }, { "epoch": 0.64, "learning_rate": 1.1141695108667463e-05, "logits/chosen": -2.962790012359619, "logits/rejected": -2.266359806060791, "logps/chosen": -233.99118041992188, "logps/rejected": -311.66070556640625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.3234962224960327, "rewards/margins": 6.323933124542236, "rewards/rejected": -7.647429466247559, "step": 4098 }, { "epoch": 0.64, "learning_rate": 1.1140961668136315e-05, "logits/chosen": -2.8963329792022705, "logits/rejected": -2.6522135734558105, "logps/chosen": -181.42539978027344, "logps/rejected": -170.98709106445312, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.0776869058609009, "rewards/margins": 4.324535369873047, "rewards/rejected": -5.402222156524658, "step": 4099 }, { "epoch": 0.64, "learning_rate": 1.1140228227605167e-05, "logits/chosen": -2.7684919834136963, "logits/rejected": -3.0912439823150635, "logps/chosen": -41.17596435546875, "logps/rejected": -331.9039001464844, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.7434109449386597, "rewards/margins": 8.624637603759766, "rewards/rejected": -9.368048667907715, "step": 4100 }, { "epoch": 0.64, "learning_rate": 1.1139494787074018e-05, "logits/chosen": -2.919959783554077, "logits/rejected": -3.086677312850952, "logps/chosen": -102.56470489501953, "logps/rejected": -211.3595733642578, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.7515588998794556, "rewards/margins": 5.649373531341553, "rewards/rejected": -6.400932312011719, "step": 4101 }, { "epoch": 0.64, "learning_rate": 1.1138761346542872e-05, "logits/chosen": -3.27523136138916, "logits/rejected": -3.397631883621216, "logps/chosen": -249.68792724609375, "logps/rejected": -311.50775146484375, "loss": 0.0844, "rewards/accuracies": 1.0, "rewards/chosen": -0.18429183959960938, "rewards/margins": 4.214534759521484, "rewards/rejected": -4.398826599121094, "step": 4102 }, { "epoch": 0.64, "learning_rate": 1.1138027906011724e-05, "logits/chosen": -2.5462663173675537, "logits/rejected": -2.804708480834961, "logps/chosen": -72.52930450439453, "logps/rejected": -334.86627197265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.724557876586914, "rewards/margins": 8.226383209228516, "rewards/rejected": -9.95094108581543, "step": 4103 }, { "epoch": 0.64, "learning_rate": 1.1137294465480576e-05, "logits/chosen": -2.6176531314849854, "logits/rejected": -2.932058572769165, "logps/chosen": -12.96773910522461, "logps/rejected": -178.00994873046875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 0.4299042522907257, "rewards/margins": 6.64097785949707, "rewards/rejected": -6.211073398590088, "step": 4104 }, { "epoch": 0.64, "learning_rate": 1.1136561024949428e-05, "logits/chosen": -2.646516799926758, "logits/rejected": -3.291719675064087, "logps/chosen": -402.7289123535156, "logps/rejected": -823.1122436523438, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.3885670900344849, "rewards/margins": 5.268171310424805, "rewards/rejected": -6.65673828125, "step": 4105 }, { "epoch": 0.64, "learning_rate": 1.113582758441828e-05, "logits/chosen": -2.0297691822052, "logits/rejected": -2.7806284427642822, "logps/chosen": -92.50202941894531, "logps/rejected": -184.34156799316406, "loss": 0.0612, "rewards/accuracies": 1.0, "rewards/chosen": -2.8561575412750244, "rewards/margins": 2.7631168365478516, "rewards/rejected": -5.619274616241455, "step": 4106 }, { "epoch": 0.64, "learning_rate": 1.1135094143887131e-05, "logits/chosen": -2.7044241428375244, "logits/rejected": -3.11257004737854, "logps/chosen": -68.97952270507812, "logps/rejected": -289.0901794433594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.2096674144268036, "rewards/margins": 8.305991172790527, "rewards/rejected": -8.515658378601074, "step": 4107 }, { "epoch": 0.64, "learning_rate": 1.1134360703355983e-05, "logits/chosen": -1.424676775932312, "logits/rejected": -3.148327589035034, "logps/chosen": -52.36412811279297, "logps/rejected": -454.85986328125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.039069175720214844, "rewards/margins": 7.153785228729248, "rewards/rejected": -7.192854404449463, "step": 4108 }, { "epoch": 0.64, "learning_rate": 1.1133627262824835e-05, "logits/chosen": -2.9404501914978027, "logits/rejected": -3.0522212982177734, "logps/chosen": -134.449462890625, "logps/rejected": -310.9949951171875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.4165487289428711, "rewards/margins": 6.027334213256836, "rewards/rejected": -5.610785484313965, "step": 4109 }, { "epoch": 0.64, "learning_rate": 1.1132893822293687e-05, "logits/chosen": -2.7112317085266113, "logits/rejected": -2.2046384811401367, "logps/chosen": -95.0064697265625, "logps/rejected": -215.86245727539062, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.27286186814308167, "rewards/margins": 6.705010890960693, "rewards/rejected": -6.977872848510742, "step": 4110 }, { "epoch": 0.64, "learning_rate": 1.113216038176254e-05, "logits/chosen": -2.0183658599853516, "logits/rejected": -2.99289608001709, "logps/chosen": -345.04229736328125, "logps/rejected": -513.58740234375, "loss": 0.2578, "rewards/accuracies": 1.0, "rewards/chosen": -3.2483856678009033, "rewards/margins": 4.5343217849731445, "rewards/rejected": -7.782707214355469, "step": 4111 }, { "epoch": 0.64, "learning_rate": 1.1131426941231393e-05, "logits/chosen": -2.79298734664917, "logits/rejected": -2.4536821842193604, "logps/chosen": -177.1357421875, "logps/rejected": -185.2865753173828, "loss": 1.1577, "rewards/accuracies": 0.5, "rewards/chosen": -2.458094835281372, "rewards/margins": 1.4703811407089233, "rewards/rejected": -3.928475856781006, "step": 4112 }, { "epoch": 0.64, "learning_rate": 1.1130693500700244e-05, "logits/chosen": -2.778324842453003, "logits/rejected": -2.2300310134887695, "logps/chosen": -290.7663879394531, "logps/rejected": -310.90081787109375, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -0.9934753775596619, "rewards/margins": 4.22603178024292, "rewards/rejected": -5.219507217407227, "step": 4113 }, { "epoch": 0.64, "learning_rate": 1.1129960060169096e-05, "logits/chosen": -2.3747594356536865, "logits/rejected": -2.985584020614624, "logps/chosen": -70.49986267089844, "logps/rejected": -438.6409912109375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.1781337857246399, "rewards/margins": 7.00295352935791, "rewards/rejected": -7.181087017059326, "step": 4114 }, { "epoch": 0.64, "learning_rate": 1.1129226619637948e-05, "logits/chosen": -2.5803024768829346, "logits/rejected": -2.8359217643737793, "logps/chosen": -384.8133239746094, "logps/rejected": -382.22088623046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.31218186020851135, "rewards/margins": 8.902364730834961, "rewards/rejected": -9.214546203613281, "step": 4115 }, { "epoch": 0.64, "learning_rate": 1.11284931791068e-05, "logits/chosen": -2.712815999984741, "logits/rejected": -2.7954373359680176, "logps/chosen": -428.88543701171875, "logps/rejected": -490.16046142578125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.0045974254608154, "rewards/margins": 7.170137405395508, "rewards/rejected": -8.174734115600586, "step": 4116 }, { "epoch": 0.64, "learning_rate": 1.1127759738575652e-05, "logits/chosen": -2.562244176864624, "logits/rejected": -3.29406476020813, "logps/chosen": -53.294532775878906, "logps/rejected": -223.76473999023438, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.22613525390625, "rewards/margins": 7.460823059082031, "rewards/rejected": -7.234687805175781, "step": 4117 }, { "epoch": 0.64, "learning_rate": 1.1127026298044504e-05, "logits/chosen": -2.962851047515869, "logits/rejected": -2.4177329540252686, "logps/chosen": -237.7454376220703, "logps/rejected": -204.13262939453125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.4264717102050781, "rewards/margins": 5.501246452331543, "rewards/rejected": -6.927718162536621, "step": 4118 }, { "epoch": 0.64, "learning_rate": 1.1126292857513356e-05, "logits/chosen": -0.9843884706497192, "logits/rejected": -2.1719179153442383, "logps/chosen": -201.4339599609375, "logps/rejected": -534.5145263671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.2110741138458252, "rewards/margins": 7.871074676513672, "rewards/rejected": -9.082148551940918, "step": 4119 }, { "epoch": 0.64, "learning_rate": 1.112555941698221e-05, "logits/chosen": -2.873220682144165, "logits/rejected": -2.8435888290405273, "logps/chosen": -141.04843139648438, "logps/rejected": -108.29399871826172, "loss": 2.8465, "rewards/accuracies": 0.5, "rewards/chosen": -2.956061601638794, "rewards/margins": -2.030153751373291, "rewards/rejected": -0.9259080290794373, "step": 4120 }, { "epoch": 0.64, "learning_rate": 1.1124825976451061e-05, "logits/chosen": -2.856449842453003, "logits/rejected": -2.4520211219787598, "logps/chosen": -150.35049438476562, "logps/rejected": -261.4822082519531, "loss": 2.1124, "rewards/accuracies": 0.5, "rewards/chosen": -2.5760374069213867, "rewards/margins": 2.031662940979004, "rewards/rejected": -4.607700347900391, "step": 4121 }, { "epoch": 0.64, "learning_rate": 1.1124092535919913e-05, "logits/chosen": -2.7378509044647217, "logits/rejected": -1.3732874393463135, "logps/chosen": -491.1602478027344, "logps/rejected": -363.80615234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.1224334239959717, "rewards/margins": 7.20206356048584, "rewards/rejected": -9.32449722290039, "step": 4122 }, { "epoch": 0.64, "learning_rate": 1.1123359095388765e-05, "logits/chosen": -2.767024517059326, "logits/rejected": -3.1842405796051025, "logps/chosen": -281.4007873535156, "logps/rejected": -309.24603271484375, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.3299583196640015, "rewards/margins": 5.27610445022583, "rewards/rejected": -6.606062889099121, "step": 4123 }, { "epoch": 0.64, "learning_rate": 1.1122625654857617e-05, "logits/chosen": -2.744034767150879, "logits/rejected": -3.10942006111145, "logps/chosen": -884.0562133789062, "logps/rejected": -704.0316162109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.6032776832580566, "rewards/margins": 7.853848457336426, "rewards/rejected": -10.45712661743164, "step": 4124 }, { "epoch": 0.64, "learning_rate": 1.1121892214326469e-05, "logits/chosen": -2.7883074283599854, "logits/rejected": -2.7071003913879395, "logps/chosen": -128.00717163085938, "logps/rejected": -233.7320556640625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.282270908355713, "rewards/margins": 6.5331621170043945, "rewards/rejected": -7.815433025360107, "step": 4125 }, { "epoch": 0.64, "learning_rate": 1.112115877379532e-05, "logits/chosen": -2.7797138690948486, "logits/rejected": -2.927699089050293, "logps/chosen": -415.915283203125, "logps/rejected": -442.6815185546875, "loss": 1.1332, "rewards/accuracies": 0.5, "rewards/chosen": -0.13271179795265198, "rewards/margins": 2.716627597808838, "rewards/rejected": -2.849339485168457, "step": 4126 }, { "epoch": 0.64, "learning_rate": 1.1120425333264172e-05, "logits/chosen": -2.982710361480713, "logits/rejected": -2.606868028640747, "logps/chosen": -230.10595703125, "logps/rejected": -356.13427734375, "loss": 0.7115, "rewards/accuracies": 0.5, "rewards/chosen": -1.2564048767089844, "rewards/margins": 3.5799920558929443, "rewards/rejected": -4.83639669418335, "step": 4127 }, { "epoch": 0.64, "learning_rate": 1.1119691892733024e-05, "logits/chosen": -2.773757219314575, "logits/rejected": -2.3134562969207764, "logps/chosen": -272.93243408203125, "logps/rejected": -377.7918701171875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3072761297225952, "rewards/margins": 8.601846694946289, "rewards/rejected": -9.909122467041016, "step": 4128 }, { "epoch": 0.64, "learning_rate": 1.1118958452201878e-05, "logits/chosen": -3.06536865234375, "logits/rejected": -3.1709611415863037, "logps/chosen": -323.08807373046875, "logps/rejected": -446.1192626953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.9503767490386963, "rewards/margins": 10.641979217529297, "rewards/rejected": -12.59235668182373, "step": 4129 }, { "epoch": 0.64, "learning_rate": 1.111822501167073e-05, "logits/chosen": -2.8668627738952637, "logits/rejected": -2.911066770553589, "logps/chosen": -294.3504638671875, "logps/rejected": -561.4996948242188, "loss": 0.0716, "rewards/accuracies": 1.0, "rewards/chosen": -1.5304611921310425, "rewards/margins": 7.325596809387207, "rewards/rejected": -8.856058120727539, "step": 4130 }, { "epoch": 0.64, "learning_rate": 1.1117491571139582e-05, "logits/chosen": -2.654426097869873, "logits/rejected": -3.1472318172454834, "logps/chosen": -164.7230987548828, "logps/rejected": -189.17501831054688, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.8073680400848389, "rewards/margins": 5.75160026550293, "rewards/rejected": -6.558968544006348, "step": 4131 }, { "epoch": 0.64, "learning_rate": 1.1116758130608435e-05, "logits/chosen": -2.1111080646514893, "logits/rejected": -2.631533145904541, "logps/chosen": -165.60525512695312, "logps/rejected": -305.92950439453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9157236814498901, "rewards/margins": 6.943124771118164, "rewards/rejected": -7.858848571777344, "step": 4132 }, { "epoch": 0.64, "learning_rate": 1.1116024690077287e-05, "logits/chosen": -2.755502939224243, "logits/rejected": -3.3253726959228516, "logps/chosen": -61.225929260253906, "logps/rejected": -229.5544891357422, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.1093196868896484, "rewards/margins": 5.918064117431641, "rewards/rejected": -7.027383804321289, "step": 4133 }, { "epoch": 0.64, "learning_rate": 1.1115291249546139e-05, "logits/chosen": -2.524444341659546, "logits/rejected": -2.788449764251709, "logps/chosen": -354.3285217285156, "logps/rejected": -581.7108154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8768770694732666, "rewards/margins": 11.23902702331543, "rewards/rejected": -12.115903854370117, "step": 4134 }, { "epoch": 0.64, "learning_rate": 1.111455780901499e-05, "logits/chosen": -2.7594001293182373, "logits/rejected": -2.474835157394409, "logps/chosen": -173.17283630371094, "logps/rejected": -216.72943115234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.37559205293655396, "rewards/margins": 7.462464809417725, "rewards/rejected": -7.0868730545043945, "step": 4135 }, { "epoch": 0.64, "learning_rate": 1.1113824368483843e-05, "logits/chosen": -2.6471023559570312, "logits/rejected": -3.366162061691284, "logps/chosen": -64.07866668701172, "logps/rejected": -303.2276611328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.007731556892395, "rewards/margins": 7.154080390930176, "rewards/rejected": -8.161811828613281, "step": 4136 }, { "epoch": 0.64, "learning_rate": 1.1113090927952695e-05, "logits/chosen": -2.2273552417755127, "logits/rejected": -3.040527820587158, "logps/chosen": -590.4490356445312, "logps/rejected": -467.5948486328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.0007431507110596, "rewards/margins": 8.713096618652344, "rewards/rejected": -11.71384048461914, "step": 4137 }, { "epoch": 0.64, "learning_rate": 1.1112357487421548e-05, "logits/chosen": -2.836078643798828, "logits/rejected": -2.499495506286621, "logps/chosen": -465.6219787597656, "logps/rejected": -388.9465637207031, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": -2.6148977279663086, "rewards/margins": 2.9486217498779297, "rewards/rejected": -5.563519477844238, "step": 4138 }, { "epoch": 0.64, "learning_rate": 1.11116240468904e-05, "logits/chosen": -2.84721302986145, "logits/rejected": -3.077401876449585, "logps/chosen": -134.43374633789062, "logps/rejected": -201.54595947265625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.6146726012229919, "rewards/margins": 5.35390567779541, "rewards/rejected": -5.968578338623047, "step": 4139 }, { "epoch": 0.64, "learning_rate": 1.1110890606359252e-05, "logits/chosen": -2.7077553272247314, "logits/rejected": -2.3356521129608154, "logps/chosen": -1187.78759765625, "logps/rejected": -624.3297119140625, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.3967437744140625, "rewards/margins": 5.118639945983887, "rewards/rejected": -6.515383720397949, "step": 4140 }, { "epoch": 0.64, "learning_rate": 1.1110157165828104e-05, "logits/chosen": -2.9976487159729004, "logits/rejected": -3.1791467666625977, "logps/chosen": -149.59326171875, "logps/rejected": -391.45623779296875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.609527587890625, "rewards/margins": 5.986560821533203, "rewards/rejected": -7.596088409423828, "step": 4141 }, { "epoch": 0.64, "learning_rate": 1.1109423725296956e-05, "logits/chosen": -1.6285499334335327, "logits/rejected": -3.165966749191284, "logps/chosen": -165.7859344482422, "logps/rejected": -453.2355041503906, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": -0.40430888533592224, "rewards/margins": 3.3103976249694824, "rewards/rejected": -3.7147064208984375, "step": 4142 }, { "epoch": 0.64, "learning_rate": 1.1108690284765808e-05, "logits/chosen": -1.7662951946258545, "logits/rejected": -3.0612614154815674, "logps/chosen": -159.95257568359375, "logps/rejected": -393.0351867675781, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.7422824501991272, "rewards/margins": 6.8424072265625, "rewards/rejected": -7.584689617156982, "step": 4143 }, { "epoch": 0.64, "learning_rate": 1.110795684423466e-05, "logits/chosen": -2.7342729568481445, "logits/rejected": -3.0827131271362305, "logps/chosen": -29.942424774169922, "logps/rejected": -165.184814453125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.0368938446044922, "rewards/margins": 5.752104759216309, "rewards/rejected": -6.788999080657959, "step": 4144 }, { "epoch": 0.64, "learning_rate": 1.1107223403703511e-05, "logits/chosen": -2.411407947540283, "logits/rejected": -2.797560930252075, "logps/chosen": -94.94696044921875, "logps/rejected": -207.0516357421875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.0923285484313965, "rewards/margins": 5.458230018615723, "rewards/rejected": -6.550558567047119, "step": 4145 }, { "epoch": 0.64, "learning_rate": 1.1106489963172363e-05, "logits/chosen": -2.6477715969085693, "logits/rejected": -2.807300090789795, "logps/chosen": -126.53553009033203, "logps/rejected": -208.51382446289062, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -2.105762004852295, "rewards/margins": 5.318073272705078, "rewards/rejected": -7.423835754394531, "step": 4146 }, { "epoch": 0.64, "learning_rate": 1.1105756522641217e-05, "logits/chosen": -1.6754274368286133, "logits/rejected": -2.417508840560913, "logps/chosen": -192.55728149414062, "logps/rejected": -355.8353271484375, "loss": 0.2152, "rewards/accuracies": 1.0, "rewards/chosen": -3.8840689659118652, "rewards/margins": 4.1348795890808105, "rewards/rejected": -8.018948554992676, "step": 4147 }, { "epoch": 0.65, "learning_rate": 1.1105023082110069e-05, "logits/chosen": -2.8128302097320557, "logits/rejected": -3.0433311462402344, "logps/chosen": -28.12234115600586, "logps/rejected": -163.05325317382812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.03535771369934082, "rewards/margins": 9.879066467285156, "rewards/rejected": -9.843708038330078, "step": 4148 }, { "epoch": 0.65, "learning_rate": 1.110428964157892e-05, "logits/chosen": -1.952123761177063, "logits/rejected": -2.8117551803588867, "logps/chosen": -264.89788818359375, "logps/rejected": -571.2308959960938, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.732710361480713, "rewards/margins": 7.611948490142822, "rewards/rejected": -9.344658851623535, "step": 4149 }, { "epoch": 0.65, "learning_rate": 1.1103556201047772e-05, "logits/chosen": -1.73006010055542, "logits/rejected": -2.9897758960723877, "logps/chosen": -198.76315307617188, "logps/rejected": -397.73028564453125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.8112198114395142, "rewards/margins": 7.102259635925293, "rewards/rejected": -7.913479804992676, "step": 4150 }, { "epoch": 0.65, "learning_rate": 1.1102822760516624e-05, "logits/chosen": -2.286881923675537, "logits/rejected": -3.1137118339538574, "logps/chosen": -523.7147827148438, "logps/rejected": -610.92578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.7960388660430908, "rewards/margins": 9.018044471740723, "rewards/rejected": -10.81408405303955, "step": 4151 }, { "epoch": 0.65, "learning_rate": 1.1102089319985476e-05, "logits/chosen": -2.043455123901367, "logits/rejected": -2.8195981979370117, "logps/chosen": -419.33819580078125, "logps/rejected": -774.078369140625, "loss": 4.8845, "rewards/accuracies": 0.5, "rewards/chosen": -7.112543106079102, "rewards/margins": 1.3497462272644043, "rewards/rejected": -8.462289810180664, "step": 4152 }, { "epoch": 0.65, "learning_rate": 1.1101355879454328e-05, "logits/chosen": -2.7007060050964355, "logits/rejected": -2.748490571975708, "logps/chosen": -169.34754943847656, "logps/rejected": -169.38778686523438, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": -3.2550034523010254, "rewards/margins": 3.467252731323242, "rewards/rejected": -6.722256183624268, "step": 4153 }, { "epoch": 0.65, "learning_rate": 1.110062243892318e-05, "logits/chosen": -2.7948648929595947, "logits/rejected": -2.150533676147461, "logps/chosen": -811.0606689453125, "logps/rejected": -457.62908935546875, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -2.1729183197021484, "rewards/margins": 5.889345169067383, "rewards/rejected": -8.062263488769531, "step": 4154 }, { "epoch": 0.65, "learning_rate": 1.1099888998392032e-05, "logits/chosen": -1.0846213102340698, "logits/rejected": -2.132662296295166, "logps/chosen": -520.4806518554688, "logps/rejected": -887.0718383789062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2997207641601562, "rewards/margins": 10.317146301269531, "rewards/rejected": -12.616867065429688, "step": 4155 }, { "epoch": 0.65, "learning_rate": 1.1099155557860885e-05, "logits/chosen": -3.0054409503936768, "logits/rejected": -2.3186511993408203, "logps/chosen": -220.0977325439453, "logps/rejected": -180.86032104492188, "loss": 3.244, "rewards/accuracies": 0.5, "rewards/chosen": -4.6511640548706055, "rewards/margins": 1.8282463550567627, "rewards/rejected": -6.479410171508789, "step": 4156 }, { "epoch": 0.65, "learning_rate": 1.1098422117329737e-05, "logits/chosen": -1.992629051208496, "logits/rejected": -2.963085174560547, "logps/chosen": -132.76336669921875, "logps/rejected": -465.153076171875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8901950716972351, "rewards/margins": 6.24193000793457, "rewards/rejected": -7.132124900817871, "step": 4157 }, { "epoch": 0.65, "learning_rate": 1.1097688676798589e-05, "logits/chosen": -2.89984393119812, "logits/rejected": -3.0942044258117676, "logps/chosen": -336.5982971191406, "logps/rejected": -588.93896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.155513048171997, "rewards/margins": 10.68781566619873, "rewards/rejected": -11.843328475952148, "step": 4158 }, { "epoch": 0.65, "learning_rate": 1.1096955236267441e-05, "logits/chosen": -1.7721846103668213, "logits/rejected": -3.2117772102355957, "logps/chosen": -189.75350952148438, "logps/rejected": -541.0867919921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.113385796546936, "rewards/margins": 10.563739776611328, "rewards/rejected": -11.677125930786133, "step": 4159 }, { "epoch": 0.65, "learning_rate": 1.1096221795736293e-05, "logits/chosen": -2.7875730991363525, "logits/rejected": -1.4963680505752563, "logps/chosen": -896.9077758789062, "logps/rejected": -441.7802734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0125664472579956, "rewards/margins": 8.416048049926758, "rewards/rejected": -9.42861557006836, "step": 4160 }, { "epoch": 0.65, "learning_rate": 1.1095488355205145e-05, "logits/chosen": -2.4463133811950684, "logits/rejected": -3.1840572357177734, "logps/chosen": -476.89337158203125, "logps/rejected": -716.5010375976562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3424019813537598, "rewards/margins": 9.301170349121094, "rewards/rejected": -10.643571853637695, "step": 4161 }, { "epoch": 0.65, "learning_rate": 1.1094754914673997e-05, "logits/chosen": -2.701231002807617, "logits/rejected": -2.8134846687316895, "logps/chosen": -70.33493041992188, "logps/rejected": -310.87860107421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.402340292930603, "rewards/margins": 8.354751586914062, "rewards/rejected": -9.757092475891113, "step": 4162 }, { "epoch": 0.65, "learning_rate": 1.1094021474142848e-05, "logits/chosen": -2.6221323013305664, "logits/rejected": -2.79909610748291, "logps/chosen": -94.32862091064453, "logps/rejected": -202.70541381835938, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0872044563293457, "rewards/margins": 7.707183361053467, "rewards/rejected": -8.794387817382812, "step": 4163 }, { "epoch": 0.65, "learning_rate": 1.10932880336117e-05, "logits/chosen": -2.695817470550537, "logits/rejected": -3.265787363052368, "logps/chosen": -193.08499145507812, "logps/rejected": -476.0030822753906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.617337167263031, "rewards/margins": 9.389646530151367, "rewards/rejected": -10.006983757019043, "step": 4164 }, { "epoch": 0.65, "learning_rate": 1.1092554593080554e-05, "logits/chosen": -2.790313959121704, "logits/rejected": -2.631194829940796, "logps/chosen": -198.92845153808594, "logps/rejected": -271.0250244140625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.3782878518104553, "rewards/margins": 7.378727436065674, "rewards/rejected": -7.757015228271484, "step": 4165 }, { "epoch": 0.65, "learning_rate": 1.1091821152549407e-05, "logits/chosen": -1.2596920728683472, "logits/rejected": -2.8002076148986816, "logps/chosen": -16.75417709350586, "logps/rejected": -347.42340087890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.1471758931875229, "rewards/margins": 10.883453369140625, "rewards/rejected": -11.030630111694336, "step": 4166 }, { "epoch": 0.65, "learning_rate": 1.109108771201826e-05, "logits/chosen": -2.8801276683807373, "logits/rejected": -1.8177192211151123, "logps/chosen": -228.2958221435547, "logps/rejected": -187.6452178955078, "loss": 1.8055, "rewards/accuracies": 0.5, "rewards/chosen": -2.702493906021118, "rewards/margins": 1.8443944454193115, "rewards/rejected": -4.54688835144043, "step": 4167 }, { "epoch": 0.65, "learning_rate": 1.1090354271487111e-05, "logits/chosen": -2.055105209350586, "logits/rejected": -3.067497730255127, "logps/chosen": -101.7670669555664, "logps/rejected": -391.4620056152344, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.048189640045166016, "rewards/margins": 7.484703540802002, "rewards/rejected": -7.532893180847168, "step": 4168 }, { "epoch": 0.65, "learning_rate": 1.1089620830955963e-05, "logits/chosen": -2.2169809341430664, "logits/rejected": -2.7373149394989014, "logps/chosen": -262.6084289550781, "logps/rejected": -476.1061096191406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2995792627334595, "rewards/margins": 9.736717224121094, "rewards/rejected": -11.036296844482422, "step": 4169 }, { "epoch": 0.65, "learning_rate": 1.1088887390424815e-05, "logits/chosen": -2.8463902473449707, "logits/rejected": -2.2560861110687256, "logps/chosen": -137.89419555664062, "logps/rejected": -145.34805297851562, "loss": 2.4116, "rewards/accuracies": 0.5, "rewards/chosen": -3.316885232925415, "rewards/margins": 2.0244476795196533, "rewards/rejected": -5.341332912445068, "step": 4170 }, { "epoch": 0.65, "learning_rate": 1.1088153949893667e-05, "logits/chosen": -2.6443183422088623, "logits/rejected": -3.1841399669647217, "logps/chosen": -50.259830474853516, "logps/rejected": -315.3826904296875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.268871545791626, "rewards/margins": 7.772885322570801, "rewards/rejected": -9.041757583618164, "step": 4171 }, { "epoch": 0.65, "learning_rate": 1.1087420509362519e-05, "logits/chosen": -2.9060957431793213, "logits/rejected": -1.8850493431091309, "logps/chosen": -374.88507080078125, "logps/rejected": -216.00552368164062, "loss": 4.2975, "rewards/accuracies": 0.5, "rewards/chosen": -4.428150177001953, "rewards/margins": -1.401643991470337, "rewards/rejected": -3.026505947113037, "step": 4172 }, { "epoch": 0.65, "learning_rate": 1.108668706883137e-05, "logits/chosen": -1.8803942203521729, "logits/rejected": -2.8526875972747803, "logps/chosen": -15.694169998168945, "logps/rejected": -234.00584411621094, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.012578770518302917, "rewards/margins": 8.238653182983398, "rewards/rejected": -8.251232147216797, "step": 4173 }, { "epoch": 0.65, "learning_rate": 1.1085953628300224e-05, "logits/chosen": -2.5967836380004883, "logits/rejected": -3.092494249343872, "logps/chosen": -657.0753784179688, "logps/rejected": -536.185302734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.8334825038909912, "rewards/margins": 9.168201446533203, "rewards/rejected": -11.001684188842773, "step": 4174 }, { "epoch": 0.65, "learning_rate": 1.1085220187769076e-05, "logits/chosen": -2.536597490310669, "logits/rejected": -2.767822027206421, "logps/chosen": -186.371826171875, "logps/rejected": -303.0162658691406, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": -1.280774712562561, "rewards/margins": 6.341102123260498, "rewards/rejected": -7.6218767166137695, "step": 4175 }, { "epoch": 0.65, "learning_rate": 1.1084486747237928e-05, "logits/chosen": -3.2312915325164795, "logits/rejected": -2.915226697921753, "logps/chosen": -608.2649536132812, "logps/rejected": -451.948974609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8795841336250305, "rewards/margins": 9.90707015991211, "rewards/rejected": -10.78665542602539, "step": 4176 }, { "epoch": 0.65, "learning_rate": 1.108375330670678e-05, "logits/chosen": -3.030113697052002, "logits/rejected": -1.6646666526794434, "logps/chosen": -336.79638671875, "logps/rejected": -72.29763793945312, "loss": 2.3295, "rewards/accuracies": 0.5, "rewards/chosen": -2.8829758167266846, "rewards/margins": -0.1701192855834961, "rewards/rejected": -2.7128565311431885, "step": 4177 }, { "epoch": 0.65, "learning_rate": 1.1083019866175632e-05, "logits/chosen": -2.7576465606689453, "logits/rejected": -2.973446846008301, "logps/chosen": -297.3599853515625, "logps/rejected": -341.6908264160156, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.4572091102600098, "rewards/margins": 5.585392951965332, "rewards/rejected": -7.042602062225342, "step": 4178 }, { "epoch": 0.65, "learning_rate": 1.1082286425644484e-05, "logits/chosen": -2.4876370429992676, "logits/rejected": -3.0049335956573486, "logps/chosen": -107.6075668334961, "logps/rejected": -292.6627502441406, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1277496814727783, "rewards/margins": 7.889410018920898, "rewards/rejected": -9.017159461975098, "step": 4179 }, { "epoch": 0.65, "learning_rate": 1.1081552985113335e-05, "logits/chosen": -2.25296950340271, "logits/rejected": -3.0718939304351807, "logps/chosen": -198.77670288085938, "logps/rejected": -332.445068359375, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": -0.41243019700050354, "rewards/margins": 4.5569868087768555, "rewards/rejected": -4.969417095184326, "step": 4180 }, { "epoch": 0.65, "learning_rate": 1.1080819544582187e-05, "logits/chosen": -3.0318078994750977, "logits/rejected": -3.248645782470703, "logps/chosen": -174.55567932128906, "logps/rejected": -175.0787811279297, "loss": 4.0384, "rewards/accuracies": 0.5, "rewards/chosen": -4.74490213394165, "rewards/margins": -2.4521901607513428, "rewards/rejected": -2.2927119731903076, "step": 4181 }, { "epoch": 0.65, "learning_rate": 1.1080086104051041e-05, "logits/chosen": -2.07157826423645, "logits/rejected": -3.0877530574798584, "logps/chosen": -158.76101684570312, "logps/rejected": -405.93206787109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.058915615081787, "rewards/margins": 7.876518249511719, "rewards/rejected": -8.935434341430664, "step": 4182 }, { "epoch": 0.65, "learning_rate": 1.1079352663519893e-05, "logits/chosen": -0.9636585116386414, "logits/rejected": -2.517302989959717, "logps/chosen": -96.18973541259766, "logps/rejected": -504.8050842285156, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.7385040521621704, "rewards/margins": 8.266502380371094, "rewards/rejected": -9.005006790161133, "step": 4183 }, { "epoch": 0.65, "learning_rate": 1.1078619222988745e-05, "logits/chosen": -2.2630069255828857, "logits/rejected": -2.830918312072754, "logps/chosen": -56.276710510253906, "logps/rejected": -319.29522705078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.6375504732131958, "rewards/margins": 9.01269245147705, "rewards/rejected": -9.650242805480957, "step": 4184 }, { "epoch": 0.65, "learning_rate": 1.1077885782457597e-05, "logits/chosen": -0.7229667901992798, "logits/rejected": -2.3225386142730713, "logps/chosen": -71.89555358886719, "logps/rejected": -589.0416259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6186670660972595, "rewards/margins": 11.319878578186035, "rewards/rejected": -11.938545227050781, "step": 4185 }, { "epoch": 0.65, "learning_rate": 1.1077152341926448e-05, "logits/chosen": -1.1924102306365967, "logits/rejected": -2.9545018672943115, "logps/chosen": -89.50431060791016, "logps/rejected": -322.3404541015625, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": -1.2226721048355103, "rewards/margins": 4.986195087432861, "rewards/rejected": -6.208867073059082, "step": 4186 }, { "epoch": 0.65, "learning_rate": 1.10764189013953e-05, "logits/chosen": -2.3135459423065186, "logits/rejected": -3.0311872959136963, "logps/chosen": -112.80908203125, "logps/rejected": -226.42051696777344, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.604916572570801, "rewards/margins": 5.953645706176758, "rewards/rejected": -8.558561325073242, "step": 4187 }, { "epoch": 0.65, "learning_rate": 1.1075685460864152e-05, "logits/chosen": -2.8634350299835205, "logits/rejected": -2.916867971420288, "logps/chosen": -268.03271484375, "logps/rejected": -331.44158935546875, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -2.6478378772735596, "rewards/margins": 4.3065185546875, "rewards/rejected": -6.9543561935424805, "step": 4188 }, { "epoch": 0.65, "learning_rate": 1.1074952020333004e-05, "logits/chosen": -1.3855340480804443, "logits/rejected": -2.796071767807007, "logps/chosen": -176.24468994140625, "logps/rejected": -508.3439025878906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.5951884984970093, "rewards/margins": 8.527350425720215, "rewards/rejected": -9.122539520263672, "step": 4189 }, { "epoch": 0.65, "learning_rate": 1.1074218579801856e-05, "logits/chosen": -2.993926763534546, "logits/rejected": -2.812248468399048, "logps/chosen": -463.9814758300781, "logps/rejected": -409.7338562011719, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.902758002281189, "rewards/margins": 6.231559753417969, "rewards/rejected": -7.134317874908447, "step": 4190 }, { "epoch": 0.65, "learning_rate": 1.107348513927071e-05, "logits/chosen": -3.1682262420654297, "logits/rejected": -2.438141107559204, "logps/chosen": -273.20916748046875, "logps/rejected": -224.42221069335938, "loss": 4.5629, "rewards/accuracies": 0.5, "rewards/chosen": -4.497745513916016, "rewards/margins": 0.6764955520629883, "rewards/rejected": -5.174241065979004, "step": 4191 }, { "epoch": 0.65, "learning_rate": 1.1072751698739561e-05, "logits/chosen": -2.8963892459869385, "logits/rejected": -3.0918948650360107, "logps/chosen": -127.9095230102539, "logps/rejected": -304.127197265625, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -3.791015148162842, "rewards/margins": 5.445418834686279, "rewards/rejected": -9.236433982849121, "step": 4192 }, { "epoch": 0.65, "learning_rate": 1.1072018258208413e-05, "logits/chosen": -2.2527248859405518, "logits/rejected": -2.6011855602264404, "logps/chosen": -184.93653869628906, "logps/rejected": -295.5039367675781, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.5243759155273438, "rewards/margins": 6.918704032897949, "rewards/rejected": -7.443079948425293, "step": 4193 }, { "epoch": 0.65, "learning_rate": 1.1071284817677265e-05, "logits/chosen": -1.837040662765503, "logits/rejected": -2.773049831390381, "logps/chosen": -136.7357940673828, "logps/rejected": -193.2572021484375, "loss": 0.0949, "rewards/accuracies": 1.0, "rewards/chosen": -0.5351917147636414, "rewards/margins": 3.8379781246185303, "rewards/rejected": -4.373169898986816, "step": 4194 }, { "epoch": 0.65, "learning_rate": 1.1070551377146117e-05, "logits/chosen": -2.8472280502319336, "logits/rejected": -2.4225986003875732, "logps/chosen": -133.08303833007812, "logps/rejected": -169.05264282226562, "loss": 0.5477, "rewards/accuracies": 0.5, "rewards/chosen": -2.0154242515563965, "rewards/margins": 3.289560556411743, "rewards/rejected": -5.304985046386719, "step": 4195 }, { "epoch": 0.65, "learning_rate": 1.1069817936614969e-05, "logits/chosen": -2.837083339691162, "logits/rejected": -2.540081262588501, "logps/chosen": -243.63897705078125, "logps/rejected": -269.5787353515625, "loss": 0.2711, "rewards/accuracies": 1.0, "rewards/chosen": -1.0379371643066406, "rewards/margins": 2.906902313232422, "rewards/rejected": -3.9448394775390625, "step": 4196 }, { "epoch": 0.65, "learning_rate": 1.106908449608382e-05, "logits/chosen": -1.5587959289550781, "logits/rejected": -2.9469733238220215, "logps/chosen": -74.19263458251953, "logps/rejected": -269.1095886230469, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -0.7846080660820007, "rewards/margins": 5.2534990310668945, "rewards/rejected": -6.038107395172119, "step": 4197 }, { "epoch": 0.65, "learning_rate": 1.1068351055552673e-05, "logits/chosen": -2.8054358959198, "logits/rejected": -3.1589431762695312, "logps/chosen": -327.1454772949219, "logps/rejected": -452.9701843261719, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.7546745538711548, "rewards/margins": 6.7215495109558105, "rewards/rejected": -8.476223945617676, "step": 4198 }, { "epoch": 0.65, "learning_rate": 1.1067617615021526e-05, "logits/chosen": -2.128270149230957, "logits/rejected": -2.3951187133789062, "logps/chosen": -420.7170715332031, "logps/rejected": -428.50067138671875, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.9092854261398315, "rewards/margins": 6.362159729003906, "rewards/rejected": -7.271445274353027, "step": 4199 }, { "epoch": 0.65, "learning_rate": 1.1066884174490378e-05, "logits/chosen": -2.919182777404785, "logits/rejected": -2.9133973121643066, "logps/chosen": -600.2432250976562, "logps/rejected": -561.2340087890625, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -1.1353332996368408, "rewards/margins": 4.69281005859375, "rewards/rejected": -5.82814359664917, "step": 4200 }, { "epoch": 0.65, "learning_rate": 1.1066150733959232e-05, "logits/chosen": -2.77083420753479, "logits/rejected": -2.974724292755127, "logps/chosen": -343.9930419921875, "logps/rejected": -427.8536071777344, "loss": 2.5918, "rewards/accuracies": 0.5, "rewards/chosen": -2.9253923892974854, "rewards/margins": 2.4046437740325928, "rewards/rejected": -5.330036163330078, "step": 4201 }, { "epoch": 0.65, "learning_rate": 1.1065417293428084e-05, "logits/chosen": -2.522428035736084, "logits/rejected": -2.9225821495056152, "logps/chosen": -52.09714126586914, "logps/rejected": -204.15707397460938, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.8187311887741089, "rewards/margins": 5.76245641708374, "rewards/rejected": -6.5811872482299805, "step": 4202 }, { "epoch": 0.65, "learning_rate": 1.1064683852896935e-05, "logits/chosen": -0.8511877059936523, "logits/rejected": -2.8100264072418213, "logps/chosen": -37.34449768066406, "logps/rejected": -386.6593322753906, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.182519793510437, "rewards/margins": 7.233067512512207, "rewards/rejected": -8.415587425231934, "step": 4203 }, { "epoch": 0.65, "learning_rate": 1.1063950412365787e-05, "logits/chosen": -3.1190032958984375, "logits/rejected": -3.044217109680176, "logps/chosen": -152.82440185546875, "logps/rejected": -124.3070068359375, "loss": 4.209, "rewards/accuracies": 0.5, "rewards/chosen": -4.4146599769592285, "rewards/margins": -1.0808570384979248, "rewards/rejected": -3.333803176879883, "step": 4204 }, { "epoch": 0.65, "learning_rate": 1.106321697183464e-05, "logits/chosen": -2.9371497631073, "logits/rejected": -1.4895333051681519, "logps/chosen": -282.05352783203125, "logps/rejected": -120.53252410888672, "loss": 0.2876, "rewards/accuracies": 1.0, "rewards/chosen": -1.2154121398925781, "rewards/margins": 3.5660691261291504, "rewards/rejected": -4.7814812660217285, "step": 4205 }, { "epoch": 0.65, "learning_rate": 1.1062483531303491e-05, "logits/chosen": -2.220360517501831, "logits/rejected": -3.2437872886657715, "logps/chosen": -43.115638732910156, "logps/rejected": -293.76336669921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7699939012527466, "rewards/margins": 7.318661689758301, "rewards/rejected": -8.088655471801758, "step": 4206 }, { "epoch": 0.65, "learning_rate": 1.1061750090772343e-05, "logits/chosen": -2.6148126125335693, "logits/rejected": -2.9501283168792725, "logps/chosen": -104.69309997558594, "logps/rejected": -343.3642883300781, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.9491355419158936, "rewards/margins": 6.812113285064697, "rewards/rejected": -8.761249542236328, "step": 4207 }, { "epoch": 0.65, "learning_rate": 1.1061016650241195e-05, "logits/chosen": -2.870687484741211, "logits/rejected": -3.173485517501831, "logps/chosen": -412.7268981933594, "logps/rejected": -518.4462890625, "loss": 5.2718, "rewards/accuracies": 0.5, "rewards/chosen": -6.625575065612793, "rewards/margins": -2.5250182151794434, "rewards/rejected": -4.10055685043335, "step": 4208 }, { "epoch": 0.65, "learning_rate": 1.1060283209710048e-05, "logits/chosen": -1.8169621229171753, "logits/rejected": -3.1724870204925537, "logps/chosen": -89.04910278320312, "logps/rejected": -365.2255554199219, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.24315223097801208, "rewards/margins": 8.194557189941406, "rewards/rejected": -7.951404571533203, "step": 4209 }, { "epoch": 0.65, "learning_rate": 1.10595497691789e-05, "logits/chosen": -3.014310598373413, "logits/rejected": -2.803142786026001, "logps/chosen": -297.5074768066406, "logps/rejected": -128.77891540527344, "loss": 0.311, "rewards/accuracies": 1.0, "rewards/chosen": -1.4117218255996704, "rewards/margins": 1.5832605361938477, "rewards/rejected": -2.9949824810028076, "step": 4210 }, { "epoch": 0.65, "learning_rate": 1.1058816328647752e-05, "logits/chosen": -2.4897375106811523, "logits/rejected": -2.7375729084014893, "logps/chosen": -190.31552124023438, "logps/rejected": -279.590576171875, "loss": 2.1798, "rewards/accuracies": 0.5, "rewards/chosen": -5.051093101501465, "rewards/margins": 1.8079543113708496, "rewards/rejected": -6.859046936035156, "step": 4211 }, { "epoch": 0.66, "learning_rate": 1.1058082888116604e-05, "logits/chosen": -2.912992000579834, "logits/rejected": -3.0017378330230713, "logps/chosen": -146.4886016845703, "logps/rejected": -301.2394714355469, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 0.16163673996925354, "rewards/margins": 7.157800197601318, "rewards/rejected": -6.996163368225098, "step": 4212 }, { "epoch": 0.66, "learning_rate": 1.1057349447585456e-05, "logits/chosen": -2.9653871059417725, "logits/rejected": -2.4734368324279785, "logps/chosen": -393.8486633300781, "logps/rejected": -384.79205322265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.176252841949463, "rewards/margins": 7.330603122711182, "rewards/rejected": -9.506855964660645, "step": 4213 }, { "epoch": 0.66, "learning_rate": 1.1056616007054308e-05, "logits/chosen": -2.7776284217834473, "logits/rejected": -3.214841365814209, "logps/chosen": -142.37295532226562, "logps/rejected": -269.90081787109375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.7289405465126038, "rewards/margins": 6.227733135223389, "rewards/rejected": -6.956673622131348, "step": 4214 }, { "epoch": 0.66, "learning_rate": 1.105588256652316e-05, "logits/chosen": -2.49204158782959, "logits/rejected": -2.71693754196167, "logps/chosen": -105.6409912109375, "logps/rejected": -392.2699890136719, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.036764144897461, "rewards/margins": 7.488296985626221, "rewards/rejected": -8.525060653686523, "step": 4215 }, { "epoch": 0.66, "learning_rate": 1.1055149125992012e-05, "logits/chosen": -1.122698187828064, "logits/rejected": -2.371763229370117, "logps/chosen": -129.84095764160156, "logps/rejected": -339.8255615234375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.5677334070205688, "rewards/margins": 5.872241497039795, "rewards/rejected": -7.439974784851074, "step": 4216 }, { "epoch": 0.66, "learning_rate": 1.1054415685460863e-05, "logits/chosen": -2.2509665489196777, "logits/rejected": -2.768847703933716, "logps/chosen": -89.72877502441406, "logps/rejected": -150.56826782226562, "loss": 2.1841, "rewards/accuracies": 0.5, "rewards/chosen": -3.5320885181427, "rewards/margins": -0.6498677730560303, "rewards/rejected": -2.88222074508667, "step": 4217 }, { "epoch": 0.66, "learning_rate": 1.1053682244929717e-05, "logits/chosen": -3.1843292713165283, "logits/rejected": -2.9060704708099365, "logps/chosen": -386.24652099609375, "logps/rejected": -288.10528564453125, "loss": 4.2818, "rewards/accuracies": 0.5, "rewards/chosen": -5.248300075531006, "rewards/margins": -1.7452902793884277, "rewards/rejected": -3.503009796142578, "step": 4218 }, { "epoch": 0.66, "learning_rate": 1.1052948804398569e-05, "logits/chosen": -3.024975061416626, "logits/rejected": -2.618269443511963, "logps/chosen": -438.0967102050781, "logps/rejected": -261.6454772949219, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.33026811480522156, "rewards/margins": 5.727670669555664, "rewards/rejected": -6.057939052581787, "step": 4219 }, { "epoch": 0.66, "learning_rate": 1.105221536386742e-05, "logits/chosen": -3.0520007610321045, "logits/rejected": -2.654785633087158, "logps/chosen": -124.05402374267578, "logps/rejected": -97.17152404785156, "loss": 4.1084, "rewards/accuracies": 0.5, "rewards/chosen": -5.2345685958862305, "rewards/margins": -1.7402291297912598, "rewards/rejected": -3.4943394660949707, "step": 4220 }, { "epoch": 0.66, "learning_rate": 1.1051481923336273e-05, "logits/chosen": -1.788855791091919, "logits/rejected": -3.01645565032959, "logps/chosen": -260.3707275390625, "logps/rejected": -319.9927978515625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 0.10637971013784409, "rewards/margins": 5.074565887451172, "rewards/rejected": -4.968186378479004, "step": 4221 }, { "epoch": 0.66, "learning_rate": 1.1050748482805125e-05, "logits/chosen": -2.3860387802124023, "logits/rejected": -2.950631618499756, "logps/chosen": -374.9276428222656, "logps/rejected": -490.30828857421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.4478946924209595, "rewards/margins": 8.459135055541992, "rewards/rejected": -8.90703010559082, "step": 4222 }, { "epoch": 0.66, "learning_rate": 1.1050015042273976e-05, "logits/chosen": -2.9824178218841553, "logits/rejected": -2.86779522895813, "logps/chosen": -161.05194091796875, "logps/rejected": -269.3504638671875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.4335891008377075, "rewards/margins": 4.983307838439941, "rewards/rejected": -6.416896820068359, "step": 4223 }, { "epoch": 0.66, "learning_rate": 1.1049281601742828e-05, "logits/chosen": -1.7707651853561401, "logits/rejected": -2.797969102859497, "logps/chosen": -486.17950439453125, "logps/rejected": -587.8521728515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.7009824514389038, "rewards/margins": 8.819022178649902, "rewards/rejected": -9.520004272460938, "step": 4224 }, { "epoch": 0.66, "learning_rate": 1.104854816121168e-05, "logits/chosen": -3.0728812217712402, "logits/rejected": -1.966356873512268, "logps/chosen": -350.6363830566406, "logps/rejected": -320.4522705078125, "loss": 1.3704, "rewards/accuracies": 0.5, "rewards/chosen": -3.2667903900146484, "rewards/margins": 0.44534099102020264, "rewards/rejected": -3.7121315002441406, "step": 4225 }, { "epoch": 0.66, "learning_rate": 1.1047814720680532e-05, "logits/chosen": -3.030151844024658, "logits/rejected": -2.963209867477417, "logps/chosen": -172.73666381835938, "logps/rejected": -243.7895050048828, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -1.4887596368789673, "rewards/margins": 4.579536437988281, "rewards/rejected": -6.068295955657959, "step": 4226 }, { "epoch": 0.66, "learning_rate": 1.1047081280149386e-05, "logits/chosen": -2.3318004608154297, "logits/rejected": -2.9627366065979004, "logps/chosen": -383.2569885253906, "logps/rejected": -528.5012817382812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7900263071060181, "rewards/margins": 7.214397430419922, "rewards/rejected": -8.004423141479492, "step": 4227 }, { "epoch": 0.66, "learning_rate": 1.1046347839618238e-05, "logits/chosen": -2.515631914138794, "logits/rejected": -3.0896365642547607, "logps/chosen": -76.76754760742188, "logps/rejected": -242.13433837890625, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.6951204538345337, "rewards/margins": 6.289460182189941, "rewards/rejected": -7.9845805168151855, "step": 4228 }, { "epoch": 0.66, "learning_rate": 1.104561439908709e-05, "logits/chosen": -2.903162956237793, "logits/rejected": -2.1567962169647217, "logps/chosen": -240.838623046875, "logps/rejected": -246.73419189453125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.7426154613494873, "rewards/margins": 5.378122329711914, "rewards/rejected": -6.120737552642822, "step": 4229 }, { "epoch": 0.66, "learning_rate": 1.1044880958555941e-05, "logits/chosen": -3.0307798385620117, "logits/rejected": -2.7324209213256836, "logps/chosen": -295.14263916015625, "logps/rejected": -65.65577697753906, "loss": 3.1448, "rewards/accuracies": 0.0, "rewards/chosen": -5.1832780838012695, "rewards/margins": -3.098257064819336, "rewards/rejected": -2.0850210189819336, "step": 4230 }, { "epoch": 0.66, "learning_rate": 1.1044147518024793e-05, "logits/chosen": -1.332872748374939, "logits/rejected": -2.498478412628174, "logps/chosen": -98.98836517333984, "logps/rejected": -275.02191162109375, "loss": 0.1335, "rewards/accuracies": 1.0, "rewards/chosen": -3.7093124389648438, "rewards/margins": 3.6820359230041504, "rewards/rejected": -7.391348838806152, "step": 4231 }, { "epoch": 0.66, "learning_rate": 1.1043414077493645e-05, "logits/chosen": -2.4849934577941895, "logits/rejected": -2.7800967693328857, "logps/chosen": -124.93641662597656, "logps/rejected": -191.50991821289062, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -2.052633285522461, "rewards/margins": 5.083857536315918, "rewards/rejected": -7.136490821838379, "step": 4232 }, { "epoch": 0.66, "learning_rate": 1.1042680636962499e-05, "logits/chosen": -1.3660736083984375, "logits/rejected": -2.903998851776123, "logps/chosen": -102.32561492919922, "logps/rejected": -306.06793212890625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.05506973713636398, "rewards/margins": 5.774299144744873, "rewards/rejected": -5.829368591308594, "step": 4233 }, { "epoch": 0.66, "learning_rate": 1.104194719643135e-05, "logits/chosen": -2.8716113567352295, "logits/rejected": -2.1777567863464355, "logps/chosen": -274.3479309082031, "logps/rejected": -255.8102569580078, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.7736976742744446, "rewards/margins": 8.204879760742188, "rewards/rejected": -7.43118143081665, "step": 4234 }, { "epoch": 0.66, "learning_rate": 1.1041213755900202e-05, "logits/chosen": -3.0432798862457275, "logits/rejected": -3.0302937030792236, "logps/chosen": -256.46514892578125, "logps/rejected": -285.322021484375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.030104875564575, "rewards/margins": 5.748228549957275, "rewards/rejected": -7.77833366394043, "step": 4235 }, { "epoch": 0.66, "learning_rate": 1.1040480315369056e-05, "logits/chosen": -2.6972103118896484, "logits/rejected": -3.109816074371338, "logps/chosen": -188.25604248046875, "logps/rejected": -311.2214660644531, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.4008186459541321, "rewards/margins": 5.115335941314697, "rewards/rejected": -5.5161542892456055, "step": 4236 }, { "epoch": 0.66, "learning_rate": 1.1039746874837908e-05, "logits/chosen": -2.9871881008148193, "logits/rejected": -2.3144965171813965, "logps/chosen": -174.0701904296875, "logps/rejected": -114.94822692871094, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 0.2839256525039673, "rewards/margins": 5.82990026473999, "rewards/rejected": -5.5459747314453125, "step": 4237 }, { "epoch": 0.66, "learning_rate": 1.103901343430676e-05, "logits/chosen": -1.338104486465454, "logits/rejected": -2.640537738800049, "logps/chosen": -150.75267028808594, "logps/rejected": -497.2746276855469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.833700954914093, "rewards/margins": 9.546406745910645, "rewards/rejected": -10.380107879638672, "step": 4238 }, { "epoch": 0.66, "learning_rate": 1.1038279993775612e-05, "logits/chosen": -2.790452480316162, "logits/rejected": -2.972210645675659, "logps/chosen": -98.45957946777344, "logps/rejected": -267.3681335449219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.529189109802246, "rewards/margins": 8.495527267456055, "rewards/rejected": -10.0247163772583, "step": 4239 }, { "epoch": 0.66, "learning_rate": 1.1037546553244463e-05, "logits/chosen": -2.7846429347991943, "logits/rejected": -2.5274553298950195, "logps/chosen": -90.7793197631836, "logps/rejected": -203.0363311767578, "loss": 0.0458, "rewards/accuracies": 1.0, "rewards/chosen": -1.8472280502319336, "rewards/margins": 3.306983470916748, "rewards/rejected": -5.154211521148682, "step": 4240 }, { "epoch": 0.66, "learning_rate": 1.1036813112713315e-05, "logits/chosen": -2.647334337234497, "logits/rejected": -3.282693862915039, "logps/chosen": -57.25846862792969, "logps/rejected": -179.0768280029297, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.8529770970344543, "rewards/margins": 5.174965858459473, "rewards/rejected": -6.027942657470703, "step": 4241 }, { "epoch": 0.66, "learning_rate": 1.1036079672182167e-05, "logits/chosen": -1.0835113525390625, "logits/rejected": -2.4844179153442383, "logps/chosen": -185.82034301757812, "logps/rejected": -528.0263061523438, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.4723197817802429, "rewards/margins": 6.066466808319092, "rewards/rejected": -6.5387864112854, "step": 4242 }, { "epoch": 0.66, "learning_rate": 1.1035346231651019e-05, "logits/chosen": -1.861570954322815, "logits/rejected": -3.0579464435577393, "logps/chosen": -113.67150115966797, "logps/rejected": -272.23736572265625, "loss": 3.4248, "rewards/accuracies": 0.5, "rewards/chosen": -4.773563861846924, "rewards/margins": -1.4197452068328857, "rewards/rejected": -3.353818655014038, "step": 4243 }, { "epoch": 0.66, "learning_rate": 1.1034612791119871e-05, "logits/chosen": -2.814594268798828, "logits/rejected": -2.9750125408172607, "logps/chosen": -271.44451904296875, "logps/rejected": -369.38543701171875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.4796944856643677, "rewards/margins": 7.118534564971924, "rewards/rejected": -8.598228454589844, "step": 4244 }, { "epoch": 0.66, "learning_rate": 1.1033879350588725e-05, "logits/chosen": -2.6714158058166504, "logits/rejected": -2.6722066402435303, "logps/chosen": -153.99288940429688, "logps/rejected": -176.02682495117188, "loss": 1.8264, "rewards/accuracies": 0.5, "rewards/chosen": -2.4510955810546875, "rewards/margins": 0.8061985969543457, "rewards/rejected": -3.257294178009033, "step": 4245 }, { "epoch": 0.66, "learning_rate": 1.1033145910057576e-05, "logits/chosen": -1.125800371170044, "logits/rejected": -3.1510071754455566, "logps/chosen": -95.80989837646484, "logps/rejected": -617.4110107421875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.1460461616516113, "rewards/margins": 5.307675361633301, "rewards/rejected": -7.453721523284912, "step": 4246 }, { "epoch": 0.66, "learning_rate": 1.1032412469526428e-05, "logits/chosen": -2.661803960800171, "logits/rejected": -3.2758803367614746, "logps/chosen": -125.52500915527344, "logps/rejected": -213.29537963867188, "loss": 3.5556, "rewards/accuracies": 0.5, "rewards/chosen": -5.032512187957764, "rewards/margins": 0.024831533432006836, "rewards/rejected": -5.05734395980835, "step": 4247 }, { "epoch": 0.66, "learning_rate": 1.103167902899528e-05, "logits/chosen": -2.879502058029175, "logits/rejected": -2.9662156105041504, "logps/chosen": -93.43562316894531, "logps/rejected": -297.5260925292969, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.5055416822433472, "rewards/margins": 6.376675605773926, "rewards/rejected": -7.8822174072265625, "step": 4248 }, { "epoch": 0.66, "learning_rate": 1.1030945588464132e-05, "logits/chosen": -2.971071720123291, "logits/rejected": -2.8107595443725586, "logps/chosen": -187.02537536621094, "logps/rejected": -154.1244354248047, "loss": 2.7039, "rewards/accuracies": 0.5, "rewards/chosen": -3.4363064765930176, "rewards/margins": 0.42642951011657715, "rewards/rejected": -3.8627359867095947, "step": 4249 }, { "epoch": 0.66, "learning_rate": 1.1030212147932984e-05, "logits/chosen": -2.6240832805633545, "logits/rejected": -3.4428277015686035, "logps/chosen": -59.90652847290039, "logps/rejected": -247.9326934814453, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -1.2562580108642578, "rewards/margins": 4.166484355926514, "rewards/rejected": -5.4227423667907715, "step": 4250 }, { "epoch": 0.66, "learning_rate": 1.1029478707401836e-05, "logits/chosen": -2.7911081314086914, "logits/rejected": -3.2344162464141846, "logps/chosen": -113.25531768798828, "logps/rejected": -282.0512390136719, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.4211413860321045, "rewards/margins": 5.911101341247559, "rewards/rejected": -7.332242965698242, "step": 4251 }, { "epoch": 0.66, "learning_rate": 1.1028745266870688e-05, "logits/chosen": -2.7158865928649902, "logits/rejected": -3.1609323024749756, "logps/chosen": -371.52691650390625, "logps/rejected": -553.1534423828125, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -1.0951331853866577, "rewards/margins": 4.929937362670898, "rewards/rejected": -6.0250701904296875, "step": 4252 }, { "epoch": 0.66, "learning_rate": 1.102801182633954e-05, "logits/chosen": -2.3824961185455322, "logits/rejected": -2.7418453693389893, "logps/chosen": -253.67214965820312, "logps/rejected": -209.53529357910156, "loss": 1.8464, "rewards/accuracies": 0.5, "rewards/chosen": -3.3664865493774414, "rewards/margins": 2.006673812866211, "rewards/rejected": -5.373159885406494, "step": 4253 }, { "epoch": 0.66, "learning_rate": 1.1027278385808393e-05, "logits/chosen": -1.8195850849151611, "logits/rejected": -2.7185590267181396, "logps/chosen": -246.19090270996094, "logps/rejected": -412.16912841796875, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -1.2696189880371094, "rewards/margins": 5.933204650878906, "rewards/rejected": -7.202823638916016, "step": 4254 }, { "epoch": 0.66, "learning_rate": 1.1026544945277245e-05, "logits/chosen": -2.8642284870147705, "logits/rejected": -2.870177745819092, "logps/chosen": -150.9620819091797, "logps/rejected": -289.6904296875, "loss": 2.0401, "rewards/accuracies": 0.5, "rewards/chosen": -2.3429737091064453, "rewards/margins": 2.15352201461792, "rewards/rejected": -4.496496200561523, "step": 4255 }, { "epoch": 0.66, "learning_rate": 1.1025811504746097e-05, "logits/chosen": -2.8939857482910156, "logits/rejected": -2.4973247051239014, "logps/chosen": -458.0144348144531, "logps/rejected": -451.1153259277344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7462909817695618, "rewards/margins": 8.878497123718262, "rewards/rejected": -9.624788284301758, "step": 4256 }, { "epoch": 0.66, "learning_rate": 1.1025078064214949e-05, "logits/chosen": -2.353747844696045, "logits/rejected": -2.9245588779449463, "logps/chosen": -436.81793212890625, "logps/rejected": -704.7115478515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.0485386848449707, "rewards/margins": 7.465418815612793, "rewards/rejected": -9.513957977294922, "step": 4257 }, { "epoch": 0.66, "learning_rate": 1.10243446236838e-05, "logits/chosen": -1.7730987071990967, "logits/rejected": -2.6910147666931152, "logps/chosen": -187.2432861328125, "logps/rejected": -341.5208740234375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.5250520706176758, "rewards/margins": 6.5329670906066895, "rewards/rejected": -8.058018684387207, "step": 4258 }, { "epoch": 0.66, "learning_rate": 1.1023611183152653e-05, "logits/chosen": -2.9818050861358643, "logits/rejected": -2.823106288909912, "logps/chosen": -156.6263427734375, "logps/rejected": -265.1082458496094, "loss": 3.1013, "rewards/accuracies": 0.5, "rewards/chosen": -5.257994651794434, "rewards/margins": -1.2358766794204712, "rewards/rejected": -4.022118091583252, "step": 4259 }, { "epoch": 0.66, "learning_rate": 1.1022877742621504e-05, "logits/chosen": -3.225090980529785, "logits/rejected": -2.8609888553619385, "logps/chosen": -577.3728637695312, "logps/rejected": -351.8614501953125, "loss": 2.0509, "rewards/accuracies": 0.5, "rewards/chosen": -4.050861358642578, "rewards/margins": -0.4764852523803711, "rewards/rejected": -3.574375867843628, "step": 4260 }, { "epoch": 0.66, "learning_rate": 1.1022144302090356e-05, "logits/chosen": -1.4776480197906494, "logits/rejected": -3.007974147796631, "logps/chosen": -153.6279754638672, "logps/rejected": -343.339599609375, "loss": 0.1202, "rewards/accuracies": 1.0, "rewards/chosen": -1.672893762588501, "rewards/margins": 5.578060150146484, "rewards/rejected": -7.250953674316406, "step": 4261 }, { "epoch": 0.66, "learning_rate": 1.1021410861559208e-05, "logits/chosen": -2.7079012393951416, "logits/rejected": -3.165031671524048, "logps/chosen": -284.5738830566406, "logps/rejected": -322.5061340332031, "loss": 3.2112, "rewards/accuracies": 0.5, "rewards/chosen": -4.544859409332275, "rewards/margins": -0.6075272560119629, "rewards/rejected": -3.9373321533203125, "step": 4262 }, { "epoch": 0.66, "learning_rate": 1.1020677421028062e-05, "logits/chosen": -2.889756917953491, "logits/rejected": -2.4190587997436523, "logps/chosen": -480.6611022949219, "logps/rejected": -433.0951232910156, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.9738697409629822, "rewards/margins": 8.043149948120117, "rewards/rejected": -9.017020225524902, "step": 4263 }, { "epoch": 0.66, "learning_rate": 1.1019943980496914e-05, "logits/chosen": -2.8938863277435303, "logits/rejected": -2.4926531314849854, "logps/chosen": -342.9576416015625, "logps/rejected": -511.4237060546875, "loss": 3.0989, "rewards/accuracies": 0.5, "rewards/chosen": -5.491522789001465, "rewards/margins": 2.641845703125, "rewards/rejected": -8.133368492126465, "step": 4264 }, { "epoch": 0.66, "learning_rate": 1.1019210539965765e-05, "logits/chosen": -2.573787212371826, "logits/rejected": -2.765547037124634, "logps/chosen": -50.03236389160156, "logps/rejected": -160.653076171875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.738512396812439, "rewards/margins": 5.117988586425781, "rewards/rejected": -6.85650110244751, "step": 4265 }, { "epoch": 0.66, "learning_rate": 1.1018477099434617e-05, "logits/chosen": -2.705270528793335, "logits/rejected": -1.4414215087890625, "logps/chosen": -188.82846069335938, "logps/rejected": -208.85211181640625, "loss": 0.4758, "rewards/accuracies": 0.5, "rewards/chosen": -3.523406982421875, "rewards/margins": 4.774710178375244, "rewards/rejected": -8.298116683959961, "step": 4266 }, { "epoch": 0.66, "learning_rate": 1.1017743658903471e-05, "logits/chosen": -2.2463576793670654, "logits/rejected": -2.8011929988861084, "logps/chosen": -267.3553466796875, "logps/rejected": -224.688720703125, "loss": 3.965, "rewards/accuracies": 0.0, "rewards/chosen": -5.310721397399902, "rewards/margins": -3.8946175575256348, "rewards/rejected": -1.4161040782928467, "step": 4267 }, { "epoch": 0.66, "learning_rate": 1.1017010218372323e-05, "logits/chosen": -2.9428200721740723, "logits/rejected": -3.0158915519714355, "logps/chosen": -40.94940185546875, "logps/rejected": -156.0616912841797, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.3085293769836426, "rewards/margins": 4.9192938804626465, "rewards/rejected": -5.227823257446289, "step": 4268 }, { "epoch": 0.66, "learning_rate": 1.1016276777841175e-05, "logits/chosen": -3.033663749694824, "logits/rejected": -2.990616798400879, "logps/chosen": -100.7074966430664, "logps/rejected": -198.341796875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9253565073013306, "rewards/margins": 7.157500267028809, "rewards/rejected": -8.082857131958008, "step": 4269 }, { "epoch": 0.66, "learning_rate": 1.1015543337310027e-05, "logits/chosen": -2.759032964706421, "logits/rejected": -2.374723196029663, "logps/chosen": -426.33905029296875, "logps/rejected": -432.79132080078125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.891345202922821, "rewards/margins": 5.839077949523926, "rewards/rejected": -6.7304229736328125, "step": 4270 }, { "epoch": 0.66, "learning_rate": 1.101480989677888e-05, "logits/chosen": -2.6176767349243164, "logits/rejected": -2.424990653991699, "logps/chosen": -260.71197509765625, "logps/rejected": -174.84439086914062, "loss": 4.3361, "rewards/accuracies": 0.5, "rewards/chosen": -6.386425495147705, "rewards/margins": -1.211221694946289, "rewards/rejected": -5.175203800201416, "step": 4271 }, { "epoch": 0.66, "learning_rate": 1.1014076456247732e-05, "logits/chosen": -1.8593881130218506, "logits/rejected": -3.1916871070861816, "logps/chosen": -153.02658081054688, "logps/rejected": -546.7748413085938, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.6270678043365479, "rewards/margins": 6.2282609939575195, "rewards/rejected": -7.855328559875488, "step": 4272 }, { "epoch": 0.66, "learning_rate": 1.1013343015716584e-05, "logits/chosen": -2.6197428703308105, "logits/rejected": -2.8061413764953613, "logps/chosen": -499.57049560546875, "logps/rejected": -446.9867858886719, "loss": 0.2837, "rewards/accuracies": 1.0, "rewards/chosen": -2.630697250366211, "rewards/margins": 4.700815200805664, "rewards/rejected": -7.331512451171875, "step": 4273 }, { "epoch": 0.66, "learning_rate": 1.1012609575185436e-05, "logits/chosen": -2.8569014072418213, "logits/rejected": -2.9912140369415283, "logps/chosen": -170.6856689453125, "logps/rejected": -214.88311767578125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.0287137031555176, "rewards/margins": 5.948668956756592, "rewards/rejected": -7.977382659912109, "step": 4274 }, { "epoch": 0.66, "learning_rate": 1.1011876134654288e-05, "logits/chosen": -3.0165505409240723, "logits/rejected": -2.0243470668792725, "logps/chosen": -280.0852966308594, "logps/rejected": -141.83660888671875, "loss": 3.3658, "rewards/accuracies": 0.5, "rewards/chosen": -5.079071044921875, "rewards/margins": -1.3395588397979736, "rewards/rejected": -3.7395122051239014, "step": 4275 }, { "epoch": 0.67, "learning_rate": 1.101114269412314e-05, "logits/chosen": -3.166632890701294, "logits/rejected": -2.9048964977264404, "logps/chosen": -190.57061767578125, "logps/rejected": -142.0824737548828, "loss": 1.8437, "rewards/accuracies": 0.5, "rewards/chosen": -5.381409645080566, "rewards/margins": 0.7037773132324219, "rewards/rejected": -6.085186958312988, "step": 4276 }, { "epoch": 0.67, "learning_rate": 1.1010409253591991e-05, "logits/chosen": -2.605633020401001, "logits/rejected": -3.1402106285095215, "logps/chosen": -847.2099609375, "logps/rejected": -717.6806030273438, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": -3.220803737640381, "rewards/margins": 4.1928863525390625, "rewards/rejected": -7.413690090179443, "step": 4277 }, { "epoch": 0.67, "learning_rate": 1.1009675813060843e-05, "logits/chosen": -3.0189805030822754, "logits/rejected": -3.0481810569763184, "logps/chosen": -322.41949462890625, "logps/rejected": -408.5587158203125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.6546332836151123, "rewards/margins": 5.790438175201416, "rewards/rejected": -7.445071220397949, "step": 4278 }, { "epoch": 0.67, "learning_rate": 1.1008942372529695e-05, "logits/chosen": -3.0714964866638184, "logits/rejected": -2.671302080154419, "logps/chosen": -351.640869140625, "logps/rejected": -459.08990478515625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -2.3979098796844482, "rewards/margins": 5.154058456420898, "rewards/rejected": -7.551968574523926, "step": 4279 }, { "epoch": 0.67, "learning_rate": 1.1008208931998549e-05, "logits/chosen": -2.571141004562378, "logits/rejected": -2.7750601768493652, "logps/chosen": -341.04229736328125, "logps/rejected": -325.3831787109375, "loss": 6.4572, "rewards/accuracies": 0.0, "rewards/chosen": -8.086007118225098, "rewards/margins": -6.455324172973633, "rewards/rejected": -1.630683183670044, "step": 4280 }, { "epoch": 0.67, "learning_rate": 1.10074754914674e-05, "logits/chosen": -2.142784595489502, "logits/rejected": -2.8523380756378174, "logps/chosen": -319.8361511230469, "logps/rejected": -289.2556457519531, "loss": 2.4929, "rewards/accuracies": 0.5, "rewards/chosen": -5.747703552246094, "rewards/margins": -0.7937748432159424, "rewards/rejected": -4.953928470611572, "step": 4281 }, { "epoch": 0.67, "learning_rate": 1.1006742050936252e-05, "logits/chosen": -2.6663730144500732, "logits/rejected": -3.024608612060547, "logps/chosen": -87.82461547851562, "logps/rejected": -330.67999267578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7381902933120728, "rewards/margins": 7.380345821380615, "rewards/rejected": -9.118535995483398, "step": 4282 }, { "epoch": 0.67, "learning_rate": 1.1006008610405104e-05, "logits/chosen": -1.543705701828003, "logits/rejected": -2.015266180038452, "logps/chosen": -407.43878173828125, "logps/rejected": -387.82183837890625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.5796482563018799, "rewards/margins": 6.702579975128174, "rewards/rejected": -8.282228469848633, "step": 4283 }, { "epoch": 0.67, "learning_rate": 1.1005275169873956e-05, "logits/chosen": -2.408268690109253, "logits/rejected": -2.2373170852661133, "logps/chosen": -268.8937072753906, "logps/rejected": -299.8824768066406, "loss": 6.3183, "rewards/accuracies": 0.0, "rewards/chosen": -7.213143348693848, "rewards/margins": -6.316450119018555, "rewards/rejected": -0.8966934680938721, "step": 4284 }, { "epoch": 0.67, "learning_rate": 1.1004541729342808e-05, "logits/chosen": -2.9741594791412354, "logits/rejected": -3.1780354976654053, "logps/chosen": -317.90478515625, "logps/rejected": -376.7655944824219, "loss": 0.0693, "rewards/accuracies": 1.0, "rewards/chosen": -2.266441822052002, "rewards/margins": 2.6354522705078125, "rewards/rejected": -4.9018940925598145, "step": 4285 }, { "epoch": 0.67, "learning_rate": 1.100380828881166e-05, "logits/chosen": -3.0809473991394043, "logits/rejected": -3.019785165786743, "logps/chosen": -557.5911254882812, "logps/rejected": -318.0013732910156, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.0952248573303223, "rewards/margins": 5.741176605224609, "rewards/rejected": -7.83640193939209, "step": 4286 }, { "epoch": 0.67, "learning_rate": 1.1003074848280512e-05, "logits/chosen": -3.0899271965026855, "logits/rejected": -2.698323965072632, "logps/chosen": -340.37200927734375, "logps/rejected": -281.624267578125, "loss": 2.0503, "rewards/accuracies": 0.5, "rewards/chosen": -3.2747509479522705, "rewards/margins": 2.638516426086426, "rewards/rejected": -5.913267612457275, "step": 4287 }, { "epoch": 0.67, "learning_rate": 1.1002341407749364e-05, "logits/chosen": -1.9139516353607178, "logits/rejected": -2.619145631790161, "logps/chosen": -136.41502380371094, "logps/rejected": -268.06341552734375, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.9343764781951904, "rewards/margins": 4.815821647644043, "rewards/rejected": -6.7501983642578125, "step": 4288 }, { "epoch": 0.67, "learning_rate": 1.1001607967218217e-05, "logits/chosen": -2.9468276500701904, "logits/rejected": -2.8395349979400635, "logps/chosen": -890.28662109375, "logps/rejected": -755.52294921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.889050304889679, "rewards/margins": 8.577972412109375, "rewards/rejected": -9.467022895812988, "step": 4289 }, { "epoch": 0.67, "learning_rate": 1.100087452668707e-05, "logits/chosen": -2.8592703342437744, "logits/rejected": -3.07474684715271, "logps/chosen": -532.1637573242188, "logps/rejected": -519.2330932617188, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -1.5300155878067017, "rewards/margins": 3.754757881164551, "rewards/rejected": -5.284773349761963, "step": 4290 }, { "epoch": 0.67, "learning_rate": 1.1000141086155921e-05, "logits/chosen": -2.8045201301574707, "logits/rejected": -2.951533317565918, "logps/chosen": -147.74346923828125, "logps/rejected": -85.247802734375, "loss": 3.039, "rewards/accuracies": 0.0, "rewards/chosen": -5.7158637046813965, "rewards/margins": -2.9853899478912354, "rewards/rejected": -2.730473756790161, "step": 4291 }, { "epoch": 0.67, "learning_rate": 1.0999407645624773e-05, "logits/chosen": -2.1105570793151855, "logits/rejected": -2.8417038917541504, "logps/chosen": -93.8166732788086, "logps/rejected": -217.8985595703125, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.9301913976669312, "rewards/margins": 5.930871963500977, "rewards/rejected": -6.861063480377197, "step": 4292 }, { "epoch": 0.67, "learning_rate": 1.0998674205093625e-05, "logits/chosen": -2.800894021987915, "logits/rejected": -3.036222219467163, "logps/chosen": -93.69007110595703, "logps/rejected": -140.4061279296875, "loss": 0.0475, "rewards/accuracies": 1.0, "rewards/chosen": -2.363248586654663, "rewards/margins": 3.61106014251709, "rewards/rejected": -5.974308490753174, "step": 4293 }, { "epoch": 0.67, "learning_rate": 1.0997940764562477e-05, "logits/chosen": -2.0590145587921143, "logits/rejected": -2.919774055480957, "logps/chosen": -158.02041625976562, "logps/rejected": -488.2717590332031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.2641884088516235, "rewards/margins": 7.592252254486084, "rewards/rejected": -8.856440544128418, "step": 4294 }, { "epoch": 0.67, "learning_rate": 1.0997207324031329e-05, "logits/chosen": -2.918694019317627, "logits/rejected": -3.4537572860717773, "logps/chosen": -145.47647094726562, "logps/rejected": -364.6401672363281, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.662621021270752, "rewards/margins": 5.446905136108398, "rewards/rejected": -7.10952615737915, "step": 4295 }, { "epoch": 0.67, "learning_rate": 1.099647388350018e-05, "logits/chosen": -2.5991008281707764, "logits/rejected": -2.9854543209075928, "logps/chosen": -312.89495849609375, "logps/rejected": -287.10247802734375, "loss": 2.0711, "rewards/accuracies": 0.5, "rewards/chosen": -4.18324613571167, "rewards/margins": 0.49430322647094727, "rewards/rejected": -4.677548885345459, "step": 4296 }, { "epoch": 0.67, "learning_rate": 1.0995740442969032e-05, "logits/chosen": -2.557100772857666, "logits/rejected": -2.7949321269989014, "logps/chosen": -90.04093933105469, "logps/rejected": -264.4051513671875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.5640196800231934, "rewards/margins": 6.31553840637207, "rewards/rejected": -8.879558563232422, "step": 4297 }, { "epoch": 0.67, "learning_rate": 1.0995007002437886e-05, "logits/chosen": -2.8303213119506836, "logits/rejected": -2.5855538845062256, "logps/chosen": -284.48651123046875, "logps/rejected": -271.2545471191406, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.2063968181610107, "rewards/margins": 5.390260696411133, "rewards/rejected": -6.596657752990723, "step": 4298 }, { "epoch": 0.67, "learning_rate": 1.0994273561906738e-05, "logits/chosen": -2.953416585922241, "logits/rejected": -2.8346776962280273, "logps/chosen": -583.4833374023438, "logps/rejected": -655.1429443359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5951035022735596, "rewards/margins": 7.195979118347168, "rewards/rejected": -8.791082382202148, "step": 4299 }, { "epoch": 0.67, "learning_rate": 1.099354012137559e-05, "logits/chosen": -2.726330280303955, "logits/rejected": -2.9457695484161377, "logps/chosen": -600.6611938476562, "logps/rejected": -611.7406005859375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.9945063591003418, "rewards/margins": 5.1554694175720215, "rewards/rejected": -7.149975776672363, "step": 4300 }, { "epoch": 0.67, "learning_rate": 1.0992806680844443e-05, "logits/chosen": -2.3423397541046143, "logits/rejected": -2.88641357421875, "logps/chosen": -165.5892333984375, "logps/rejected": -208.71490478515625, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -1.6136528253555298, "rewards/margins": 4.596115589141846, "rewards/rejected": -6.209768295288086, "step": 4301 }, { "epoch": 0.67, "learning_rate": 1.0992073240313295e-05, "logits/chosen": -2.5975003242492676, "logits/rejected": -3.3369932174682617, "logps/chosen": -71.28477478027344, "logps/rejected": -166.8324737548828, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -1.8680024147033691, "rewards/margins": 3.527517795562744, "rewards/rejected": -5.395520210266113, "step": 4302 }, { "epoch": 0.67, "learning_rate": 1.0991339799782147e-05, "logits/chosen": -2.824347734451294, "logits/rejected": -3.12137770652771, "logps/chosen": -59.0015869140625, "logps/rejected": -135.1510009765625, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -2.4742374420166016, "rewards/margins": 3.1396374702453613, "rewards/rejected": -5.613874912261963, "step": 4303 }, { "epoch": 0.67, "learning_rate": 1.0990606359250999e-05, "logits/chosen": -2.9586355686187744, "logits/rejected": -3.1140143871307373, "logps/chosen": -179.67233276367188, "logps/rejected": -290.655517578125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -1.6840429306030273, "rewards/margins": 5.827396392822266, "rewards/rejected": -7.511439323425293, "step": 4304 }, { "epoch": 0.67, "learning_rate": 1.098987291871985e-05, "logits/chosen": -2.711099624633789, "logits/rejected": -1.669342041015625, "logps/chosen": -201.35511779785156, "logps/rejected": -213.50587463378906, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.5252864360809326, "rewards/margins": 5.524075508117676, "rewards/rejected": -7.0493621826171875, "step": 4305 }, { "epoch": 0.67, "learning_rate": 1.0989139478188703e-05, "logits/chosen": -2.3457558155059814, "logits/rejected": -2.5733156204223633, "logps/chosen": -237.81295776367188, "logps/rejected": -298.04888916015625, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.4624649286270142, "rewards/margins": 5.269603729248047, "rewards/rejected": -6.7320685386657715, "step": 4306 }, { "epoch": 0.67, "learning_rate": 1.0988406037657556e-05, "logits/chosen": -1.998552680015564, "logits/rejected": -2.421314001083374, "logps/chosen": -308.5542297363281, "logps/rejected": -558.5732421875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.949853777885437, "rewards/margins": 6.042874336242676, "rewards/rejected": -7.992728233337402, "step": 4307 }, { "epoch": 0.67, "learning_rate": 1.0987672597126408e-05, "logits/chosen": -2.7770750522613525, "logits/rejected": -3.050551414489746, "logps/chosen": -257.9893493652344, "logps/rejected": -277.3243713378906, "loss": 2.4101, "rewards/accuracies": 0.5, "rewards/chosen": -3.7379190921783447, "rewards/margins": 3.0472402572631836, "rewards/rejected": -6.785159111022949, "step": 4308 }, { "epoch": 0.67, "learning_rate": 1.098693915659526e-05, "logits/chosen": -2.9425199031829834, "logits/rejected": -3.009244203567505, "logps/chosen": -98.98737335205078, "logps/rejected": -174.68936157226562, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.4908115863800049, "rewards/margins": 5.460995197296143, "rewards/rejected": -6.951807022094727, "step": 4309 }, { "epoch": 0.67, "learning_rate": 1.0986205716064112e-05, "logits/chosen": -1.9559903144836426, "logits/rejected": -2.682572603225708, "logps/chosen": -83.85340881347656, "logps/rejected": -265.8109130859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.494128704071045, "rewards/margins": 6.973785877227783, "rewards/rejected": -8.467914581298828, "step": 4310 }, { "epoch": 0.67, "learning_rate": 1.0985472275532964e-05, "logits/chosen": -2.814382314682007, "logits/rejected": -2.5754337310791016, "logps/chosen": -678.0020751953125, "logps/rejected": -542.4539794921875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.4045014381408691, "rewards/margins": 6.138449192047119, "rewards/rejected": -7.542950630187988, "step": 4311 }, { "epoch": 0.67, "learning_rate": 1.0984738835001816e-05, "logits/chosen": -2.395897388458252, "logits/rejected": -3.1878461837768555, "logps/chosen": -164.17648315429688, "logps/rejected": -385.5881652832031, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.9322304725646973, "rewards/margins": 6.655030250549316, "rewards/rejected": -9.587261199951172, "step": 4312 }, { "epoch": 0.67, "learning_rate": 1.0984005394470667e-05, "logits/chosen": -2.79312801361084, "logits/rejected": -2.916400909423828, "logps/chosen": -122.61451721191406, "logps/rejected": -226.44577026367188, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.2841899394989014, "rewards/margins": 7.396817207336426, "rewards/rejected": -8.681007385253906, "step": 4313 }, { "epoch": 0.67, "learning_rate": 1.098327195393952e-05, "logits/chosen": -2.0849037170410156, "logits/rejected": -3.092000961303711, "logps/chosen": -125.79627990722656, "logps/rejected": -362.73028564453125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.0970566272735596, "rewards/margins": 5.321005344390869, "rewards/rejected": -6.418062210083008, "step": 4314 }, { "epoch": 0.67, "learning_rate": 1.0982538513408371e-05, "logits/chosen": -3.130112409591675, "logits/rejected": -3.1542255878448486, "logps/chosen": -336.2650146484375, "logps/rejected": -302.5835876464844, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.0337764024734497, "rewards/margins": 4.813136100769043, "rewards/rejected": -5.846912384033203, "step": 4315 }, { "epoch": 0.67, "learning_rate": 1.0981805072877225e-05, "logits/chosen": -3.065580368041992, "logits/rejected": -2.4705188274383545, "logps/chosen": -306.9085388183594, "logps/rejected": -266.4962158203125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.8460071682929993, "rewards/margins": 5.7229204177856445, "rewards/rejected": -6.568927764892578, "step": 4316 }, { "epoch": 0.67, "learning_rate": 1.0981071632346077e-05, "logits/chosen": -2.750389337539673, "logits/rejected": -3.0336763858795166, "logps/chosen": -239.43939208984375, "logps/rejected": -341.5347900390625, "loss": 1.3966, "rewards/accuracies": 0.5, "rewards/chosen": -2.7673048973083496, "rewards/margins": 0.561020016670227, "rewards/rejected": -3.328325033187866, "step": 4317 }, { "epoch": 0.67, "learning_rate": 1.0980338191814929e-05, "logits/chosen": -2.691305160522461, "logits/rejected": -1.881623387336731, "logps/chosen": -679.8505249023438, "logps/rejected": -366.5722351074219, "loss": 0.626, "rewards/accuracies": 0.5, "rewards/chosen": -4.165596961975098, "rewards/margins": 1.1574914455413818, "rewards/rejected": -5.323088645935059, "step": 4318 }, { "epoch": 0.67, "learning_rate": 1.097960475128378e-05, "logits/chosen": -2.361793279647827, "logits/rejected": -2.811746835708618, "logps/chosen": -212.93118286132812, "logps/rejected": -347.66046142578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.978105068206787, "rewards/margins": 7.323117256164551, "rewards/rejected": -11.30122184753418, "step": 4319 }, { "epoch": 0.67, "learning_rate": 1.0978871310752632e-05, "logits/chosen": -1.931020736694336, "logits/rejected": -2.9755067825317383, "logps/chosen": -81.25941467285156, "logps/rejected": -260.9184875488281, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -2.5023298263549805, "rewards/margins": 5.0522003173828125, "rewards/rejected": -7.554530143737793, "step": 4320 }, { "epoch": 0.67, "learning_rate": 1.0978137870221484e-05, "logits/chosen": -2.966726064682007, "logits/rejected": -2.550161361694336, "logps/chosen": -359.9989318847656, "logps/rejected": -295.6980895996094, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.2898578643798828, "rewards/margins": 5.105975151062012, "rewards/rejected": -6.3958330154418945, "step": 4321 }, { "epoch": 0.67, "learning_rate": 1.0977404429690336e-05, "logits/chosen": -3.250854730606079, "logits/rejected": -3.368748903274536, "logps/chosen": -388.4165954589844, "logps/rejected": -459.6756286621094, "loss": 0.1165, "rewards/accuracies": 1.0, "rewards/chosen": -0.8362998962402344, "rewards/margins": 5.480161666870117, "rewards/rejected": -6.316461563110352, "step": 4322 }, { "epoch": 0.67, "learning_rate": 1.0976670989159188e-05, "logits/chosen": -2.751507043838501, "logits/rejected": -1.1903183460235596, "logps/chosen": -424.1079406738281, "logps/rejected": -406.85247802734375, "loss": 0.7471, "rewards/accuracies": 0.5, "rewards/chosen": -2.983517646789551, "rewards/margins": 1.8246064186096191, "rewards/rejected": -4.80812406539917, "step": 4323 }, { "epoch": 0.67, "learning_rate": 1.097593754862804e-05, "logits/chosen": -2.906109094619751, "logits/rejected": -2.744654417037964, "logps/chosen": -144.00930786132812, "logps/rejected": -231.70773315429688, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -2.3261749744415283, "rewards/margins": 5.787746429443359, "rewards/rejected": -8.113921165466309, "step": 4324 }, { "epoch": 0.67, "learning_rate": 1.0975204108096893e-05, "logits/chosen": -1.5554248094558716, "logits/rejected": -2.8018171787261963, "logps/chosen": -110.99369812011719, "logps/rejected": -361.8343505859375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -2.2702407836914062, "rewards/margins": 5.979818344116211, "rewards/rejected": -8.250059127807617, "step": 4325 }, { "epoch": 0.67, "learning_rate": 1.0974470667565745e-05, "logits/chosen": -2.0625102519989014, "logits/rejected": -2.501415729522705, "logps/chosen": -177.43991088867188, "logps/rejected": -154.09967041015625, "loss": 0.1466, "rewards/accuracies": 1.0, "rewards/chosen": -2.681401252746582, "rewards/margins": 3.8377482891082764, "rewards/rejected": -6.5191497802734375, "step": 4326 }, { "epoch": 0.67, "learning_rate": 1.0973737227034597e-05, "logits/chosen": -2.96397066116333, "logits/rejected": -3.21623158454895, "logps/chosen": -360.25732421875, "logps/rejected": -263.77862548828125, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -0.14004211127758026, "rewards/margins": 4.322119235992432, "rewards/rejected": -4.462161540985107, "step": 4327 }, { "epoch": 0.67, "learning_rate": 1.0973003786503449e-05, "logits/chosen": -1.687114953994751, "logits/rejected": -2.9852466583251953, "logps/chosen": -68.3374252319336, "logps/rejected": -382.4246826171875, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.8341865539550781, "rewards/margins": 5.958481311798096, "rewards/rejected": -7.792668342590332, "step": 4328 }, { "epoch": 0.67, "learning_rate": 1.0972270345972301e-05, "logits/chosen": -2.62982439994812, "logits/rejected": -2.977513074874878, "logps/chosen": -251.59104919433594, "logps/rejected": -379.1137390136719, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9125744104385376, "rewards/margins": 5.130181312561035, "rewards/rejected": -6.042756080627441, "step": 4329 }, { "epoch": 0.67, "learning_rate": 1.0971536905441153e-05, "logits/chosen": -2.9445157051086426, "logits/rejected": -3.013996124267578, "logps/chosen": -251.15399169921875, "logps/rejected": -117.67859649658203, "loss": 3.6485, "rewards/accuracies": 0.5, "rewards/chosen": -5.700192928314209, "rewards/margins": 0.14410972595214844, "rewards/rejected": -5.844302654266357, "step": 4330 }, { "epoch": 0.67, "learning_rate": 1.0970803464910005e-05, "logits/chosen": -2.903406858444214, "logits/rejected": -3.046945571899414, "logps/chosen": -54.22928237915039, "logps/rejected": -150.5489501953125, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -1.6648492813110352, "rewards/margins": 4.238798141479492, "rewards/rejected": -5.903647422790527, "step": 4331 }, { "epoch": 0.67, "learning_rate": 1.0970070024378857e-05, "logits/chosen": -2.947996139526367, "logits/rejected": -3.1562180519104004, "logps/chosen": -66.63654327392578, "logps/rejected": -185.61965942382812, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -1.8475744724273682, "rewards/margins": 4.5752410888671875, "rewards/rejected": -6.422815799713135, "step": 4332 }, { "epoch": 0.67, "learning_rate": 1.096933658384771e-05, "logits/chosen": -2.9117681980133057, "logits/rejected": -2.091480016708374, "logps/chosen": -314.69683837890625, "logps/rejected": -473.1182861328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.8015594482421875, "rewards/margins": 8.62169361114502, "rewards/rejected": -10.423253059387207, "step": 4333 }, { "epoch": 0.67, "learning_rate": 1.0968603143316562e-05, "logits/chosen": -2.987093925476074, "logits/rejected": -1.970383882522583, "logps/chosen": -619.5376586914062, "logps/rejected": -245.150146484375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.4088242053985596, "rewards/margins": 5.510239601135254, "rewards/rejected": -6.919063568115234, "step": 4334 }, { "epoch": 0.67, "learning_rate": 1.0967869702785416e-05, "logits/chosen": -2.6421115398406982, "logits/rejected": -2.6620142459869385, "logps/chosen": -115.79546356201172, "logps/rejected": -271.4320983886719, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.4936275482177734, "rewards/margins": 5.722130298614502, "rewards/rejected": -7.215757846832275, "step": 4335 }, { "epoch": 0.67, "learning_rate": 1.0967136262254267e-05, "logits/chosen": -2.6755666732788086, "logits/rejected": -2.4162659645080566, "logps/chosen": -117.69415283203125, "logps/rejected": -234.91647338867188, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -1.4586055278778076, "rewards/margins": 5.446033477783203, "rewards/rejected": -6.90463924407959, "step": 4336 }, { "epoch": 0.67, "learning_rate": 1.096640282172312e-05, "logits/chosen": -2.825132131576538, "logits/rejected": -2.905916929244995, "logps/chosen": -356.3596496582031, "logps/rejected": -337.88507080078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6393672823905945, "rewards/margins": 7.313210487365723, "rewards/rejected": -7.952577590942383, "step": 4337 }, { "epoch": 0.67, "learning_rate": 1.0965669381191971e-05, "logits/chosen": -2.0384302139282227, "logits/rejected": -2.5364718437194824, "logps/chosen": -122.75579833984375, "logps/rejected": -250.37844848632812, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.6798557639122009, "rewards/margins": 8.410544395446777, "rewards/rejected": -9.090400695800781, "step": 4338 }, { "epoch": 0.67, "learning_rate": 1.0964935940660823e-05, "logits/chosen": -1.665270209312439, "logits/rejected": -2.6247472763061523, "logps/chosen": -88.9739990234375, "logps/rejected": -194.88819885253906, "loss": 0.9716, "rewards/accuracies": 0.5, "rewards/chosen": -4.014560222625732, "rewards/margins": 2.066786050796509, "rewards/rejected": -6.08134651184082, "step": 4339 }, { "epoch": 0.67, "learning_rate": 1.0964202500129675e-05, "logits/chosen": -2.5166356563568115, "logits/rejected": -2.956962823867798, "logps/chosen": -317.7639465332031, "logps/rejected": -487.71807861328125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.6890265941619873, "rewards/margins": 7.076786041259766, "rewards/rejected": -8.765812873840332, "step": 4340 }, { "epoch": 0.68, "learning_rate": 1.0963469059598527e-05, "logits/chosen": -1.758256196975708, "logits/rejected": -2.8902220726013184, "logps/chosen": -40.16444778442383, "logps/rejected": -328.9727478027344, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.130660057067871, "rewards/margins": 6.729522228240967, "rewards/rejected": -8.86018180847168, "step": 4341 }, { "epoch": 0.68, "learning_rate": 1.0962735619067379e-05, "logits/chosen": -2.1410515308380127, "logits/rejected": -3.053215503692627, "logps/chosen": -661.0493774414062, "logps/rejected": -715.43798828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.244253635406494, "rewards/margins": 6.6648850440979, "rewards/rejected": -8.909138679504395, "step": 4342 }, { "epoch": 0.68, "learning_rate": 1.0962002178536232e-05, "logits/chosen": -2.367767333984375, "logits/rejected": -3.259631633758545, "logps/chosen": -365.65875244140625, "logps/rejected": -423.3648681640625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.1729674339294434, "rewards/margins": 7.8988823890686035, "rewards/rejected": -10.071849822998047, "step": 4343 }, { "epoch": 0.68, "learning_rate": 1.0961268738005084e-05, "logits/chosen": -2.7923097610473633, "logits/rejected": -2.877533435821533, "logps/chosen": -101.72190856933594, "logps/rejected": -212.00137329101562, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.7960842847824097, "rewards/margins": 4.980560302734375, "rewards/rejected": -6.776644706726074, "step": 4344 }, { "epoch": 0.68, "learning_rate": 1.0960535297473936e-05, "logits/chosen": -2.8824920654296875, "logits/rejected": -2.953036069869995, "logps/chosen": -530.4114379882812, "logps/rejected": -702.8284912109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.37808534502983093, "rewards/margins": 9.082727432250977, "rewards/rejected": -9.46081256866455, "step": 4345 }, { "epoch": 0.68, "learning_rate": 1.0959801856942788e-05, "logits/chosen": -2.7511062622070312, "logits/rejected": -2.869412422180176, "logps/chosen": -89.97172546386719, "logps/rejected": -292.1510009765625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3591477870941162, "rewards/margins": 6.958164215087891, "rewards/rejected": -8.317312240600586, "step": 4346 }, { "epoch": 0.68, "learning_rate": 1.095906841641164e-05, "logits/chosen": -2.709639072418213, "logits/rejected": -2.6822738647460938, "logps/chosen": -50.83720016479492, "logps/rejected": -267.4515686035156, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.4414941668510437, "rewards/margins": 5.983173370361328, "rewards/rejected": -6.4246673583984375, "step": 4347 }, { "epoch": 0.68, "learning_rate": 1.0958334975880492e-05, "logits/chosen": -2.8110296726226807, "logits/rejected": -2.6391761302948, "logps/chosen": -198.3732452392578, "logps/rejected": -245.17718505859375, "loss": 2.0003, "rewards/accuracies": 0.5, "rewards/chosen": -3.80635404586792, "rewards/margins": 0.2929922342300415, "rewards/rejected": -4.099346160888672, "step": 4348 }, { "epoch": 0.68, "learning_rate": 1.0957601535349344e-05, "logits/chosen": -2.4928197860717773, "logits/rejected": -2.7667622566223145, "logps/chosen": -179.51535034179688, "logps/rejected": -387.61553955078125, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.4933769404888153, "rewards/margins": 4.256051063537598, "rewards/rejected": -4.749427795410156, "step": 4349 }, { "epoch": 0.68, "learning_rate": 1.0956868094818195e-05, "logits/chosen": -2.618138074874878, "logits/rejected": -2.9763848781585693, "logps/chosen": -181.86131286621094, "logps/rejected": -227.59913635253906, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -2.0860114097595215, "rewards/margins": 4.562015533447266, "rewards/rejected": -6.648027420043945, "step": 4350 }, { "epoch": 0.68, "learning_rate": 1.0956134654287047e-05, "logits/chosen": -2.3423306941986084, "logits/rejected": -3.0029118061065674, "logps/chosen": -122.69659423828125, "logps/rejected": -291.986572265625, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -1.5710039138793945, "rewards/margins": 3.8758459091186523, "rewards/rejected": -5.446849822998047, "step": 4351 }, { "epoch": 0.68, "learning_rate": 1.0955401213755901e-05, "logits/chosen": -1.197726845741272, "logits/rejected": -2.618349075317383, "logps/chosen": -131.67623901367188, "logps/rejected": -336.40814208984375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.058551549911499, "rewards/margins": 6.1076836585998535, "rewards/rejected": -9.166234970092773, "step": 4352 }, { "epoch": 0.68, "learning_rate": 1.0954667773224753e-05, "logits/chosen": -2.8622055053710938, "logits/rejected": -3.3721437454223633, "logps/chosen": -115.79463195800781, "logps/rejected": -391.1826171875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.4075570106506348, "rewards/margins": 5.889142990112305, "rewards/rejected": -7.296699523925781, "step": 4353 }, { "epoch": 0.68, "learning_rate": 1.0953934332693605e-05, "logits/chosen": -2.9653470516204834, "logits/rejected": -2.196079969406128, "logps/chosen": -232.95460510253906, "logps/rejected": -223.39694213867188, "loss": 2.6837, "rewards/accuracies": 0.5, "rewards/chosen": -4.222235679626465, "rewards/margins": 0.40776991844177246, "rewards/rejected": -4.630005836486816, "step": 4354 }, { "epoch": 0.68, "learning_rate": 1.0953200892162457e-05, "logits/chosen": -2.5289571285247803, "logits/rejected": -3.082721471786499, "logps/chosen": -165.58871459960938, "logps/rejected": -284.89300537109375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.664899468421936, "rewards/margins": 5.880403518676758, "rewards/rejected": -7.5453033447265625, "step": 4355 }, { "epoch": 0.68, "learning_rate": 1.0952467451631308e-05, "logits/chosen": -2.178006410598755, "logits/rejected": -2.759449005126953, "logps/chosen": -186.55545043945312, "logps/rejected": -337.908203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7601391077041626, "rewards/margins": 8.009918212890625, "rewards/rejected": -9.77005672454834, "step": 4356 }, { "epoch": 0.68, "learning_rate": 1.095173401110016e-05, "logits/chosen": -1.0067569017410278, "logits/rejected": -2.0766210556030273, "logps/chosen": -96.8958969116211, "logps/rejected": -278.07666015625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.286216378211975, "rewards/margins": 7.140376091003418, "rewards/rejected": -8.426591873168945, "step": 4357 }, { "epoch": 0.68, "learning_rate": 1.0951000570569012e-05, "logits/chosen": -2.4128589630126953, "logits/rejected": -2.957683801651001, "logps/chosen": -408.34820556640625, "logps/rejected": -563.826904296875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.662672519683838, "rewards/margins": 5.791325569152832, "rewards/rejected": -7.45399808883667, "step": 4358 }, { "epoch": 0.68, "learning_rate": 1.0950267130037864e-05, "logits/chosen": -2.628307342529297, "logits/rejected": -2.9224655628204346, "logps/chosen": -175.85372924804688, "logps/rejected": -263.78875732421875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.7002149820327759, "rewards/margins": 6.020861625671387, "rewards/rejected": -7.721076011657715, "step": 4359 }, { "epoch": 0.68, "learning_rate": 1.0949533689506716e-05, "logits/chosen": -2.348191499710083, "logits/rejected": -3.077341318130493, "logps/chosen": -102.37876892089844, "logps/rejected": -272.97186279296875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.9912300109863281, "rewards/margins": 6.728720188140869, "rewards/rejected": -8.719949722290039, "step": 4360 }, { "epoch": 0.68, "learning_rate": 1.094880024897557e-05, "logits/chosen": -2.5976734161376953, "logits/rejected": -2.2883758544921875, "logps/chosen": -221.46139526367188, "logps/rejected": -306.8856201171875, "loss": 2.4036, "rewards/accuracies": 0.5, "rewards/chosen": -4.726781845092773, "rewards/margins": -0.3757288455963135, "rewards/rejected": -4.351052761077881, "step": 4361 }, { "epoch": 0.68, "learning_rate": 1.0948066808444421e-05, "logits/chosen": -2.882396936416626, "logits/rejected": -2.8525218963623047, "logps/chosen": -111.83309936523438, "logps/rejected": -239.29275512695312, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.3502460718154907, "rewards/margins": 5.8308539390563965, "rewards/rejected": -7.181099891662598, "step": 4362 }, { "epoch": 0.68, "learning_rate": 1.0947333367913273e-05, "logits/chosen": -2.900254964828491, "logits/rejected": -1.9695061445236206, "logps/chosen": -408.14007568359375, "logps/rejected": -190.31585693359375, "loss": 1.6803, "rewards/accuracies": 0.5, "rewards/chosen": -2.541229486465454, "rewards/margins": 2.9009673595428467, "rewards/rejected": -5.442196846008301, "step": 4363 }, { "epoch": 0.68, "learning_rate": 1.0946599927382125e-05, "logits/chosen": -2.8262109756469727, "logits/rejected": -2.329979419708252, "logps/chosen": -242.7859649658203, "logps/rejected": -319.51458740234375, "loss": 3.3049, "rewards/accuracies": 0.0, "rewards/chosen": -5.072802543640137, "rewards/margins": -3.1080715656280518, "rewards/rejected": -1.9647308588027954, "step": 4364 }, { "epoch": 0.68, "learning_rate": 1.0945866486850977e-05, "logits/chosen": -2.1454861164093018, "logits/rejected": -2.9959981441497803, "logps/chosen": -159.21046447753906, "logps/rejected": -276.4322509765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6819965839385986, "rewards/margins": 7.575708389282227, "rewards/rejected": -9.257704734802246, "step": 4365 }, { "epoch": 0.68, "learning_rate": 1.0945133046319829e-05, "logits/chosen": -2.1611177921295166, "logits/rejected": -2.924621820449829, "logps/chosen": -233.19821166992188, "logps/rejected": -517.962890625, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -2.5774474143981934, "rewards/margins": 5.87360954284668, "rewards/rejected": -8.451057434082031, "step": 4366 }, { "epoch": 0.68, "learning_rate": 1.0944399605788682e-05, "logits/chosen": -2.083425998687744, "logits/rejected": -2.994666337966919, "logps/chosen": -501.28192138671875, "logps/rejected": -629.8563842773438, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.5235443115234375, "rewards/margins": 6.887416362762451, "rewards/rejected": -8.41096019744873, "step": 4367 }, { "epoch": 0.68, "learning_rate": 1.0943666165257534e-05, "logits/chosen": -3.026294469833374, "logits/rejected": -2.637561559677124, "logps/chosen": -505.1465759277344, "logps/rejected": -441.0657958984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.4178043305873871, "rewards/margins": 8.748364448547363, "rewards/rejected": -9.166168212890625, "step": 4368 }, { "epoch": 0.68, "learning_rate": 1.0942932724726388e-05, "logits/chosen": -2.0251142978668213, "logits/rejected": -3.036339521408081, "logps/chosen": -624.81005859375, "logps/rejected": -636.3472900390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.397988885641098, "rewards/margins": 9.80249309539795, "rewards/rejected": -10.200482368469238, "step": 4369 }, { "epoch": 0.68, "learning_rate": 1.094219928419524e-05, "logits/chosen": -2.860504388809204, "logits/rejected": -3.0940847396850586, "logps/chosen": -199.80511474609375, "logps/rejected": -398.931640625, "loss": 1.9565, "rewards/accuracies": 0.5, "rewards/chosen": -2.591160774230957, "rewards/margins": 2.7415552139282227, "rewards/rejected": -5.33271598815918, "step": 4370 }, { "epoch": 0.68, "learning_rate": 1.0941465843664092e-05, "logits/chosen": -1.4649344682693481, "logits/rejected": -2.7986650466918945, "logps/chosen": -110.28523254394531, "logps/rejected": -342.4700622558594, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.5975229740142822, "rewards/margins": 7.102208614349365, "rewards/rejected": -8.699731826782227, "step": 4371 }, { "epoch": 0.68, "learning_rate": 1.0940732403132944e-05, "logits/chosen": -2.746842622756958, "logits/rejected": -2.3155710697174072, "logps/chosen": -168.28257751464844, "logps/rejected": -182.74429321289062, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.0634400844573975, "rewards/margins": 6.306825160980225, "rewards/rejected": -8.370265007019043, "step": 4372 }, { "epoch": 0.68, "learning_rate": 1.0939998962601795e-05, "logits/chosen": -2.1889467239379883, "logits/rejected": -2.945948839187622, "logps/chosen": -110.76686096191406, "logps/rejected": -208.81658935546875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.3035271167755127, "rewards/margins": 6.088215351104736, "rewards/rejected": -7.391742706298828, "step": 4373 }, { "epoch": 0.68, "learning_rate": 1.0939265522070647e-05, "logits/chosen": -2.7766027450561523, "logits/rejected": -3.0114364624023438, "logps/chosen": -701.315673828125, "logps/rejected": -536.2294921875, "loss": 3.7591, "rewards/accuracies": 0.5, "rewards/chosen": -5.426227569580078, "rewards/margins": 0.43728113174438477, "rewards/rejected": -5.863508701324463, "step": 4374 }, { "epoch": 0.68, "learning_rate": 1.09385320815395e-05, "logits/chosen": -2.693715810775757, "logits/rejected": -2.5749542713165283, "logps/chosen": -422.2649230957031, "logps/rejected": -391.00994873046875, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -2.052827835083008, "rewards/margins": 4.312918663024902, "rewards/rejected": -6.36574649810791, "step": 4375 }, { "epoch": 0.68, "learning_rate": 1.0937798641008351e-05, "logits/chosen": -2.5310120582580566, "logits/rejected": -2.91479754447937, "logps/chosen": -184.95346069335938, "logps/rejected": -278.58319091796875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.2861175537109375, "rewards/margins": 5.920597076416016, "rewards/rejected": -7.206714630126953, "step": 4376 }, { "epoch": 0.68, "learning_rate": 1.0937065200477203e-05, "logits/chosen": -2.613823652267456, "logits/rejected": -2.911616325378418, "logps/chosen": -203.96102905273438, "logps/rejected": -135.1348114013672, "loss": 3.4637, "rewards/accuracies": 0.5, "rewards/chosen": -4.448415279388428, "rewards/margins": -0.7023193836212158, "rewards/rejected": -3.746096134185791, "step": 4377 }, { "epoch": 0.68, "learning_rate": 1.0936331759946057e-05, "logits/chosen": -2.879916191101074, "logits/rejected": -1.6659899950027466, "logps/chosen": -443.6889953613281, "logps/rejected": -298.68792724609375, "loss": 2.2891, "rewards/accuracies": 0.5, "rewards/chosen": -4.064914226531982, "rewards/margins": 0.5105328559875488, "rewards/rejected": -4.575447082519531, "step": 4378 }, { "epoch": 0.68, "learning_rate": 1.0935598319414908e-05, "logits/chosen": -1.9851462841033936, "logits/rejected": -2.821322202682495, "logps/chosen": -195.4229278564453, "logps/rejected": -337.15185546875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.3007980585098267, "rewards/margins": 6.499665260314941, "rewards/rejected": -7.800463676452637, "step": 4379 }, { "epoch": 0.68, "learning_rate": 1.093486487888376e-05, "logits/chosen": -2.9523050785064697, "logits/rejected": -2.968100070953369, "logps/chosen": -256.9555969238281, "logps/rejected": -290.5361328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5735963582992554, "rewards/margins": 7.888904094696045, "rewards/rejected": -8.46250057220459, "step": 4380 }, { "epoch": 0.68, "learning_rate": 1.0934131438352612e-05, "logits/chosen": -2.656388282775879, "logits/rejected": -2.9786362648010254, "logps/chosen": -63.083106994628906, "logps/rejected": -345.8568115234375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.7937637567520142, "rewards/margins": 4.976921081542969, "rewards/rejected": -6.770684719085693, "step": 4381 }, { "epoch": 0.68, "learning_rate": 1.0933397997821464e-05, "logits/chosen": -2.629206657409668, "logits/rejected": -2.9665329456329346, "logps/chosen": -191.30816650390625, "logps/rejected": -180.29335021972656, "loss": 2.3776, "rewards/accuracies": 0.5, "rewards/chosen": -3.063181161880493, "rewards/margins": 1.4076871871948242, "rewards/rejected": -4.470868110656738, "step": 4382 }, { "epoch": 0.68, "learning_rate": 1.0932664557290316e-05, "logits/chosen": -2.22939133644104, "logits/rejected": -2.852121114730835, "logps/chosen": -136.3827667236328, "logps/rejected": -386.47064208984375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.8376133441925049, "rewards/margins": 6.209489822387695, "rewards/rejected": -8.047103881835938, "step": 4383 }, { "epoch": 0.68, "learning_rate": 1.0931931116759168e-05, "logits/chosen": -2.8899753093719482, "logits/rejected": -2.8951733112335205, "logps/chosen": -188.18988037109375, "logps/rejected": -358.70050048828125, "loss": 0.6959, "rewards/accuracies": 0.5, "rewards/chosen": -1.1023986339569092, "rewards/margins": 1.8251824378967285, "rewards/rejected": -2.9275810718536377, "step": 4384 }, { "epoch": 0.68, "learning_rate": 1.093119767622802e-05, "logits/chosen": -2.090487003326416, "logits/rejected": -3.079407215118408, "logps/chosen": -114.78590393066406, "logps/rejected": -301.52740478515625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.510622501373291, "rewards/margins": 6.516687870025635, "rewards/rejected": -9.027310371398926, "step": 4385 }, { "epoch": 0.68, "learning_rate": 1.0930464235696872e-05, "logits/chosen": -2.9139368534088135, "logits/rejected": -2.329030752182007, "logps/chosen": -196.7390594482422, "logps/rejected": -323.5586853027344, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6675695180892944, "rewards/margins": 7.240652084350586, "rewards/rejected": -8.908222198486328, "step": 4386 }, { "epoch": 0.68, "learning_rate": 1.0929730795165725e-05, "logits/chosen": -3.04314923286438, "logits/rejected": -3.1530745029449463, "logps/chosen": -451.7998046875, "logps/rejected": -474.8854675292969, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.5983176231384277, "rewards/margins": 6.982421398162842, "rewards/rejected": -9.58073902130127, "step": 4387 }, { "epoch": 0.68, "learning_rate": 1.0928997354634577e-05, "logits/chosen": -2.0325093269348145, "logits/rejected": -2.9081380367279053, "logps/chosen": -196.01687622070312, "logps/rejected": -427.4500732421875, "loss": 0.0898, "rewards/accuracies": 1.0, "rewards/chosen": -1.2264220714569092, "rewards/margins": 3.5008530616760254, "rewards/rejected": -4.727275371551514, "step": 4388 }, { "epoch": 0.68, "learning_rate": 1.0928263914103429e-05, "logits/chosen": -2.059032917022705, "logits/rejected": -2.8299429416656494, "logps/chosen": -139.57388305664062, "logps/rejected": -163.92503356933594, "loss": 1.8687, "rewards/accuracies": 0.5, "rewards/chosen": -3.6970748901367188, "rewards/margins": 2.0762381553649902, "rewards/rejected": -5.773313045501709, "step": 4389 }, { "epoch": 0.68, "learning_rate": 1.092753047357228e-05, "logits/chosen": -1.7729605436325073, "logits/rejected": -2.816861867904663, "logps/chosen": -46.93666458129883, "logps/rejected": -203.9397430419922, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3851573467254639, "rewards/margins": 8.22726058959961, "rewards/rejected": -9.612419128417969, "step": 4390 }, { "epoch": 0.68, "learning_rate": 1.0926797033041133e-05, "logits/chosen": -0.8884619474411011, "logits/rejected": -1.867261528968811, "logps/chosen": -157.20846557617188, "logps/rejected": -295.1800537109375, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": -2.345702886581421, "rewards/margins": 4.666726112365723, "rewards/rejected": -7.012429237365723, "step": 4391 }, { "epoch": 0.68, "learning_rate": 1.0926063592509985e-05, "logits/chosen": -2.860736608505249, "logits/rejected": -3.3865232467651367, "logps/chosen": -66.0019302368164, "logps/rejected": -214.84796142578125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.9155819416046143, "rewards/margins": 5.290783882141113, "rewards/rejected": -6.206366062164307, "step": 4392 }, { "epoch": 0.68, "learning_rate": 1.0925330151978836e-05, "logits/chosen": -1.9881174564361572, "logits/rejected": -3.1570839881896973, "logps/chosen": -613.7693481445312, "logps/rejected": -766.5826416015625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.4313160181045532, "rewards/margins": 6.644833087921143, "rewards/rejected": -8.076148986816406, "step": 4393 }, { "epoch": 0.68, "learning_rate": 1.0924596711447688e-05, "logits/chosen": -2.28415846824646, "logits/rejected": -2.8733391761779785, "logps/chosen": -51.20083999633789, "logps/rejected": -126.49877166748047, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -2.6298861503601074, "rewards/margins": 4.1846208572387695, "rewards/rejected": -6.814507007598877, "step": 4394 }, { "epoch": 0.68, "learning_rate": 1.092386327091654e-05, "logits/chosen": -1.2926164865493774, "logits/rejected": -2.9031081199645996, "logps/chosen": -67.67974090576172, "logps/rejected": -307.51953125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.9046182632446289, "rewards/margins": 6.758331775665283, "rewards/rejected": -7.66295051574707, "step": 4395 }, { "epoch": 0.68, "learning_rate": 1.0923129830385394e-05, "logits/chosen": -2.042379140853882, "logits/rejected": -2.514457941055298, "logps/chosen": -128.3444366455078, "logps/rejected": -268.5186767578125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -2.305759906768799, "rewards/margins": 5.871546745300293, "rewards/rejected": -8.177306175231934, "step": 4396 }, { "epoch": 0.68, "learning_rate": 1.0922396389854246e-05, "logits/chosen": -2.9281399250030518, "logits/rejected": -2.904409885406494, "logps/chosen": -111.56364440917969, "logps/rejected": -233.12582397460938, "loss": 0.3259, "rewards/accuracies": 1.0, "rewards/chosen": -2.8899729251861572, "rewards/margins": 2.3670544624328613, "rewards/rejected": -5.257027626037598, "step": 4397 }, { "epoch": 0.68, "learning_rate": 1.0921662949323097e-05, "logits/chosen": -2.4864001274108887, "logits/rejected": -2.686551332473755, "logps/chosen": -578.2544555664062, "logps/rejected": -531.685791015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.7713772058486938, "rewards/margins": 7.845561981201172, "rewards/rejected": -8.616939544677734, "step": 4398 }, { "epoch": 0.68, "learning_rate": 1.092092950879195e-05, "logits/chosen": -2.7479631900787354, "logits/rejected": -2.9195706844329834, "logps/chosen": -126.46112060546875, "logps/rejected": -192.89239501953125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -2.9431657791137695, "rewards/margins": 5.147908687591553, "rewards/rejected": -8.091073989868164, "step": 4399 }, { "epoch": 0.68, "learning_rate": 1.0920196068260801e-05, "logits/chosen": -1.509218454360962, "logits/rejected": -2.8484537601470947, "logps/chosen": -83.9909896850586, "logps/rejected": -294.74517822265625, "loss": 0.2642, "rewards/accuracies": 1.0, "rewards/chosen": -2.4614322185516357, "rewards/margins": 3.152899742126465, "rewards/rejected": -5.61433219909668, "step": 4400 }, { "epoch": 0.68, "learning_rate": 1.0919462627729655e-05, "logits/chosen": -0.7708377242088318, "logits/rejected": -2.395221710205078, "logps/chosen": -171.1340789794922, "logps/rejected": -439.9609375, "loss": 3.7565, "rewards/accuracies": 0.5, "rewards/chosen": -5.163268089294434, "rewards/margins": 2.4911813735961914, "rewards/rejected": -7.654449462890625, "step": 4401 }, { "epoch": 0.68, "learning_rate": 1.0918729187198507e-05, "logits/chosen": -2.6750876903533936, "logits/rejected": -2.871861219406128, "logps/chosen": -75.73760986328125, "logps/rejected": -213.6415557861328, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": -1.6062500476837158, "rewards/margins": 2.9783074855804443, "rewards/rejected": -4.58455753326416, "step": 4402 }, { "epoch": 0.68, "learning_rate": 1.0917995746667359e-05, "logits/chosen": -2.6929550170898438, "logits/rejected": -1.3223739862442017, "logps/chosen": -264.43072509765625, "logps/rejected": -251.1310577392578, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.356715440750122, "rewards/margins": 6.098688125610352, "rewards/rejected": -8.455404281616211, "step": 4403 }, { "epoch": 0.68, "learning_rate": 1.091726230613621e-05, "logits/chosen": -2.595280885696411, "logits/rejected": -2.7566330432891846, "logps/chosen": -48.49772262573242, "logps/rejected": -178.41726684570312, "loss": 0.1628, "rewards/accuracies": 1.0, "rewards/chosen": -1.3124090433120728, "rewards/margins": 2.671281337738037, "rewards/rejected": -3.9836902618408203, "step": 4404 }, { "epoch": 0.69, "learning_rate": 1.0916528865605064e-05, "logits/chosen": -2.4885029792785645, "logits/rejected": -2.7334256172180176, "logps/chosen": -215.68824768066406, "logps/rejected": -365.22998046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6004421710968018, "rewards/margins": 8.753473281860352, "rewards/rejected": -9.353915214538574, "step": 4405 }, { "epoch": 0.69, "learning_rate": 1.0915795425073916e-05, "logits/chosen": -1.7313470840454102, "logits/rejected": -3.058788537979126, "logps/chosen": -149.33377075195312, "logps/rejected": -457.7637939453125, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -2.675811529159546, "rewards/margins": 4.921872138977051, "rewards/rejected": -7.597683906555176, "step": 4406 }, { "epoch": 0.69, "learning_rate": 1.0915061984542768e-05, "logits/chosen": -2.5271480083465576, "logits/rejected": -2.9537670612335205, "logps/chosen": -196.43008422851562, "logps/rejected": -307.46661376953125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.4337189197540283, "rewards/margins": 5.909355163574219, "rewards/rejected": -7.343073844909668, "step": 4407 }, { "epoch": 0.69, "learning_rate": 1.091432854401162e-05, "logits/chosen": -2.233825445175171, "logits/rejected": -2.985631227493286, "logps/chosen": -496.3935852050781, "logps/rejected": -586.1389770507812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7279857993125916, "rewards/margins": 7.713100433349609, "rewards/rejected": -8.441085815429688, "step": 4408 }, { "epoch": 0.69, "learning_rate": 1.0913595103480472e-05, "logits/chosen": -2.0307259559631348, "logits/rejected": -2.7521824836730957, "logps/chosen": -234.22047424316406, "logps/rejected": -409.9844970703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6718649864196777, "rewards/margins": 9.613679885864258, "rewards/rejected": -11.285545349121094, "step": 4409 }, { "epoch": 0.69, "learning_rate": 1.0912861662949323e-05, "logits/chosen": -2.580246686935425, "logits/rejected": -2.62298583984375, "logps/chosen": -210.71090698242188, "logps/rejected": -317.478759765625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.168481469154358, "rewards/margins": 5.196037292480469, "rewards/rejected": -6.364519119262695, "step": 4410 }, { "epoch": 0.69, "learning_rate": 1.0912128222418175e-05, "logits/chosen": -2.7373785972595215, "logits/rejected": -3.127857208251953, "logps/chosen": -186.80374145507812, "logps/rejected": -316.2352294921875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.4505623579025269, "rewards/margins": 6.030928611755371, "rewards/rejected": -7.4814910888671875, "step": 4411 }, { "epoch": 0.69, "learning_rate": 1.0911394781887027e-05, "logits/chosen": -1.9959758520126343, "logits/rejected": -2.8755462169647217, "logps/chosen": -38.51640701293945, "logps/rejected": -171.91616821289062, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.2429938316345215, "rewards/margins": 5.7553558349609375, "rewards/rejected": -6.998350143432617, "step": 4412 }, { "epoch": 0.69, "learning_rate": 1.0910661341355879e-05, "logits/chosen": -2.8475255966186523, "logits/rejected": -2.355454683303833, "logps/chosen": -638.4127807617188, "logps/rejected": -426.4122619628906, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.1321208477020264, "rewards/margins": 6.376169681549072, "rewards/rejected": -7.5082902908325195, "step": 4413 }, { "epoch": 0.69, "learning_rate": 1.0909927900824733e-05, "logits/chosen": -3.1327316761016846, "logits/rejected": -2.4290835857391357, "logps/chosen": -409.1719665527344, "logps/rejected": -292.2498779296875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.52916419506073, "rewards/margins": 8.241786003112793, "rewards/rejected": -9.770950317382812, "step": 4414 }, { "epoch": 0.69, "learning_rate": 1.0909194460293584e-05, "logits/chosen": -2.7944672107696533, "logits/rejected": -3.0024473667144775, "logps/chosen": -361.0946960449219, "logps/rejected": -371.1951904296875, "loss": 4.1693, "rewards/accuracies": 0.5, "rewards/chosen": -5.419685363769531, "rewards/margins": -0.38593173027038574, "rewards/rejected": -5.033753871917725, "step": 4415 }, { "epoch": 0.69, "learning_rate": 1.0908461019762436e-05, "logits/chosen": -3.11110520362854, "logits/rejected": -2.9617631435394287, "logps/chosen": -141.56956481933594, "logps/rejected": -146.23719787597656, "loss": 3.8706, "rewards/accuracies": 0.5, "rewards/chosen": -4.641565322875977, "rewards/margins": -1.6861648559570312, "rewards/rejected": -2.955400228500366, "step": 4416 }, { "epoch": 0.69, "learning_rate": 1.0907727579231288e-05, "logits/chosen": -2.9628219604492188, "logits/rejected": -2.82368803024292, "logps/chosen": -146.01377868652344, "logps/rejected": -116.48661804199219, "loss": 1.6502, "rewards/accuracies": 0.5, "rewards/chosen": -2.994999647140503, "rewards/margins": 3.1705832481384277, "rewards/rejected": -6.165582656860352, "step": 4417 }, { "epoch": 0.69, "learning_rate": 1.090699413870014e-05, "logits/chosen": -2.651925802230835, "logits/rejected": -2.440277099609375, "logps/chosen": -148.39083862304688, "logps/rejected": -209.6029815673828, "loss": 4.2946, "rewards/accuracies": 0.5, "rewards/chosen": -5.991346836090088, "rewards/margins": -0.10948419570922852, "rewards/rejected": -5.881862640380859, "step": 4418 }, { "epoch": 0.69, "learning_rate": 1.0906260698168992e-05, "logits/chosen": -2.92829966545105, "logits/rejected": -1.8542776107788086, "logps/chosen": -303.9165954589844, "logps/rejected": -119.97254943847656, "loss": 1.7381, "rewards/accuracies": 0.5, "rewards/chosen": -3.166914224624634, "rewards/margins": 2.148135185241699, "rewards/rejected": -5.315049648284912, "step": 4419 }, { "epoch": 0.69, "learning_rate": 1.0905527257637844e-05, "logits/chosen": -2.916104555130005, "logits/rejected": -2.813891649246216, "logps/chosen": -279.2322082519531, "logps/rejected": -398.5546875, "loss": 2.4416, "rewards/accuracies": 0.5, "rewards/chosen": -5.102384090423584, "rewards/margins": 0.2669687271118164, "rewards/rejected": -5.3693528175354, "step": 4420 }, { "epoch": 0.69, "learning_rate": 1.0904793817106696e-05, "logits/chosen": -2.122100591659546, "logits/rejected": -3.0788793563842773, "logps/chosen": -179.10006713867188, "logps/rejected": -298.66436767578125, "loss": 0.0454, "rewards/accuracies": 1.0, "rewards/chosen": -0.9924383163452148, "rewards/margins": 3.7997827529907227, "rewards/rejected": -4.7922210693359375, "step": 4421 }, { "epoch": 0.69, "learning_rate": 1.0904060376575548e-05, "logits/chosen": -1.6448196172714233, "logits/rejected": -3.0072033405303955, "logps/chosen": -55.38221740722656, "logps/rejected": -254.99148559570312, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.7286451458930969, "rewards/margins": 5.430903911590576, "rewards/rejected": -6.159548759460449, "step": 4422 }, { "epoch": 0.69, "learning_rate": 1.0903326936044401e-05, "logits/chosen": -2.540952682495117, "logits/rejected": -2.993760108947754, "logps/chosen": -31.450693130493164, "logps/rejected": -149.38616943359375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.9361274838447571, "rewards/margins": 5.837895393371582, "rewards/rejected": -6.774022579193115, "step": 4423 }, { "epoch": 0.69, "learning_rate": 1.0902593495513253e-05, "logits/chosen": -3.415846824645996, "logits/rejected": -3.0044376850128174, "logps/chosen": -132.6978302001953, "logps/rejected": -79.16914367675781, "loss": 0.639, "rewards/accuracies": 0.5, "rewards/chosen": -1.6655110120773315, "rewards/margins": 2.704277276992798, "rewards/rejected": -4.36978816986084, "step": 4424 }, { "epoch": 0.69, "learning_rate": 1.0901860054982105e-05, "logits/chosen": -2.5920889377593994, "logits/rejected": -3.0617570877075195, "logps/chosen": -157.52212524414062, "logps/rejected": -300.4630126953125, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -1.3523304462432861, "rewards/margins": 4.366564750671387, "rewards/rejected": -5.718894958496094, "step": 4425 }, { "epoch": 0.69, "learning_rate": 1.0901126614450957e-05, "logits/chosen": -1.7033137083053589, "logits/rejected": -2.938325881958008, "logps/chosen": -142.85906982421875, "logps/rejected": -311.7662048339844, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -2.689375400543213, "rewards/margins": 4.667937278747559, "rewards/rejected": -7.35731315612793, "step": 4426 }, { "epoch": 0.69, "learning_rate": 1.0900393173919809e-05, "logits/chosen": -2.9520630836486816, "logits/rejected": -3.0758721828460693, "logps/chosen": -177.26194763183594, "logps/rejected": -112.6548080444336, "loss": 2.8995, "rewards/accuracies": 0.5, "rewards/chosen": -3.624235153198242, "rewards/margins": -1.0006963014602661, "rewards/rejected": -2.6235389709472656, "step": 4427 }, { "epoch": 0.69, "learning_rate": 1.089965973338866e-05, "logits/chosen": -2.8271682262420654, "logits/rejected": -2.6366639137268066, "logps/chosen": -198.1642608642578, "logps/rejected": -167.96446228027344, "loss": 1.4214, "rewards/accuracies": 0.5, "rewards/chosen": -3.89876651763916, "rewards/margins": 2.1148526668548584, "rewards/rejected": -6.013619422912598, "step": 4428 }, { "epoch": 0.69, "learning_rate": 1.0898926292857512e-05, "logits/chosen": -1.6506997346878052, "logits/rejected": -2.831825017929077, "logps/chosen": -32.36327362060547, "logps/rejected": -250.75369262695312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8932710289955139, "rewards/margins": 6.656705856323242, "rewards/rejected": -7.549976348876953, "step": 4429 }, { "epoch": 0.69, "learning_rate": 1.0898192852326364e-05, "logits/chosen": -3.0058023929595947, "logits/rejected": -2.941265106201172, "logps/chosen": -674.530517578125, "logps/rejected": -363.1963195800781, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.0030033588409424, "rewards/margins": 6.084359169006348, "rewards/rejected": -7.087362766265869, "step": 4430 }, { "epoch": 0.69, "learning_rate": 1.0897459411795216e-05, "logits/chosen": -1.5002498626708984, "logits/rejected": -2.675499677658081, "logps/chosen": -96.04092407226562, "logps/rejected": -337.3748779296875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.0529297590255737, "rewards/margins": 5.3014726638793945, "rewards/rejected": -6.354402542114258, "step": 4431 }, { "epoch": 0.69, "learning_rate": 1.089672597126407e-05, "logits/chosen": -1.7194006443023682, "logits/rejected": -2.773589611053467, "logps/chosen": -110.6202163696289, "logps/rejected": -343.4123229980469, "loss": 1.2504, "rewards/accuracies": 0.5, "rewards/chosen": -3.34136962890625, "rewards/margins": 1.0018868446350098, "rewards/rejected": -4.34325647354126, "step": 4432 }, { "epoch": 0.69, "learning_rate": 1.0895992530732922e-05, "logits/chosen": -2.0125672817230225, "logits/rejected": -3.0778214931488037, "logps/chosen": -469.41339111328125, "logps/rejected": -644.5460205078125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.315525770187378, "rewards/margins": 5.705522060394287, "rewards/rejected": -7.021048069000244, "step": 4433 }, { "epoch": 0.69, "learning_rate": 1.0895259090201774e-05, "logits/chosen": -2.311220645904541, "logits/rejected": -2.9158356189727783, "logps/chosen": -279.9156799316406, "logps/rejected": -386.969970703125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6946243643760681, "rewards/margins": 7.0320916175842285, "rewards/rejected": -7.726716041564941, "step": 4434 }, { "epoch": 0.69, "learning_rate": 1.0894525649670627e-05, "logits/chosen": -1.7477078437805176, "logits/rejected": -2.735313653945923, "logps/chosen": -166.07290649414062, "logps/rejected": -269.0169372558594, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -2.518916368484497, "rewards/margins": 4.520132064819336, "rewards/rejected": -7.039048194885254, "step": 4435 }, { "epoch": 0.69, "learning_rate": 1.0893792209139479e-05, "logits/chosen": -2.673617362976074, "logits/rejected": -1.8916561603546143, "logps/chosen": -579.9371948242188, "logps/rejected": -399.2646179199219, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.4970619678497314, "rewards/margins": 4.44866943359375, "rewards/rejected": -5.945731163024902, "step": 4436 }, { "epoch": 0.69, "learning_rate": 1.0893058768608331e-05, "logits/chosen": -2.6262729167938232, "logits/rejected": -1.7434968948364258, "logps/chosen": -94.21849822998047, "logps/rejected": -298.1563720703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9671615362167358, "rewards/margins": 7.352971076965332, "rewards/rejected": -8.320133209228516, "step": 4437 }, { "epoch": 0.69, "learning_rate": 1.0892325328077183e-05, "logits/chosen": -3.1150498390197754, "logits/rejected": -2.049574136734009, "logps/chosen": -657.8831787109375, "logps/rejected": -453.96630859375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.915313720703125, "rewards/margins": 7.982914924621582, "rewards/rejected": -9.898228645324707, "step": 4438 }, { "epoch": 0.69, "learning_rate": 1.0891591887546035e-05, "logits/chosen": -3.0539493560791016, "logits/rejected": -2.8503613471984863, "logps/chosen": -421.6688232421875, "logps/rejected": -460.9792785644531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5064041018486023, "rewards/margins": 9.151087760925293, "rewards/rejected": -9.657491683959961, "step": 4439 }, { "epoch": 0.69, "learning_rate": 1.0890858447014887e-05, "logits/chosen": -2.073784828186035, "logits/rejected": -2.8326759338378906, "logps/chosen": -653.9453125, "logps/rejected": -735.0384521484375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.6046676635742188, "rewards/margins": 7.483027458190918, "rewards/rejected": -9.087695121765137, "step": 4440 }, { "epoch": 0.69, "learning_rate": 1.089012500648374e-05, "logits/chosen": -1.3392333984375, "logits/rejected": -2.576303243637085, "logps/chosen": -127.80796813964844, "logps/rejected": -260.377685546875, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -2.995763063430786, "rewards/margins": 4.221931457519531, "rewards/rejected": -7.217694282531738, "step": 4441 }, { "epoch": 0.69, "learning_rate": 1.0889391565952592e-05, "logits/chosen": -2.2054548263549805, "logits/rejected": -3.0552618503570557, "logps/chosen": -329.0524597167969, "logps/rejected": -462.02716064453125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -3.048062324523926, "rewards/margins": 6.6293745040893555, "rewards/rejected": -9.677436828613281, "step": 4442 }, { "epoch": 0.69, "learning_rate": 1.0888658125421444e-05, "logits/chosen": -2.5314834117889404, "logits/rejected": -3.0918798446655273, "logps/chosen": -139.38369750976562, "logps/rejected": -162.8009033203125, "loss": 2.992, "rewards/accuracies": 0.5, "rewards/chosen": -5.369046211242676, "rewards/margins": -2.4634475708007812, "rewards/rejected": -2.9055988788604736, "step": 4443 }, { "epoch": 0.69, "learning_rate": 1.0887924684890296e-05, "logits/chosen": -2.498363733291626, "logits/rejected": -3.2175452709198, "logps/chosen": -250.3031768798828, "logps/rejected": -391.7378845214844, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.5786302089691162, "rewards/margins": 6.450085639953613, "rewards/rejected": -8.028716087341309, "step": 4444 }, { "epoch": 0.69, "learning_rate": 1.0887191244359148e-05, "logits/chosen": -2.444209337234497, "logits/rejected": -3.066696882247925, "logps/chosen": -187.8900146484375, "logps/rejected": -411.9892272949219, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.4324662685394287, "rewards/margins": 6.641099452972412, "rewards/rejected": -8.073565483093262, "step": 4445 }, { "epoch": 0.69, "learning_rate": 1.0886457803828e-05, "logits/chosen": -1.8326537609100342, "logits/rejected": -3.0273985862731934, "logps/chosen": -211.76202392578125, "logps/rejected": -475.1774597167969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.0479881763458252, "rewards/margins": 8.161871910095215, "rewards/rejected": -9.209859848022461, "step": 4446 }, { "epoch": 0.69, "learning_rate": 1.0885724363296851e-05, "logits/chosen": -1.2513129711151123, "logits/rejected": -2.776507616043091, "logps/chosen": -134.70555114746094, "logps/rejected": -452.07940673828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.033524990081787, "rewards/margins": 9.143043518066406, "rewards/rejected": -10.176568984985352, "step": 4447 }, { "epoch": 0.69, "learning_rate": 1.0884990922765703e-05, "logits/chosen": -1.3393455743789673, "logits/rejected": -2.770630359649658, "logps/chosen": -192.54148864746094, "logps/rejected": -368.21160888671875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9188510775566101, "rewards/margins": 7.248301029205322, "rewards/rejected": -8.167152404785156, "step": 4448 }, { "epoch": 0.69, "learning_rate": 1.0884257482234555e-05, "logits/chosen": -1.6077080965042114, "logits/rejected": -2.3823659420013428, "logps/chosen": -152.1192169189453, "logps/rejected": -455.3538513183594, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.731250286102295, "rewards/margins": 10.441082000732422, "rewards/rejected": -12.172332763671875, "step": 4449 }, { "epoch": 0.69, "learning_rate": 1.0883524041703409e-05, "logits/chosen": -2.7917604446411133, "logits/rejected": -2.9260318279266357, "logps/chosen": -68.83812713623047, "logps/rejected": -226.64456176757812, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.4062060117721558, "rewards/margins": 5.0793962478637695, "rewards/rejected": -6.485602378845215, "step": 4450 }, { "epoch": 0.69, "learning_rate": 1.088279060117226e-05, "logits/chosen": -1.7242798805236816, "logits/rejected": -3.0551233291625977, "logps/chosen": -220.02175903320312, "logps/rejected": -339.99664306640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.796858310699463, "rewards/margins": 6.885924339294434, "rewards/rejected": -8.682782173156738, "step": 4451 }, { "epoch": 0.69, "learning_rate": 1.0882057160641112e-05, "logits/chosen": -1.5421844720840454, "logits/rejected": -2.8190815448760986, "logps/chosen": -104.59695434570312, "logps/rejected": -358.7690734863281, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0601449012756348, "rewards/margins": 7.636984825134277, "rewards/rejected": -8.69713020324707, "step": 4452 }, { "epoch": 0.69, "learning_rate": 1.0881323720109964e-05, "logits/chosen": -2.6463677883148193, "logits/rejected": -2.6489126682281494, "logps/chosen": -148.7978057861328, "logps/rejected": -187.11146545410156, "loss": 2.4298, "rewards/accuracies": 0.5, "rewards/chosen": -3.95648193359375, "rewards/margins": 0.4526829719543457, "rewards/rejected": -4.409164905548096, "step": 4453 }, { "epoch": 0.69, "learning_rate": 1.0880590279578816e-05, "logits/chosen": -2.216520071029663, "logits/rejected": -2.755918264389038, "logps/chosen": -108.90997314453125, "logps/rejected": -324.848388671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.4560619592666626, "rewards/margins": 7.203114032745361, "rewards/rejected": -8.659175872802734, "step": 4454 }, { "epoch": 0.69, "learning_rate": 1.0879856839047668e-05, "logits/chosen": -2.680819034576416, "logits/rejected": -1.8679847717285156, "logps/chosen": -499.53021240234375, "logps/rejected": -338.832275390625, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": -2.7932281494140625, "rewards/margins": 4.429271221160889, "rewards/rejected": -7.222499370574951, "step": 4455 }, { "epoch": 0.69, "learning_rate": 1.087912339851652e-05, "logits/chosen": -1.2070561647415161, "logits/rejected": -2.696363687515259, "logps/chosen": -86.26434326171875, "logps/rejected": -394.7401428222656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.0743234157562256, "rewards/margins": 9.480642318725586, "rewards/rejected": -11.55496597290039, "step": 4456 }, { "epoch": 0.69, "learning_rate": 1.0878389957985372e-05, "logits/chosen": -2.1088311672210693, "logits/rejected": -3.166393756866455, "logps/chosen": -108.13529968261719, "logps/rejected": -391.4676818847656, "loss": 0.7505, "rewards/accuracies": 0.5, "rewards/chosen": -3.3242568969726562, "rewards/margins": 3.6145572662353516, "rewards/rejected": -6.938814163208008, "step": 4457 }, { "epoch": 0.69, "learning_rate": 1.0877656517454225e-05, "logits/chosen": -2.887561798095703, "logits/rejected": -2.6622302532196045, "logps/chosen": -258.7065124511719, "logps/rejected": -168.73846435546875, "loss": 4.5067, "rewards/accuracies": 0.5, "rewards/chosen": -6.122796535491943, "rewards/margins": -2.1992380619049072, "rewards/rejected": -3.923558235168457, "step": 4458 }, { "epoch": 0.69, "learning_rate": 1.0876923076923077e-05, "logits/chosen": -1.9320305585861206, "logits/rejected": -2.8257758617401123, "logps/chosen": -79.19267272949219, "logps/rejected": -213.5181121826172, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.837127923965454, "rewards/margins": 5.617592811584473, "rewards/rejected": -7.454720497131348, "step": 4459 }, { "epoch": 0.69, "learning_rate": 1.087618963639193e-05, "logits/chosen": -3.1080238819122314, "logits/rejected": -3.1607470512390137, "logps/chosen": -194.18487548828125, "logps/rejected": -229.7399139404297, "loss": 2.7419, "rewards/accuracies": 0.5, "rewards/chosen": -4.194212913513184, "rewards/margins": 1.7951440811157227, "rewards/rejected": -5.989356994628906, "step": 4460 }, { "epoch": 0.69, "learning_rate": 1.0875456195860781e-05, "logits/chosen": -2.6591594219207764, "logits/rejected": -2.9524893760681152, "logps/chosen": -275.1082458496094, "logps/rejected": -642.3025512695312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5243141651153564, "rewards/margins": 10.672435760498047, "rewards/rejected": -12.19675064086914, "step": 4461 }, { "epoch": 0.69, "learning_rate": 1.0874722755329633e-05, "logits/chosen": -2.399075984954834, "logits/rejected": -3.0802371501922607, "logps/chosen": -143.0972900390625, "logps/rejected": -186.6699676513672, "loss": 0.9289, "rewards/accuracies": 0.5, "rewards/chosen": -2.106952667236328, "rewards/margins": 3.4609274864196777, "rewards/rejected": -5.567880153656006, "step": 4462 }, { "epoch": 0.69, "learning_rate": 1.0873989314798485e-05, "logits/chosen": -3.01737642288208, "logits/rejected": -2.196462631225586, "logps/chosen": -257.89111328125, "logps/rejected": -204.61888122558594, "loss": 3.6235, "rewards/accuracies": 0.5, "rewards/chosen": -5.355198383331299, "rewards/margins": -1.3790371417999268, "rewards/rejected": -3.976161479949951, "step": 4463 }, { "epoch": 0.69, "learning_rate": 1.0873255874267337e-05, "logits/chosen": -2.7978932857513428, "logits/rejected": -3.0398688316345215, "logps/chosen": -269.27435302734375, "logps/rejected": -427.451171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8399518728256226, "rewards/margins": 6.922563552856445, "rewards/rejected": -8.762516021728516, "step": 4464 }, { "epoch": 0.69, "learning_rate": 1.0872522433736189e-05, "logits/chosen": -3.0676114559173584, "logits/rejected": -2.9885878562927246, "logps/chosen": -143.04849243164062, "logps/rejected": -113.63063049316406, "loss": 2.792, "rewards/accuracies": 0.5, "rewards/chosen": -3.7473971843719482, "rewards/margins": 0.144852876663208, "rewards/rejected": -3.8922500610351562, "step": 4465 }, { "epoch": 0.69, "learning_rate": 1.087178899320504e-05, "logits/chosen": -2.02884578704834, "logits/rejected": -2.7757699489593506, "logps/chosen": -199.46856689453125, "logps/rejected": -460.511474609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.4855594635009766, "rewards/margins": 7.959156513214111, "rewards/rejected": -9.44471549987793, "step": 4466 }, { "epoch": 0.69, "learning_rate": 1.0871055552673894e-05, "logits/chosen": -1.7145392894744873, "logits/rejected": -2.902533531188965, "logps/chosen": -214.94322204589844, "logps/rejected": -560.5245971679688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0221961736679077, "rewards/margins": 9.715632438659668, "rewards/rejected": -10.737828254699707, "step": 4467 }, { "epoch": 0.69, "learning_rate": 1.0870322112142746e-05, "logits/chosen": -2.4041714668273926, "logits/rejected": -2.720844030380249, "logps/chosen": -114.7077407836914, "logps/rejected": -229.71527099609375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.5225932598114014, "rewards/margins": 6.4139723777771, "rewards/rejected": -7.936565399169922, "step": 4468 }, { "epoch": 0.7, "learning_rate": 1.08695886716116e-05, "logits/chosen": -2.905355453491211, "logits/rejected": -2.994262218475342, "logps/chosen": -34.138755798339844, "logps/rejected": -144.42942810058594, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.9227523803710938, "rewards/margins": 5.8981757164001465, "rewards/rejected": -6.820928573608398, "step": 4469 }, { "epoch": 0.7, "learning_rate": 1.0868855231080451e-05, "logits/chosen": -2.833336591720581, "logits/rejected": -2.6219594478607178, "logps/chosen": -397.53045654296875, "logps/rejected": -473.6186828613281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.0295143127441406, "rewards/margins": 8.644089698791504, "rewards/rejected": -11.673604011535645, "step": 4470 }, { "epoch": 0.7, "learning_rate": 1.0868121790549303e-05, "logits/chosen": -2.558350086212158, "logits/rejected": -2.9419944286346436, "logps/chosen": -654.8177490234375, "logps/rejected": -683.28857421875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -3.66461181640625, "rewards/margins": 6.412544250488281, "rewards/rejected": -10.077156066894531, "step": 4471 }, { "epoch": 0.7, "learning_rate": 1.0867388350018155e-05, "logits/chosen": -2.8605635166168213, "logits/rejected": -2.160548210144043, "logps/chosen": -480.64202880859375, "logps/rejected": -345.33447265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1089675426483154, "rewards/margins": 8.304473876953125, "rewards/rejected": -9.41344165802002, "step": 4472 }, { "epoch": 0.7, "learning_rate": 1.0866654909487007e-05, "logits/chosen": -1.974698543548584, "logits/rejected": -2.930898904800415, "logps/chosen": -117.20502471923828, "logps/rejected": -291.1432800292969, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -1.5532853603363037, "rewards/margins": 6.696820259094238, "rewards/rejected": -8.250104904174805, "step": 4473 }, { "epoch": 0.7, "learning_rate": 1.0865921468955859e-05, "logits/chosen": -2.5207395553588867, "logits/rejected": -3.2186226844787598, "logps/chosen": -48.4858283996582, "logps/rejected": -240.71786499023438, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.6238107681274414, "rewards/margins": 6.135607719421387, "rewards/rejected": -7.759418487548828, "step": 4474 }, { "epoch": 0.7, "learning_rate": 1.086518802842471e-05, "logits/chosen": -3.114658832550049, "logits/rejected": -2.3678269386291504, "logps/chosen": -335.52203369140625, "logps/rejected": -207.70712280273438, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.2313339710235596, "rewards/margins": 6.079815864562988, "rewards/rejected": -8.311149597167969, "step": 4475 }, { "epoch": 0.7, "learning_rate": 1.0864454587893564e-05, "logits/chosen": -2.8163998126983643, "logits/rejected": -2.86122727394104, "logps/chosen": -174.32778930664062, "logps/rejected": -188.96951293945312, "loss": 1.6291, "rewards/accuracies": 0.5, "rewards/chosen": -3.3532462120056152, "rewards/margins": 1.563186764717102, "rewards/rejected": -4.916432857513428, "step": 4476 }, { "epoch": 0.7, "learning_rate": 1.0863721147362416e-05, "logits/chosen": -2.9307644367218018, "logits/rejected": -2.1495935916900635, "logps/chosen": -406.22064208984375, "logps/rejected": -233.183349609375, "loss": 2.5951, "rewards/accuracies": 0.5, "rewards/chosen": -6.181466102600098, "rewards/margins": -0.3608212471008301, "rewards/rejected": -5.820644855499268, "step": 4477 }, { "epoch": 0.7, "learning_rate": 1.0862987706831268e-05, "logits/chosen": -2.7194788455963135, "logits/rejected": -2.9698078632354736, "logps/chosen": -244.125244140625, "logps/rejected": -203.5529022216797, "loss": 3.2496, "rewards/accuracies": 0.5, "rewards/chosen": -5.4826979637146, "rewards/margins": 0.1371138095855713, "rewards/rejected": -5.61981201171875, "step": 4478 }, { "epoch": 0.7, "learning_rate": 1.086225426630012e-05, "logits/chosen": -2.51820707321167, "logits/rejected": -2.861664056777954, "logps/chosen": -638.9674072265625, "logps/rejected": -559.9960327148438, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -2.9902374744415283, "rewards/margins": 6.270461082458496, "rewards/rejected": -9.260698318481445, "step": 4479 }, { "epoch": 0.7, "learning_rate": 1.0861520825768972e-05, "logits/chosen": -2.935037851333618, "logits/rejected": -3.31451678276062, "logps/chosen": -594.6793212890625, "logps/rejected": -1027.623291015625, "loss": 4.9476, "rewards/accuracies": 0.5, "rewards/chosen": -6.454921245574951, "rewards/margins": -2.128077983856201, "rewards/rejected": -4.32684326171875, "step": 4480 }, { "epoch": 0.7, "learning_rate": 1.0860787385237824e-05, "logits/chosen": -2.9650847911834717, "logits/rejected": -3.274606466293335, "logps/chosen": -77.97527313232422, "logps/rejected": -209.74951171875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.128249168395996, "rewards/margins": 6.138874053955078, "rewards/rejected": -7.267123222351074, "step": 4481 }, { "epoch": 0.7, "learning_rate": 1.0860053944706676e-05, "logits/chosen": -2.308579683303833, "logits/rejected": -2.942469835281372, "logps/chosen": -385.8885192871094, "logps/rejected": -623.4019775390625, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -1.525158405303955, "rewards/margins": 9.11270523071289, "rewards/rejected": -10.637863159179688, "step": 4482 }, { "epoch": 0.7, "learning_rate": 1.0859320504175527e-05, "logits/chosen": -2.4321587085723877, "logits/rejected": -3.2360265254974365, "logps/chosen": -162.79409790039062, "logps/rejected": -291.1781005859375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.28444594144821167, "rewards/margins": 7.495943069458008, "rewards/rejected": -7.780389308929443, "step": 4483 }, { "epoch": 0.7, "learning_rate": 1.085858706364438e-05, "logits/chosen": -2.723700761795044, "logits/rejected": -3.0107245445251465, "logps/chosen": -241.27328491210938, "logps/rejected": -274.30419921875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.9954384565353394, "rewards/margins": 5.163736820220947, "rewards/rejected": -7.159174919128418, "step": 4484 }, { "epoch": 0.7, "learning_rate": 1.0857853623113233e-05, "logits/chosen": -2.9823544025421143, "logits/rejected": -3.236255168914795, "logps/chosen": -141.13653564453125, "logps/rejected": -279.815673828125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.5926663875579834, "rewards/margins": 6.7386064529418945, "rewards/rejected": -8.331273078918457, "step": 4485 }, { "epoch": 0.7, "learning_rate": 1.0857120182582085e-05, "logits/chosen": -2.547422170639038, "logits/rejected": -2.9324285984039307, "logps/chosen": -216.25537109375, "logps/rejected": -225.34544372558594, "loss": 3.9433, "rewards/accuracies": 0.5, "rewards/chosen": -4.925843715667725, "rewards/margins": -0.2738645076751709, "rewards/rejected": -4.651978969573975, "step": 4486 }, { "epoch": 0.7, "learning_rate": 1.0856386742050937e-05, "logits/chosen": -1.6555835008621216, "logits/rejected": -1.9773675203323364, "logps/chosen": -120.54962158203125, "logps/rejected": -162.79244995117188, "loss": 1.4723, "rewards/accuracies": 0.5, "rewards/chosen": -2.6239235401153564, "rewards/margins": 2.7324881553649902, "rewards/rejected": -5.356411457061768, "step": 4487 }, { "epoch": 0.7, "learning_rate": 1.0855653301519789e-05, "logits/chosen": -2.7838094234466553, "logits/rejected": -2.915921926498413, "logps/chosen": -445.2430725097656, "logps/rejected": -493.9790344238281, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.477133274078369, "rewards/margins": 6.993365287780762, "rewards/rejected": -9.470499038696289, "step": 4488 }, { "epoch": 0.7, "learning_rate": 1.085491986098864e-05, "logits/chosen": -2.522371768951416, "logits/rejected": -3.086854934692383, "logps/chosen": -205.55381774902344, "logps/rejected": -440.89544677734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.1041641235351562, "rewards/margins": 7.546213150024414, "rewards/rejected": -9.65037727355957, "step": 4489 }, { "epoch": 0.7, "learning_rate": 1.0854186420457492e-05, "logits/chosen": -2.7550175189971924, "logits/rejected": -2.1221790313720703, "logps/chosen": -782.8545532226562, "logps/rejected": -593.1392211914062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.5950608253479004, "rewards/margins": 9.921201705932617, "rewards/rejected": -12.516263008117676, "step": 4490 }, { "epoch": 0.7, "learning_rate": 1.0853452979926344e-05, "logits/chosen": -0.6835674047470093, "logits/rejected": -2.7701361179351807, "logps/chosen": -149.5586395263672, "logps/rejected": -515.0498046875, "loss": 2.6303, "rewards/accuracies": 0.5, "rewards/chosen": -3.5482609272003174, "rewards/margins": 1.5730400085449219, "rewards/rejected": -5.12130069732666, "step": 4491 }, { "epoch": 0.7, "learning_rate": 1.0852719539395196e-05, "logits/chosen": -2.987006902694702, "logits/rejected": -1.9492921829223633, "logps/chosen": -346.07928466796875, "logps/rejected": -296.4914855957031, "loss": 1.2393, "rewards/accuracies": 0.5, "rewards/chosen": -2.838165283203125, "rewards/margins": 2.9391236305236816, "rewards/rejected": -5.777288913726807, "step": 4492 }, { "epoch": 0.7, "learning_rate": 1.0851986098864048e-05, "logits/chosen": -1.9185466766357422, "logits/rejected": -3.18620228767395, "logps/chosen": -95.7347412109375, "logps/rejected": -401.4117431640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0985331535339355, "rewards/margins": 7.031008720397949, "rewards/rejected": -8.129541397094727, "step": 4493 }, { "epoch": 0.7, "learning_rate": 1.0851252658332902e-05, "logits/chosen": -2.698317766189575, "logits/rejected": -2.8489789962768555, "logps/chosen": -177.1100616455078, "logps/rejected": -144.61514282226562, "loss": 2.8365, "rewards/accuracies": 0.5, "rewards/chosen": -4.594862461090088, "rewards/margins": -1.7451908588409424, "rewards/rejected": -2.8496716022491455, "step": 4494 }, { "epoch": 0.7, "learning_rate": 1.0850519217801753e-05, "logits/chosen": -2.823784112930298, "logits/rejected": -1.8742809295654297, "logps/chosen": -256.76544189453125, "logps/rejected": -208.14749145507812, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -0.43693163990974426, "rewards/margins": 5.054781436920166, "rewards/rejected": -5.491713523864746, "step": 4495 }, { "epoch": 0.7, "learning_rate": 1.0849785777270605e-05, "logits/chosen": -0.7871297001838684, "logits/rejected": -1.4801183938980103, "logps/chosen": -136.86839294433594, "logps/rejected": -213.462158203125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.4704911708831787, "rewards/margins": 5.317820072174072, "rewards/rejected": -6.788311004638672, "step": 4496 }, { "epoch": 0.7, "learning_rate": 1.0849052336739457e-05, "logits/chosen": -2.7643558979034424, "logits/rejected": -2.7932982444763184, "logps/chosen": -101.85205841064453, "logps/rejected": -164.0834197998047, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.21786919236183167, "rewards/margins": 5.883708953857422, "rewards/rejected": -6.1015777587890625, "step": 4497 }, { "epoch": 0.7, "learning_rate": 1.0848318896208309e-05, "logits/chosen": -2.4964189529418945, "logits/rejected": -2.384167194366455, "logps/chosen": -559.269287109375, "logps/rejected": -393.9602355957031, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.9389023780822754, "rewards/margins": 6.037254810333252, "rewards/rejected": -7.976157188415527, "step": 4498 }, { "epoch": 0.7, "learning_rate": 1.0847585455677161e-05, "logits/chosen": -2.9242217540740967, "logits/rejected": -2.545051097869873, "logps/chosen": -198.23660278320312, "logps/rejected": -41.30061340332031, "loss": 1.2212, "rewards/accuracies": 0.5, "rewards/chosen": -3.003511905670166, "rewards/margins": -0.38307100534439087, "rewards/rejected": -2.62044095993042, "step": 4499 }, { "epoch": 0.7, "learning_rate": 1.0846852015146013e-05, "logits/chosen": -2.7587902545928955, "logits/rejected": -2.9514780044555664, "logps/chosen": -82.74838256835938, "logps/rejected": -133.8867950439453, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -1.0390026569366455, "rewards/margins": 3.9224395751953125, "rewards/rejected": -4.961441993713379, "step": 4500 }, { "epoch": 0.7, "learning_rate": 1.0846118574614866e-05, "logits/chosen": -2.5594019889831543, "logits/rejected": -3.016226291656494, "logps/chosen": -82.14521789550781, "logps/rejected": -255.87933349609375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.8870269060134888, "rewards/margins": 6.206450462341309, "rewards/rejected": -7.093477249145508, "step": 4501 }, { "epoch": 0.7, "learning_rate": 1.0845385134083718e-05, "logits/chosen": -2.4567577838897705, "logits/rejected": -3.147540330886841, "logps/chosen": -113.83940887451172, "logps/rejected": -287.94415283203125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.2310419082641602, "rewards/margins": 5.484060287475586, "rewards/rejected": -6.715102195739746, "step": 4502 }, { "epoch": 0.7, "learning_rate": 1.0844651693552572e-05, "logits/chosen": -2.9095351696014404, "logits/rejected": -2.4258434772491455, "logps/chosen": -164.5320281982422, "logps/rejected": -144.72268676757812, "loss": 2.3908, "rewards/accuracies": 0.5, "rewards/chosen": -3.7393600940704346, "rewards/margins": 0.9655141830444336, "rewards/rejected": -4.704874038696289, "step": 4503 }, { "epoch": 0.7, "learning_rate": 1.0843918253021424e-05, "logits/chosen": -1.7411298751831055, "logits/rejected": -3.084402561187744, "logps/chosen": -95.9185791015625, "logps/rejected": -246.10794067382812, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -1.211819052696228, "rewards/margins": 4.809823989868164, "rewards/rejected": -6.021643161773682, "step": 4504 }, { "epoch": 0.7, "learning_rate": 1.0843184812490276e-05, "logits/chosen": -2.830627202987671, "logits/rejected": -1.776984691619873, "logps/chosen": -203.92385864257812, "logps/rejected": -228.45339965820312, "loss": 2.6535, "rewards/accuracies": 0.5, "rewards/chosen": -2.8573296070098877, "rewards/margins": 0.04151344299316406, "rewards/rejected": -2.8988430500030518, "step": 4505 }, { "epoch": 0.7, "learning_rate": 1.0842451371959127e-05, "logits/chosen": -2.3969616889953613, "logits/rejected": -3.278770685195923, "logps/chosen": -138.68621826171875, "logps/rejected": -329.947265625, "loss": 0.0878, "rewards/accuracies": 1.0, "rewards/chosen": -0.9583786129951477, "rewards/margins": 4.428651809692383, "rewards/rejected": -5.387030124664307, "step": 4506 }, { "epoch": 0.7, "learning_rate": 1.084171793142798e-05, "logits/chosen": -3.1363704204559326, "logits/rejected": -2.1923820972442627, "logps/chosen": -884.3333129882812, "logps/rejected": -371.5137939453125, "loss": 2.3821, "rewards/accuracies": 0.5, "rewards/chosen": -2.9365599155426025, "rewards/margins": 1.3264470100402832, "rewards/rejected": -4.263007164001465, "step": 4507 }, { "epoch": 0.7, "learning_rate": 1.0840984490896831e-05, "logits/chosen": -2.752887487411499, "logits/rejected": -2.183443546295166, "logps/chosen": -454.5141296386719, "logps/rejected": -457.5367736816406, "loss": 2.3672, "rewards/accuracies": 0.5, "rewards/chosen": -5.25638484954834, "rewards/margins": 0.3919641971588135, "rewards/rejected": -5.648348808288574, "step": 4508 }, { "epoch": 0.7, "learning_rate": 1.0840251050365683e-05, "logits/chosen": -1.5025607347488403, "logits/rejected": -2.4497625827789307, "logps/chosen": -170.60702514648438, "logps/rejected": -465.56109619140625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.1581790447235107, "rewards/margins": 6.957972526550293, "rewards/rejected": -8.116151809692383, "step": 4509 }, { "epoch": 0.7, "learning_rate": 1.0839517609834535e-05, "logits/chosen": -2.329817533493042, "logits/rejected": -2.676079273223877, "logps/chosen": -308.90191650390625, "logps/rejected": -494.1558837890625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.3695130348205566, "rewards/margins": 6.396925449371338, "rewards/rejected": -8.766438484191895, "step": 4510 }, { "epoch": 0.7, "learning_rate": 1.0838784169303387e-05, "logits/chosen": -2.7143423557281494, "logits/rejected": -1.9731543064117432, "logps/chosen": -211.76181030273438, "logps/rejected": -226.79373168945312, "loss": 2.9823, "rewards/accuracies": 0.5, "rewards/chosen": -3.272631883621216, "rewards/margins": 0.9854929447174072, "rewards/rejected": -4.258124828338623, "step": 4511 }, { "epoch": 0.7, "learning_rate": 1.083805072877224e-05, "logits/chosen": -2.9622278213500977, "logits/rejected": -2.7548036575317383, "logps/chosen": -129.54037475585938, "logps/rejected": -90.02796173095703, "loss": 0.623, "rewards/accuracies": 0.5, "rewards/chosen": -2.137099027633667, "rewards/margins": 1.3840388059616089, "rewards/rejected": -3.5211377143859863, "step": 4512 }, { "epoch": 0.7, "learning_rate": 1.0837317288241092e-05, "logits/chosen": -2.55842924118042, "logits/rejected": -2.918825626373291, "logps/chosen": -54.02054214477539, "logps/rejected": -248.79336547851562, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.0286636352539062, "rewards/margins": 5.584156036376953, "rewards/rejected": -6.612819671630859, "step": 4513 }, { "epoch": 0.7, "learning_rate": 1.0836583847709944e-05, "logits/chosen": -2.2410497665405273, "logits/rejected": -3.0195531845092773, "logps/chosen": -86.20938110351562, "logps/rejected": -349.552490234375, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -0.9144452214241028, "rewards/margins": 5.231738090515137, "rewards/rejected": -6.146183013916016, "step": 4514 }, { "epoch": 0.7, "learning_rate": 1.0835850407178796e-05, "logits/chosen": -1.3745644092559814, "logits/rejected": -2.350184440612793, "logps/chosen": -168.82803344726562, "logps/rejected": -333.966552734375, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -1.141334891319275, "rewards/margins": 7.265101432800293, "rewards/rejected": -8.4064359664917, "step": 4515 }, { "epoch": 0.7, "learning_rate": 1.0835116966647648e-05, "logits/chosen": -2.719139814376831, "logits/rejected": -2.0287067890167236, "logps/chosen": -333.8206481933594, "logps/rejected": -368.20355224609375, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.0670870542526245, "rewards/margins": 5.080230712890625, "rewards/rejected": -6.147317886352539, "step": 4516 }, { "epoch": 0.7, "learning_rate": 1.08343835261165e-05, "logits/chosen": -1.8605778217315674, "logits/rejected": -2.9439175128936768, "logps/chosen": -87.81340026855469, "logps/rejected": -358.63995361328125, "loss": 0.1184, "rewards/accuracies": 1.0, "rewards/chosen": -2.308746337890625, "rewards/margins": 2.363177537918091, "rewards/rejected": -4.671923637390137, "step": 4517 }, { "epoch": 0.7, "learning_rate": 1.0833650085585352e-05, "logits/chosen": -2.8801794052124023, "logits/rejected": -2.5192949771881104, "logps/chosen": -463.9971923828125, "logps/rejected": -237.7522430419922, "loss": 3.0877, "rewards/accuracies": 0.5, "rewards/chosen": -3.966299533843994, "rewards/margins": -1.9689757823944092, "rewards/rejected": -1.9973236322402954, "step": 4518 }, { "epoch": 0.7, "learning_rate": 1.0832916645054204e-05, "logits/chosen": -1.8526228666305542, "logits/rejected": -2.5068771839141846, "logps/chosen": -79.83460998535156, "logps/rejected": -190.016845703125, "loss": 1.0148, "rewards/accuracies": 0.5, "rewards/chosen": -2.7754762172698975, "rewards/margins": 2.2863306999206543, "rewards/rejected": -5.061806678771973, "step": 4519 }, { "epoch": 0.7, "learning_rate": 1.0832183204523055e-05, "logits/chosen": -0.9332488775253296, "logits/rejected": -2.8011634349823, "logps/chosen": -75.93911743164062, "logps/rejected": -353.4394226074219, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -2.4804000854492188, "rewards/margins": 3.731525421142578, "rewards/rejected": -6.211925506591797, "step": 4520 }, { "epoch": 0.7, "learning_rate": 1.0831449763991909e-05, "logits/chosen": -3.3153722286224365, "logits/rejected": -3.5403621196746826, "logps/chosen": -61.005958557128906, "logps/rejected": -199.20602416992188, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -1.672973394393921, "rewards/margins": 4.093343734741211, "rewards/rejected": -5.766317367553711, "step": 4521 }, { "epoch": 0.7, "learning_rate": 1.0830716323460761e-05, "logits/chosen": -2.819020986557007, "logits/rejected": -1.8312361240386963, "logps/chosen": -198.2528076171875, "logps/rejected": -61.112308502197266, "loss": 4.5915, "rewards/accuracies": 0.0, "rewards/chosen": -6.067348957061768, "rewards/margins": -4.574149131774902, "rewards/rejected": -1.4931995868682861, "step": 4522 }, { "epoch": 0.7, "learning_rate": 1.0829982882929613e-05, "logits/chosen": -2.5617547035217285, "logits/rejected": -3.2525458335876465, "logps/chosen": -243.40086364746094, "logps/rejected": -437.5462646484375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.5807716846466064, "rewards/margins": 5.436397552490234, "rewards/rejected": -7.01716947555542, "step": 4523 }, { "epoch": 0.7, "learning_rate": 1.0829249442398465e-05, "logits/chosen": -2.6012232303619385, "logits/rejected": -2.629124879837036, "logps/chosen": -99.9540786743164, "logps/rejected": -194.93048095703125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.1498794555664062, "rewards/margins": 7.2986555099487305, "rewards/rejected": -8.44853401184082, "step": 4524 }, { "epoch": 0.7, "learning_rate": 1.0828516001867317e-05, "logits/chosen": -2.1394612789154053, "logits/rejected": -1.8799649477005005, "logps/chosen": -513.8471069335938, "logps/rejected": -476.53289794921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.2092010974884033, "rewards/margins": 8.552510261535645, "rewards/rejected": -9.761711120605469, "step": 4525 }, { "epoch": 0.7, "learning_rate": 1.0827782561336168e-05, "logits/chosen": -0.6806510090827942, "logits/rejected": -2.101900339126587, "logps/chosen": -143.63426208496094, "logps/rejected": -360.5128479003906, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -1.1530048847198486, "rewards/margins": 4.569146156311035, "rewards/rejected": -5.722151279449463, "step": 4526 }, { "epoch": 0.7, "learning_rate": 1.082704912080502e-05, "logits/chosen": -2.5328011512756348, "logits/rejected": -2.8424875736236572, "logps/chosen": -120.05882263183594, "logps/rejected": -168.28370666503906, "loss": 1.0502, "rewards/accuracies": 0.5, "rewards/chosen": -2.787975311279297, "rewards/margins": 1.838761329650879, "rewards/rejected": -4.626736640930176, "step": 4527 }, { "epoch": 0.7, "learning_rate": 1.0826315680273872e-05, "logits/chosen": -3.043389320373535, "logits/rejected": -3.2284014225006104, "logps/chosen": -129.95352172851562, "logps/rejected": -172.83709716796875, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -0.8602081537246704, "rewards/margins": 4.746416091918945, "rewards/rejected": -5.606624603271484, "step": 4528 }, { "epoch": 0.7, "learning_rate": 1.0825582239742724e-05, "logits/chosen": -2.339883804321289, "logits/rejected": -3.0406265258789062, "logps/chosen": -48.050697326660156, "logps/rejected": -199.62734985351562, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.6900203227996826, "rewards/margins": 4.948752403259277, "rewards/rejected": -5.638772487640381, "step": 4529 }, { "epoch": 0.7, "learning_rate": 1.0824848799211578e-05, "logits/chosen": -2.714010715484619, "logits/rejected": -2.86897873878479, "logps/chosen": -399.0675354003906, "logps/rejected": -478.15826416015625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.7265625, "rewards/margins": 5.263470649719238, "rewards/rejected": -6.990033149719238, "step": 4530 }, { "epoch": 0.7, "learning_rate": 1.082411535868043e-05, "logits/chosen": -3.0653748512268066, "logits/rejected": -3.128225564956665, "logps/chosen": -410.075439453125, "logps/rejected": -398.03045654296875, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -1.5833771228790283, "rewards/margins": 4.435895919799805, "rewards/rejected": -6.019273281097412, "step": 4531 }, { "epoch": 0.7, "learning_rate": 1.0823381918149281e-05, "logits/chosen": -2.3843154907226562, "logits/rejected": -1.7964528799057007, "logps/chosen": -388.3183288574219, "logps/rejected": -273.4531555175781, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.21215972304344177, "rewards/margins": 8.117765426635742, "rewards/rejected": -7.905606269836426, "step": 4532 }, { "epoch": 0.7, "learning_rate": 1.0822648477618133e-05, "logits/chosen": -2.6723554134368896, "logits/rejected": -2.5523898601531982, "logps/chosen": -395.5564270019531, "logps/rejected": -560.4489135742188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2717725038528442, "rewards/margins": 8.840946197509766, "rewards/rejected": -10.11271858215332, "step": 4533 }, { "epoch": 0.71, "learning_rate": 1.0821915037086985e-05, "logits/chosen": -1.3501776456832886, "logits/rejected": -2.837820291519165, "logps/chosen": -187.28016662597656, "logps/rejected": -674.2785034179688, "loss": 0.1363, "rewards/accuracies": 1.0, "rewards/chosen": -0.7377948760986328, "rewards/margins": 3.4035730361938477, "rewards/rejected": -4.1413679122924805, "step": 4534 }, { "epoch": 0.71, "learning_rate": 1.0821181596555839e-05, "logits/chosen": -1.9263070821762085, "logits/rejected": -3.101090669631958, "logps/chosen": -131.6162567138672, "logps/rejected": -330.296875, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -1.412022352218628, "rewards/margins": 5.808836936950684, "rewards/rejected": -7.220859527587891, "step": 4535 }, { "epoch": 0.71, "learning_rate": 1.082044815602469e-05, "logits/chosen": -2.8234615325927734, "logits/rejected": -2.0290684700012207, "logps/chosen": -487.39959716796875, "logps/rejected": -364.223876953125, "loss": 3.1005, "rewards/accuracies": 0.5, "rewards/chosen": -4.180367946624756, "rewards/margins": 0.6841423511505127, "rewards/rejected": -4.8645100593566895, "step": 4536 }, { "epoch": 0.71, "learning_rate": 1.0819714715493542e-05, "logits/chosen": -2.7965526580810547, "logits/rejected": -2.030881404876709, "logps/chosen": -227.93167114257812, "logps/rejected": -150.35202026367188, "loss": 0.9434, "rewards/accuracies": 0.5, "rewards/chosen": -1.1589813232421875, "rewards/margins": 2.204547882080078, "rewards/rejected": -3.3635292053222656, "step": 4537 }, { "epoch": 0.71, "learning_rate": 1.0818981274962394e-05, "logits/chosen": -2.5648186206817627, "logits/rejected": -2.8106114864349365, "logps/chosen": -334.4091796875, "logps/rejected": -358.44873046875, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.743940830230713, "rewards/margins": 4.84076452255249, "rewards/rejected": -6.584705352783203, "step": 4538 }, { "epoch": 0.71, "learning_rate": 1.0818247834431248e-05, "logits/chosen": -3.247298240661621, "logits/rejected": -2.5203499794006348, "logps/chosen": -295.67529296875, "logps/rejected": -88.62125396728516, "loss": 2.879, "rewards/accuracies": 0.5, "rewards/chosen": -4.513904571533203, "rewards/margins": -2.51401424407959, "rewards/rejected": -1.9998905658721924, "step": 4539 }, { "epoch": 0.71, "learning_rate": 1.08175143939001e-05, "logits/chosen": -2.2174551486968994, "logits/rejected": -2.82399320602417, "logps/chosen": -164.37591552734375, "logps/rejected": -228.85435485839844, "loss": 2.0387, "rewards/accuracies": 0.5, "rewards/chosen": -2.791247606277466, "rewards/margins": 2.202268123626709, "rewards/rejected": -4.993515968322754, "step": 4540 }, { "epoch": 0.71, "learning_rate": 1.0816780953368952e-05, "logits/chosen": -2.667771816253662, "logits/rejected": -3.2677602767944336, "logps/chosen": -345.498291015625, "logps/rejected": -403.0986328125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.1580489873886108, "rewards/margins": 5.6073455810546875, "rewards/rejected": -6.765394687652588, "step": 4541 }, { "epoch": 0.71, "learning_rate": 1.0816047512837804e-05, "logits/chosen": -2.7618589401245117, "logits/rejected": -2.8682658672332764, "logps/chosen": -99.40904235839844, "logps/rejected": -344.9071350097656, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -0.8768978714942932, "rewards/margins": 6.46518087387085, "rewards/rejected": -7.342078685760498, "step": 4542 }, { "epoch": 0.71, "learning_rate": 1.0815314072306655e-05, "logits/chosen": -1.9680150747299194, "logits/rejected": -2.447819709777832, "logps/chosen": -111.7581787109375, "logps/rejected": -155.04530334472656, "loss": 0.1466, "rewards/accuracies": 1.0, "rewards/chosen": -1.2086353302001953, "rewards/margins": 1.9519343376159668, "rewards/rejected": -3.160569667816162, "step": 4543 }, { "epoch": 0.71, "learning_rate": 1.0814580631775507e-05, "logits/chosen": -2.940871477127075, "logits/rejected": -2.8777778148651123, "logps/chosen": -207.29153442382812, "logps/rejected": -255.49330139160156, "loss": 2.387, "rewards/accuracies": 0.5, "rewards/chosen": -3.1209919452667236, "rewards/margins": 0.6081795692443848, "rewards/rejected": -3.7291715145111084, "step": 4544 }, { "epoch": 0.71, "learning_rate": 1.081384719124436e-05, "logits/chosen": -2.7941620349884033, "logits/rejected": -2.2748467922210693, "logps/chosen": -380.84490966796875, "logps/rejected": -273.4772033691406, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.0992625951766968, "rewards/margins": 4.800082683563232, "rewards/rejected": -5.899345397949219, "step": 4545 }, { "epoch": 0.71, "learning_rate": 1.0813113750713211e-05, "logits/chosen": -2.9576117992401123, "logits/rejected": -2.8133010864257812, "logps/chosen": -237.07208251953125, "logps/rejected": -245.956787109375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.6628189086914062, "rewards/margins": 5.798783302307129, "rewards/rejected": -6.461602210998535, "step": 4546 }, { "epoch": 0.71, "learning_rate": 1.0812380310182063e-05, "logits/chosen": -2.7438507080078125, "logits/rejected": -1.9085127115249634, "logps/chosen": -249.70138549804688, "logps/rejected": -210.50497436523438, "loss": 1.9185, "rewards/accuracies": 0.5, "rewards/chosen": -2.7083396911621094, "rewards/margins": 0.9810791015625, "rewards/rejected": -3.6894187927246094, "step": 4547 }, { "epoch": 0.71, "learning_rate": 1.0811646869650916e-05, "logits/chosen": -1.6736303567886353, "logits/rejected": -2.873335599899292, "logps/chosen": -171.07598876953125, "logps/rejected": -276.04620361328125, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -1.022718071937561, "rewards/margins": 4.210552215576172, "rewards/rejected": -5.233270645141602, "step": 4548 }, { "epoch": 0.71, "learning_rate": 1.0810913429119768e-05, "logits/chosen": -2.7593743801116943, "logits/rejected": -2.0042436122894287, "logps/chosen": -387.6422119140625, "logps/rejected": -308.2872314453125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.19610595703125, "rewards/margins": 5.5810627937316895, "rewards/rejected": -6.7771687507629395, "step": 4549 }, { "epoch": 0.71, "learning_rate": 1.081017998858862e-05, "logits/chosen": -2.964810609817505, "logits/rejected": -3.0893993377685547, "logps/chosen": -155.9778289794922, "logps/rejected": -204.12338256835938, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.5787907838821411, "rewards/margins": 5.519403457641602, "rewards/rejected": -7.098194599151611, "step": 4550 }, { "epoch": 0.71, "learning_rate": 1.0809446548057472e-05, "logits/chosen": -2.5816009044647217, "logits/rejected": -2.878490686416626, "logps/chosen": -115.69805908203125, "logps/rejected": -239.61505126953125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.3405940532684326, "rewards/margins": 5.4492974281311035, "rewards/rejected": -6.789891719818115, "step": 4551 }, { "epoch": 0.71, "learning_rate": 1.0808713107526324e-05, "logits/chosen": -2.383363723754883, "logits/rejected": -3.2643070220947266, "logps/chosen": -70.79994201660156, "logps/rejected": -267.51629638671875, "loss": 0.2244, "rewards/accuracies": 1.0, "rewards/chosen": -1.6473829746246338, "rewards/margins": 2.6553821563720703, "rewards/rejected": -4.302764892578125, "step": 4552 }, { "epoch": 0.71, "learning_rate": 1.0807979666995176e-05, "logits/chosen": -2.6567740440368652, "logits/rejected": -2.835078239440918, "logps/chosen": -139.73863220214844, "logps/rejected": -168.7746124267578, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -0.8009136915206909, "rewards/margins": 3.4964776039123535, "rewards/rejected": -4.297391414642334, "step": 4553 }, { "epoch": 0.71, "learning_rate": 1.0807246226464028e-05, "logits/chosen": -2.859950542449951, "logits/rejected": -2.007686138153076, "logps/chosen": -200.62030029296875, "logps/rejected": -86.55340576171875, "loss": 2.0042, "rewards/accuracies": 0.5, "rewards/chosen": -4.264326095581055, "rewards/margins": 0.6018695831298828, "rewards/rejected": -4.8661956787109375, "step": 4554 }, { "epoch": 0.71, "learning_rate": 1.080651278593288e-05, "logits/chosen": -1.9149194955825806, "logits/rejected": -2.8902828693389893, "logps/chosen": -126.12604522705078, "logps/rejected": -411.05975341796875, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -0.5323501825332642, "rewards/margins": 5.563182353973389, "rewards/rejected": -6.095532417297363, "step": 4555 }, { "epoch": 0.71, "learning_rate": 1.0805779345401733e-05, "logits/chosen": -1.7225605249404907, "logits/rejected": -3.000988006591797, "logps/chosen": -169.53192138671875, "logps/rejected": -473.00628662109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7575882077217102, "rewards/margins": 7.047203063964844, "rewards/rejected": -7.804791450500488, "step": 4556 }, { "epoch": 0.71, "learning_rate": 1.0805045904870585e-05, "logits/chosen": -2.8936891555786133, "logits/rejected": -3.0404179096221924, "logps/chosen": -207.5906219482422, "logps/rejected": -284.27337646484375, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": -0.571789562702179, "rewards/margins": 3.162174940109253, "rewards/rejected": -3.733964443206787, "step": 4557 }, { "epoch": 0.71, "learning_rate": 1.0804312464339437e-05, "logits/chosen": -3.275578022003174, "logits/rejected": -2.808506488800049, "logps/chosen": -451.0443115234375, "logps/rejected": -237.84423828125, "loss": 2.9798, "rewards/accuracies": 0.5, "rewards/chosen": -3.1130125522613525, "rewards/margins": 1.212214708328247, "rewards/rejected": -4.325227737426758, "step": 4558 }, { "epoch": 0.71, "learning_rate": 1.0803579023808289e-05, "logits/chosen": -2.2763752937316895, "logits/rejected": -3.1202890872955322, "logps/chosen": -168.9390869140625, "logps/rejected": -350.8570861816406, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -0.919482409954071, "rewards/margins": 4.681097507476807, "rewards/rejected": -5.600580215454102, "step": 4559 }, { "epoch": 0.71, "learning_rate": 1.080284558327714e-05, "logits/chosen": -1.758094072341919, "logits/rejected": -2.8812332153320312, "logps/chosen": -57.76491928100586, "logps/rejected": -398.9479675292969, "loss": 0.1661, "rewards/accuracies": 1.0, "rewards/chosen": -1.338470220565796, "rewards/margins": 3.9080049991607666, "rewards/rejected": -5.2464752197265625, "step": 4560 }, { "epoch": 0.71, "learning_rate": 1.0802112142745993e-05, "logits/chosen": -2.741389274597168, "logits/rejected": -1.55133056640625, "logps/chosen": -409.7523193359375, "logps/rejected": -229.45045471191406, "loss": 3.8195, "rewards/accuracies": 0.0, "rewards/chosen": -4.999266624450684, "rewards/margins": -3.7847466468811035, "rewards/rejected": -1.2145202159881592, "step": 4561 }, { "epoch": 0.71, "learning_rate": 1.0801378702214844e-05, "logits/chosen": -2.3374898433685303, "logits/rejected": -2.984995126724243, "logps/chosen": -166.05845642089844, "logps/rejected": -233.15426635742188, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.4561736583709717, "rewards/margins": 5.610052108764648, "rewards/rejected": -7.066225528717041, "step": 4562 }, { "epoch": 0.71, "learning_rate": 1.0800645261683696e-05, "logits/chosen": -3.031191825866699, "logits/rejected": -2.2180745601654053, "logps/chosen": -332.9156799316406, "logps/rejected": -201.37591552734375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.7346642017364502, "rewards/margins": 6.208148956298828, "rewards/rejected": -6.942812919616699, "step": 4563 }, { "epoch": 0.71, "learning_rate": 1.0799911821152548e-05, "logits/chosen": -1.6466511487960815, "logits/rejected": -3.009176015853882, "logps/chosen": -79.39009094238281, "logps/rejected": -366.60943603515625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.3770843744277954, "rewards/margins": 5.937379837036133, "rewards/rejected": -7.3144636154174805, "step": 4564 }, { "epoch": 0.71, "learning_rate": 1.0799178380621402e-05, "logits/chosen": -2.4263110160827637, "logits/rejected": -2.9621033668518066, "logps/chosen": -39.95069122314453, "logps/rejected": -153.42324829101562, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.858738899230957, "rewards/margins": 6.060356140136719, "rewards/rejected": -7.919095039367676, "step": 4565 }, { "epoch": 0.71, "learning_rate": 1.0798444940090254e-05, "logits/chosen": -2.263866662979126, "logits/rejected": -3.126164674758911, "logps/chosen": -94.82809448242188, "logps/rejected": -327.409912109375, "loss": 0.2569, "rewards/accuracies": 1.0, "rewards/chosen": -2.2785794734954834, "rewards/margins": 2.1458256244659424, "rewards/rejected": -4.424405097961426, "step": 4566 }, { "epoch": 0.71, "learning_rate": 1.0797711499559106e-05, "logits/chosen": -2.52229905128479, "logits/rejected": -3.1526269912719727, "logps/chosen": -255.7845916748047, "logps/rejected": -362.91790771484375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.5788803100585938, "rewards/margins": 5.648351669311523, "rewards/rejected": -5.06947135925293, "step": 4567 }, { "epoch": 0.71, "learning_rate": 1.0796978059027957e-05, "logits/chosen": -2.9561920166015625, "logits/rejected": -3.0171236991882324, "logps/chosen": -340.97418212890625, "logps/rejected": -405.167724609375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -2.4370555877685547, "rewards/margins": 5.0074543952941895, "rewards/rejected": -7.444509983062744, "step": 4568 }, { "epoch": 0.71, "learning_rate": 1.079624461849681e-05, "logits/chosen": -2.0981054306030273, "logits/rejected": -2.935788631439209, "logps/chosen": -171.38735961914062, "logps/rejected": -347.49749755859375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.3828415870666504, "rewards/margins": 6.3960676193237305, "rewards/rejected": -7.778908729553223, "step": 4569 }, { "epoch": 0.71, "learning_rate": 1.0795511177965663e-05, "logits/chosen": -1.1186386346817017, "logits/rejected": -2.7543303966522217, "logps/chosen": -86.33113861083984, "logps/rejected": -318.3515319824219, "loss": 0.171, "rewards/accuracies": 1.0, "rewards/chosen": -3.071028232574463, "rewards/margins": 2.888468027114868, "rewards/rejected": -5.95949649810791, "step": 4570 }, { "epoch": 0.71, "learning_rate": 1.0794777737434515e-05, "logits/chosen": -2.6075220108032227, "logits/rejected": -2.920004367828369, "logps/chosen": -315.0709228515625, "logps/rejected": -316.95782470703125, "loss": 1.5078, "rewards/accuracies": 0.5, "rewards/chosen": -1.6184608936309814, "rewards/margins": 2.2382712364196777, "rewards/rejected": -3.856732130050659, "step": 4571 }, { "epoch": 0.71, "learning_rate": 1.0794044296903367e-05, "logits/chosen": -2.4560725688934326, "logits/rejected": -2.893669843673706, "logps/chosen": -209.32647705078125, "logps/rejected": -174.76431274414062, "loss": 2.6823, "rewards/accuracies": 0.5, "rewards/chosen": -3.96884822845459, "rewards/margins": -0.0270383358001709, "rewards/rejected": -3.941809892654419, "step": 4572 }, { "epoch": 0.71, "learning_rate": 1.0793310856372219e-05, "logits/chosen": -2.409130096435547, "logits/rejected": -2.3988847732543945, "logps/chosen": -90.15990447998047, "logps/rejected": -73.38783264160156, "loss": 1.5521, "rewards/accuracies": 0.5, "rewards/chosen": -3.7776660919189453, "rewards/margins": 0.38731932640075684, "rewards/rejected": -4.164985656738281, "step": 4573 }, { "epoch": 0.71, "learning_rate": 1.0792577415841072e-05, "logits/chosen": -2.8304696083068848, "logits/rejected": -3.089355230331421, "logps/chosen": -92.19905090332031, "logps/rejected": -84.14725494384766, "loss": 2.3281, "rewards/accuracies": 0.5, "rewards/chosen": -4.592289924621582, "rewards/margins": 0.13834214210510254, "rewards/rejected": -4.730632305145264, "step": 4574 }, { "epoch": 0.71, "learning_rate": 1.0791843975309924e-05, "logits/chosen": -2.4713776111602783, "logits/rejected": -2.976034164428711, "logps/chosen": -208.73587036132812, "logps/rejected": -368.306640625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.3289406299591064, "rewards/margins": 6.225431442260742, "rewards/rejected": -7.5543718338012695, "step": 4575 }, { "epoch": 0.71, "learning_rate": 1.0791110534778776e-05, "logits/chosen": -2.523688793182373, "logits/rejected": -2.869532346725464, "logps/chosen": -375.97265625, "logps/rejected": -231.60809326171875, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -1.2794510126113892, "rewards/margins": 4.844731330871582, "rewards/rejected": -6.124182224273682, "step": 4576 }, { "epoch": 0.71, "learning_rate": 1.0790377094247628e-05, "logits/chosen": -1.8435478210449219, "logits/rejected": -2.9674792289733887, "logps/chosen": -79.45423889160156, "logps/rejected": -224.91424560546875, "loss": 0.0651, "rewards/accuracies": 1.0, "rewards/chosen": -2.7440502643585205, "rewards/margins": 4.584800720214844, "rewards/rejected": -7.328850746154785, "step": 4577 }, { "epoch": 0.71, "learning_rate": 1.078964365371648e-05, "logits/chosen": -3.0546016693115234, "logits/rejected": -2.501115560531616, "logps/chosen": -122.67632293701172, "logps/rejected": -84.82887268066406, "loss": 3.3952, "rewards/accuracies": 0.5, "rewards/chosen": -5.217702865600586, "rewards/margins": -0.785585880279541, "rewards/rejected": -4.432116508483887, "step": 4578 }, { "epoch": 0.71, "learning_rate": 1.0788910213185331e-05, "logits/chosen": -2.743037462234497, "logits/rejected": -3.269066333770752, "logps/chosen": -59.661773681640625, "logps/rejected": -226.38677978515625, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": -1.8224101066589355, "rewards/margins": 3.1901044845581055, "rewards/rejected": -5.012514591217041, "step": 4579 }, { "epoch": 0.71, "learning_rate": 1.0788176772654183e-05, "logits/chosen": -2.1648826599121094, "logits/rejected": -3.1181602478027344, "logps/chosen": -227.79507446289062, "logps/rejected": -436.3941345214844, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -2.0396697521209717, "rewards/margins": 6.820930480957031, "rewards/rejected": -8.860600471496582, "step": 4580 }, { "epoch": 0.71, "learning_rate": 1.0787443332123035e-05, "logits/chosen": -2.553687572479248, "logits/rejected": -3.247817039489746, "logps/chosen": -193.12261962890625, "logps/rejected": -298.870361328125, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/chosen": -2.087242841720581, "rewards/margins": 3.9752750396728516, "rewards/rejected": -6.062518119812012, "step": 4581 }, { "epoch": 0.71, "learning_rate": 1.0786709891591887e-05, "logits/chosen": -2.339704990386963, "logits/rejected": -2.9515397548675537, "logps/chosen": -79.8958740234375, "logps/rejected": -164.77687072753906, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -1.9691263437271118, "rewards/margins": 3.2933695316314697, "rewards/rejected": -5.262495994567871, "step": 4582 }, { "epoch": 0.71, "learning_rate": 1.078597645106074e-05, "logits/chosen": -2.2896945476531982, "logits/rejected": -3.101412296295166, "logps/chosen": -123.25955963134766, "logps/rejected": -497.3002624511719, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.9114654064178467, "rewards/margins": 5.274131774902344, "rewards/rejected": -7.1855974197387695, "step": 4583 }, { "epoch": 0.71, "learning_rate": 1.0785243010529593e-05, "logits/chosen": -3.240170478820801, "logits/rejected": -3.3101091384887695, "logps/chosen": -378.4278259277344, "logps/rejected": -242.25828552246094, "loss": 2.0959, "rewards/accuracies": 0.5, "rewards/chosen": -4.778453826904297, "rewards/margins": 1.6142489910125732, "rewards/rejected": -6.392703056335449, "step": 4584 }, { "epoch": 0.71, "learning_rate": 1.0784509569998444e-05, "logits/chosen": -3.076533317565918, "logits/rejected": -2.8204078674316406, "logps/chosen": -448.03564453125, "logps/rejected": -259.23004150390625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.387559413909912, "rewards/margins": 5.671609401702881, "rewards/rejected": -8.059168815612793, "step": 4585 }, { "epoch": 0.71, "learning_rate": 1.0783776129467296e-05, "logits/chosen": -2.5065927505493164, "logits/rejected": -2.7064461708068848, "logps/chosen": -184.47247314453125, "logps/rejected": -164.69961547851562, "loss": 1.2669, "rewards/accuracies": 0.5, "rewards/chosen": -4.322218894958496, "rewards/margins": -0.36362969875335693, "rewards/rejected": -3.9585893154144287, "step": 4586 }, { "epoch": 0.71, "learning_rate": 1.0783042688936148e-05, "logits/chosen": -2.4558489322662354, "logits/rejected": -2.796982765197754, "logps/chosen": -612.68359375, "logps/rejected": -526.3152465820312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.4307304322719574, "rewards/margins": 6.576066970825195, "rewards/rejected": -7.006797790527344, "step": 4587 }, { "epoch": 0.71, "learning_rate": 1.0782309248405e-05, "logits/chosen": -0.7550813555717468, "logits/rejected": -2.0863020420074463, "logps/chosen": -235.63084411621094, "logps/rejected": -617.404052734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.44094085693359375, "rewards/margins": 9.091764450073242, "rewards/rejected": -9.53270435333252, "step": 4588 }, { "epoch": 0.71, "learning_rate": 1.0781575807873852e-05, "logits/chosen": -1.1318413019180298, "logits/rejected": -2.646796464920044, "logps/chosen": -68.42890930175781, "logps/rejected": -223.87576293945312, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -1.5664268732070923, "rewards/margins": 4.163590908050537, "rewards/rejected": -5.73001766204834, "step": 4589 }, { "epoch": 0.71, "learning_rate": 1.0780842367342704e-05, "logits/chosen": -2.911918878555298, "logits/rejected": -2.9181840419769287, "logps/chosen": -126.48072814941406, "logps/rejected": -216.00259399414062, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.0816024541854858, "rewards/margins": 5.927852630615234, "rewards/rejected": -7.00945520401001, "step": 4590 }, { "epoch": 0.71, "learning_rate": 1.0780108926811556e-05, "logits/chosen": -3.007214069366455, "logits/rejected": -2.9681942462921143, "logps/chosen": -229.71820068359375, "logps/rejected": -367.1309814453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.816892147064209, "rewards/margins": 7.497522354125977, "rewards/rejected": -9.314414978027344, "step": 4591 }, { "epoch": 0.71, "learning_rate": 1.077937548628041e-05, "logits/chosen": -1.8846200704574585, "logits/rejected": -2.8078083992004395, "logps/chosen": -193.57400512695312, "logps/rejected": -518.0192260742188, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -2.8502254486083984, "rewards/margins": 5.225315093994141, "rewards/rejected": -8.075540542602539, "step": 4592 }, { "epoch": 0.71, "learning_rate": 1.0778642045749261e-05, "logits/chosen": -3.164000988006592, "logits/rejected": -2.626371383666992, "logps/chosen": -265.25653076171875, "logps/rejected": -271.97576904296875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.7396111488342285, "rewards/margins": 5.260095119476318, "rewards/rejected": -6.999706268310547, "step": 4593 }, { "epoch": 0.71, "learning_rate": 1.0777908605218113e-05, "logits/chosen": -2.740490674972534, "logits/rejected": -3.146533250808716, "logps/chosen": -57.9087028503418, "logps/rejected": -220.056640625, "loss": 0.496, "rewards/accuracies": 0.5, "rewards/chosen": -3.1927270889282227, "rewards/margins": 2.5795373916625977, "rewards/rejected": -5.77226448059082, "step": 4594 }, { "epoch": 0.71, "learning_rate": 1.0777175164686965e-05, "logits/chosen": -1.7711559534072876, "logits/rejected": -2.317343235015869, "logps/chosen": -86.65623474121094, "logps/rejected": -237.84361267089844, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.1516368389129639, "rewards/margins": 6.083080291748047, "rewards/rejected": -7.23471736907959, "step": 4595 }, { "epoch": 0.71, "learning_rate": 1.0776441724155817e-05, "logits/chosen": -2.955341100692749, "logits/rejected": -2.2765755653381348, "logps/chosen": -503.13067626953125, "logps/rejected": -251.2887420654297, "loss": 3.0176, "rewards/accuracies": 0.5, "rewards/chosen": -4.101069927215576, "rewards/margins": 1.7030854225158691, "rewards/rejected": -5.804155349731445, "step": 4596 }, { "epoch": 0.71, "learning_rate": 1.0775708283624669e-05, "logits/chosen": -2.7341501712799072, "logits/rejected": -3.1737070083618164, "logps/chosen": -223.68115234375, "logps/rejected": -389.02001953125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -2.400637149810791, "rewards/margins": 5.024538516998291, "rewards/rejected": -7.425175666809082, "step": 4597 }, { "epoch": 0.72, "learning_rate": 1.077497484309352e-05, "logits/chosen": -1.4399936199188232, "logits/rejected": -2.7220957279205322, "logps/chosen": -83.90565490722656, "logps/rejected": -242.676513671875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.3515777587890625, "rewards/margins": 8.936704635620117, "rewards/rejected": -11.28828239440918, "step": 4598 }, { "epoch": 0.72, "learning_rate": 1.0774241402562372e-05, "logits/chosen": -2.8880505561828613, "logits/rejected": -2.2797157764434814, "logps/chosen": -327.9091796875, "logps/rejected": -190.02490234375, "loss": 3.5251, "rewards/accuracies": 0.5, "rewards/chosen": -4.435428619384766, "rewards/margins": -0.40963101387023926, "rewards/rejected": -4.0257978439331055, "step": 4599 }, { "epoch": 0.72, "learning_rate": 1.0773507962031224e-05, "logits/chosen": -2.1005516052246094, "logits/rejected": -3.2830443382263184, "logps/chosen": -198.521240234375, "logps/rejected": -401.5440673828125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.175011157989502, "rewards/margins": 6.384322643280029, "rewards/rejected": -7.559333801269531, "step": 4600 }, { "epoch": 0.72, "learning_rate": 1.0772774521500078e-05, "logits/chosen": -2.772252082824707, "logits/rejected": -2.8184258937835693, "logps/chosen": -138.46038818359375, "logps/rejected": -297.95941162109375, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -1.4342033863067627, "rewards/margins": 4.116819381713867, "rewards/rejected": -5.551022529602051, "step": 4601 }, { "epoch": 0.72, "learning_rate": 1.077204108096893e-05, "logits/chosen": -2.660147190093994, "logits/rejected": -3.0190820693969727, "logps/chosen": -259.3599853515625, "logps/rejected": -438.6483154296875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.1754422187805176, "rewards/margins": 7.47231388092041, "rewards/rejected": -9.64775562286377, "step": 4602 }, { "epoch": 0.72, "learning_rate": 1.0771307640437782e-05, "logits/chosen": -2.2151401042938232, "logits/rejected": -2.7861366271972656, "logps/chosen": -232.35841369628906, "logps/rejected": -522.978515625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.7106682062149048, "rewards/margins": 5.456690311431885, "rewards/rejected": -7.1673583984375, "step": 4603 }, { "epoch": 0.72, "learning_rate": 1.0770574199906635e-05, "logits/chosen": -2.891247272491455, "logits/rejected": -3.0269813537597656, "logps/chosen": -114.85350036621094, "logps/rejected": -194.77496337890625, "loss": 0.1505, "rewards/accuracies": 1.0, "rewards/chosen": -2.7383384704589844, "rewards/margins": 4.268865585327148, "rewards/rejected": -7.007204055786133, "step": 4604 }, { "epoch": 0.72, "learning_rate": 1.0769840759375487e-05, "logits/chosen": -2.4696884155273438, "logits/rejected": -2.999896764755249, "logps/chosen": -166.3785400390625, "logps/rejected": -361.61920166015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.453869342803955, "rewards/margins": 9.370135307312012, "rewards/rejected": -10.824005126953125, "step": 4605 }, { "epoch": 0.72, "learning_rate": 1.0769107318844339e-05, "logits/chosen": -2.647394895553589, "logits/rejected": -2.974560260772705, "logps/chosen": -500.2285461425781, "logps/rejected": -458.1148681640625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -3.4653472900390625, "rewards/margins": 4.934288024902344, "rewards/rejected": -8.399635314941406, "step": 4606 }, { "epoch": 0.72, "learning_rate": 1.0768373878313191e-05, "logits/chosen": -2.119246482849121, "logits/rejected": -3.2025070190429688, "logps/chosen": -477.33367919921875, "logps/rejected": -640.1215209960938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6494224667549133, "rewards/margins": 8.988791465759277, "rewards/rejected": -9.638214111328125, "step": 4607 }, { "epoch": 0.72, "learning_rate": 1.0767640437782043e-05, "logits/chosen": -2.2557106018066406, "logits/rejected": -2.643705368041992, "logps/chosen": -342.13287353515625, "logps/rejected": -468.77825927734375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.0125205516815186, "rewards/margins": 6.635904312133789, "rewards/rejected": -8.648425102233887, "step": 4608 }, { "epoch": 0.72, "learning_rate": 1.0766906997250895e-05, "logits/chosen": -1.967248558998108, "logits/rejected": -2.8216421604156494, "logps/chosen": -100.18408203125, "logps/rejected": -412.70941162109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1093777418136597, "rewards/margins": 10.463823318481445, "rewards/rejected": -11.573201179504395, "step": 4609 }, { "epoch": 0.72, "learning_rate": 1.0766173556719748e-05, "logits/chosen": -3.040353775024414, "logits/rejected": -2.6507461071014404, "logps/chosen": -130.375244140625, "logps/rejected": -158.1441192626953, "loss": 3.4435, "rewards/accuracies": 0.5, "rewards/chosen": -6.03721284866333, "rewards/margins": -0.0553746223449707, "rewards/rejected": -5.981838226318359, "step": 4610 }, { "epoch": 0.72, "learning_rate": 1.07654401161886e-05, "logits/chosen": -3.161512851715088, "logits/rejected": -2.6019845008850098, "logps/chosen": -320.1363525390625, "logps/rejected": -70.00285339355469, "loss": 5.6481, "rewards/accuracies": 0.0, "rewards/chosen": -7.500372886657715, "rewards/margins": -5.644537448883057, "rewards/rejected": -1.8558350801467896, "step": 4611 }, { "epoch": 0.72, "learning_rate": 1.0764706675657452e-05, "logits/chosen": -2.8757593631744385, "logits/rejected": -2.9187464714050293, "logps/chosen": -228.03857421875, "logps/rejected": -238.7061767578125, "loss": 3.4483, "rewards/accuracies": 0.5, "rewards/chosen": -3.860635280609131, "rewards/margins": 0.10076761245727539, "rewards/rejected": -3.9614028930664062, "step": 4612 }, { "epoch": 0.72, "learning_rate": 1.0763973235126304e-05, "logits/chosen": -2.796478509902954, "logits/rejected": -2.2951626777648926, "logps/chosen": -136.0182647705078, "logps/rejected": -127.64373016357422, "loss": 3.3195, "rewards/accuracies": 0.5, "rewards/chosen": -5.148421764373779, "rewards/margins": -0.4771111011505127, "rewards/rejected": -4.671310901641846, "step": 4613 }, { "epoch": 0.72, "learning_rate": 1.0763239794595156e-05, "logits/chosen": -2.845620632171631, "logits/rejected": -1.8642549514770508, "logps/chosen": -345.3525695800781, "logps/rejected": -268.6448669433594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1359765529632568, "rewards/margins": 8.825946807861328, "rewards/rejected": -9.961923599243164, "step": 4614 }, { "epoch": 0.72, "learning_rate": 1.0762506354064008e-05, "logits/chosen": -2.9854977130889893, "logits/rejected": -3.0321786403656006, "logps/chosen": -128.3881072998047, "logps/rejected": -265.55303955078125, "loss": 1.4267, "rewards/accuracies": 0.5, "rewards/chosen": -3.0972774028778076, "rewards/margins": 1.853613257408142, "rewards/rejected": -4.95089054107666, "step": 4615 }, { "epoch": 0.72, "learning_rate": 1.076177291353286e-05, "logits/chosen": -1.9761464595794678, "logits/rejected": -2.9153172969818115, "logps/chosen": -112.75576782226562, "logps/rejected": -290.9170227050781, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.7532639503479004, "rewards/margins": 7.71199893951416, "rewards/rejected": -9.465263366699219, "step": 4616 }, { "epoch": 0.72, "learning_rate": 1.0761039473001711e-05, "logits/chosen": -2.8794593811035156, "logits/rejected": -3.0722856521606445, "logps/chosen": -142.74717712402344, "logps/rejected": -257.9809265136719, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.4867732524871826, "rewards/margins": 5.416040420532227, "rewards/rejected": -6.902813911437988, "step": 4617 }, { "epoch": 0.72, "learning_rate": 1.0760306032470563e-05, "logits/chosen": -1.1972894668579102, "logits/rejected": -2.462101697921753, "logps/chosen": -67.93527221679688, "logps/rejected": -245.5516357421875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -2.8788695335388184, "rewards/margins": 4.938910484313965, "rewards/rejected": -7.817779541015625, "step": 4618 }, { "epoch": 0.72, "learning_rate": 1.0759572591939417e-05, "logits/chosen": -2.8521909713745117, "logits/rejected": -2.672220230102539, "logps/chosen": -589.46044921875, "logps/rejected": -487.75653076171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.4621994495391846, "rewards/margins": 7.467921257019043, "rewards/rejected": -8.930120468139648, "step": 4619 }, { "epoch": 0.72, "learning_rate": 1.0758839151408269e-05, "logits/chosen": -2.5037572383880615, "logits/rejected": -2.918391466140747, "logps/chosen": -78.34359741210938, "logps/rejected": -184.6473388671875, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.3515241146087646, "rewards/margins": 4.834033966064453, "rewards/rejected": -7.185558319091797, "step": 4620 }, { "epoch": 0.72, "learning_rate": 1.075810571087712e-05, "logits/chosen": -3.0049350261688232, "logits/rejected": -3.2748098373413086, "logps/chosen": -119.18946838378906, "logps/rejected": -233.830078125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.836052417755127, "rewards/margins": 5.406155586242676, "rewards/rejected": -7.242208003997803, "step": 4621 }, { "epoch": 0.72, "learning_rate": 1.0757372270345972e-05, "logits/chosen": -2.488007068634033, "logits/rejected": -3.0074074268341064, "logps/chosen": -208.36386108398438, "logps/rejected": -304.1317443847656, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.213597297668457, "rewards/margins": 6.5993757247924805, "rewards/rejected": -7.8129730224609375, "step": 4622 }, { "epoch": 0.72, "learning_rate": 1.0756638829814824e-05, "logits/chosen": -2.93656849861145, "logits/rejected": -2.4568064212799072, "logps/chosen": -316.048095703125, "logps/rejected": -408.4908752441406, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -1.839881181716919, "rewards/margins": 5.708285808563232, "rewards/rejected": -7.5481672286987305, "step": 4623 }, { "epoch": 0.72, "learning_rate": 1.0755905389283676e-05, "logits/chosen": -1.0612338781356812, "logits/rejected": -2.7512433528900146, "logps/chosen": -103.44841766357422, "logps/rejected": -316.459228515625, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.799109935760498, "rewards/margins": 4.506405830383301, "rewards/rejected": -6.305516242980957, "step": 4624 }, { "epoch": 0.72, "learning_rate": 1.0755171948752528e-05, "logits/chosen": -2.665083408355713, "logits/rejected": -3.2255730628967285, "logps/chosen": -61.52477264404297, "logps/rejected": -448.01416015625, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -2.2381439208984375, "rewards/margins": 6.519748687744141, "rewards/rejected": -8.757892608642578, "step": 4625 }, { "epoch": 0.72, "learning_rate": 1.075443850822138e-05, "logits/chosen": -1.4952665567398071, "logits/rejected": -2.563155174255371, "logps/chosen": -166.00323486328125, "logps/rejected": -457.9330139160156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.7212094068527222, "rewards/margins": 9.815763473510742, "rewards/rejected": -11.536972045898438, "step": 4626 }, { "epoch": 0.72, "learning_rate": 1.0753705067690232e-05, "logits/chosen": -1.3929857015609741, "logits/rejected": -2.579007387161255, "logps/chosen": -220.77565002441406, "logps/rejected": -446.90911865234375, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.6691734790802002, "rewards/margins": 6.286062240600586, "rewards/rejected": -7.955235481262207, "step": 4627 }, { "epoch": 0.72, "learning_rate": 1.0752971627159085e-05, "logits/chosen": -1.54002046585083, "logits/rejected": -2.9576637744903564, "logps/chosen": -107.82730102539062, "logps/rejected": -227.84165954589844, "loss": 1.9926, "rewards/accuracies": 0.5, "rewards/chosen": -4.503273010253906, "rewards/margins": -0.29055142402648926, "rewards/rejected": -4.212721824645996, "step": 4628 }, { "epoch": 0.72, "learning_rate": 1.0752238186627937e-05, "logits/chosen": -2.4106409549713135, "logits/rejected": -3.035759210586548, "logps/chosen": -59.953330993652344, "logps/rejected": -352.1063232421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.5453861951828003, "rewards/margins": 7.68319034576416, "rewards/rejected": -9.22857666015625, "step": 4629 }, { "epoch": 0.72, "learning_rate": 1.0751504746096789e-05, "logits/chosen": -1.6981583833694458, "logits/rejected": -2.7332701683044434, "logps/chosen": -196.38897705078125, "logps/rejected": -375.2880859375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.149265766143799, "rewards/margins": 6.7669901847839355, "rewards/rejected": -8.916255950927734, "step": 4630 }, { "epoch": 0.72, "learning_rate": 1.0750771305565641e-05, "logits/chosen": -2.633697748184204, "logits/rejected": -2.9262824058532715, "logps/chosen": -386.2154541015625, "logps/rejected": -355.46514892578125, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.2753548622131348, "rewards/margins": 4.815126419067383, "rewards/rejected": -6.090481758117676, "step": 4631 }, { "epoch": 0.72, "learning_rate": 1.0750037865034493e-05, "logits/chosen": -2.2982866764068604, "logits/rejected": -3.1190848350524902, "logps/chosen": -111.624755859375, "logps/rejected": -332.850341796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2902878522872925, "rewards/margins": 6.737229347229004, "rewards/rejected": -8.027517318725586, "step": 4632 }, { "epoch": 0.72, "learning_rate": 1.0749304424503345e-05, "logits/chosen": -2.325456380844116, "logits/rejected": -2.775768518447876, "logps/chosen": -128.42266845703125, "logps/rejected": -493.6688232421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5655040740966797, "rewards/margins": 7.097990989685059, "rewards/rejected": -8.663495063781738, "step": 4633 }, { "epoch": 0.72, "learning_rate": 1.0748570983972197e-05, "logits/chosen": -2.7136971950531006, "logits/rejected": -2.8056955337524414, "logps/chosen": -76.37342834472656, "logps/rejected": -168.8807373046875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.6499369144439697, "rewards/margins": 5.240972995758057, "rewards/rejected": -6.890909671783447, "step": 4634 }, { "epoch": 0.72, "learning_rate": 1.0747837543441049e-05, "logits/chosen": -1.313910722732544, "logits/rejected": -2.5305817127227783, "logps/chosen": -101.00993347167969, "logps/rejected": -211.46017456054688, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -1.9729266166687012, "rewards/margins": 3.441082715988159, "rewards/rejected": -5.414009094238281, "step": 4635 }, { "epoch": 0.72, "learning_rate": 1.0747104102909902e-05, "logits/chosen": -1.9585784673690796, "logits/rejected": -3.0411038398742676, "logps/chosen": -135.4481658935547, "logps/rejected": -451.02117919921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8792179226875305, "rewards/margins": 7.4796857833862305, "rewards/rejected": -8.358903884887695, "step": 4636 }, { "epoch": 0.72, "learning_rate": 1.0746370662378754e-05, "logits/chosen": -2.5499792098999023, "logits/rejected": -3.0922625064849854, "logps/chosen": -175.8772430419922, "logps/rejected": -356.80950927734375, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -1.6448862552642822, "rewards/margins": 4.386185169219971, "rewards/rejected": -6.031071662902832, "step": 4637 }, { "epoch": 0.72, "learning_rate": 1.0745637221847608e-05, "logits/chosen": -1.5369198322296143, "logits/rejected": -2.6770706176757812, "logps/chosen": -172.9470977783203, "logps/rejected": -337.8547058105469, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.0726852416992188, "rewards/margins": 6.848095893859863, "rewards/rejected": -7.920781135559082, "step": 4638 }, { "epoch": 0.72, "learning_rate": 1.074490378131646e-05, "logits/chosen": -2.639697790145874, "logits/rejected": -2.658780574798584, "logps/chosen": -418.7164611816406, "logps/rejected": -481.19183349609375, "loss": 0.1023, "rewards/accuracies": 1.0, "rewards/chosen": -3.290306806564331, "rewards/margins": 6.591799259185791, "rewards/rejected": -9.88210678100586, "step": 4639 }, { "epoch": 0.72, "learning_rate": 1.0744170340785311e-05, "logits/chosen": -2.5610859394073486, "logits/rejected": -3.147101640701294, "logps/chosen": -112.84944152832031, "logps/rejected": -308.7972106933594, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8341484069824219, "rewards/margins": 7.059065818786621, "rewards/rejected": -7.893214225769043, "step": 4640 }, { "epoch": 0.72, "learning_rate": 1.0743436900254163e-05, "logits/chosen": -1.9818693399429321, "logits/rejected": -2.7657525539398193, "logps/chosen": -248.3700408935547, "logps/rejected": -300.75921630859375, "loss": 5.6658, "rewards/accuracies": 0.5, "rewards/chosen": -7.246403694152832, "rewards/margins": -2.79219913482666, "rewards/rejected": -4.454204559326172, "step": 4641 }, { "epoch": 0.72, "learning_rate": 1.0742703459723015e-05, "logits/chosen": -1.7190468311309814, "logits/rejected": -2.697754144668579, "logps/chosen": -145.58297729492188, "logps/rejected": -305.4561767578125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.353844165802002, "rewards/margins": 6.754095077514648, "rewards/rejected": -8.107938766479492, "step": 4642 }, { "epoch": 0.72, "learning_rate": 1.0741970019191867e-05, "logits/chosen": -2.2192420959472656, "logits/rejected": -2.599933385848999, "logps/chosen": -97.15995788574219, "logps/rejected": -269.3238525390625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.2429218292236328, "rewards/margins": 9.302362442016602, "rewards/rejected": -10.545284271240234, "step": 4643 }, { "epoch": 0.72, "learning_rate": 1.0741236578660719e-05, "logits/chosen": -2.6171154975891113, "logits/rejected": -2.87597393989563, "logps/chosen": -457.96124267578125, "logps/rejected": -641.413330078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.0084424018859863, "rewards/margins": 9.14134693145752, "rewards/rejected": -11.149789810180664, "step": 4644 }, { "epoch": 0.72, "learning_rate": 1.0740503138129572e-05, "logits/chosen": -1.6970187425613403, "logits/rejected": -2.7604100704193115, "logps/chosen": -44.538368225097656, "logps/rejected": -203.44711303710938, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.5546154975891113, "rewards/margins": 4.9761152267456055, "rewards/rejected": -6.530730247497559, "step": 4645 }, { "epoch": 0.72, "learning_rate": 1.0739769697598424e-05, "logits/chosen": -2.636160373687744, "logits/rejected": -2.9166669845581055, "logps/chosen": -691.49853515625, "logps/rejected": -589.1600341796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.3954048156738281, "rewards/margins": 10.430338859558105, "rewards/rejected": -10.825743675231934, "step": 4646 }, { "epoch": 0.72, "learning_rate": 1.0739036257067276e-05, "logits/chosen": -2.1038763523101807, "logits/rejected": -2.798764944076538, "logps/chosen": -124.24723815917969, "logps/rejected": -300.1214599609375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.4267700910568237, "rewards/margins": 7.3995890617370605, "rewards/rejected": -8.826358795166016, "step": 4647 }, { "epoch": 0.72, "learning_rate": 1.0738302816536128e-05, "logits/chosen": -0.9255183339118958, "logits/rejected": -1.529591679573059, "logps/chosen": -296.3898620605469, "logps/rejected": -389.70526123046875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.127109169960022, "rewards/margins": 7.460533142089844, "rewards/rejected": -8.587641716003418, "step": 4648 }, { "epoch": 0.72, "learning_rate": 1.073756937600498e-05, "logits/chosen": -3.317767381668091, "logits/rejected": -3.021656036376953, "logps/chosen": -77.83964538574219, "logps/rejected": -136.6387939453125, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.6280828714370728, "rewards/margins": 4.812057971954346, "rewards/rejected": -6.440140724182129, "step": 4649 }, { "epoch": 0.72, "learning_rate": 1.0736835935473832e-05, "logits/chosen": -1.1783394813537598, "logits/rejected": -2.505157947540283, "logps/chosen": -45.04008483886719, "logps/rejected": -288.71044921875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.6625776290893555, "rewards/margins": 7.723924160003662, "rewards/rejected": -8.386502265930176, "step": 4650 }, { "epoch": 0.72, "learning_rate": 1.0736102494942684e-05, "logits/chosen": -1.8645206689834595, "logits/rejected": -2.6731722354888916, "logps/chosen": -132.88323974609375, "logps/rejected": -433.9683837890625, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -1.7085888385772705, "rewards/margins": 6.718398094177246, "rewards/rejected": -8.426986694335938, "step": 4651 }, { "epoch": 0.72, "learning_rate": 1.0735369054411536e-05, "logits/chosen": -2.8266494274139404, "logits/rejected": -2.8400375843048096, "logps/chosen": -126.63932037353516, "logps/rejected": -213.61578369140625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.455621361732483, "rewards/margins": 5.923086643218994, "rewards/rejected": -7.3787078857421875, "step": 4652 }, { "epoch": 0.72, "learning_rate": 1.0734635613880387e-05, "logits/chosen": -1.4551647901535034, "logits/rejected": -2.917341470718384, "logps/chosen": -82.16220092773438, "logps/rejected": -352.21990966796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5522525310516357, "rewards/margins": 7.941522121429443, "rewards/rejected": -9.4937744140625, "step": 4653 }, { "epoch": 0.72, "learning_rate": 1.0733902173349241e-05, "logits/chosen": -2.327113389968872, "logits/rejected": -1.6075252294540405, "logps/chosen": -248.7210235595703, "logps/rejected": -171.1862335205078, "loss": 7.3388, "rewards/accuracies": 0.0, "rewards/chosen": -7.813908576965332, "rewards/margins": -7.337120056152344, "rewards/rejected": -0.47678834199905396, "step": 4654 }, { "epoch": 0.72, "learning_rate": 1.0733168732818093e-05, "logits/chosen": -2.5647835731506348, "logits/rejected": -2.2448503971099854, "logps/chosen": -521.3863525390625, "logps/rejected": -558.4161376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5995804071426392, "rewards/margins": 10.028465270996094, "rewards/rejected": -10.628046035766602, "step": 4655 }, { "epoch": 0.72, "learning_rate": 1.0732435292286945e-05, "logits/chosen": -2.122908592224121, "logits/rejected": -2.8832991123199463, "logps/chosen": -111.34928131103516, "logps/rejected": -239.40176391601562, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.0022777318954468, "rewards/margins": 6.177016258239746, "rewards/rejected": -7.179294586181641, "step": 4656 }, { "epoch": 0.72, "learning_rate": 1.0731701851755797e-05, "logits/chosen": -1.451904535293579, "logits/rejected": -2.4032974243164062, "logps/chosen": -144.67922973632812, "logps/rejected": -331.716552734375, "loss": 3.9237, "rewards/accuracies": 0.5, "rewards/chosen": -5.607975006103516, "rewards/margins": -0.30107831954956055, "rewards/rejected": -5.306896686553955, "step": 4657 }, { "epoch": 0.72, "learning_rate": 1.0730968411224649e-05, "logits/chosen": -2.712667942047119, "logits/rejected": -2.905379295349121, "logps/chosen": -85.18756866455078, "logps/rejected": -176.750244140625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.8735463619232178, "rewards/margins": 5.871683597564697, "rewards/rejected": -7.745229721069336, "step": 4658 }, { "epoch": 0.72, "learning_rate": 1.07302349706935e-05, "logits/chosen": -2.6911067962646484, "logits/rejected": -1.4211010932922363, "logps/chosen": -318.0531921386719, "logps/rejected": -282.6206970214844, "loss": 4.4406, "rewards/accuracies": 0.5, "rewards/chosen": -5.419686317443848, "rewards/margins": -0.5030813217163086, "rewards/rejected": -4.916604995727539, "step": 4659 }, { "epoch": 0.72, "learning_rate": 1.0729501530162352e-05, "logits/chosen": -2.234785318374634, "logits/rejected": -2.62550950050354, "logps/chosen": -180.60614013671875, "logps/rejected": -157.287109375, "loss": 3.3766, "rewards/accuracies": 0.5, "rewards/chosen": -3.6300835609436035, "rewards/margins": -0.07899188995361328, "rewards/rejected": -3.5510916709899902, "step": 4660 }, { "epoch": 0.72, "learning_rate": 1.0728768089631204e-05, "logits/chosen": -0.8120670914649963, "logits/rejected": -2.2586851119995117, "logps/chosen": -174.62905883789062, "logps/rejected": -594.7830810546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9308594465255737, "rewards/margins": 9.328134536743164, "rewards/rejected": -11.258993148803711, "step": 4661 }, { "epoch": 0.73, "learning_rate": 1.0728034649100056e-05, "logits/chosen": -3.1220648288726807, "logits/rejected": -2.1098103523254395, "logps/chosen": -362.49456787109375, "logps/rejected": -296.52197265625, "loss": 2.7613, "rewards/accuracies": 0.5, "rewards/chosen": -3.644956350326538, "rewards/margins": 1.846466302871704, "rewards/rejected": -5.491422653198242, "step": 4662 }, { "epoch": 0.73, "learning_rate": 1.072730120856891e-05, "logits/chosen": -2.1763906478881836, "logits/rejected": -2.8501105308532715, "logps/chosen": -66.1409912109375, "logps/rejected": -291.7130432128906, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5939918756484985, "rewards/margins": 8.200510025024414, "rewards/rejected": -8.794502258300781, "step": 4663 }, { "epoch": 0.73, "learning_rate": 1.0726567768037761e-05, "logits/chosen": -2.987448215484619, "logits/rejected": -2.710049867630005, "logps/chosen": -303.75396728515625, "logps/rejected": -257.4931640625, "loss": 3.8705, "rewards/accuracies": 0.5, "rewards/chosen": -4.514632701873779, "rewards/margins": -0.8970062732696533, "rewards/rejected": -3.617626190185547, "step": 4664 }, { "epoch": 0.73, "learning_rate": 1.0725834327506613e-05, "logits/chosen": -2.920584201812744, "logits/rejected": -3.0252630710601807, "logps/chosen": -113.14779663085938, "logps/rejected": -209.89517211914062, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.21722069382667542, "rewards/margins": 7.186726093292236, "rewards/rejected": -7.403946876525879, "step": 4665 }, { "epoch": 0.73, "learning_rate": 1.0725100886975465e-05, "logits/chosen": -2.0223045349121094, "logits/rejected": -2.711804151535034, "logps/chosen": -90.41799926757812, "logps/rejected": -251.6199951171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.2178332805633545, "rewards/margins": 7.8615336418151855, "rewards/rejected": -9.079366683959961, "step": 4666 }, { "epoch": 0.73, "learning_rate": 1.0724367446444317e-05, "logits/chosen": -2.824800968170166, "logits/rejected": -2.969954013824463, "logps/chosen": -552.93359375, "logps/rejected": -595.8251953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.4387497007846832, "rewards/margins": 8.584526062011719, "rewards/rejected": -9.023275375366211, "step": 4667 }, { "epoch": 0.73, "learning_rate": 1.0723634005913169e-05, "logits/chosen": -2.606599807739258, "logits/rejected": -2.812439203262329, "logps/chosen": -633.90771484375, "logps/rejected": -508.59820556640625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.2010726928710938, "rewards/margins": 5.579576015472412, "rewards/rejected": -6.780649185180664, "step": 4668 }, { "epoch": 0.73, "learning_rate": 1.0722900565382021e-05, "logits/chosen": -1.9883655309677124, "logits/rejected": -2.979099750518799, "logps/chosen": -114.75921630859375, "logps/rejected": -268.03173828125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.1790080964565277, "rewards/margins": 6.342893600463867, "rewards/rejected": -6.5219011306762695, "step": 4669 }, { "epoch": 0.73, "learning_rate": 1.0722167124850874e-05, "logits/chosen": -1.6277917623519897, "logits/rejected": -2.7009081840515137, "logps/chosen": -170.0212860107422, "logps/rejected": -220.83274841308594, "loss": 2.8606, "rewards/accuracies": 0.5, "rewards/chosen": -3.792356252670288, "rewards/margins": 0.4176321029663086, "rewards/rejected": -4.209988594055176, "step": 4670 }, { "epoch": 0.73, "learning_rate": 1.0721433684319726e-05, "logits/chosen": -1.2255525588989258, "logits/rejected": -2.7103145122528076, "logps/chosen": -85.73180389404297, "logps/rejected": -424.4573669433594, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.8681620359420776, "rewards/margins": 5.4454755783081055, "rewards/rejected": -6.313637733459473, "step": 4671 }, { "epoch": 0.73, "learning_rate": 1.072070024378858e-05, "logits/chosen": -2.0887463092803955, "logits/rejected": -3.129019260406494, "logps/chosen": -79.76437377929688, "logps/rejected": -263.95843505859375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -2.3922576904296875, "rewards/margins": 5.909694194793701, "rewards/rejected": -8.301952362060547, "step": 4672 }, { "epoch": 0.73, "learning_rate": 1.0719966803257432e-05, "logits/chosen": -2.9264774322509766, "logits/rejected": -2.3148908615112305, "logps/chosen": -337.305908203125, "logps/rejected": -363.0663757324219, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.4047333002090454, "rewards/margins": 6.343531608581543, "rewards/rejected": -6.748265266418457, "step": 4673 }, { "epoch": 0.73, "learning_rate": 1.0719233362726284e-05, "logits/chosen": -2.685279369354248, "logits/rejected": -2.7412655353546143, "logps/chosen": -121.74479675292969, "logps/rejected": -134.80247497558594, "loss": 2.7677, "rewards/accuracies": 0.5, "rewards/chosen": -3.7596914768218994, "rewards/margins": -0.28691768646240234, "rewards/rejected": -3.472773790359497, "step": 4674 }, { "epoch": 0.73, "learning_rate": 1.0718499922195136e-05, "logits/chosen": -2.309072732925415, "logits/rejected": -2.781536340713501, "logps/chosen": -567.0478515625, "logps/rejected": -576.2645263671875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.5950515866279602, "rewards/margins": 5.718225479125977, "rewards/rejected": -6.313276767730713, "step": 4675 }, { "epoch": 0.73, "learning_rate": 1.0717766481663987e-05, "logits/chosen": -1.97934091091156, "logits/rejected": -2.7054624557495117, "logps/chosen": -78.495361328125, "logps/rejected": -393.32342529296875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.0856633186340332, "rewards/margins": 5.751552581787109, "rewards/rejected": -6.837216377258301, "step": 4676 }, { "epoch": 0.73, "learning_rate": 1.071703304113284e-05, "logits/chosen": -3.0405948162078857, "logits/rejected": -3.2773404121398926, "logps/chosen": -104.80733489990234, "logps/rejected": -234.43031311035156, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.896648108959198, "rewards/margins": 5.9724860191345215, "rewards/rejected": -6.869133949279785, "step": 4677 }, { "epoch": 0.73, "learning_rate": 1.0716299600601691e-05, "logits/chosen": -2.5822339057922363, "logits/rejected": -2.9017772674560547, "logps/chosen": -21.27637481689453, "logps/rejected": -329.2076721191406, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.23475094139575958, "rewards/margins": 6.586069107055664, "rewards/rejected": -6.820820331573486, "step": 4678 }, { "epoch": 0.73, "learning_rate": 1.0715566160070543e-05, "logits/chosen": -2.550124406814575, "logits/rejected": -3.0088751316070557, "logps/chosen": -146.60031127929688, "logps/rejected": -306.00628662109375, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.0177974700927734, "rewards/margins": 5.429523468017578, "rewards/rejected": -6.447320938110352, "step": 4679 }, { "epoch": 0.73, "learning_rate": 1.0714832719539395e-05, "logits/chosen": -2.730616807937622, "logits/rejected": -3.23799467086792, "logps/chosen": -348.9397277832031, "logps/rejected": -379.706787109375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3180886209011078, "rewards/margins": 6.766665458679199, "rewards/rejected": -7.08475399017334, "step": 4680 }, { "epoch": 0.73, "learning_rate": 1.0714099279008248e-05, "logits/chosen": -2.780951499938965, "logits/rejected": -2.389301061630249, "logps/chosen": -245.907958984375, "logps/rejected": -349.95599365234375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.0478569269180298, "rewards/margins": 6.580732345581055, "rewards/rejected": -7.628588676452637, "step": 4681 }, { "epoch": 0.73, "learning_rate": 1.07133658384771e-05, "logits/chosen": -2.754155397415161, "logits/rejected": -2.8658814430236816, "logps/chosen": -138.27630615234375, "logps/rejected": -204.91766357421875, "loss": 2.8493, "rewards/accuracies": 0.5, "rewards/chosen": -4.331711769104004, "rewards/margins": -0.7564356327056885, "rewards/rejected": -3.5752761363983154, "step": 4682 }, { "epoch": 0.73, "learning_rate": 1.0712632397945952e-05, "logits/chosen": -2.6305811405181885, "logits/rejected": -2.9121456146240234, "logps/chosen": -112.84159851074219, "logps/rejected": -145.36634826660156, "loss": 1.7226, "rewards/accuracies": 0.5, "rewards/chosen": -3.843447685241699, "rewards/margins": 1.9946640729904175, "rewards/rejected": -5.838111877441406, "step": 4683 }, { "epoch": 0.73, "learning_rate": 1.0711898957414804e-05, "logits/chosen": -1.8687329292297363, "logits/rejected": -3.1570699214935303, "logps/chosen": -241.28231811523438, "logps/rejected": -429.7325744628906, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.7370954751968384, "rewards/margins": 5.9218339920043945, "rewards/rejected": -7.658929347991943, "step": 4684 }, { "epoch": 0.73, "learning_rate": 1.0711165516883656e-05, "logits/chosen": -2.7606685161590576, "logits/rejected": -3.205897331237793, "logps/chosen": -603.7847900390625, "logps/rejected": -886.0096435546875, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.40338900685310364, "rewards/margins": 6.982478141784668, "rewards/rejected": -7.385867118835449, "step": 4685 }, { "epoch": 0.73, "learning_rate": 1.0710432076352508e-05, "logits/chosen": -2.213632822036743, "logits/rejected": -1.8986926078796387, "logps/chosen": -288.31298828125, "logps/rejected": -216.28890991210938, "loss": 3.2549, "rewards/accuracies": 0.5, "rewards/chosen": -3.286917209625244, "rewards/margins": 1.4008491039276123, "rewards/rejected": -4.687766075134277, "step": 4686 }, { "epoch": 0.73, "learning_rate": 1.070969863582136e-05, "logits/chosen": -2.8658525943756104, "logits/rejected": -2.899150848388672, "logps/chosen": -198.423583984375, "logps/rejected": -424.00897216796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.888568639755249, "rewards/margins": 6.922791481018066, "rewards/rejected": -7.8113603591918945, "step": 4687 }, { "epoch": 0.73, "learning_rate": 1.0708965195290212e-05, "logits/chosen": -2.9787566661834717, "logits/rejected": -2.070312976837158, "logps/chosen": -480.8570251464844, "logps/rejected": -393.48046875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.9568237662315369, "rewards/margins": 5.704373359680176, "rewards/rejected": -6.661196708679199, "step": 4688 }, { "epoch": 0.73, "learning_rate": 1.0708231754759064e-05, "logits/chosen": -1.8045364618301392, "logits/rejected": -3.0868265628814697, "logps/chosen": -143.1815185546875, "logps/rejected": -258.6390380859375, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.32249680161476135, "rewards/margins": 4.697243690490723, "rewards/rejected": -5.019740581512451, "step": 4689 }, { "epoch": 0.73, "learning_rate": 1.0707498314227917e-05, "logits/chosen": -2.8544623851776123, "logits/rejected": -3.2269625663757324, "logps/chosen": -166.27572631835938, "logps/rejected": -322.5068359375, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.8879638910293579, "rewards/margins": 4.793670654296875, "rewards/rejected": -5.681634902954102, "step": 4690 }, { "epoch": 0.73, "learning_rate": 1.0706764873696769e-05, "logits/chosen": -2.8825290203094482, "logits/rejected": -2.9481239318847656, "logps/chosen": -97.534423828125, "logps/rejected": -169.65264892578125, "loss": 1.7133, "rewards/accuracies": 0.5, "rewards/chosen": -3.5343732833862305, "rewards/margins": 0.9922924041748047, "rewards/rejected": -4.526665687561035, "step": 4691 }, { "epoch": 0.73, "learning_rate": 1.070603143316562e-05, "logits/chosen": -3.1565568447113037, "logits/rejected": -3.455582618713379, "logps/chosen": -534.1351318359375, "logps/rejected": -444.04931640625, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 0.6658828854560852, "rewards/margins": 6.095512866973877, "rewards/rejected": -5.429630279541016, "step": 4692 }, { "epoch": 0.73, "learning_rate": 1.0705297992634473e-05, "logits/chosen": -2.8014001846313477, "logits/rejected": -2.531989336013794, "logps/chosen": -102.18400573730469, "logps/rejected": -207.7376708984375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.5836223363876343, "rewards/margins": 5.383088111877441, "rewards/rejected": -6.966710090637207, "step": 4693 }, { "epoch": 0.73, "learning_rate": 1.0704564552103325e-05, "logits/chosen": -2.3876476287841797, "logits/rejected": -3.1678519248962402, "logps/chosen": -69.97947692871094, "logps/rejected": -223.82403564453125, "loss": 1.3255, "rewards/accuracies": 0.5, "rewards/chosen": -2.453155279159546, "rewards/margins": 0.4798593521118164, "rewards/rejected": -2.9330146312713623, "step": 4694 }, { "epoch": 0.73, "learning_rate": 1.0703831111572176e-05, "logits/chosen": -2.8065152168273926, "logits/rejected": -3.2527835369110107, "logps/chosen": -242.35838317871094, "logps/rejected": -408.3908386230469, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.0316063165664673, "rewards/margins": 6.553056716918945, "rewards/rejected": -7.584663391113281, "step": 4695 }, { "epoch": 0.73, "learning_rate": 1.0703097671041028e-05, "logits/chosen": -2.2249557971954346, "logits/rejected": -2.837031841278076, "logps/chosen": -98.10205841064453, "logps/rejected": -301.3145446777344, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.7038682103157043, "rewards/margins": 6.454057693481445, "rewards/rejected": -7.157925605773926, "step": 4696 }, { "epoch": 0.73, "learning_rate": 1.070236423050988e-05, "logits/chosen": -2.9788520336151123, "logits/rejected": -1.9063563346862793, "logps/chosen": -402.4168701171875, "logps/rejected": -125.43122863769531, "loss": 3.3678, "rewards/accuracies": 0.5, "rewards/chosen": -3.8639886379241943, "rewards/margins": -0.22667598724365234, "rewards/rejected": -3.637312650680542, "step": 4697 }, { "epoch": 0.73, "learning_rate": 1.0701630789978732e-05, "logits/chosen": -2.6869611740112305, "logits/rejected": -2.4781436920166016, "logps/chosen": -564.9033813476562, "logps/rejected": -376.8436584472656, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.09829969704151154, "rewards/margins": 5.195303440093994, "rewards/rejected": -5.29360294342041, "step": 4698 }, { "epoch": 0.73, "learning_rate": 1.0700897349447586e-05, "logits/chosen": -2.984825611114502, "logits/rejected": -2.2736992835998535, "logps/chosen": -432.70147705078125, "logps/rejected": -341.7796630859375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.6511244773864746, "rewards/margins": 6.338425636291504, "rewards/rejected": -6.9895501136779785, "step": 4699 }, { "epoch": 0.73, "learning_rate": 1.0700163908916438e-05, "logits/chosen": -2.6030185222625732, "logits/rejected": -2.8157992362976074, "logps/chosen": -105.52328491210938, "logps/rejected": -277.2007141113281, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.8065853118896484, "rewards/margins": 5.802375793457031, "rewards/rejected": -6.60896110534668, "step": 4700 }, { "epoch": 0.73, "learning_rate": 1.069943046838529e-05, "logits/chosen": -3.3303840160369873, "logits/rejected": -2.695565700531006, "logps/chosen": -424.4417724609375, "logps/rejected": -285.4189147949219, "loss": 3.0725, "rewards/accuracies": 0.5, "rewards/chosen": -3.35331654548645, "rewards/margins": 1.5249536037445068, "rewards/rejected": -4.878270149230957, "step": 4701 }, { "epoch": 0.73, "learning_rate": 1.0698697027854141e-05, "logits/chosen": -2.9354114532470703, "logits/rejected": -2.5015857219696045, "logps/chosen": -230.95230102539062, "logps/rejected": -448.81005859375, "loss": 1.2495, "rewards/accuracies": 0.5, "rewards/chosen": -2.597602128982544, "rewards/margins": 3.1181695461273193, "rewards/rejected": -5.715771675109863, "step": 4702 }, { "epoch": 0.73, "learning_rate": 1.0697963587322993e-05, "logits/chosen": -2.8672337532043457, "logits/rejected": -2.701845407485962, "logps/chosen": -274.34051513671875, "logps/rejected": -487.4862060546875, "loss": 2.62, "rewards/accuracies": 0.5, "rewards/chosen": -3.272716522216797, "rewards/margins": -0.03679299354553223, "rewards/rejected": -3.2359235286712646, "step": 4703 }, { "epoch": 0.73, "learning_rate": 1.0697230146791847e-05, "logits/chosen": -2.8360867500305176, "logits/rejected": -2.6998019218444824, "logps/chosen": -206.09524536132812, "logps/rejected": -117.55877685546875, "loss": 2.9591, "rewards/accuracies": 0.5, "rewards/chosen": -5.002598762512207, "rewards/margins": -2.083582878112793, "rewards/rejected": -2.919015645980835, "step": 4704 }, { "epoch": 0.73, "learning_rate": 1.0696496706260699e-05, "logits/chosen": -1.7537336349487305, "logits/rejected": -2.487989664077759, "logps/chosen": -213.7457275390625, "logps/rejected": -323.4158630371094, "loss": 3.3079, "rewards/accuracies": 0.5, "rewards/chosen": -4.062817096710205, "rewards/margins": 1.477935791015625, "rewards/rejected": -5.54075288772583, "step": 4705 }, { "epoch": 0.73, "learning_rate": 1.069576326572955e-05, "logits/chosen": -2.727404832839966, "logits/rejected": -2.559330940246582, "logps/chosen": -453.00726318359375, "logps/rejected": -473.0638122558594, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.9104660749435425, "rewards/margins": 9.039594650268555, "rewards/rejected": -9.950061798095703, "step": 4706 }, { "epoch": 0.73, "learning_rate": 1.0695029825198402e-05, "logits/chosen": -2.4763920307159424, "logits/rejected": -2.932689666748047, "logps/chosen": -111.29745483398438, "logps/rejected": -179.0289306640625, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": 0.4270033538341522, "rewards/margins": 3.713822841644287, "rewards/rejected": -3.2868194580078125, "step": 4707 }, { "epoch": 0.73, "learning_rate": 1.0694296384667256e-05, "logits/chosen": -2.1735265254974365, "logits/rejected": -2.9912075996398926, "logps/chosen": -115.52168273925781, "logps/rejected": -330.2579345703125, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -0.7399952411651611, "rewards/margins": 4.995471000671387, "rewards/rejected": -5.735466003417969, "step": 4708 }, { "epoch": 0.73, "learning_rate": 1.0693562944136108e-05, "logits/chosen": -2.2668206691741943, "logits/rejected": -2.7797319889068604, "logps/chosen": -122.93507385253906, "logps/rejected": -274.70709228515625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.6223446130752563, "rewards/margins": 7.178775787353516, "rewards/rejected": -7.801119804382324, "step": 4709 }, { "epoch": 0.73, "learning_rate": 1.069282950360496e-05, "logits/chosen": -3.0863912105560303, "logits/rejected": -2.165306568145752, "logps/chosen": -500.66973876953125, "logps/rejected": -137.95974731445312, "loss": 2.7616, "rewards/accuracies": 0.5, "rewards/chosen": -4.198890686035156, "rewards/margins": -0.17313814163208008, "rewards/rejected": -4.025752544403076, "step": 4710 }, { "epoch": 0.73, "learning_rate": 1.0692096063073812e-05, "logits/chosen": -2.869276285171509, "logits/rejected": -3.434645175933838, "logps/chosen": -155.30677795410156, "logps/rejected": -199.67367553710938, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": -0.5683884620666504, "rewards/margins": 3.563478469848633, "rewards/rejected": -4.131866931915283, "step": 4711 }, { "epoch": 0.73, "learning_rate": 1.0691362622542663e-05, "logits/chosen": -1.6922401189804077, "logits/rejected": -3.106393814086914, "logps/chosen": -156.86367797851562, "logps/rejected": -468.760009765625, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": -1.0585845708847046, "rewards/margins": 3.5116546154022217, "rewards/rejected": -4.570239067077637, "step": 4712 }, { "epoch": 0.73, "learning_rate": 1.0690629182011515e-05, "logits/chosen": -2.424226760864258, "logits/rejected": -2.963994026184082, "logps/chosen": -324.5909729003906, "logps/rejected": -473.09967041015625, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -1.5427392721176147, "rewards/margins": 4.532111644744873, "rewards/rejected": -6.074851036071777, "step": 4713 }, { "epoch": 0.73, "learning_rate": 1.0689895741480367e-05, "logits/chosen": -1.6059716939926147, "logits/rejected": -2.8226118087768555, "logps/chosen": -39.533199310302734, "logps/rejected": -322.67132568359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.3158207833766937, "rewards/margins": 7.4786553382873535, "rewards/rejected": -7.794476509094238, "step": 4714 }, { "epoch": 0.73, "learning_rate": 1.0689162300949219e-05, "logits/chosen": -2.8490326404571533, "logits/rejected": -3.2132070064544678, "logps/chosen": -28.465160369873047, "logps/rejected": -155.62350463867188, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -1.139572024345398, "rewards/margins": 4.814692497253418, "rewards/rejected": -5.9542646408081055, "step": 4715 }, { "epoch": 0.73, "learning_rate": 1.0688428860418071e-05, "logits/chosen": -2.6440954208374023, "logits/rejected": -2.7430667877197266, "logps/chosen": -98.3271255493164, "logps/rejected": -244.50027465820312, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -1.2201515436172485, "rewards/margins": 4.763134002685547, "rewards/rejected": -5.983285427093506, "step": 4716 }, { "epoch": 0.73, "learning_rate": 1.0687695419886925e-05, "logits/chosen": -1.8333098888397217, "logits/rejected": -2.9624733924865723, "logps/chosen": -53.89683151245117, "logps/rejected": -240.5491943359375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.59090656042099, "rewards/margins": 5.080682277679443, "rewards/rejected": -5.671588897705078, "step": 4717 }, { "epoch": 0.73, "learning_rate": 1.0686961979355776e-05, "logits/chosen": -1.0296651124954224, "logits/rejected": -2.4018006324768066, "logps/chosen": -116.54458618164062, "logps/rejected": -435.31390380859375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3317711353302002, "rewards/margins": 7.909043312072754, "rewards/rejected": -9.240814208984375, "step": 4718 }, { "epoch": 0.73, "learning_rate": 1.0686228538824628e-05, "logits/chosen": -1.981271743774414, "logits/rejected": -3.1583566665649414, "logps/chosen": -73.21068572998047, "logps/rejected": -260.4349365234375, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -1.3578075170516968, "rewards/margins": 3.5695886611938477, "rewards/rejected": -4.927396297454834, "step": 4719 }, { "epoch": 0.73, "learning_rate": 1.068549509829348e-05, "logits/chosen": -2.5311965942382812, "logits/rejected": -2.9662623405456543, "logps/chosen": -352.08245849609375, "logps/rejected": -478.6850280761719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.7672195434570312, "rewards/margins": 8.463380813598633, "rewards/rejected": -9.230600357055664, "step": 4720 }, { "epoch": 0.73, "learning_rate": 1.0684761657762332e-05, "logits/chosen": -2.436054229736328, "logits/rejected": -3.246154308319092, "logps/chosen": -238.44590759277344, "logps/rejected": -608.8873291015625, "loss": 2.7102, "rewards/accuracies": 0.5, "rewards/chosen": -3.785679578781128, "rewards/margins": 1.956883430480957, "rewards/rejected": -5.742562770843506, "step": 4721 }, { "epoch": 0.73, "learning_rate": 1.0684028217231184e-05, "logits/chosen": -3.0642952919006348, "logits/rejected": -3.317556619644165, "logps/chosen": -419.0020751953125, "logps/rejected": -363.8254089355469, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.617060661315918, "rewards/margins": 4.163839340209961, "rewards/rejected": -4.780900001525879, "step": 4722 }, { "epoch": 0.73, "learning_rate": 1.0683294776700036e-05, "logits/chosen": -1.981101393699646, "logits/rejected": -3.1211702823638916, "logps/chosen": -234.16700744628906, "logps/rejected": -445.429931640625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.44371262192726135, "rewards/margins": 5.834478378295898, "rewards/rejected": -6.278190612792969, "step": 4723 }, { "epoch": 0.73, "learning_rate": 1.0682561336168888e-05, "logits/chosen": -2.76090407371521, "logits/rejected": -2.669206380844116, "logps/chosen": -96.6912841796875, "logps/rejected": -205.68772888183594, "loss": 1.9109, "rewards/accuracies": 0.5, "rewards/chosen": -2.299449920654297, "rewards/margins": 1.6043968200683594, "rewards/rejected": -3.903846502304077, "step": 4724 }, { "epoch": 0.73, "learning_rate": 1.068182789563774e-05, "logits/chosen": -3.0101709365844727, "logits/rejected": -2.5950074195861816, "logps/chosen": -414.59442138671875, "logps/rejected": -357.5951232910156, "loss": 2.6045, "rewards/accuracies": 0.5, "rewards/chosen": -4.358558654785156, "rewards/margins": -0.06979608535766602, "rewards/rejected": -4.28876256942749, "step": 4725 }, { "epoch": 0.73, "learning_rate": 1.0681094455106593e-05, "logits/chosen": -1.6276568174362183, "logits/rejected": -3.2851552963256836, "logps/chosen": -103.15013885498047, "logps/rejected": -356.26800537109375, "loss": 0.0472, "rewards/accuracies": 1.0, "rewards/chosen": -0.6056812405586243, "rewards/margins": 3.3159847259521484, "rewards/rejected": -3.921665906906128, "step": 4726 }, { "epoch": 0.74, "learning_rate": 1.0680361014575445e-05, "logits/chosen": -3.0019421577453613, "logits/rejected": -3.115790367126465, "logps/chosen": -430.4581604003906, "logps/rejected": -649.3028564453125, "loss": 1.7315, "rewards/accuracies": 0.5, "rewards/chosen": -3.1462440490722656, "rewards/margins": 0.08628273010253906, "rewards/rejected": -3.2325267791748047, "step": 4727 }, { "epoch": 0.74, "learning_rate": 1.0679627574044297e-05, "logits/chosen": -2.257200241088867, "logits/rejected": -2.7479395866394043, "logps/chosen": -190.05799865722656, "logps/rejected": -401.3970642089844, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.1847370862960815, "rewards/margins": 7.164430618286133, "rewards/rejected": -8.349167823791504, "step": 4728 }, { "epoch": 0.74, "learning_rate": 1.0678894133513149e-05, "logits/chosen": -1.8138561248779297, "logits/rejected": -2.9594709873199463, "logps/chosen": -104.80430603027344, "logps/rejected": -327.0440673828125, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -0.0786033570766449, "rewards/margins": 5.326305389404297, "rewards/rejected": -5.404908657073975, "step": 4729 }, { "epoch": 0.74, "learning_rate": 1.0678160692982e-05, "logits/chosen": -2.8019821643829346, "logits/rejected": -3.0218207836151123, "logps/chosen": -75.4136962890625, "logps/rejected": -235.3466339111328, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.5894449353218079, "rewards/margins": 4.650716304779053, "rewards/rejected": -5.240160942077637, "step": 4730 }, { "epoch": 0.74, "learning_rate": 1.0677427252450853e-05, "logits/chosen": -1.720688819885254, "logits/rejected": -3.240288734436035, "logps/chosen": -164.59951782226562, "logps/rejected": -534.4401245117188, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -0.7553806304931641, "rewards/margins": 6.075645446777344, "rewards/rejected": -6.831026077270508, "step": 4731 }, { "epoch": 0.74, "learning_rate": 1.0676693811919704e-05, "logits/chosen": -2.720673084259033, "logits/rejected": -2.85552716255188, "logps/chosen": -269.33294677734375, "logps/rejected": -211.33242797851562, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": -1.427459716796875, "rewards/margins": 3.2900819778442383, "rewards/rejected": -4.717541694641113, "step": 4732 }, { "epoch": 0.74, "learning_rate": 1.0675960371388556e-05, "logits/chosen": -2.7800121307373047, "logits/rejected": -2.4394290447235107, "logps/chosen": -504.15399169921875, "logps/rejected": -431.8328857421875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.015679121017456, "rewards/margins": 6.886109352111816, "rewards/rejected": -7.901788711547852, "step": 4733 }, { "epoch": 0.74, "learning_rate": 1.0675226930857408e-05, "logits/chosen": -2.213823080062866, "logits/rejected": -2.895803213119507, "logps/chosen": -146.36024475097656, "logps/rejected": -338.04876708984375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.9966263175010681, "rewards/margins": 7.309495449066162, "rewards/rejected": -8.306121826171875, "step": 4734 }, { "epoch": 0.74, "learning_rate": 1.0674493490326262e-05, "logits/chosen": -2.633042812347412, "logits/rejected": -2.7894997596740723, "logps/chosen": -47.87225341796875, "logps/rejected": -268.33251953125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.5346355438232422, "rewards/margins": 6.1239118576049805, "rewards/rejected": -6.658547401428223, "step": 4735 }, { "epoch": 0.74, "learning_rate": 1.0673760049795114e-05, "logits/chosen": -2.873685359954834, "logits/rejected": -3.1521570682525635, "logps/chosen": -184.4244384765625, "logps/rejected": -198.76666259765625, "loss": 3.4198, "rewards/accuracies": 0.5, "rewards/chosen": -3.9112982749938965, "rewards/margins": 0.17689967155456543, "rewards/rejected": -4.088197708129883, "step": 4736 }, { "epoch": 0.74, "learning_rate": 1.0673026609263966e-05, "logits/chosen": -2.7314372062683105, "logits/rejected": -1.1856167316436768, "logps/chosen": -399.62255859375, "logps/rejected": -237.01507568359375, "loss": 4.1465, "rewards/accuracies": 0.5, "rewards/chosen": -5.288762092590332, "rewards/margins": -0.7819948196411133, "rewards/rejected": -4.506767272949219, "step": 4737 }, { "epoch": 0.74, "learning_rate": 1.0672293168732819e-05, "logits/chosen": -2.6224021911621094, "logits/rejected": -2.9982473850250244, "logps/chosen": -145.266845703125, "logps/rejected": -239.25460815429688, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -0.932499885559082, "rewards/margins": 4.569047451019287, "rewards/rejected": -5.501547336578369, "step": 4738 }, { "epoch": 0.74, "learning_rate": 1.0671559728201671e-05, "logits/chosen": -2.720003366470337, "logits/rejected": -2.6477558612823486, "logps/chosen": -169.72463989257812, "logps/rejected": -62.74749755859375, "loss": 6.6169, "rewards/accuracies": 0.0, "rewards/chosen": -8.024130821228027, "rewards/margins": -6.614591598510742, "rewards/rejected": -1.4095393419265747, "step": 4739 }, { "epoch": 0.74, "learning_rate": 1.0670826287670523e-05, "logits/chosen": -2.7626659870147705, "logits/rejected": -2.342315673828125, "logps/chosen": -438.9263000488281, "logps/rejected": -341.7679748535156, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -1.8355064392089844, "rewards/margins": 4.942264556884766, "rewards/rejected": -6.77777099609375, "step": 4740 }, { "epoch": 0.74, "learning_rate": 1.0670092847139375e-05, "logits/chosen": -2.919487237930298, "logits/rejected": -2.3886868953704834, "logps/chosen": -319.84796142578125, "logps/rejected": -234.37437438964844, "loss": 3.6556, "rewards/accuracies": 0.5, "rewards/chosen": -4.627455234527588, "rewards/margins": 0.11527514457702637, "rewards/rejected": -4.742730140686035, "step": 4741 }, { "epoch": 0.74, "learning_rate": 1.0669359406608227e-05, "logits/chosen": -1.0399925708770752, "logits/rejected": -2.405888080596924, "logps/chosen": -288.0374755859375, "logps/rejected": -681.4390258789062, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.2231736183166504, "rewards/margins": 8.225746154785156, "rewards/rejected": -9.448919296264648, "step": 4742 }, { "epoch": 0.74, "learning_rate": 1.066862596607708e-05, "logits/chosen": -3.038360595703125, "logits/rejected": -2.797921657562256, "logps/chosen": -511.69189453125, "logps/rejected": -207.08192443847656, "loss": 4.3132, "rewards/accuracies": 0.5, "rewards/chosen": -5.087490081787109, "rewards/margins": -1.009216547012329, "rewards/rejected": -4.078273296356201, "step": 4743 }, { "epoch": 0.74, "learning_rate": 1.0667892525545932e-05, "logits/chosen": -2.9501445293426514, "logits/rejected": -2.6401515007019043, "logps/chosen": -519.4052734375, "logps/rejected": -402.4339599609375, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -2.7895524501800537, "rewards/margins": 4.177330017089844, "rewards/rejected": -6.966882705688477, "step": 4744 }, { "epoch": 0.74, "learning_rate": 1.0667159085014784e-05, "logits/chosen": -2.713682174682617, "logits/rejected": -2.1229360103607178, "logps/chosen": -370.6827087402344, "logps/rejected": -351.78045654296875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.2089874744415283, "rewards/margins": 5.279680252075195, "rewards/rejected": -6.4886674880981445, "step": 4745 }, { "epoch": 0.74, "learning_rate": 1.0666425644483636e-05, "logits/chosen": -1.482714295387268, "logits/rejected": -3.0729446411132812, "logps/chosen": -33.272098541259766, "logps/rejected": -316.39556884765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.1578071117401123, "rewards/margins": 7.925823211669922, "rewards/rejected": -8.083630561828613, "step": 4746 }, { "epoch": 0.74, "learning_rate": 1.0665692203952488e-05, "logits/chosen": -3.0419132709503174, "logits/rejected": -2.5747480392456055, "logps/chosen": -284.4219970703125, "logps/rejected": -313.85577392578125, "loss": 2.0078, "rewards/accuracies": 0.5, "rewards/chosen": -3.326444149017334, "rewards/margins": 0.6035635471343994, "rewards/rejected": -3.9300079345703125, "step": 4747 }, { "epoch": 0.74, "learning_rate": 1.066495876342134e-05, "logits/chosen": -1.9688814878463745, "logits/rejected": -2.8226163387298584, "logps/chosen": -146.12586975097656, "logps/rejected": -367.52703857421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.47152405977249146, "rewards/margins": 7.965578079223633, "rewards/rejected": -8.437102317810059, "step": 4748 }, { "epoch": 0.74, "learning_rate": 1.0664225322890191e-05, "logits/chosen": -2.9509949684143066, "logits/rejected": -2.957728385925293, "logps/chosen": -215.296142578125, "logps/rejected": -195.69554138183594, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.6027226448059082, "rewards/margins": 5.8862104415893555, "rewards/rejected": -7.4889326095581055, "step": 4749 }, { "epoch": 0.74, "learning_rate": 1.0663491882359043e-05, "logits/chosen": -2.4420173168182373, "logits/rejected": -2.8221960067749023, "logps/chosen": -285.7850036621094, "logps/rejected": -408.753662109375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.5128525495529175, "rewards/margins": 6.618426322937012, "rewards/rejected": -8.131278991699219, "step": 4750 }, { "epoch": 0.74, "learning_rate": 1.0662758441827895e-05, "logits/chosen": -2.021493673324585, "logits/rejected": -3.118004083633423, "logps/chosen": -351.87677001953125, "logps/rejected": -602.9806518554688, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.7906730771064758, "rewards/margins": 8.112300872802734, "rewards/rejected": -8.902974128723145, "step": 4751 }, { "epoch": 0.74, "learning_rate": 1.0662025001296749e-05, "logits/chosen": -2.670865774154663, "logits/rejected": -2.941514015197754, "logps/chosen": -252.43724060058594, "logps/rejected": -181.33938598632812, "loss": 1.3232, "rewards/accuracies": 0.5, "rewards/chosen": -2.374943971633911, "rewards/margins": 1.7196948528289795, "rewards/rejected": -4.094638824462891, "step": 4752 }, { "epoch": 0.74, "learning_rate": 1.06612915607656e-05, "logits/chosen": -3.1222429275512695, "logits/rejected": -2.2053422927856445, "logps/chosen": -202.59701538085938, "logps/rejected": -112.74662780761719, "loss": 0.6977, "rewards/accuracies": 0.5, "rewards/chosen": -2.7200958728790283, "rewards/margins": 2.301567792892456, "rewards/rejected": -5.021663665771484, "step": 4753 }, { "epoch": 0.74, "learning_rate": 1.0660558120234453e-05, "logits/chosen": -2.184741735458374, "logits/rejected": -2.9661996364593506, "logps/chosen": -440.9833984375, "logps/rejected": -593.8806762695312, "loss": 1.1966, "rewards/accuracies": 0.5, "rewards/chosen": -2.1779494285583496, "rewards/margins": 1.8470873832702637, "rewards/rejected": -4.025036811828613, "step": 4754 }, { "epoch": 0.74, "learning_rate": 1.0659824679703304e-05, "logits/chosen": -2.4776434898376465, "logits/rejected": -2.7146289348602295, "logps/chosen": -135.71664428710938, "logps/rejected": -175.87832641601562, "loss": 1.3407, "rewards/accuracies": 0.5, "rewards/chosen": -3.8699848651885986, "rewards/margins": 1.5017385482788086, "rewards/rejected": -5.371723175048828, "step": 4755 }, { "epoch": 0.74, "learning_rate": 1.0659091239172156e-05, "logits/chosen": -2.8894567489624023, "logits/rejected": -2.8319149017333984, "logps/chosen": -138.88023376464844, "logps/rejected": -152.8410186767578, "loss": 2.2853, "rewards/accuracies": 0.5, "rewards/chosen": -3.9653353691101074, "rewards/margins": 0.22555756568908691, "rewards/rejected": -4.190893173217773, "step": 4756 }, { "epoch": 0.74, "learning_rate": 1.0658357798641008e-05, "logits/chosen": -2.1686174869537354, "logits/rejected": -2.8061463832855225, "logps/chosen": -254.9416046142578, "logps/rejected": -380.536865234375, "loss": 0.0674, "rewards/accuracies": 1.0, "rewards/chosen": -0.4810466766357422, "rewards/margins": 4.496789932250977, "rewards/rejected": -4.977836608886719, "step": 4757 }, { "epoch": 0.74, "learning_rate": 1.065762435810986e-05, "logits/chosen": -2.067345142364502, "logits/rejected": -3.103745222091675, "logps/chosen": -102.35587310791016, "logps/rejected": -401.16357421875, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.9976738095283508, "rewards/margins": 6.015050888061523, "rewards/rejected": -7.012724876403809, "step": 4758 }, { "epoch": 0.74, "learning_rate": 1.0656890917578712e-05, "logits/chosen": -2.9865219593048096, "logits/rejected": -1.973697304725647, "logps/chosen": -482.14520263671875, "logps/rejected": -345.383056640625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.2418079376220703, "rewards/margins": 5.548614978790283, "rewards/rejected": -6.7904229164123535, "step": 4759 }, { "epoch": 0.74, "learning_rate": 1.0656157477047564e-05, "logits/chosen": -2.4511804580688477, "logits/rejected": -2.842252016067505, "logps/chosen": -151.14430236816406, "logps/rejected": -331.432373046875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.3728141784667969, "rewards/margins": 6.490361213684082, "rewards/rejected": -7.863175392150879, "step": 4760 }, { "epoch": 0.74, "learning_rate": 1.0655424036516417e-05, "logits/chosen": -2.6834139823913574, "logits/rejected": -2.4878427982330322, "logps/chosen": -263.2078857421875, "logps/rejected": -323.331787109375, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -2.02331280708313, "rewards/margins": 4.076258182525635, "rewards/rejected": -6.0995707511901855, "step": 4761 }, { "epoch": 0.74, "learning_rate": 1.065469059598527e-05, "logits/chosen": -2.15085768699646, "logits/rejected": -2.91373348236084, "logps/chosen": -168.57086181640625, "logps/rejected": -268.794189453125, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.716144323348999, "rewards/margins": 4.711938858032227, "rewards/rejected": -6.428083419799805, "step": 4762 }, { "epoch": 0.74, "learning_rate": 1.0653957155454121e-05, "logits/chosen": -2.070690393447876, "logits/rejected": -2.7916829586029053, "logps/chosen": -222.74234008789062, "logps/rejected": -239.5978546142578, "loss": 1.96, "rewards/accuracies": 0.5, "rewards/chosen": -2.3630893230438232, "rewards/margins": 1.8258051872253418, "rewards/rejected": -4.188894748687744, "step": 4763 }, { "epoch": 0.74, "learning_rate": 1.0653223714922973e-05, "logits/chosen": -3.023890733718872, "logits/rejected": -2.603118419647217, "logps/chosen": -298.8326416015625, "logps/rejected": -186.5836181640625, "loss": 2.6097, "rewards/accuracies": 0.5, "rewards/chosen": -4.485553741455078, "rewards/margins": 0.07066893577575684, "rewards/rejected": -4.556222915649414, "step": 4764 }, { "epoch": 0.74, "learning_rate": 1.0652490274391825e-05, "logits/chosen": -3.0780704021453857, "logits/rejected": -3.0830490589141846, "logps/chosen": -590.9154052734375, "logps/rejected": -521.4089965820312, "loss": 0.0895, "rewards/accuracies": 1.0, "rewards/chosen": -1.505963921546936, "rewards/margins": 3.8307836055755615, "rewards/rejected": -5.336747646331787, "step": 4765 }, { "epoch": 0.74, "learning_rate": 1.0651756833860677e-05, "logits/chosen": -3.0484061241149902, "logits/rejected": -2.115103006362915, "logps/chosen": -396.2267150878906, "logps/rejected": -294.8409423828125, "loss": 0.2018, "rewards/accuracies": 1.0, "rewards/chosen": -3.242053270339966, "rewards/margins": 4.007964134216309, "rewards/rejected": -7.250017166137695, "step": 4766 }, { "epoch": 0.74, "learning_rate": 1.0651023393329529e-05, "logits/chosen": -3.0048272609710693, "logits/rejected": -2.9910295009613037, "logps/chosen": -134.01095581054688, "logps/rejected": -200.11639404296875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.8901570439338684, "rewards/margins": 6.080967903137207, "rewards/rejected": -6.97112512588501, "step": 4767 }, { "epoch": 0.74, "learning_rate": 1.065028995279838e-05, "logits/chosen": -2.126203775405884, "logits/rejected": -2.902108669281006, "logps/chosen": -152.67068481445312, "logps/rejected": -360.4693908691406, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -2.1802546977996826, "rewards/margins": 4.734236717224121, "rewards/rejected": -6.914491653442383, "step": 4768 }, { "epoch": 0.74, "learning_rate": 1.0649556512267232e-05, "logits/chosen": -2.8304994106292725, "logits/rejected": -2.98335337638855, "logps/chosen": -38.32395935058594, "logps/rejected": -125.7059097290039, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": -2.2310237884521484, "rewards/margins": 3.3082990646362305, "rewards/rejected": -5.539322853088379, "step": 4769 }, { "epoch": 0.74, "learning_rate": 1.0648823071736086e-05, "logits/chosen": -2.8049771785736084, "logits/rejected": -2.1953368186950684, "logps/chosen": -216.2403106689453, "logps/rejected": -148.73565673828125, "loss": 1.3613, "rewards/accuracies": 0.5, "rewards/chosen": -2.6816136837005615, "rewards/margins": 0.33659088611602783, "rewards/rejected": -3.0182044506073, "step": 4770 }, { "epoch": 0.74, "learning_rate": 1.0648089631204938e-05, "logits/chosen": -2.620239496231079, "logits/rejected": -2.0030221939086914, "logps/chosen": -210.24966430664062, "logps/rejected": -183.6201934814453, "loss": 3.3399, "rewards/accuracies": 0.5, "rewards/chosen": -5.28073787689209, "rewards/margins": 0.1652355194091797, "rewards/rejected": -5.4459733963012695, "step": 4771 }, { "epoch": 0.74, "learning_rate": 1.0647356190673791e-05, "logits/chosen": -2.905794620513916, "logits/rejected": -3.0210464000701904, "logps/chosen": -276.99395751953125, "logps/rejected": -346.60296630859375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.5505748987197876, "rewards/margins": 5.854326248168945, "rewards/rejected": -7.404901504516602, "step": 4772 }, { "epoch": 0.74, "learning_rate": 1.0646622750142643e-05, "logits/chosen": -3.0766966342926025, "logits/rejected": -2.5916571617126465, "logps/chosen": -195.00148010253906, "logps/rejected": -179.6797637939453, "loss": 2.2921, "rewards/accuracies": 0.5, "rewards/chosen": -3.8796496391296387, "rewards/margins": 0.5261385440826416, "rewards/rejected": -4.405787944793701, "step": 4773 }, { "epoch": 0.74, "learning_rate": 1.0645889309611495e-05, "logits/chosen": -2.786810874938965, "logits/rejected": -2.75776743888855, "logps/chosen": -170.43792724609375, "logps/rejected": -235.75454711914062, "loss": 2.4221, "rewards/accuracies": 0.5, "rewards/chosen": -3.7568609714508057, "rewards/margins": 0.7672195434570312, "rewards/rejected": -4.524080753326416, "step": 4774 }, { "epoch": 0.74, "learning_rate": 1.0645155869080347e-05, "logits/chosen": -2.418757677078247, "logits/rejected": -2.8082377910614014, "logps/chosen": -123.69343566894531, "logps/rejected": -249.87208557128906, "loss": 0.062, "rewards/accuracies": 1.0, "rewards/chosen": -1.9674222469329834, "rewards/margins": 3.911954879760742, "rewards/rejected": -5.8793768882751465, "step": 4775 }, { "epoch": 0.74, "learning_rate": 1.0644422428549199e-05, "logits/chosen": -3.152865409851074, "logits/rejected": -3.246288299560547, "logps/chosen": -146.2564239501953, "logps/rejected": -204.94583129882812, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": -2.552809953689575, "rewards/margins": 3.7907726764678955, "rewards/rejected": -6.343582630157471, "step": 4776 }, { "epoch": 0.74, "learning_rate": 1.064368898801805e-05, "logits/chosen": -1.219446063041687, "logits/rejected": -2.8245954513549805, "logps/chosen": -100.58686065673828, "logps/rejected": -359.03912353515625, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -2.2685017585754395, "rewards/margins": 3.7459561824798584, "rewards/rejected": -6.014457702636719, "step": 4777 }, { "epoch": 0.74, "learning_rate": 1.0642955547486903e-05, "logits/chosen": -2.0680103302001953, "logits/rejected": -2.8752071857452393, "logps/chosen": -48.26641845703125, "logps/rejected": -233.39804077148438, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -1.8608729839324951, "rewards/margins": 5.10175085067749, "rewards/rejected": -6.962623596191406, "step": 4778 }, { "epoch": 0.74, "learning_rate": 1.0642222106955756e-05, "logits/chosen": -3.0627927780151367, "logits/rejected": -1.8917529582977295, "logps/chosen": -565.630126953125, "logps/rejected": -403.0597229003906, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -2.2919721603393555, "rewards/margins": 4.562260627746582, "rewards/rejected": -6.8542327880859375, "step": 4779 }, { "epoch": 0.74, "learning_rate": 1.0641488666424608e-05, "logits/chosen": -2.9317619800567627, "logits/rejected": -1.4753574132919312, "logps/chosen": -258.2074890136719, "logps/rejected": -79.08773803710938, "loss": 2.319, "rewards/accuracies": 0.5, "rewards/chosen": -3.9157333374023438, "rewards/margins": -0.27728271484375, "rewards/rejected": -3.6384506225585938, "step": 4780 }, { "epoch": 0.74, "learning_rate": 1.064075522589346e-05, "logits/chosen": -2.875411033630371, "logits/rejected": -2.756370782852173, "logps/chosen": -136.83651733398438, "logps/rejected": -191.627197265625, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": -1.4703174829483032, "rewards/margins": 3.2017946243286133, "rewards/rejected": -4.672112464904785, "step": 4781 }, { "epoch": 0.74, "learning_rate": 1.0640021785362312e-05, "logits/chosen": -2.846809148788452, "logits/rejected": -2.785358190536499, "logps/chosen": -185.2638702392578, "logps/rejected": -158.38536071777344, "loss": 3.012, "rewards/accuracies": 0.5, "rewards/chosen": -3.6093807220458984, "rewards/margins": 0.2843291759490967, "rewards/rejected": -3.893709897994995, "step": 4782 }, { "epoch": 0.74, "learning_rate": 1.0639288344831164e-05, "logits/chosen": -2.946255683898926, "logits/rejected": -3.0685837268829346, "logps/chosen": -34.11187744140625, "logps/rejected": -196.90213012695312, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.5470396280288696, "rewards/margins": 5.066225051879883, "rewards/rejected": -6.613265037536621, "step": 4783 }, { "epoch": 0.74, "learning_rate": 1.0638554904300016e-05, "logits/chosen": -2.193974256515503, "logits/rejected": -2.8299386501312256, "logps/chosen": -130.15689086914062, "logps/rejected": -248.94912719726562, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.643781304359436, "rewards/margins": 5.396120071411133, "rewards/rejected": -7.039901256561279, "step": 4784 }, { "epoch": 0.74, "learning_rate": 1.0637821463768868e-05, "logits/chosen": -2.2381751537323, "logits/rejected": -3.212648868560791, "logps/chosen": -155.66688537597656, "logps/rejected": -316.3419189453125, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -1.1032605171203613, "rewards/margins": 4.865541458129883, "rewards/rejected": -5.968801975250244, "step": 4785 }, { "epoch": 0.74, "learning_rate": 1.063708802323772e-05, "logits/chosen": -1.283583402633667, "logits/rejected": -3.1802189350128174, "logps/chosen": -85.90141296386719, "logps/rejected": -397.19586181640625, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -0.7904563546180725, "rewards/margins": 3.9404571056365967, "rewards/rejected": -4.7309136390686035, "step": 4786 }, { "epoch": 0.74, "learning_rate": 1.0636354582706571e-05, "logits/chosen": -1.395662784576416, "logits/rejected": -1.9213321208953857, "logps/chosen": -158.55789184570312, "logps/rejected": -133.83419799804688, "loss": 2.6228, "rewards/accuracies": 0.5, "rewards/chosen": -3.856478691101074, "rewards/margins": 0.623584508895874, "rewards/rejected": -4.480063438415527, "step": 4787 }, { "epoch": 0.74, "learning_rate": 1.0635621142175425e-05, "logits/chosen": -1.4724011421203613, "logits/rejected": -2.9595322608947754, "logps/chosen": -208.4574432373047, "logps/rejected": -442.9925537109375, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.0823653936386108, "rewards/margins": 5.155754089355469, "rewards/rejected": -6.238119602203369, "step": 4788 }, { "epoch": 0.74, "learning_rate": 1.0634887701644277e-05, "logits/chosen": -2.6503078937530518, "logits/rejected": -2.8162801265716553, "logps/chosen": -445.3951416015625, "logps/rejected": -353.48577880859375, "loss": 4.5665, "rewards/accuracies": 0.5, "rewards/chosen": -5.656982421875, "rewards/margins": -2.1989500522613525, "rewards/rejected": -3.4580323696136475, "step": 4789 }, { "epoch": 0.74, "learning_rate": 1.0634154261113129e-05, "logits/chosen": -2.6309049129486084, "logits/rejected": -2.910940647125244, "logps/chosen": -52.941524505615234, "logps/rejected": -145.94863891601562, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/chosen": -2.4351673126220703, "rewards/margins": 2.850391387939453, "rewards/rejected": -5.285558700561523, "step": 4790 }, { "epoch": 0.75, "learning_rate": 1.063342082058198e-05, "logits/chosen": -2.648280143737793, "logits/rejected": -2.803007125854492, "logps/chosen": -462.85662841796875, "logps/rejected": -586.3151245117188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.1829181909561157, "rewards/margins": 6.954946517944336, "rewards/rejected": -8.13786506652832, "step": 4791 }, { "epoch": 0.75, "learning_rate": 1.0632687380050832e-05, "logits/chosen": -2.6347315311431885, "logits/rejected": -3.2137153148651123, "logps/chosen": -94.1275863647461, "logps/rejected": -233.4575653076172, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.9682028293609619, "rewards/margins": 4.793566703796387, "rewards/rejected": -5.7617692947387695, "step": 4792 }, { "epoch": 0.75, "learning_rate": 1.0631953939519684e-05, "logits/chosen": -2.7798285484313965, "logits/rejected": -2.1750640869140625, "logps/chosen": -458.999267578125, "logps/rejected": -491.41436767578125, "loss": 1.4925, "rewards/accuracies": 0.5, "rewards/chosen": -3.43376088142395, "rewards/margins": 0.893918514251709, "rewards/rejected": -4.327679634094238, "step": 4793 }, { "epoch": 0.75, "learning_rate": 1.0631220498988536e-05, "logits/chosen": -2.883549690246582, "logits/rejected": -2.7930006980895996, "logps/chosen": -127.73628997802734, "logps/rejected": -291.9282531738281, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.7149480581283569, "rewards/margins": 5.666467666625977, "rewards/rejected": -6.381415843963623, "step": 4794 }, { "epoch": 0.75, "learning_rate": 1.0630487058457388e-05, "logits/chosen": -2.00345778465271, "logits/rejected": -2.321918487548828, "logps/chosen": -1012.2325439453125, "logps/rejected": -752.7752075195312, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.875644326210022, "rewards/margins": 6.7028961181640625, "rewards/rejected": -8.578540802001953, "step": 4795 }, { "epoch": 0.75, "learning_rate": 1.062975361792624e-05, "logits/chosen": -2.8653130531311035, "logits/rejected": -3.0035691261291504, "logps/chosen": -112.92333221435547, "logps/rejected": -291.4118347167969, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.2372963428497314, "rewards/margins": 6.313438415527344, "rewards/rejected": -7.550734519958496, "step": 4796 }, { "epoch": 0.75, "learning_rate": 1.0629020177395093e-05, "logits/chosen": -2.8316757678985596, "logits/rejected": -2.5526344776153564, "logps/chosen": -724.3519897460938, "logps/rejected": -610.6658935546875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7690418362617493, "rewards/margins": 6.320278644561768, "rewards/rejected": -7.089320182800293, "step": 4797 }, { "epoch": 0.75, "learning_rate": 1.0628286736863945e-05, "logits/chosen": -2.8016250133514404, "logits/rejected": -3.1847176551818848, "logps/chosen": -76.3521728515625, "logps/rejected": -327.2569885253906, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.6512186527252197, "rewards/margins": 7.49265718460083, "rewards/rejected": -9.143876075744629, "step": 4798 }, { "epoch": 0.75, "learning_rate": 1.0627553296332797e-05, "logits/chosen": -3.2398972511291504, "logits/rejected": -2.9972236156463623, "logps/chosen": -530.5126953125, "logps/rejected": -415.1856689453125, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.2899589538574219, "rewards/margins": 4.600425720214844, "rewards/rejected": -5.890384674072266, "step": 4799 }, { "epoch": 0.75, "learning_rate": 1.0626819855801649e-05, "logits/chosen": -2.8714070320129395, "logits/rejected": -2.178412675857544, "logps/chosen": -481.4088134765625, "logps/rejected": -467.565185546875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.6047852039337158, "rewards/margins": 7.459871292114258, "rewards/rejected": -9.064656257629395, "step": 4800 }, { "epoch": 0.75, "learning_rate": 1.0626086415270501e-05, "logits/chosen": -2.670989751815796, "logits/rejected": -3.175922393798828, "logps/chosen": -131.24307250976562, "logps/rejected": -363.2438049316406, "loss": 0.1978, "rewards/accuracies": 1.0, "rewards/chosen": -1.5340150594711304, "rewards/margins": 2.617764949798584, "rewards/rejected": -4.151780128479004, "step": 4801 }, { "epoch": 0.75, "learning_rate": 1.0625352974739353e-05, "logits/chosen": -2.928549289703369, "logits/rejected": -2.823737382888794, "logps/chosen": -297.82330322265625, "logps/rejected": -606.1771240234375, "loss": 2.4772, "rewards/accuracies": 0.5, "rewards/chosen": -3.685351610183716, "rewards/margins": -1.4982900619506836, "rewards/rejected": -2.187061309814453, "step": 4802 }, { "epoch": 0.75, "learning_rate": 1.0624619534208205e-05, "logits/chosen": -1.8307995796203613, "logits/rejected": -2.611072063446045, "logps/chosen": -238.5249481201172, "logps/rejected": -261.377197265625, "loss": 1.5187, "rewards/accuracies": 0.5, "rewards/chosen": -1.662826657295227, "rewards/margins": 2.867708206176758, "rewards/rejected": -4.530534744262695, "step": 4803 }, { "epoch": 0.75, "learning_rate": 1.0623886093677058e-05, "logits/chosen": -2.867518663406372, "logits/rejected": -2.5877015590667725, "logps/chosen": -156.67898559570312, "logps/rejected": -159.59085083007812, "loss": 1.4923, "rewards/accuracies": 0.5, "rewards/chosen": -2.8248069286346436, "rewards/margins": 1.5853952169418335, "rewards/rejected": -4.4102020263671875, "step": 4804 }, { "epoch": 0.75, "learning_rate": 1.062315265314591e-05, "logits/chosen": -2.3388774394989014, "logits/rejected": -2.9714410305023193, "logps/chosen": -140.86656188964844, "logps/rejected": -275.6712646484375, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.0745553970336914, "rewards/margins": 4.4405999183654785, "rewards/rejected": -5.51515531539917, "step": 4805 }, { "epoch": 0.75, "learning_rate": 1.0622419212614764e-05, "logits/chosen": -2.74853515625, "logits/rejected": -2.065385580062866, "logps/chosen": -158.13241577148438, "logps/rejected": -98.50175476074219, "loss": 2.4476, "rewards/accuracies": 0.5, "rewards/chosen": -3.7395684719085693, "rewards/margins": -0.9005937576293945, "rewards/rejected": -2.838974714279175, "step": 4806 }, { "epoch": 0.75, "learning_rate": 1.0621685772083616e-05, "logits/chosen": -2.788455009460449, "logits/rejected": -1.5231621265411377, "logps/chosen": -786.0626831054688, "logps/rejected": -313.15606689453125, "loss": 2.3987, "rewards/accuracies": 0.5, "rewards/chosen": -3.2808678150177, "rewards/margins": -0.9640841484069824, "rewards/rejected": -2.3167836666107178, "step": 4807 }, { "epoch": 0.75, "learning_rate": 1.0620952331552468e-05, "logits/chosen": -2.8766121864318848, "logits/rejected": -2.000143527984619, "logps/chosen": -216.54736328125, "logps/rejected": -149.29872131347656, "loss": 2.9694, "rewards/accuracies": 0.5, "rewards/chosen": -4.493577003479004, "rewards/margins": -1.028436303138733, "rewards/rejected": -3.4651408195495605, "step": 4808 }, { "epoch": 0.75, "learning_rate": 1.062021889102132e-05, "logits/chosen": -3.0860607624053955, "logits/rejected": -3.1699278354644775, "logps/chosen": -220.59347534179688, "logps/rejected": -311.906982421875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0909340381622314, "rewards/margins": 6.389041900634766, "rewards/rejected": -7.479975700378418, "step": 4809 }, { "epoch": 0.75, "learning_rate": 1.0619485450490171e-05, "logits/chosen": -1.4349571466445923, "logits/rejected": -2.9726922512054443, "logps/chosen": -140.33786010742188, "logps/rejected": -322.06451416015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.322241872549057, "rewards/margins": 6.581089019775391, "rewards/rejected": -6.9033308029174805, "step": 4810 }, { "epoch": 0.75, "learning_rate": 1.0618752009959023e-05, "logits/chosen": -2.245558023452759, "logits/rejected": -2.8900394439697266, "logps/chosen": -177.0755615234375, "logps/rejected": -254.94546508789062, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": 0.7316910028457642, "rewards/margins": 4.277519702911377, "rewards/rejected": -3.5458285808563232, "step": 4811 }, { "epoch": 0.75, "learning_rate": 1.0618018569427875e-05, "logits/chosen": -2.4143893718719482, "logits/rejected": -3.0349349975585938, "logps/chosen": -119.44352722167969, "logps/rejected": -429.20367431640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3548176288604736, "rewards/margins": 7.749656677246094, "rewards/rejected": -9.104475021362305, "step": 4812 }, { "epoch": 0.75, "learning_rate": 1.0617285128896727e-05, "logits/chosen": -3.1164491176605225, "logits/rejected": -2.932802677154541, "logps/chosen": -189.04324340820312, "logps/rejected": -270.17498779296875, "loss": 3.4759, "rewards/accuracies": 0.5, "rewards/chosen": -4.212579727172852, "rewards/margins": -0.46686220169067383, "rewards/rejected": -3.7457172870635986, "step": 4813 }, { "epoch": 0.75, "learning_rate": 1.0616551688365579e-05, "logits/chosen": -2.0681307315826416, "logits/rejected": -3.1553196907043457, "logps/chosen": -227.79989624023438, "logps/rejected": -609.9931640625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -1.2009042501449585, "rewards/margins": 5.2890119552612305, "rewards/rejected": -6.4899163246154785, "step": 4814 }, { "epoch": 0.75, "learning_rate": 1.0615818247834432e-05, "logits/chosen": -2.5050628185272217, "logits/rejected": -3.125805139541626, "logps/chosen": -185.1466827392578, "logps/rejected": -309.5269470214844, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.1496039479970932, "rewards/margins": 4.87952184677124, "rewards/rejected": -5.029126167297363, "step": 4815 }, { "epoch": 0.75, "learning_rate": 1.0615084807303284e-05, "logits/chosen": -3.0734283924102783, "logits/rejected": -2.8802101612091064, "logps/chosen": -401.5382995605469, "logps/rejected": -390.88897705078125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.056256055831909, "rewards/margins": 5.025618553161621, "rewards/rejected": -7.081874370574951, "step": 4816 }, { "epoch": 0.75, "learning_rate": 1.0614351366772136e-05, "logits/chosen": -2.9945621490478516, "logits/rejected": -2.473287343978882, "logps/chosen": -172.10093688964844, "logps/rejected": -164.7119598388672, "loss": 0.2422, "rewards/accuracies": 1.0, "rewards/chosen": -2.6290245056152344, "rewards/margins": 1.49844229221344, "rewards/rejected": -4.127467155456543, "step": 4817 }, { "epoch": 0.75, "learning_rate": 1.0613617926240988e-05, "logits/chosen": -2.70890736579895, "logits/rejected": -2.0122807025909424, "logps/chosen": -1052.763427734375, "logps/rejected": -786.1170654296875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.083803653717041, "rewards/margins": 5.19327449798584, "rewards/rejected": -6.277078628540039, "step": 4818 }, { "epoch": 0.75, "learning_rate": 1.061288448570984e-05, "logits/chosen": -2.846167802810669, "logits/rejected": -2.400282382965088, "logps/chosen": -817.042724609375, "logps/rejected": -615.5064697265625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.443598210811615, "rewards/margins": 6.074871063232422, "rewards/rejected": -6.518469333648682, "step": 4819 }, { "epoch": 0.75, "learning_rate": 1.0612151045178692e-05, "logits/chosen": -3.1645593643188477, "logits/rejected": -2.5837900638580322, "logps/chosen": -104.42308044433594, "logps/rejected": -184.60130310058594, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.9275994300842285, "rewards/margins": 4.7295026779174805, "rewards/rejected": -5.657102584838867, "step": 4820 }, { "epoch": 0.75, "learning_rate": 1.0611417604647544e-05, "logits/chosen": -3.1389403343200684, "logits/rejected": -2.7021732330322266, "logps/chosen": -353.79058837890625, "logps/rejected": -206.76878356933594, "loss": 2.1691, "rewards/accuracies": 0.5, "rewards/chosen": -1.9955430030822754, "rewards/margins": -0.15747034549713135, "rewards/rejected": -1.8380727767944336, "step": 4821 }, { "epoch": 0.75, "learning_rate": 1.0610684164116396e-05, "logits/chosen": -2.84871244430542, "logits/rejected": -3.0197362899780273, "logps/chosen": -85.37660217285156, "logps/rejected": -244.21401977539062, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.6067241430282593, "rewards/margins": 5.0212321281433105, "rewards/rejected": -5.627956390380859, "step": 4822 }, { "epoch": 0.75, "learning_rate": 1.0609950723585247e-05, "logits/chosen": -2.9968760013580322, "logits/rejected": -3.0461132526397705, "logps/chosen": -198.8475341796875, "logps/rejected": -186.76101684570312, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.09624022245407104, "rewards/margins": 4.444938659667969, "rewards/rejected": -4.5411787033081055, "step": 4823 }, { "epoch": 0.75, "learning_rate": 1.0609217283054101e-05, "logits/chosen": -2.8656697273254395, "logits/rejected": -1.9839211702346802, "logps/chosen": -219.09228515625, "logps/rejected": -151.28286743164062, "loss": 2.0009, "rewards/accuracies": 0.5, "rewards/chosen": -1.9887752532958984, "rewards/margins": -0.04273724555969238, "rewards/rejected": -1.946038007736206, "step": 4824 }, { "epoch": 0.75, "learning_rate": 1.0608483842522953e-05, "logits/chosen": -2.8613102436065674, "logits/rejected": -2.428082227706909, "logps/chosen": -415.5994873046875, "logps/rejected": -314.6532287597656, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.4533889889717102, "rewards/margins": 5.273704528808594, "rewards/rejected": -5.727093696594238, "step": 4825 }, { "epoch": 0.75, "learning_rate": 1.0607750401991805e-05, "logits/chosen": -2.859867811203003, "logits/rejected": -2.7999892234802246, "logps/chosen": -349.87310791015625, "logps/rejected": -188.06549072265625, "loss": 2.0623, "rewards/accuracies": 0.5, "rewards/chosen": -3.44802188873291, "rewards/margins": 0.34101200103759766, "rewards/rejected": -3.789033889770508, "step": 4826 }, { "epoch": 0.75, "learning_rate": 1.0607016961460657e-05, "logits/chosen": -1.3037275075912476, "logits/rejected": -2.5327093601226807, "logps/chosen": -87.37810516357422, "logps/rejected": -313.560546875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.8141247034072876, "rewards/margins": 5.747443199157715, "rewards/rejected": -6.561567783355713, "step": 4827 }, { "epoch": 0.75, "learning_rate": 1.0606283520929508e-05, "logits/chosen": -2.8744056224823, "logits/rejected": -1.990994930267334, "logps/chosen": -200.8850860595703, "logps/rejected": -171.5186767578125, "loss": 1.9442, "rewards/accuracies": 0.5, "rewards/chosen": -3.192081928253174, "rewards/margins": 0.37278056144714355, "rewards/rejected": -3.5648624897003174, "step": 4828 }, { "epoch": 0.75, "learning_rate": 1.060555008039836e-05, "logits/chosen": -2.898695707321167, "logits/rejected": -2.016967535018921, "logps/chosen": -287.917724609375, "logps/rejected": -175.5722198486328, "loss": 1.8169, "rewards/accuracies": 0.5, "rewards/chosen": -3.127156972885132, "rewards/margins": 1.6188509464263916, "rewards/rejected": -4.746007919311523, "step": 4829 }, { "epoch": 0.75, "learning_rate": 1.0604816639867212e-05, "logits/chosen": -2.6513402462005615, "logits/rejected": -1.8497651815414429, "logps/chosen": -176.8037872314453, "logps/rejected": -85.15837860107422, "loss": 2.3944, "rewards/accuracies": 0.5, "rewards/chosen": -3.2971928119659424, "rewards/margins": -0.6036398410797119, "rewards/rejected": -2.6935529708862305, "step": 4830 }, { "epoch": 0.75, "learning_rate": 1.0604083199336064e-05, "logits/chosen": -2.0472381114959717, "logits/rejected": -2.821362257003784, "logps/chosen": -225.8072967529297, "logps/rejected": -500.19390869140625, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.7461280822753906, "rewards/margins": 7.959939002990723, "rewards/rejected": -8.706067085266113, "step": 4831 }, { "epoch": 0.75, "learning_rate": 1.0603349758804918e-05, "logits/chosen": -1.8128539323806763, "logits/rejected": -2.7986111640930176, "logps/chosen": -54.10975646972656, "logps/rejected": -294.9986572265625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.7716532945632935, "rewards/margins": 5.275491237640381, "rewards/rejected": -6.047144412994385, "step": 4832 }, { "epoch": 0.75, "learning_rate": 1.060261631827377e-05, "logits/chosen": -2.3979673385620117, "logits/rejected": -3.0621721744537354, "logps/chosen": -383.54986572265625, "logps/rejected": -560.719482421875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.6348930597305298, "rewards/margins": 6.452202796936035, "rewards/rejected": -7.087095737457275, "step": 4833 }, { "epoch": 0.75, "learning_rate": 1.0601882877742621e-05, "logits/chosen": -1.5863933563232422, "logits/rejected": -2.9943549633026123, "logps/chosen": -298.51239013671875, "logps/rejected": -672.1728515625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.2548246383666992, "rewards/margins": 5.544827461242676, "rewards/rejected": -6.799652099609375, "step": 4834 }, { "epoch": 0.75, "learning_rate": 1.0601149437211473e-05, "logits/chosen": -2.8122940063476562, "logits/rejected": -3.0618069171905518, "logps/chosen": -330.2679443359375, "logps/rejected": -395.0128479003906, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.1757103055715561, "rewards/margins": 5.384507179260254, "rewards/rejected": -5.560217380523682, "step": 4835 }, { "epoch": 0.75, "learning_rate": 1.0600415996680325e-05, "logits/chosen": -1.601595163345337, "logits/rejected": -2.6966800689697266, "logps/chosen": -594.3397216796875, "logps/rejected": -738.0653686523438, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.689013659954071, "rewards/margins": 7.1380157470703125, "rewards/rejected": -7.827029228210449, "step": 4836 }, { "epoch": 0.75, "learning_rate": 1.0599682556149177e-05, "logits/chosen": -2.884855270385742, "logits/rejected": -1.9564193487167358, "logps/chosen": -223.51124572753906, "logps/rejected": -49.42932891845703, "loss": 4.409, "rewards/accuracies": 0.0, "rewards/chosen": -5.502880096435547, "rewards/margins": -4.390759468078613, "rewards/rejected": -1.1121206283569336, "step": 4837 }, { "epoch": 0.75, "learning_rate": 1.059894911561803e-05, "logits/chosen": -3.0383777618408203, "logits/rejected": -1.9667534828186035, "logps/chosen": -627.1328125, "logps/rejected": -383.59228515625, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -1.6359679698944092, "rewards/margins": 4.309879302978516, "rewards/rejected": -5.945847511291504, "step": 4838 }, { "epoch": 0.75, "learning_rate": 1.0598215675086883e-05, "logits/chosen": -3.0566961765289307, "logits/rejected": -2.1758370399475098, "logps/chosen": -225.246826171875, "logps/rejected": -112.535888671875, "loss": 2.7987, "rewards/accuracies": 0.0, "rewards/chosen": -4.267168045043945, "rewards/margins": -2.7354931831359863, "rewards/rejected": -1.5316746234893799, "step": 4839 }, { "epoch": 0.75, "learning_rate": 1.0597482234555734e-05, "logits/chosen": -3.211169719696045, "logits/rejected": -2.9524381160736084, "logps/chosen": -160.01242065429688, "logps/rejected": -132.59295654296875, "loss": 2.4784, "rewards/accuracies": 0.5, "rewards/chosen": -3.178975820541382, "rewards/margins": 0.2550666332244873, "rewards/rejected": -3.434042453765869, "step": 4840 }, { "epoch": 0.75, "learning_rate": 1.0596748794024588e-05, "logits/chosen": -2.645235300064087, "logits/rejected": -3.2453110218048096, "logps/chosen": -101.07364654541016, "logps/rejected": -278.26837158203125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.1661028861999512, "rewards/margins": 6.622222900390625, "rewards/rejected": -7.788325309753418, "step": 4841 }, { "epoch": 0.75, "learning_rate": 1.059601535349344e-05, "logits/chosen": -2.4769983291625977, "logits/rejected": -2.968961715698242, "logps/chosen": -110.324462890625, "logps/rejected": -332.64154052734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.334385633468628, "rewards/margins": 6.607446670532227, "rewards/rejected": -7.941832065582275, "step": 4842 }, { "epoch": 0.75, "learning_rate": 1.0595281912962292e-05, "logits/chosen": -3.075488328933716, "logits/rejected": -2.650155782699585, "logps/chosen": -445.0552062988281, "logps/rejected": -463.1236572265625, "loss": 4.6866, "rewards/accuracies": 0.0, "rewards/chosen": -5.610757827758789, "rewards/margins": -4.6551690101623535, "rewards/rejected": -0.9555885195732117, "step": 4843 }, { "epoch": 0.75, "learning_rate": 1.0594548472431144e-05, "logits/chosen": -2.836444139480591, "logits/rejected": -3.0853843688964844, "logps/chosen": -241.9245147705078, "logps/rejected": -389.6703796386719, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -1.1933348178863525, "rewards/margins": 4.082389831542969, "rewards/rejected": -5.2757248878479, "step": 4844 }, { "epoch": 0.75, "learning_rate": 1.0593815031899995e-05, "logits/chosen": -2.5455124378204346, "logits/rejected": -2.1173534393310547, "logps/chosen": -290.5467224121094, "logps/rejected": -321.625244140625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.38602447509765625, "rewards/margins": 5.783763885498047, "rewards/rejected": -6.169788360595703, "step": 4845 }, { "epoch": 0.75, "learning_rate": 1.0593081591368847e-05, "logits/chosen": -2.862152099609375, "logits/rejected": -2.787538528442383, "logps/chosen": -200.8211669921875, "logps/rejected": -319.6077880859375, "loss": 0.9691, "rewards/accuracies": 0.5, "rewards/chosen": -1.7934013605117798, "rewards/margins": 1.457597255706787, "rewards/rejected": -3.2509987354278564, "step": 4846 }, { "epoch": 0.75, "learning_rate": 1.05923481508377e-05, "logits/chosen": -2.564631462097168, "logits/rejected": -2.9415054321289062, "logps/chosen": -55.45551300048828, "logps/rejected": -225.35841369628906, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.6998071074485779, "rewards/margins": 5.599440574645996, "rewards/rejected": -6.2992472648620605, "step": 4847 }, { "epoch": 0.75, "learning_rate": 1.0591614710306551e-05, "logits/chosen": -2.150237798690796, "logits/rejected": -2.824737787246704, "logps/chosen": -88.89860534667969, "logps/rejected": -248.10501098632812, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": -1.482970952987671, "rewards/margins": 2.7789721488952637, "rewards/rejected": -4.2619428634643555, "step": 4848 }, { "epoch": 0.75, "learning_rate": 1.0590881269775403e-05, "logits/chosen": -1.4530620574951172, "logits/rejected": -2.7294442653656006, "logps/chosen": -94.69766998291016, "logps/rejected": -455.5103759765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0962612628936768, "rewards/margins": 7.271979331970215, "rewards/rejected": -8.368240356445312, "step": 4849 }, { "epoch": 0.75, "learning_rate": 1.0590147829244257e-05, "logits/chosen": -2.6794683933258057, "logits/rejected": -2.832638740539551, "logps/chosen": -128.58883666992188, "logps/rejected": -431.5053405761719, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6847694516181946, "rewards/margins": 6.175722599029541, "rewards/rejected": -6.860491752624512, "step": 4850 }, { "epoch": 0.75, "learning_rate": 1.0589414388713108e-05, "logits/chosen": -1.7879632711410522, "logits/rejected": -2.9537675380706787, "logps/chosen": -125.1888198852539, "logps/rejected": -384.4158935546875, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -0.7568525075912476, "rewards/margins": 5.384155750274658, "rewards/rejected": -6.141008377075195, "step": 4851 }, { "epoch": 0.75, "learning_rate": 1.058868094818196e-05, "logits/chosen": -2.44319748878479, "logits/rejected": -2.892902374267578, "logps/chosen": -74.72364807128906, "logps/rejected": -223.29547119140625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.5545096397399902, "rewards/margins": 5.136471748352051, "rewards/rejected": -6.690980911254883, "step": 4852 }, { "epoch": 0.75, "learning_rate": 1.0587947507650812e-05, "logits/chosen": -2.3038485050201416, "logits/rejected": -3.0035741329193115, "logps/chosen": -252.99923706054688, "logps/rejected": -324.252685546875, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -1.2538352012634277, "rewards/margins": 4.525433540344238, "rewards/rejected": -5.779268741607666, "step": 4853 }, { "epoch": 0.75, "learning_rate": 1.0587214067119664e-05, "logits/chosen": -2.6932144165039062, "logits/rejected": -2.9199135303497314, "logps/chosen": -126.37654113769531, "logps/rejected": -304.8311767578125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.2358803749084473, "rewards/margins": 6.238338470458984, "rewards/rejected": -7.47421932220459, "step": 4854 }, { "epoch": 0.76, "learning_rate": 1.0586480626588516e-05, "logits/chosen": -3.1262927055358887, "logits/rejected": -3.3618900775909424, "logps/chosen": -45.11016082763672, "logps/rejected": -150.1019744873047, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -2.2469141483306885, "rewards/margins": 3.777730941772461, "rewards/rejected": -6.02464485168457, "step": 4855 }, { "epoch": 0.76, "learning_rate": 1.0585747186057368e-05, "logits/chosen": -2.531378984451294, "logits/rejected": -2.8407747745513916, "logps/chosen": -45.41065216064453, "logps/rejected": -105.3128890991211, "loss": 0.3503, "rewards/accuracies": 1.0, "rewards/chosen": -1.9857614040374756, "rewards/margins": 1.2473845481872559, "rewards/rejected": -3.2331459522247314, "step": 4856 }, { "epoch": 0.76, "learning_rate": 1.058501374552622e-05, "logits/chosen": -2.7899014949798584, "logits/rejected": -2.8217387199401855, "logps/chosen": -80.51712036132812, "logps/rejected": -182.87380981445312, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": -1.8902921676635742, "rewards/margins": 4.14725399017334, "rewards/rejected": -6.037546157836914, "step": 4857 }, { "epoch": 0.76, "learning_rate": 1.0584280304995072e-05, "logits/chosen": -2.3598270416259766, "logits/rejected": -2.871561050415039, "logps/chosen": -148.4346923828125, "logps/rejected": -260.64532470703125, "loss": 2.6901, "rewards/accuracies": 0.5, "rewards/chosen": -3.2248451709747314, "rewards/margins": -0.48513102531433105, "rewards/rejected": -2.7397141456604004, "step": 4858 }, { "epoch": 0.76, "learning_rate": 1.0583546864463925e-05, "logits/chosen": -2.1384596824645996, "logits/rejected": -2.8922202587127686, "logps/chosen": -345.9746398925781, "logps/rejected": -358.76934814453125, "loss": 1.3012, "rewards/accuracies": 0.5, "rewards/chosen": -2.5847060680389404, "rewards/margins": 0.09864437580108643, "rewards/rejected": -2.6833505630493164, "step": 4859 }, { "epoch": 0.76, "learning_rate": 1.0582813423932777e-05, "logits/chosen": -2.977588415145874, "logits/rejected": -3.006909132003784, "logps/chosen": -355.5802307128906, "logps/rejected": -402.8787536621094, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1112710237503052, "rewards/margins": 6.255099296569824, "rewards/rejected": -7.36637020111084, "step": 4860 }, { "epoch": 0.76, "learning_rate": 1.0582079983401629e-05, "logits/chosen": -2.121187686920166, "logits/rejected": -2.809083938598633, "logps/chosen": -122.6220474243164, "logps/rejected": -434.9450988769531, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.1236884593963623, "rewards/margins": 7.132112979888916, "rewards/rejected": -8.2558012008667, "step": 4861 }, { "epoch": 0.76, "learning_rate": 1.058134654287048e-05, "logits/chosen": -2.502920627593994, "logits/rejected": -2.5409371852874756, "logps/chosen": -444.76116943359375, "logps/rejected": -473.24530029296875, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.3156967163085938, "rewards/margins": 4.6214189529418945, "rewards/rejected": -5.937115669250488, "step": 4862 }, { "epoch": 0.76, "learning_rate": 1.0580613102339333e-05, "logits/chosen": -2.7724359035491943, "logits/rejected": -2.91670560836792, "logps/chosen": -41.324520111083984, "logps/rejected": -130.14776611328125, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.2585880756378174, "rewards/margins": 4.7183837890625, "rewards/rejected": -5.9769721031188965, "step": 4863 }, { "epoch": 0.76, "learning_rate": 1.0579879661808185e-05, "logits/chosen": -2.552370548248291, "logits/rejected": -3.021019697189331, "logps/chosen": -379.6216735839844, "logps/rejected": -461.25970458984375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.6007591485977173, "rewards/margins": 6.358570575714111, "rewards/rejected": -7.959329605102539, "step": 4864 }, { "epoch": 0.76, "learning_rate": 1.0579146221277036e-05, "logits/chosen": -3.2442095279693604, "logits/rejected": -2.861421823501587, "logps/chosen": -278.7286376953125, "logps/rejected": -265.7025451660156, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.269921898841858, "rewards/margins": 6.690637588500977, "rewards/rejected": -7.960559368133545, "step": 4865 }, { "epoch": 0.76, "learning_rate": 1.0578412780745888e-05, "logits/chosen": -2.152344226837158, "logits/rejected": -2.825984001159668, "logps/chosen": -50.52244567871094, "logps/rejected": -133.30540466308594, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": -1.0062158107757568, "rewards/margins": 4.01470422744751, "rewards/rejected": -5.0209197998046875, "step": 4866 }, { "epoch": 0.76, "learning_rate": 1.057767934021474e-05, "logits/chosen": -1.8747352361679077, "logits/rejected": -2.294123888015747, "logps/chosen": -182.79269409179688, "logps/rejected": -251.8778076171875, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.063786268234253, "rewards/margins": 5.042527198791504, "rewards/rejected": -6.106313705444336, "step": 4867 }, { "epoch": 0.76, "learning_rate": 1.0576945899683594e-05, "logits/chosen": -1.4348158836364746, "logits/rejected": -2.9309403896331787, "logps/chosen": -55.4102897644043, "logps/rejected": -340.786865234375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.2762035131454468, "rewards/margins": 6.253482818603516, "rewards/rejected": -7.529685974121094, "step": 4868 }, { "epoch": 0.76, "learning_rate": 1.0576212459152446e-05, "logits/chosen": -1.9564955234527588, "logits/rejected": -2.768714427947998, "logps/chosen": -144.93881225585938, "logps/rejected": -262.927734375, "loss": 2.5503, "rewards/accuracies": 0.5, "rewards/chosen": -3.5962915420532227, "rewards/margins": 0.5184783935546875, "rewards/rejected": -4.11476993560791, "step": 4869 }, { "epoch": 0.76, "learning_rate": 1.0575479018621298e-05, "logits/chosen": -2.6816840171813965, "logits/rejected": -2.8120501041412354, "logps/chosen": -160.26596069335938, "logps/rejected": -286.9737548828125, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.8490452170372009, "rewards/margins": 5.782449722290039, "rewards/rejected": -6.631495475769043, "step": 4870 }, { "epoch": 0.76, "learning_rate": 1.057474557809015e-05, "logits/chosen": -2.2583000659942627, "logits/rejected": -2.4320061206817627, "logps/chosen": -246.66995239257812, "logps/rejected": -242.76068115234375, "loss": 1.2181, "rewards/accuracies": 0.5, "rewards/chosen": -3.3380837440490723, "rewards/margins": 2.522548198699951, "rewards/rejected": -5.860631942749023, "step": 4871 }, { "epoch": 0.76, "learning_rate": 1.0574012137559003e-05, "logits/chosen": -2.711705207824707, "logits/rejected": -2.920412540435791, "logps/chosen": -215.0499267578125, "logps/rejected": -487.0972595214844, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.4257851839065552, "rewards/margins": 6.340419769287109, "rewards/rejected": -7.766204833984375, "step": 4872 }, { "epoch": 0.76, "learning_rate": 1.0573278697027855e-05, "logits/chosen": -1.8809996843338013, "logits/rejected": -3.0191502571105957, "logps/chosen": -234.74026489257812, "logps/rejected": -540.4884033203125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.4900177717208862, "rewards/margins": 7.200691223144531, "rewards/rejected": -8.690709114074707, "step": 4873 }, { "epoch": 0.76, "learning_rate": 1.0572545256496707e-05, "logits/chosen": -1.962632179260254, "logits/rejected": -3.2731661796569824, "logps/chosen": -238.4755859375, "logps/rejected": -429.9714050292969, "loss": 0.0867, "rewards/accuracies": 1.0, "rewards/chosen": -3.6261332035064697, "rewards/margins": 3.37554669380188, "rewards/rejected": -7.00167989730835, "step": 4874 }, { "epoch": 0.76, "learning_rate": 1.0571811815965559e-05, "logits/chosen": -2.523036003112793, "logits/rejected": -3.01739239692688, "logps/chosen": -85.97090911865234, "logps/rejected": -323.0854187011719, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8451638221740723, "rewards/margins": 6.660913944244385, "rewards/rejected": -7.506077766418457, "step": 4875 }, { "epoch": 0.76, "learning_rate": 1.057107837543441e-05, "logits/chosen": -2.743838310241699, "logits/rejected": -2.9573521614074707, "logps/chosen": -237.28318786621094, "logps/rejected": -222.54672241210938, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": -1.6561157703399658, "rewards/margins": 3.6949071884155273, "rewards/rejected": -5.351023197174072, "step": 4876 }, { "epoch": 0.76, "learning_rate": 1.0570344934903264e-05, "logits/chosen": -2.848583698272705, "logits/rejected": -2.674962043762207, "logps/chosen": -161.8657989501953, "logps/rejected": -237.004638671875, "loss": 1.0824, "rewards/accuracies": 0.5, "rewards/chosen": -1.6068722009658813, "rewards/margins": 2.6334195137023926, "rewards/rejected": -4.240291595458984, "step": 4877 }, { "epoch": 0.76, "learning_rate": 1.0569611494372116e-05, "logits/chosen": -2.2837045192718506, "logits/rejected": -3.0839171409606934, "logps/chosen": -242.40252685546875, "logps/rejected": -476.8001708984375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6967739462852478, "rewards/margins": 6.222116470336914, "rewards/rejected": -6.918890476226807, "step": 4878 }, { "epoch": 0.76, "learning_rate": 1.0568878053840968e-05, "logits/chosen": -1.8378856182098389, "logits/rejected": -2.8166091442108154, "logps/chosen": -106.63030242919922, "logps/rejected": -260.56231689453125, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -1.0714114904403687, "rewards/margins": 4.422565937042236, "rewards/rejected": -5.4939775466918945, "step": 4879 }, { "epoch": 0.76, "learning_rate": 1.056814461330982e-05, "logits/chosen": -2.3238110542297363, "logits/rejected": -2.9292259216308594, "logps/chosen": -238.81678771972656, "logps/rejected": -417.327880859375, "loss": 0.3274, "rewards/accuracies": 1.0, "rewards/chosen": -0.4932922422885895, "rewards/margins": 3.252450704574585, "rewards/rejected": -3.7457427978515625, "step": 4880 }, { "epoch": 0.76, "learning_rate": 1.0567411172778672e-05, "logits/chosen": -2.5906431674957275, "logits/rejected": -2.874037265777588, "logps/chosen": -193.23562622070312, "logps/rejected": -441.38043212890625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.9991428852081299, "rewards/margins": 7.419273376464844, "rewards/rejected": -8.418416023254395, "step": 4881 }, { "epoch": 0.76, "learning_rate": 1.0566677732247523e-05, "logits/chosen": -2.353285789489746, "logits/rejected": -2.7687699794769287, "logps/chosen": -191.9571990966797, "logps/rejected": -220.70184326171875, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -2.255261182785034, "rewards/margins": 3.200667381286621, "rewards/rejected": -5.455928802490234, "step": 4882 }, { "epoch": 0.76, "learning_rate": 1.0565944291716375e-05, "logits/chosen": -3.1974639892578125, "logits/rejected": -2.880666971206665, "logps/chosen": -241.91787719726562, "logps/rejected": -100.56797790527344, "loss": 1.4359, "rewards/accuracies": 0.5, "rewards/chosen": -3.740845203399658, "rewards/margins": 0.09042346477508545, "rewards/rejected": -3.831268787384033, "step": 4883 }, { "epoch": 0.76, "learning_rate": 1.0565210851185227e-05, "logits/chosen": -2.0539586544036865, "logits/rejected": -2.7885217666625977, "logps/chosen": -156.88143920898438, "logps/rejected": -175.39166259765625, "loss": 1.4961, "rewards/accuracies": 0.5, "rewards/chosen": -2.605910301208496, "rewards/margins": 1.1757452487945557, "rewards/rejected": -3.781655788421631, "step": 4884 }, { "epoch": 0.76, "learning_rate": 1.0564477410654079e-05, "logits/chosen": -2.1479837894439697, "logits/rejected": -3.115722417831421, "logps/chosen": -77.4024658203125, "logps/rejected": -199.35955810546875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -1.0167728662490845, "rewards/margins": 4.3782429695129395, "rewards/rejected": -5.395015716552734, "step": 4885 }, { "epoch": 0.76, "learning_rate": 1.0563743970122933e-05, "logits/chosen": -1.932570457458496, "logits/rejected": -2.6573610305786133, "logps/chosen": -221.724365234375, "logps/rejected": -484.1791687011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4819778501987457, "rewards/margins": 10.259684562683105, "rewards/rejected": -10.741662979125977, "step": 4886 }, { "epoch": 0.76, "learning_rate": 1.0563010529591785e-05, "logits/chosen": -2.8352084159851074, "logits/rejected": -2.7948343753814697, "logps/chosen": -144.44580078125, "logps/rejected": -178.96762084960938, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -0.481697678565979, "rewards/margins": 4.679001808166504, "rewards/rejected": -5.160699367523193, "step": 4887 }, { "epoch": 0.76, "learning_rate": 1.0562277089060636e-05, "logits/chosen": -2.1699836254119873, "logits/rejected": -3.0504050254821777, "logps/chosen": -51.98246765136719, "logps/rejected": -207.89303588867188, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -1.327202320098877, "rewards/margins": 3.809587001800537, "rewards/rejected": -5.136789321899414, "step": 4888 }, { "epoch": 0.76, "learning_rate": 1.0561543648529488e-05, "logits/chosen": -2.739882230758667, "logits/rejected": -2.9996211528778076, "logps/chosen": -75.20645141601562, "logps/rejected": -157.107666015625, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.899870753288269, "rewards/margins": 4.178898811340332, "rewards/rejected": -5.078769683837891, "step": 4889 }, { "epoch": 0.76, "learning_rate": 1.056081020799834e-05, "logits/chosen": -2.704406499862671, "logits/rejected": -3.0185775756835938, "logps/chosen": -95.42143249511719, "logps/rejected": -225.4809112548828, "loss": 1.7433, "rewards/accuracies": 0.5, "rewards/chosen": -2.096534490585327, "rewards/margins": 0.7073472738265991, "rewards/rejected": -2.803881883621216, "step": 4890 }, { "epoch": 0.76, "learning_rate": 1.0560076767467192e-05, "logits/chosen": -2.869983434677124, "logits/rejected": -2.5467185974121094, "logps/chosen": -160.70697021484375, "logps/rejected": -166.4504852294922, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.6674206256866455, "rewards/margins": 5.2879462242126465, "rewards/rejected": -5.955367088317871, "step": 4891 }, { "epoch": 0.76, "learning_rate": 1.0559343326936044e-05, "logits/chosen": -2.7437920570373535, "logits/rejected": -3.363724946975708, "logps/chosen": -476.24041748046875, "logps/rejected": -650.3642578125, "loss": 0.1039, "rewards/accuracies": 1.0, "rewards/chosen": -1.4303982257843018, "rewards/margins": 4.12761926651001, "rewards/rejected": -5.558017253875732, "step": 4892 }, { "epoch": 0.76, "learning_rate": 1.0558609886404896e-05, "logits/chosen": -3.4028570652008057, "logits/rejected": -3.120459794998169, "logps/chosen": -330.1612243652344, "logps/rejected": -126.18855285644531, "loss": 0.2873, "rewards/accuracies": 1.0, "rewards/chosen": -0.12504348158836365, "rewards/margins": 2.9770054817199707, "rewards/rejected": -3.102048873901367, "step": 4893 }, { "epoch": 0.76, "learning_rate": 1.0557876445873748e-05, "logits/chosen": -3.077777147293091, "logits/rejected": -2.9247889518737793, "logps/chosen": -180.17050170898438, "logps/rejected": -210.3582763671875, "loss": 2.1615, "rewards/accuracies": 0.5, "rewards/chosen": -3.283952236175537, "rewards/margins": 0.40641260147094727, "rewards/rejected": -3.6903648376464844, "step": 4894 }, { "epoch": 0.76, "learning_rate": 1.0557143005342601e-05, "logits/chosen": -1.1102663278579712, "logits/rejected": -2.9996097087860107, "logps/chosen": -36.02750015258789, "logps/rejected": -381.6548156738281, "loss": 0.275, "rewards/accuracies": 1.0, "rewards/chosen": -1.990627408027649, "rewards/margins": 3.3999853134155273, "rewards/rejected": -5.390612602233887, "step": 4895 }, { "epoch": 0.76, "learning_rate": 1.0556409564811453e-05, "logits/chosen": -2.6808979511260986, "logits/rejected": -2.689323663711548, "logps/chosen": -77.16463470458984, "logps/rejected": -74.37408447265625, "loss": 1.0693, "rewards/accuracies": 0.5, "rewards/chosen": -1.9588096141815186, "rewards/margins": 1.327749252319336, "rewards/rejected": -3.2865588665008545, "step": 4896 }, { "epoch": 0.76, "learning_rate": 1.0555676124280305e-05, "logits/chosen": -3.2247490882873535, "logits/rejected": -2.360880136489868, "logps/chosen": -1222.726806640625, "logps/rejected": -680.365234375, "loss": 2.4201, "rewards/accuracies": 0.5, "rewards/chosen": -3.5120606422424316, "rewards/margins": 0.6424422264099121, "rewards/rejected": -4.154502868652344, "step": 4897 }, { "epoch": 0.76, "learning_rate": 1.0554942683749157e-05, "logits/chosen": -2.3717710971832275, "logits/rejected": -3.0260720252990723, "logps/chosen": -98.77118682861328, "logps/rejected": -195.4152069091797, "loss": 2.1564, "rewards/accuracies": 0.5, "rewards/chosen": -2.7487940788269043, "rewards/margins": 0.7042737007141113, "rewards/rejected": -3.4530677795410156, "step": 4898 }, { "epoch": 0.76, "learning_rate": 1.0554209243218009e-05, "logits/chosen": -2.496616840362549, "logits/rejected": -2.9291257858276367, "logps/chosen": -28.533512115478516, "logps/rejected": -267.09423828125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.5833853483200073, "rewards/margins": 5.461819648742676, "rewards/rejected": -6.045205116271973, "step": 4899 }, { "epoch": 0.76, "learning_rate": 1.055347580268686e-05, "logits/chosen": -1.6644352674484253, "logits/rejected": -2.8820157051086426, "logps/chosen": -205.96591186523438, "logps/rejected": -222.4742431640625, "loss": 2.3086, "rewards/accuracies": 0.5, "rewards/chosen": -3.946594476699829, "rewards/margins": -0.2790095806121826, "rewards/rejected": -3.6675848960876465, "step": 4900 }, { "epoch": 0.76, "learning_rate": 1.0552742362155713e-05, "logits/chosen": -2.876373529434204, "logits/rejected": -2.4104490280151367, "logps/chosen": -446.4394226074219, "logps/rejected": -187.05838012695312, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.8910312652587891, "rewards/margins": 4.804636478424072, "rewards/rejected": -5.695667743682861, "step": 4901 }, { "epoch": 0.76, "learning_rate": 1.0552008921624564e-05, "logits/chosen": -2.115226984024048, "logits/rejected": -3.0277647972106934, "logps/chosen": -169.7402801513672, "logps/rejected": -404.4939270019531, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.08819806575775146, "rewards/margins": 7.222664833068848, "rewards/rejected": -7.310863018035889, "step": 4902 }, { "epoch": 0.76, "learning_rate": 1.0551275481093416e-05, "logits/chosen": -2.8896334171295166, "logits/rejected": -2.5351340770721436, "logps/chosen": -602.2579345703125, "logps/rejected": -455.96734619140625, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.4534988403320312, "rewards/margins": 5.1860270500183105, "rewards/rejected": -6.639525890350342, "step": 4903 }, { "epoch": 0.76, "learning_rate": 1.055054204056227e-05, "logits/chosen": -2.612882375717163, "logits/rejected": -2.7920868396759033, "logps/chosen": -63.549591064453125, "logps/rejected": -160.84902954101562, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -0.9782984256744385, "rewards/margins": 3.9071953296661377, "rewards/rejected": -4.885493755340576, "step": 4904 }, { "epoch": 0.76, "learning_rate": 1.0549808600031122e-05, "logits/chosen": -2.7774064540863037, "logits/rejected": -2.3690803050994873, "logps/chosen": -107.08275604248047, "logps/rejected": -75.12507629394531, "loss": 2.5817, "rewards/accuracies": 0.5, "rewards/chosen": -3.9925336837768555, "rewards/margins": -0.59508216381073, "rewards/rejected": -3.397451400756836, "step": 4905 }, { "epoch": 0.76, "learning_rate": 1.0549075159499975e-05, "logits/chosen": -2.9176602363586426, "logits/rejected": -2.911478042602539, "logps/chosen": -141.97427368164062, "logps/rejected": -128.23223876953125, "loss": 1.2131, "rewards/accuracies": 0.5, "rewards/chosen": -3.2151451110839844, "rewards/margins": 0.898770809173584, "rewards/rejected": -4.113916397094727, "step": 4906 }, { "epoch": 0.76, "learning_rate": 1.0548341718968827e-05, "logits/chosen": -3.0676515102386475, "logits/rejected": -2.9965901374816895, "logps/chosen": -177.2935791015625, "logps/rejected": -121.42438507080078, "loss": 1.6732, "rewards/accuracies": 0.5, "rewards/chosen": -2.9148972034454346, "rewards/margins": -0.32370781898498535, "rewards/rejected": -2.591189384460449, "step": 4907 }, { "epoch": 0.76, "learning_rate": 1.0547608278437679e-05, "logits/chosen": -3.110032081604004, "logits/rejected": -3.0471670627593994, "logps/chosen": -143.76754760742188, "logps/rejected": -176.7823944091797, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 0.1946488469839096, "rewards/margins": 4.997749328613281, "rewards/rejected": -4.8031005859375, "step": 4908 }, { "epoch": 0.76, "learning_rate": 1.0546874837906531e-05, "logits/chosen": -1.685585379600525, "logits/rejected": -2.6051013469696045, "logps/chosen": -234.9891815185547, "logps/rejected": -375.4552307128906, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -1.6144969463348389, "rewards/margins": 4.410923004150391, "rewards/rejected": -6.025420188903809, "step": 4909 }, { "epoch": 0.76, "learning_rate": 1.0546141397375383e-05, "logits/chosen": -2.5821797847747803, "logits/rejected": -2.692662239074707, "logps/chosen": -308.69989013671875, "logps/rejected": -398.18035888671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.052911639213562, "rewards/margins": 7.173557758331299, "rewards/rejected": -8.226469039916992, "step": 4910 }, { "epoch": 0.76, "learning_rate": 1.0545407956844235e-05, "logits/chosen": -2.3105666637420654, "logits/rejected": -2.9074478149414062, "logps/chosen": -66.7100601196289, "logps/rejected": -357.49639892578125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.9885149002075195, "rewards/margins": 5.9868059158325195, "rewards/rejected": -6.975320816040039, "step": 4911 }, { "epoch": 0.76, "learning_rate": 1.0544674516313087e-05, "logits/chosen": -2.8985414505004883, "logits/rejected": -3.142462968826294, "logps/chosen": -162.07843017578125, "logps/rejected": -305.441162109375, "loss": 3.0486, "rewards/accuracies": 0.5, "rewards/chosen": -4.0768022537231445, "rewards/margins": 0.6297547817230225, "rewards/rejected": -4.706557273864746, "step": 4912 }, { "epoch": 0.76, "learning_rate": 1.054394107578194e-05, "logits/chosen": -2.1853291988372803, "logits/rejected": -2.8775620460510254, "logps/chosen": -124.07352447509766, "logps/rejected": -380.86285400390625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.4267083406448364, "rewards/margins": 7.538691520690918, "rewards/rejected": -8.965399742126465, "step": 4913 }, { "epoch": 0.76, "learning_rate": 1.0543207635250792e-05, "logits/chosen": -2.7292351722717285, "logits/rejected": -1.7973302602767944, "logps/chosen": -181.9772491455078, "logps/rejected": -231.83250427246094, "loss": 1.6264, "rewards/accuracies": 0.5, "rewards/chosen": -2.3399460315704346, "rewards/margins": 2.1270017623901367, "rewards/rejected": -4.46694803237915, "step": 4914 }, { "epoch": 0.76, "learning_rate": 1.0542474194719644e-05, "logits/chosen": -2.5967330932617188, "logits/rejected": -2.9659855365753174, "logps/chosen": -68.22651672363281, "logps/rejected": -173.71591186523438, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -0.5660903453826904, "rewards/margins": 3.0245046615600586, "rewards/rejected": -3.590595006942749, "step": 4915 }, { "epoch": 0.76, "learning_rate": 1.0541740754188496e-05, "logits/chosen": -2.629481077194214, "logits/rejected": -3.0428245067596436, "logps/chosen": -89.9981918334961, "logps/rejected": -203.58026123046875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.4486844837665558, "rewards/margins": 5.516106605529785, "rewards/rejected": -5.964791297912598, "step": 4916 }, { "epoch": 0.76, "learning_rate": 1.0541007313657348e-05, "logits/chosen": -2.7597711086273193, "logits/rejected": -2.8309450149536133, "logps/chosen": -267.032958984375, "logps/rejected": -157.84698486328125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.7817811965942383, "rewards/margins": 5.254602432250977, "rewards/rejected": -6.036383628845215, "step": 4917 }, { "epoch": 0.76, "learning_rate": 1.05402738731262e-05, "logits/chosen": -2.808506488800049, "logits/rejected": -2.3288087844848633, "logps/chosen": -133.00820922851562, "logps/rejected": -248.13427734375, "loss": 0.0769, "rewards/accuracies": 1.0, "rewards/chosen": -2.141000747680664, "rewards/margins": 2.581852674484253, "rewards/rejected": -4.722853660583496, "step": 4918 }, { "epoch": 0.77, "learning_rate": 1.0539540432595051e-05, "logits/chosen": -2.3040006160736084, "logits/rejected": -3.089653253555298, "logps/chosen": -148.57644653320312, "logps/rejected": -350.5751037597656, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.0906528234481812, "rewards/margins": 4.255451679229736, "rewards/rejected": -5.346104621887207, "step": 4919 }, { "epoch": 0.77, "learning_rate": 1.0538806992063903e-05, "logits/chosen": -2.6759531497955322, "logits/rejected": -3.3140389919281006, "logps/chosen": -33.906471252441406, "logps/rejected": -339.101806640625, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -1.3890964984893799, "rewards/margins": 7.41441535949707, "rewards/rejected": -8.803512573242188, "step": 4920 }, { "epoch": 0.77, "learning_rate": 1.0538073551532755e-05, "logits/chosen": -2.359616756439209, "logits/rejected": -2.9892966747283936, "logps/chosen": -51.98600769042969, "logps/rejected": -227.71456909179688, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.4957876205444336, "rewards/margins": 4.906305313110352, "rewards/rejected": -6.402092933654785, "step": 4921 }, { "epoch": 0.77, "learning_rate": 1.0537340111001609e-05, "logits/chosen": -1.1594038009643555, "logits/rejected": -2.521556854248047, "logps/chosen": -69.213134765625, "logps/rejected": -333.9714050292969, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.1833158731460571, "rewards/margins": 5.237098693847656, "rewards/rejected": -6.420414447784424, "step": 4922 }, { "epoch": 0.77, "learning_rate": 1.053660667047046e-05, "logits/chosen": -2.9179279804229736, "logits/rejected": -2.748356819152832, "logps/chosen": -238.3729248046875, "logps/rejected": -209.84640502929688, "loss": 1.7476, "rewards/accuracies": 0.5, "rewards/chosen": -2.175107717514038, "rewards/margins": 1.7106406688690186, "rewards/rejected": -3.8857483863830566, "step": 4923 }, { "epoch": 0.77, "learning_rate": 1.0535873229939313e-05, "logits/chosen": -2.666447877883911, "logits/rejected": -2.818155288696289, "logps/chosen": -426.00201416015625, "logps/rejected": -262.64031982421875, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -1.7442642450332642, "rewards/margins": 4.1153106689453125, "rewards/rejected": -5.859575271606445, "step": 4924 }, { "epoch": 0.77, "learning_rate": 1.0535139789408164e-05, "logits/chosen": -2.4443745613098145, "logits/rejected": -3.0377111434936523, "logps/chosen": -144.792236328125, "logps/rejected": -217.0664825439453, "loss": 2.5519, "rewards/accuracies": 0.5, "rewards/chosen": -4.044585227966309, "rewards/margins": -0.40364694595336914, "rewards/rejected": -3.6409387588500977, "step": 4925 }, { "epoch": 0.77, "learning_rate": 1.0534406348877016e-05, "logits/chosen": -2.7692673206329346, "logits/rejected": -2.6713597774505615, "logps/chosen": -268.3637390136719, "logps/rejected": -314.0838623046875, "loss": 2.8141, "rewards/accuracies": 0.0, "rewards/chosen": -3.9369301795959473, "rewards/margins": -2.7500617504119873, "rewards/rejected": -1.1868683099746704, "step": 4926 }, { "epoch": 0.77, "learning_rate": 1.0533672908345868e-05, "logits/chosen": -2.775099992752075, "logits/rejected": -2.273167371749878, "logps/chosen": -236.6861114501953, "logps/rejected": -331.52752685546875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.2115837335586548, "rewards/margins": 6.552742958068848, "rewards/rejected": -7.764327049255371, "step": 4927 }, { "epoch": 0.77, "learning_rate": 1.053293946781472e-05, "logits/chosen": -3.2804079055786133, "logits/rejected": -2.987271308898926, "logps/chosen": -289.5043640136719, "logps/rejected": -243.78199768066406, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -0.05274353176355362, "rewards/margins": 3.3582091331481934, "rewards/rejected": -3.4109528064727783, "step": 4928 }, { "epoch": 0.77, "learning_rate": 1.0532206027283572e-05, "logits/chosen": -2.953176259994507, "logits/rejected": -3.083584785461426, "logps/chosen": -167.28146362304688, "logps/rejected": -256.4768981933594, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -0.7254371643066406, "rewards/margins": 4.6404337882995605, "rewards/rejected": -5.365870475769043, "step": 4929 }, { "epoch": 0.77, "learning_rate": 1.0531472586752425e-05, "logits/chosen": -1.472983479499817, "logits/rejected": -2.8230504989624023, "logps/chosen": -49.43017578125, "logps/rejected": -244.9185028076172, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.4741274118423462, "rewards/margins": 5.8655853271484375, "rewards/rejected": -6.339712619781494, "step": 4930 }, { "epoch": 0.77, "learning_rate": 1.0530739146221277e-05, "logits/chosen": -1.325169324874878, "logits/rejected": -2.695286989212036, "logps/chosen": -225.59950256347656, "logps/rejected": -532.5802001953125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.008233666419983, "rewards/margins": 6.00676155090332, "rewards/rejected": -7.0149946212768555, "step": 4931 }, { "epoch": 0.77, "learning_rate": 1.053000570569013e-05, "logits/chosen": -2.7902307510375977, "logits/rejected": -2.555521011352539, "logps/chosen": -438.31475830078125, "logps/rejected": -411.0728759765625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.31476059556007385, "rewards/margins": 5.51138973236084, "rewards/rejected": -5.826150417327881, "step": 4932 }, { "epoch": 0.77, "learning_rate": 1.0529272265158981e-05, "logits/chosen": -3.0522143840789795, "logits/rejected": -2.3338000774383545, "logps/chosen": -372.9947814941406, "logps/rejected": -163.5645294189453, "loss": 1.4017, "rewards/accuracies": 0.5, "rewards/chosen": -3.530010938644409, "rewards/margins": 2.3973779678344727, "rewards/rejected": -5.927389144897461, "step": 4933 }, { "epoch": 0.77, "learning_rate": 1.0528538824627833e-05, "logits/chosen": -2.2962253093719482, "logits/rejected": -2.6880533695220947, "logps/chosen": -133.38128662109375, "logps/rejected": -328.232666015625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.9029393196105957, "rewards/margins": 5.884979724884033, "rewards/rejected": -7.787919044494629, "step": 4934 }, { "epoch": 0.77, "learning_rate": 1.0527805384096685e-05, "logits/chosen": -1.477381706237793, "logits/rejected": -2.838935375213623, "logps/chosen": -321.4832763671875, "logps/rejected": -463.1546630859375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.391719102859497, "rewards/margins": 5.652897834777832, "rewards/rejected": -7.04461669921875, "step": 4935 }, { "epoch": 0.77, "learning_rate": 1.0527071943565537e-05, "logits/chosen": -2.8478314876556396, "logits/rejected": -2.884293794631958, "logps/chosen": -315.14404296875, "logps/rejected": -248.05682373046875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.4567919969558716, "rewards/margins": 5.3141279220581055, "rewards/rejected": -6.770920276641846, "step": 4936 }, { "epoch": 0.77, "learning_rate": 1.0526338503034389e-05, "logits/chosen": -2.7452807426452637, "logits/rejected": -2.099956512451172, "logps/chosen": -458.4127197265625, "logps/rejected": -370.258544921875, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -2.0734078884124756, "rewards/margins": 5.141143798828125, "rewards/rejected": -7.2145514488220215, "step": 4937 }, { "epoch": 0.77, "learning_rate": 1.052560506250324e-05, "logits/chosen": -3.0379388332366943, "logits/rejected": -2.0167932510375977, "logps/chosen": -250.53396606445312, "logps/rejected": -127.1747817993164, "loss": 1.7848, "rewards/accuracies": 0.5, "rewards/chosen": -2.970860242843628, "rewards/margins": 0.7612317800521851, "rewards/rejected": -3.7320921421051025, "step": 4938 }, { "epoch": 0.77, "learning_rate": 1.0524871621972094e-05, "logits/chosen": -1.8686193227767944, "logits/rejected": -3.0703341960906982, "logps/chosen": -64.98350524902344, "logps/rejected": -255.9472198486328, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -1.3680434226989746, "rewards/margins": 4.5567522048950195, "rewards/rejected": -5.924795627593994, "step": 4939 }, { "epoch": 0.77, "learning_rate": 1.0524138181440946e-05, "logits/chosen": -1.69948148727417, "logits/rejected": -2.5840301513671875, "logps/chosen": -164.4969940185547, "logps/rejected": -455.8678894042969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.10060232877731323, "rewards/margins": 8.02072811126709, "rewards/rejected": -8.121330261230469, "step": 4940 }, { "epoch": 0.77, "learning_rate": 1.05234047409098e-05, "logits/chosen": -2.791177749633789, "logits/rejected": -2.8536007404327393, "logps/chosen": -107.41600799560547, "logps/rejected": -174.6145477294922, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.9382585287094116, "rewards/margins": 5.2511138916015625, "rewards/rejected": -6.1893720626831055, "step": 4941 }, { "epoch": 0.77, "learning_rate": 1.0522671300378651e-05, "logits/chosen": -1.7020668983459473, "logits/rejected": -2.8070878982543945, "logps/chosen": -174.07046508789062, "logps/rejected": -365.31158447265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.540850043296814, "rewards/margins": 7.594061374664307, "rewards/rejected": -8.13491153717041, "step": 4942 }, { "epoch": 0.77, "learning_rate": 1.0521937859847503e-05, "logits/chosen": -2.1761701107025146, "logits/rejected": -2.5082855224609375, "logps/chosen": -192.86647033691406, "logps/rejected": -303.145751953125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.6413764953613281, "rewards/margins": 8.359684944152832, "rewards/rejected": -9.00106143951416, "step": 4943 }, { "epoch": 0.77, "learning_rate": 1.0521204419316355e-05, "logits/chosen": -2.5978333950042725, "logits/rejected": -2.5806429386138916, "logps/chosen": -268.77606201171875, "logps/rejected": -142.4842071533203, "loss": 1.9679, "rewards/accuracies": 0.5, "rewards/chosen": -3.1363322734832764, "rewards/margins": 0.3420143127441406, "rewards/rejected": -3.478346586227417, "step": 4944 }, { "epoch": 0.77, "learning_rate": 1.0520470978785207e-05, "logits/chosen": -3.030404806137085, "logits/rejected": -2.371239185333252, "logps/chosen": -265.29473876953125, "logps/rejected": -147.37571716308594, "loss": 1.4645, "rewards/accuracies": 0.5, "rewards/chosen": -3.3923463821411133, "rewards/margins": 0.8361440896987915, "rewards/rejected": -4.228490352630615, "step": 4945 }, { "epoch": 0.77, "learning_rate": 1.0519737538254059e-05, "logits/chosen": -2.8241353034973145, "logits/rejected": -2.872728109359741, "logps/chosen": -571.5916748046875, "logps/rejected": -546.8367919921875, "loss": 0.0682, "rewards/accuracies": 1.0, "rewards/chosen": -1.285578966140747, "rewards/margins": 2.6813530921936035, "rewards/rejected": -3.9669318199157715, "step": 4946 }, { "epoch": 0.77, "learning_rate": 1.051900409772291e-05, "logits/chosen": -2.696040630340576, "logits/rejected": -1.807963252067566, "logps/chosen": -618.779541015625, "logps/rejected": -283.17291259765625, "loss": 1.3145, "rewards/accuracies": 0.5, "rewards/chosen": -2.490708351135254, "rewards/margins": 1.4322919845581055, "rewards/rejected": -3.9230003356933594, "step": 4947 }, { "epoch": 0.77, "learning_rate": 1.0518270657191764e-05, "logits/chosen": -2.280036449432373, "logits/rejected": -1.8178716897964478, "logps/chosen": -160.02874755859375, "logps/rejected": -197.51426696777344, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -0.15870971977710724, "rewards/margins": 3.995945930480957, "rewards/rejected": -4.154655456542969, "step": 4948 }, { "epoch": 0.77, "learning_rate": 1.0517537216660616e-05, "logits/chosen": -1.313597559928894, "logits/rejected": -3.0235908031463623, "logps/chosen": -49.463600158691406, "logps/rejected": -484.1341857910156, "loss": 0.3141, "rewards/accuracies": 1.0, "rewards/chosen": -1.9208967685699463, "rewards/margins": 4.721477031707764, "rewards/rejected": -6.642373561859131, "step": 4949 }, { "epoch": 0.77, "learning_rate": 1.0516803776129468e-05, "logits/chosen": -2.2280797958374023, "logits/rejected": -3.0213515758514404, "logps/chosen": -63.68141174316406, "logps/rejected": -309.2021484375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.6478583812713623, "rewards/margins": 7.0486273765563965, "rewards/rejected": -8.69648551940918, "step": 4950 }, { "epoch": 0.77, "learning_rate": 1.051607033559832e-05, "logits/chosen": -2.4300427436828613, "logits/rejected": -2.9856081008911133, "logps/chosen": -224.31776428222656, "logps/rejected": -336.8165283203125, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.10462914407253265, "rewards/margins": 4.159181594848633, "rewards/rejected": -4.263811111450195, "step": 4951 }, { "epoch": 0.77, "learning_rate": 1.0515336895067172e-05, "logits/chosen": -2.820152997970581, "logits/rejected": -1.8272569179534912, "logps/chosen": -427.3266296386719, "logps/rejected": -421.414306640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.4228932857513428, "rewards/margins": 7.092952728271484, "rewards/rejected": -9.515846252441406, "step": 4952 }, { "epoch": 0.77, "learning_rate": 1.0514603454536024e-05, "logits/chosen": -1.2885870933532715, "logits/rejected": -2.615065813064575, "logps/chosen": -49.135398864746094, "logps/rejected": -228.60092163085938, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -1.3704514503479004, "rewards/margins": 3.9494524002075195, "rewards/rejected": -5.31990385055542, "step": 4953 }, { "epoch": 0.77, "learning_rate": 1.0513870014004876e-05, "logits/chosen": -2.572068214416504, "logits/rejected": -3.0279433727264404, "logps/chosen": -417.14697265625, "logps/rejected": -418.6905822753906, "loss": 0.085, "rewards/accuracies": 1.0, "rewards/chosen": -0.5003929138183594, "rewards/margins": 3.402423858642578, "rewards/rejected": -3.9028167724609375, "step": 4954 }, { "epoch": 0.77, "learning_rate": 1.0513136573473728e-05, "logits/chosen": -2.1263554096221924, "logits/rejected": -2.799698829650879, "logps/chosen": -218.72190856933594, "logps/rejected": -490.5599670410156, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.1033681631088257, "rewards/margins": 9.346637725830078, "rewards/rejected": -10.450006484985352, "step": 4955 }, { "epoch": 0.77, "learning_rate": 1.051240313294258e-05, "logits/chosen": -2.7233684062957764, "logits/rejected": -2.8157122135162354, "logps/chosen": -211.06455993652344, "logps/rejected": -180.21725463867188, "loss": 1.0148, "rewards/accuracies": 0.5, "rewards/chosen": -1.7316864728927612, "rewards/margins": 2.832803249359131, "rewards/rejected": -4.564489841461182, "step": 4956 }, { "epoch": 0.77, "learning_rate": 1.0511669692411433e-05, "logits/chosen": -2.4781746864318848, "logits/rejected": -3.041893482208252, "logps/chosen": -118.76648712158203, "logps/rejected": -276.029296875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.0251386165618896, "rewards/margins": 6.031131744384766, "rewards/rejected": -7.056270122528076, "step": 4957 }, { "epoch": 0.77, "learning_rate": 1.0510936251880285e-05, "logits/chosen": -2.7952451705932617, "logits/rejected": -2.0387017726898193, "logps/chosen": -207.6942596435547, "logps/rejected": -127.27427673339844, "loss": 1.7849, "rewards/accuracies": 0.5, "rewards/chosen": -3.2059547901153564, "rewards/margins": 0.007270097732543945, "rewards/rejected": -3.2132248878479004, "step": 4958 }, { "epoch": 0.77, "learning_rate": 1.0510202811349137e-05, "logits/chosen": -2.795325994491577, "logits/rejected": -3.2124335765838623, "logps/chosen": -85.03694152832031, "logps/rejected": -207.8712921142578, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.2721766233444214, "rewards/margins": 4.969632148742676, "rewards/rejected": -6.241808891296387, "step": 4959 }, { "epoch": 0.77, "learning_rate": 1.0509469370817989e-05, "logits/chosen": -2.93088960647583, "logits/rejected": -2.699476957321167, "logps/chosen": -1073.292236328125, "logps/rejected": -636.491943359375, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3302154541015625, "rewards/margins": 5.317318916320801, "rewards/rejected": -6.647534370422363, "step": 4960 }, { "epoch": 0.77, "learning_rate": 1.050873593028684e-05, "logits/chosen": -2.9678025245666504, "logits/rejected": -1.9174524545669556, "logps/chosen": -235.61300659179688, "logps/rejected": -88.80257415771484, "loss": 2.3363, "rewards/accuracies": 0.5, "rewards/chosen": -3.388946056365967, "rewards/margins": -0.28598904609680176, "rewards/rejected": -3.102957010269165, "step": 4961 }, { "epoch": 0.77, "learning_rate": 1.0508002489755692e-05, "logits/chosen": -2.506861686706543, "logits/rejected": -3.2671189308166504, "logps/chosen": -128.18905639648438, "logps/rejected": -294.0823059082031, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.702284812927246, "rewards/margins": 4.634697437286377, "rewards/rejected": -6.336981773376465, "step": 4962 }, { "epoch": 0.77, "learning_rate": 1.0507269049224544e-05, "logits/chosen": -1.9526283740997314, "logits/rejected": -2.998561382293701, "logps/chosen": -70.61396789550781, "logps/rejected": -299.70843505859375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.6191561222076416, "rewards/margins": 7.115715026855469, "rewards/rejected": -7.734870910644531, "step": 4963 }, { "epoch": 0.77, "learning_rate": 1.0506535608693396e-05, "logits/chosen": -1.9368563890457153, "logits/rejected": -2.490321636199951, "logps/chosen": -138.0130615234375, "logps/rejected": -248.74305725097656, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.09439773857593536, "rewards/margins": 5.847997665405273, "rewards/rejected": -5.753599643707275, "step": 4964 }, { "epoch": 0.77, "learning_rate": 1.0505802168162248e-05, "logits/chosen": -2.8394534587860107, "logits/rejected": -2.4294817447662354, "logps/chosen": -574.241943359375, "logps/rejected": -464.9662780761719, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.35792043805122375, "rewards/margins": 5.498739242553711, "rewards/rejected": -5.856659889221191, "step": 4965 }, { "epoch": 0.77, "learning_rate": 1.0505068727631102e-05, "logits/chosen": -2.6349399089813232, "logits/rejected": -1.8054471015930176, "logps/chosen": -153.84881591796875, "logps/rejected": -202.0946044921875, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -1.0853393077850342, "rewards/margins": 4.239852428436279, "rewards/rejected": -5.325191497802734, "step": 4966 }, { "epoch": 0.77, "learning_rate": 1.0504335287099953e-05, "logits/chosen": -0.9213013648986816, "logits/rejected": -1.7002134323120117, "logps/chosen": -208.860595703125, "logps/rejected": -312.6893005371094, "loss": 1.1493, "rewards/accuracies": 0.5, "rewards/chosen": -1.4912086725234985, "rewards/margins": 2.640471935272217, "rewards/rejected": -4.131680488586426, "step": 4967 }, { "epoch": 0.77, "learning_rate": 1.0503601846568805e-05, "logits/chosen": -2.8150384426116943, "logits/rejected": -2.292719841003418, "logps/chosen": -219.89169311523438, "logps/rejected": -254.1065216064453, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.10407543182373047, "rewards/margins": 6.530852317810059, "rewards/rejected": -6.426776885986328, "step": 4968 }, { "epoch": 0.77, "learning_rate": 1.0502868406037657e-05, "logits/chosen": -2.430164337158203, "logits/rejected": -3.0372941493988037, "logps/chosen": -381.22100830078125, "logps/rejected": -362.1868591308594, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.1152244582772255, "rewards/margins": 5.01318883895874, "rewards/rejected": -5.128413200378418, "step": 4969 }, { "epoch": 0.77, "learning_rate": 1.0502134965506509e-05, "logits/chosen": -1.9411039352416992, "logits/rejected": -2.738413095474243, "logps/chosen": -254.4575653076172, "logps/rejected": -237.98629760742188, "loss": 1.8272, "rewards/accuracies": 0.5, "rewards/chosen": -3.2769217491149902, "rewards/margins": 1.4696600437164307, "rewards/rejected": -4.74658203125, "step": 4970 }, { "epoch": 0.77, "learning_rate": 1.0501401524975361e-05, "logits/chosen": -2.459803819656372, "logits/rejected": -2.901608943939209, "logps/chosen": -162.28436279296875, "logps/rejected": -228.0294952392578, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -1.5626118183135986, "rewards/margins": 4.441892623901367, "rewards/rejected": -6.004504203796387, "step": 4971 }, { "epoch": 0.77, "learning_rate": 1.0500668084444213e-05, "logits/chosen": -2.218618392944336, "logits/rejected": -2.860170602798462, "logps/chosen": -152.15252685546875, "logps/rejected": -373.7530212402344, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5011544227600098, "rewards/margins": 9.952977180480957, "rewards/rejected": -11.454132080078125, "step": 4972 }, { "epoch": 0.77, "learning_rate": 1.0499934643913066e-05, "logits/chosen": -1.9005718231201172, "logits/rejected": -2.6466240882873535, "logps/chosen": -72.46211242675781, "logps/rejected": -193.17709350585938, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -1.496763825416565, "rewards/margins": 4.521775722503662, "rewards/rejected": -6.0185394287109375, "step": 4973 }, { "epoch": 0.77, "learning_rate": 1.0499201203381918e-05, "logits/chosen": -2.825190782546997, "logits/rejected": -3.4386162757873535, "logps/chosen": -226.45004272460938, "logps/rejected": -383.45458984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3766807317733765, "rewards/margins": 6.809236526489258, "rewards/rejected": -8.185917854309082, "step": 4974 }, { "epoch": 0.77, "learning_rate": 1.0498467762850772e-05, "logits/chosen": -2.7943146228790283, "logits/rejected": -3.0317842960357666, "logps/chosen": -165.55819702148438, "logps/rejected": -241.7767333984375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.7113788723945618, "rewards/margins": 5.26114559173584, "rewards/rejected": -5.972524642944336, "step": 4975 }, { "epoch": 0.77, "learning_rate": 1.0497734322319624e-05, "logits/chosen": -2.7536940574645996, "logits/rejected": -1.8802274465560913, "logps/chosen": -205.48129272460938, "logps/rejected": -248.10267639160156, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5175613164901733, "rewards/margins": 6.332437515258789, "rewards/rejected": -6.849998474121094, "step": 4976 }, { "epoch": 0.77, "learning_rate": 1.0497000881788476e-05, "logits/chosen": -1.6783289909362793, "logits/rejected": -2.8753788471221924, "logps/chosen": -95.90753936767578, "logps/rejected": -413.35101318359375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.6527725458145142, "rewards/margins": 7.598414421081543, "rewards/rejected": -8.251187324523926, "step": 4977 }, { "epoch": 0.77, "learning_rate": 1.0496267441257327e-05, "logits/chosen": -2.2431156635284424, "logits/rejected": -2.671048879623413, "logps/chosen": -102.71495819091797, "logps/rejected": -304.7001953125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.5290054082870483, "rewards/margins": 9.214174270629883, "rewards/rejected": -9.743179321289062, "step": 4978 }, { "epoch": 0.77, "learning_rate": 1.049553400072618e-05, "logits/chosen": -2.8310468196868896, "logits/rejected": -2.874950885772705, "logps/chosen": -264.81622314453125, "logps/rejected": -279.3257751464844, "loss": 2.2227, "rewards/accuracies": 0.5, "rewards/chosen": -3.203207492828369, "rewards/margins": 2.063032865524292, "rewards/rejected": -5.266240119934082, "step": 4979 }, { "epoch": 0.77, "learning_rate": 1.0494800560195031e-05, "logits/chosen": -2.231560468673706, "logits/rejected": -3.1569063663482666, "logps/chosen": -143.80026245117188, "logps/rejected": -347.15496826171875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.2233634889125824, "rewards/margins": 5.460427284240723, "rewards/rejected": -5.683790683746338, "step": 4980 }, { "epoch": 0.77, "learning_rate": 1.0494067119663883e-05, "logits/chosen": -2.1003012657165527, "logits/rejected": -2.778618097305298, "logps/chosen": -60.03074645996094, "logps/rejected": -209.75244140625, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -1.338057518005371, "rewards/margins": 5.377403259277344, "rewards/rejected": -6.715460777282715, "step": 4981 }, { "epoch": 0.77, "learning_rate": 1.0493333679132735e-05, "logits/chosen": -2.810455560684204, "logits/rejected": -2.2318224906921387, "logps/chosen": -660.8170776367188, "logps/rejected": -483.7362060546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6790871024131775, "rewards/margins": 9.52779483795166, "rewards/rejected": -10.20688247680664, "step": 4982 }, { "epoch": 0.77, "learning_rate": 1.0492600238601587e-05, "logits/chosen": -2.3062126636505127, "logits/rejected": -2.8747615814208984, "logps/chosen": -114.83586120605469, "logps/rejected": -186.3460235595703, "loss": 1.2917, "rewards/accuracies": 0.5, "rewards/chosen": -2.602184534072876, "rewards/margins": 3.823666572570801, "rewards/rejected": -6.425851345062256, "step": 4983 }, { "epoch": 0.78, "learning_rate": 1.049186679807044e-05, "logits/chosen": -2.858060359954834, "logits/rejected": -3.0493226051330566, "logps/chosen": -206.64132690429688, "logps/rejected": -174.51492309570312, "loss": 3.2519, "rewards/accuracies": 0.5, "rewards/chosen": -3.9734506607055664, "rewards/margins": -0.9292492866516113, "rewards/rejected": -3.044201374053955, "step": 4984 }, { "epoch": 0.78, "learning_rate": 1.0491133357539292e-05, "logits/chosen": -2.5533649921417236, "logits/rejected": -3.005985736846924, "logps/chosen": -100.77583312988281, "logps/rejected": -456.7760009765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.36988183856010437, "rewards/margins": 8.263968467712402, "rewards/rejected": -8.63385009765625, "step": 4985 }, { "epoch": 0.78, "learning_rate": 1.0490399917008144e-05, "logits/chosen": -2.5629334449768066, "logits/rejected": -3.239074468612671, "logps/chosen": -86.14089965820312, "logps/rejected": -406.76690673828125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5295456051826477, "rewards/margins": 6.460120677947998, "rewards/rejected": -6.98966646194458, "step": 4986 }, { "epoch": 0.78, "learning_rate": 1.0489666476476996e-05, "logits/chosen": -2.856285810470581, "logits/rejected": -2.667228937149048, "logps/chosen": -121.77035522460938, "logps/rejected": -174.86822509765625, "loss": 3.4194, "rewards/accuracies": 0.5, "rewards/chosen": -4.7646803855896, "rewards/margins": -1.3256700038909912, "rewards/rejected": -3.4390106201171875, "step": 4987 }, { "epoch": 0.78, "learning_rate": 1.0488933035945848e-05, "logits/chosen": -2.8409605026245117, "logits/rejected": -3.0833218097686768, "logps/chosen": -143.91085815429688, "logps/rejected": -306.29864501953125, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -0.6315262317657471, "rewards/margins": 4.406538963317871, "rewards/rejected": -5.038065433502197, "step": 4988 }, { "epoch": 0.78, "learning_rate": 1.04881995954147e-05, "logits/chosen": -2.997832775115967, "logits/rejected": -2.4809582233428955, "logps/chosen": -234.0032501220703, "logps/rejected": -277.3438720703125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.563483476638794, "rewards/margins": 4.722765922546387, "rewards/rejected": -5.28624963760376, "step": 4989 }, { "epoch": 0.78, "learning_rate": 1.0487466154883552e-05, "logits/chosen": -2.8220407962799072, "logits/rejected": -2.6787667274475098, "logps/chosen": -193.86203002929688, "logps/rejected": -184.40487670898438, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.4932358264923096, "rewards/margins": 6.4116411209106445, "rewards/rejected": -7.904876708984375, "step": 4990 }, { "epoch": 0.78, "learning_rate": 1.0486732714352404e-05, "logits/chosen": -2.9061150550842285, "logits/rejected": -2.5710110664367676, "logps/chosen": -295.654052734375, "logps/rejected": -161.4433135986328, "loss": 3.4435, "rewards/accuracies": 0.5, "rewards/chosen": -3.992568254470825, "rewards/margins": -1.2137908935546875, "rewards/rejected": -2.778777599334717, "step": 4991 }, { "epoch": 0.78, "learning_rate": 1.0485999273821255e-05, "logits/chosen": -2.9133808612823486, "logits/rejected": -2.9920873641967773, "logps/chosen": -144.92640686035156, "logps/rejected": -242.138671875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.2712254524230957, "rewards/margins": 5.357785224914551, "rewards/rejected": -6.629011154174805, "step": 4992 }, { "epoch": 0.78, "learning_rate": 1.0485265833290109e-05, "logits/chosen": -1.796809434890747, "logits/rejected": -2.571188449859619, "logps/chosen": -114.11715698242188, "logps/rejected": -237.37026977539062, "loss": 1.0612, "rewards/accuracies": 0.5, "rewards/chosen": -2.727715492248535, "rewards/margins": 0.8323116302490234, "rewards/rejected": -3.5600271224975586, "step": 4993 }, { "epoch": 0.78, "learning_rate": 1.0484532392758961e-05, "logits/chosen": -1.3722697496414185, "logits/rejected": -2.840148448944092, "logps/chosen": -61.50946807861328, "logps/rejected": -358.353271484375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.9676759243011475, "rewards/margins": 5.817135810852051, "rewards/rejected": -7.784811973571777, "step": 4994 }, { "epoch": 0.78, "learning_rate": 1.0483798952227813e-05, "logits/chosen": -2.0187082290649414, "logits/rejected": -2.7204980850219727, "logps/chosen": -166.89297485351562, "logps/rejected": -429.8115234375, "loss": 2.8032, "rewards/accuracies": 0.5, "rewards/chosen": -3.3643174171447754, "rewards/margins": -0.6752626895904541, "rewards/rejected": -2.6890547275543213, "step": 4995 }, { "epoch": 0.78, "learning_rate": 1.0483065511696665e-05, "logits/chosen": -2.640226125717163, "logits/rejected": -2.6760315895080566, "logps/chosen": -511.9011535644531, "logps/rejected": -252.68643188476562, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.7128921747207642, "rewards/margins": 4.330288887023926, "rewards/rejected": -5.0431809425354, "step": 4996 }, { "epoch": 0.78, "learning_rate": 1.0482332071165517e-05, "logits/chosen": -2.542546272277832, "logits/rejected": -3.2902684211730957, "logps/chosen": -49.104026794433594, "logps/rejected": -393.2518615722656, "loss": 0.6871, "rewards/accuracies": 0.5, "rewards/chosen": -1.7689728736877441, "rewards/margins": 0.3911193013191223, "rewards/rejected": -2.1600921154022217, "step": 4997 }, { "epoch": 0.78, "learning_rate": 1.0481598630634368e-05, "logits/chosen": -2.8760013580322266, "logits/rejected": -2.651500701904297, "logps/chosen": -130.23272705078125, "logps/rejected": -155.57064819335938, "loss": 0.1103, "rewards/accuracies": 1.0, "rewards/chosen": -0.4382225275039673, "rewards/margins": 3.2631571292877197, "rewards/rejected": -3.7013797760009766, "step": 4998 }, { "epoch": 0.78, "learning_rate": 1.048086519010322e-05, "logits/chosen": -3.0277600288391113, "logits/rejected": -2.538193941116333, "logps/chosen": -227.34576416015625, "logps/rejected": -295.5308837890625, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -2.5592098236083984, "rewards/margins": 3.9855904579162598, "rewards/rejected": -6.5447998046875, "step": 4999 }, { "epoch": 0.78, "learning_rate": 1.0480131749572072e-05, "logits/chosen": -1.5686097145080566, "logits/rejected": -3.005248546600342, "logps/chosen": -236.7603302001953, "logps/rejected": -445.78887939453125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.30432817339897156, "rewards/margins": 6.261289119720459, "rewards/rejected": -5.956961154937744, "step": 5000 }, { "epoch": 0.78, "learning_rate": 1.0479398309040924e-05, "logits/chosen": -2.8561108112335205, "logits/rejected": -3.06807541847229, "logps/chosen": -309.7750244140625, "logps/rejected": -232.00009155273438, "loss": 3.7591, "rewards/accuracies": 0.5, "rewards/chosen": -4.046746730804443, "rewards/margins": -0.17029404640197754, "rewards/rejected": -3.876452684402466, "step": 5001 }, { "epoch": 0.78, "learning_rate": 1.0478664868509778e-05, "logits/chosen": -2.9284138679504395, "logits/rejected": -2.508347749710083, "logps/chosen": -145.6199493408203, "logps/rejected": -223.0767364501953, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.3897891938686371, "rewards/margins": 7.937771797180176, "rewards/rejected": -8.327560424804688, "step": 5002 }, { "epoch": 0.78, "learning_rate": 1.047793142797863e-05, "logits/chosen": -1.6268084049224854, "logits/rejected": -2.639014482498169, "logps/chosen": -66.2666244506836, "logps/rejected": -180.59657287597656, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.8364676237106323, "rewards/margins": 5.116436958312988, "rewards/rejected": -6.952904224395752, "step": 5003 }, { "epoch": 0.78, "learning_rate": 1.0477197987447481e-05, "logits/chosen": -2.2611284255981445, "logits/rejected": -2.8958516120910645, "logps/chosen": -314.3775634765625, "logps/rejected": -290.8726806640625, "loss": 4.8355, "rewards/accuracies": 0.5, "rewards/chosen": -5.652593612670898, "rewards/margins": -1.794917345046997, "rewards/rejected": -3.8576767444610596, "step": 5004 }, { "epoch": 0.78, "learning_rate": 1.0476464546916333e-05, "logits/chosen": -3.011598825454712, "logits/rejected": -1.628035545349121, "logps/chosen": -290.0805358886719, "logps/rejected": -121.98011779785156, "loss": 0.3575, "rewards/accuracies": 0.5, "rewards/chosen": -1.8141074180603027, "rewards/margins": 3.222167730331421, "rewards/rejected": -5.036275386810303, "step": 5005 }, { "epoch": 0.78, "learning_rate": 1.0475731106385185e-05, "logits/chosen": -1.6635791063308716, "logits/rejected": -3.050152540206909, "logps/chosen": -111.75284576416016, "logps/rejected": -442.97314453125, "loss": 3.8947, "rewards/accuracies": 0.5, "rewards/chosen": -4.086717128753662, "rewards/margins": -2.074470281600952, "rewards/rejected": -2.01224684715271, "step": 5006 }, { "epoch": 0.78, "learning_rate": 1.0474997665854039e-05, "logits/chosen": -1.3992516994476318, "logits/rejected": -2.971327066421509, "logps/chosen": -126.67169952392578, "logps/rejected": -486.3930969238281, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.9168258905410767, "rewards/margins": 7.281129360198975, "rewards/rejected": -9.197955131530762, "step": 5007 }, { "epoch": 0.78, "learning_rate": 1.047426422532289e-05, "logits/chosen": -1.7580060958862305, "logits/rejected": -2.561143636703491, "logps/chosen": -102.46037292480469, "logps/rejected": -417.0404052734375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0105507373809814, "rewards/margins": 8.195745468139648, "rewards/rejected": -9.20629596710205, "step": 5008 }, { "epoch": 0.78, "learning_rate": 1.0473530784791742e-05, "logits/chosen": -1.8030673265457153, "logits/rejected": -2.7749202251434326, "logps/chosen": -118.92628479003906, "logps/rejected": -313.16058349609375, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.9688589572906494, "rewards/margins": 4.527635097503662, "rewards/rejected": -5.496494293212891, "step": 5009 }, { "epoch": 0.78, "learning_rate": 1.0472797344260594e-05, "logits/chosen": -1.7931398153305054, "logits/rejected": -3.1144707202911377, "logps/chosen": -160.9743194580078, "logps/rejected": -515.5336303710938, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.49663087725639343, "rewards/margins": 6.521815299987793, "rewards/rejected": -7.018446445465088, "step": 5010 }, { "epoch": 0.78, "learning_rate": 1.0472063903729448e-05, "logits/chosen": -1.8391144275665283, "logits/rejected": -2.780423164367676, "logps/chosen": -65.35102844238281, "logps/rejected": -239.37452697753906, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 0.20190143585205078, "rewards/margins": 4.748208999633789, "rewards/rejected": -4.546307563781738, "step": 5011 }, { "epoch": 0.78, "learning_rate": 1.04713304631983e-05, "logits/chosen": -1.6197501420974731, "logits/rejected": -2.8222250938415527, "logps/chosen": -702.616455078125, "logps/rejected": -688.185546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9661988019943237, "rewards/margins": 8.443172454833984, "rewards/rejected": -9.409372329711914, "step": 5012 }, { "epoch": 0.78, "learning_rate": 1.0470597022667152e-05, "logits/chosen": -1.996065616607666, "logits/rejected": -3.063401699066162, "logps/chosen": -116.90786743164062, "logps/rejected": -320.5372314453125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.043389901518821716, "rewards/margins": 5.618788719177246, "rewards/rejected": -5.662178993225098, "step": 5013 }, { "epoch": 0.78, "learning_rate": 1.0469863582136004e-05, "logits/chosen": -2.8385283946990967, "logits/rejected": -3.322397232055664, "logps/chosen": -122.54315948486328, "logps/rejected": -342.326416015625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.552682101726532, "rewards/margins": 5.440212249755859, "rewards/rejected": -5.992894649505615, "step": 5014 }, { "epoch": 0.78, "learning_rate": 1.0469130141604855e-05, "logits/chosen": -2.9901795387268066, "logits/rejected": -3.0498173236846924, "logps/chosen": -111.39900207519531, "logps/rejected": -177.2592315673828, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": -1.4181468486785889, "rewards/margins": 3.819074869155884, "rewards/rejected": -5.237221717834473, "step": 5015 }, { "epoch": 0.78, "learning_rate": 1.0468396701073707e-05, "logits/chosen": -2.900003433227539, "logits/rejected": -2.5777134895324707, "logps/chosen": -480.8193664550781, "logps/rejected": -372.1892395019531, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -0.9032641649246216, "rewards/margins": 5.700229644775391, "rewards/rejected": -6.603493690490723, "step": 5016 }, { "epoch": 0.78, "learning_rate": 1.046766326054256e-05, "logits/chosen": -1.9876397848129272, "logits/rejected": -2.965683937072754, "logps/chosen": -61.25479507446289, "logps/rejected": -264.9866943359375, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.7316735982894897, "rewards/margins": 4.531010627746582, "rewards/rejected": -5.262683868408203, "step": 5017 }, { "epoch": 0.78, "learning_rate": 1.0466929820011411e-05, "logits/chosen": -3.209613800048828, "logits/rejected": -2.845339059829712, "logps/chosen": -296.7369384765625, "logps/rejected": -186.3291015625, "loss": 4.648, "rewards/accuracies": 0.0, "rewards/chosen": -5.295389175415039, "rewards/margins": -4.637612819671631, "rewards/rejected": -0.65777587890625, "step": 5018 }, { "epoch": 0.78, "learning_rate": 1.0466196379480265e-05, "logits/chosen": -1.7932307720184326, "logits/rejected": -2.5248756408691406, "logps/chosen": -172.8396759033203, "logps/rejected": -385.1707458496094, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5767738223075867, "rewards/margins": 6.5705718994140625, "rewards/rejected": -7.147345542907715, "step": 5019 }, { "epoch": 0.78, "learning_rate": 1.0465462938949117e-05, "logits/chosen": -3.00766921043396, "logits/rejected": -2.897514581680298, "logps/chosen": -215.16867065429688, "logps/rejected": -134.1209716796875, "loss": 1.9836, "rewards/accuracies": 0.5, "rewards/chosen": -2.5511314868927, "rewards/margins": 1.050654411315918, "rewards/rejected": -3.601785898208618, "step": 5020 }, { "epoch": 0.78, "learning_rate": 1.0464729498417968e-05, "logits/chosen": -2.0404772758483887, "logits/rejected": -3.0825655460357666, "logps/chosen": -71.28327941894531, "logps/rejected": -372.3501892089844, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.0703847408294678, "rewards/margins": 5.612981796264648, "rewards/rejected": -6.683366775512695, "step": 5021 }, { "epoch": 0.78, "learning_rate": 1.046399605788682e-05, "logits/chosen": -2.5918614864349365, "logits/rejected": -2.791055679321289, "logps/chosen": -37.325355529785156, "logps/rejected": -132.70330810546875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.1382269859313965, "rewards/margins": 4.986971855163574, "rewards/rejected": -6.125199317932129, "step": 5022 }, { "epoch": 0.78, "learning_rate": 1.0463262617355672e-05, "logits/chosen": -2.3842952251434326, "logits/rejected": -2.286538600921631, "logps/chosen": -253.7444610595703, "logps/rejected": -243.607666015625, "loss": 3.5388, "rewards/accuracies": 0.5, "rewards/chosen": -3.61175537109375, "rewards/margins": -0.43148350715637207, "rewards/rejected": -3.180271863937378, "step": 5023 }, { "epoch": 0.78, "learning_rate": 1.0462529176824524e-05, "logits/chosen": -1.9334371089935303, "logits/rejected": -2.9795889854431152, "logps/chosen": -345.68939208984375, "logps/rejected": -504.8932189941406, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.2502487301826477, "rewards/margins": 5.944520950317383, "rewards/rejected": -6.194769382476807, "step": 5024 }, { "epoch": 0.78, "learning_rate": 1.0461795736293376e-05, "logits/chosen": -3.2911994457244873, "logits/rejected": -3.2710888385772705, "logps/chosen": -209.10641479492188, "logps/rejected": -173.52992248535156, "loss": 2.2791, "rewards/accuracies": 0.5, "rewards/chosen": -3.874875783920288, "rewards/margins": -1.2478655576705933, "rewards/rejected": -2.6270103454589844, "step": 5025 }, { "epoch": 0.78, "learning_rate": 1.0461062295762228e-05, "logits/chosen": -2.554809808731079, "logits/rejected": -2.910968065261841, "logps/chosen": -153.45327758789062, "logps/rejected": -216.73861694335938, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6415199637413025, "rewards/margins": 6.51127815246582, "rewards/rejected": -7.152797698974609, "step": 5026 }, { "epoch": 0.78, "learning_rate": 1.046032885523108e-05, "logits/chosen": -3.0421156883239746, "logits/rejected": -2.3404476642608643, "logps/chosen": -198.48928833007812, "logps/rejected": -33.72412872314453, "loss": 3.3728, "rewards/accuracies": 0.0, "rewards/chosen": -5.169012546539307, "rewards/margins": -3.319538116455078, "rewards/rejected": -1.8494744300842285, "step": 5027 }, { "epoch": 0.78, "learning_rate": 1.0459595414699933e-05, "logits/chosen": -3.12473726272583, "logits/rejected": -3.3452718257904053, "logps/chosen": -162.07553100585938, "logps/rejected": -291.7971496582031, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.34475937485694885, "rewards/margins": 4.717598915100098, "rewards/rejected": -5.0623579025268555, "step": 5028 }, { "epoch": 0.78, "learning_rate": 1.0458861974168785e-05, "logits/chosen": -2.335481643676758, "logits/rejected": -2.844510078430176, "logps/chosen": -130.37689208984375, "logps/rejected": -209.6023712158203, "loss": 2.0533, "rewards/accuracies": 0.5, "rewards/chosen": -2.4554152488708496, "rewards/margins": -1.6196240186691284, "rewards/rejected": -0.8357914090156555, "step": 5029 }, { "epoch": 0.78, "learning_rate": 1.0458128533637637e-05, "logits/chosen": -1.5561593770980835, "logits/rejected": -2.4394278526306152, "logps/chosen": -206.33335876464844, "logps/rejected": -289.9417419433594, "loss": 1.9335, "rewards/accuracies": 0.5, "rewards/chosen": -1.9940766096115112, "rewards/margins": 2.7694032192230225, "rewards/rejected": -4.763479709625244, "step": 5030 }, { "epoch": 0.78, "learning_rate": 1.0457395093106489e-05, "logits/chosen": -2.785890579223633, "logits/rejected": -2.8015055656433105, "logps/chosen": -98.2754135131836, "logps/rejected": -215.88800048828125, "loss": 0.1704, "rewards/accuracies": 1.0, "rewards/chosen": -1.0008665323257446, "rewards/margins": 3.2620651721954346, "rewards/rejected": -4.262931823730469, "step": 5031 }, { "epoch": 0.78, "learning_rate": 1.045666165257534e-05, "logits/chosen": -2.0786690711975098, "logits/rejected": -3.2058534622192383, "logps/chosen": -213.4061737060547, "logps/rejected": -226.73544311523438, "loss": 0.0605, "rewards/accuracies": 1.0, "rewards/chosen": -0.6362345218658447, "rewards/margins": 4.552576065063477, "rewards/rejected": -5.1888108253479, "step": 5032 }, { "epoch": 0.78, "learning_rate": 1.0455928212044193e-05, "logits/chosen": -3.006258964538574, "logits/rejected": -3.0089633464813232, "logps/chosen": -211.4372100830078, "logps/rejected": -246.93478393554688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6478937268257141, "rewards/margins": 6.461111545562744, "rewards/rejected": -7.109004974365234, "step": 5033 }, { "epoch": 0.78, "learning_rate": 1.0455194771513045e-05, "logits/chosen": -1.5359734296798706, "logits/rejected": -2.6292264461517334, "logps/chosen": -86.7381591796875, "logps/rejected": -305.692626953125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.8063358068466187, "rewards/margins": 5.118752479553223, "rewards/rejected": -5.925088405609131, "step": 5034 }, { "epoch": 0.78, "learning_rate": 1.0454461330981896e-05, "logits/chosen": -3.0651540756225586, "logits/rejected": -2.795889377593994, "logps/chosen": -342.3954162597656, "logps/rejected": -120.33018493652344, "loss": 2.5909, "rewards/accuracies": 0.5, "rewards/chosen": -2.6437981128692627, "rewards/margins": -1.302114486694336, "rewards/rejected": -1.3416836261749268, "step": 5035 }, { "epoch": 0.78, "learning_rate": 1.0453727890450748e-05, "logits/chosen": -2.089054584503174, "logits/rejected": -2.849853515625, "logps/chosen": -111.53937530517578, "logps/rejected": -266.902587890625, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.7442855834960938, "rewards/margins": 5.782345294952393, "rewards/rejected": -6.5266313552856445, "step": 5036 }, { "epoch": 0.78, "learning_rate": 1.0452994449919602e-05, "logits/chosen": -2.679730176925659, "logits/rejected": -3.3076329231262207, "logps/chosen": -671.6680297851562, "logps/rejected": -651.2049560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.052032470703125, "rewards/margins": 9.544479370117188, "rewards/rejected": -9.492446899414062, "step": 5037 }, { "epoch": 0.78, "learning_rate": 1.0452261009388454e-05, "logits/chosen": -2.7400193214416504, "logits/rejected": -2.9697353839874268, "logps/chosen": -139.1236572265625, "logps/rejected": -404.708740234375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": 0.4929310083389282, "rewards/margins": 6.232934951782227, "rewards/rejected": -5.740004062652588, "step": 5038 }, { "epoch": 0.78, "learning_rate": 1.0451527568857306e-05, "logits/chosen": -1.4222174882888794, "logits/rejected": -2.789477825164795, "logps/chosen": -139.1902313232422, "logps/rejected": -372.8231506347656, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.5251230001449585, "rewards/margins": 5.758042812347412, "rewards/rejected": -6.28316593170166, "step": 5039 }, { "epoch": 0.78, "learning_rate": 1.0450794128326157e-05, "logits/chosen": -2.2975354194641113, "logits/rejected": -2.7504308223724365, "logps/chosen": -165.83163452148438, "logps/rejected": -273.2633972167969, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.11986884474754333, "rewards/margins": 5.8950042724609375, "rewards/rejected": -5.775135040283203, "step": 5040 }, { "epoch": 0.78, "learning_rate": 1.0450060687795011e-05, "logits/chosen": -2.7821483612060547, "logits/rejected": -2.898777961730957, "logps/chosen": -36.199920654296875, "logps/rejected": -157.359619140625, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": -0.7148216366767883, "rewards/margins": 3.9895005226135254, "rewards/rejected": -4.70432186126709, "step": 5041 }, { "epoch": 0.78, "learning_rate": 1.0449327247263863e-05, "logits/chosen": -2.924969434738159, "logits/rejected": -2.8461451530456543, "logps/chosen": -615.9609375, "logps/rejected": -397.5198669433594, "loss": 2.2577, "rewards/accuracies": 0.5, "rewards/chosen": -3.218859910964966, "rewards/margins": -0.9355819225311279, "rewards/rejected": -2.283277750015259, "step": 5042 }, { "epoch": 0.78, "learning_rate": 1.0448593806732715e-05, "logits/chosen": -0.4285956919193268, "logits/rejected": -2.851257085800171, "logps/chosen": -80.043212890625, "logps/rejected": -426.34051513671875, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.23165342211723328, "rewards/margins": 5.994838237762451, "rewards/rejected": -6.226491928100586, "step": 5043 }, { "epoch": 0.78, "learning_rate": 1.0447860366201567e-05, "logits/chosen": -1.9620474576950073, "logits/rejected": -2.7534995079040527, "logps/chosen": -146.94097900390625, "logps/rejected": -470.4815368652344, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.28910523653030396, "rewards/margins": 6.301985740661621, "rewards/rejected": -6.591091156005859, "step": 5044 }, { "epoch": 0.78, "learning_rate": 1.0447126925670419e-05, "logits/chosen": -2.642242908477783, "logits/rejected": -2.040907859802246, "logps/chosen": -401.283203125, "logps/rejected": -319.14837646484375, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.4072418212890625, "rewards/margins": 4.573489189147949, "rewards/rejected": -4.980731010437012, "step": 5045 }, { "epoch": 0.78, "learning_rate": 1.0446393485139272e-05, "logits/chosen": -2.1304545402526855, "logits/rejected": -2.40645432472229, "logps/chosen": -40.39228439331055, "logps/rejected": -130.99600219726562, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": -0.15623456239700317, "rewards/margins": 3.645688056945801, "rewards/rejected": -3.8019227981567383, "step": 5046 }, { "epoch": 0.78, "learning_rate": 1.0445660044608124e-05, "logits/chosen": -2.6389427185058594, "logits/rejected": -2.7281947135925293, "logps/chosen": -146.31179809570312, "logps/rejected": -93.1084976196289, "loss": 0.2008, "rewards/accuracies": 1.0, "rewards/chosen": -1.4067779779434204, "rewards/margins": 1.7340726852416992, "rewards/rejected": -3.14085054397583, "step": 5047 }, { "epoch": 0.79, "learning_rate": 1.0444926604076976e-05, "logits/chosen": -2.8045246601104736, "logits/rejected": -2.9802870750427246, "logps/chosen": -101.15948486328125, "logps/rejected": -118.26010131835938, "loss": 2.9474, "rewards/accuracies": 0.5, "rewards/chosen": -3.632155418395996, "rewards/margins": 0.5336439609527588, "rewards/rejected": -4.165799140930176, "step": 5048 }, { "epoch": 0.79, "learning_rate": 1.0444193163545828e-05, "logits/chosen": -1.9580219984054565, "logits/rejected": -3.0611445903778076, "logps/chosen": -87.72743225097656, "logps/rejected": -338.6813049316406, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.1966512203216553, "rewards/margins": 5.21657657623291, "rewards/rejected": -6.4132280349731445, "step": 5049 }, { "epoch": 0.79, "learning_rate": 1.044345972301468e-05, "logits/chosen": -2.2438504695892334, "logits/rejected": -3.1633338928222656, "logps/chosen": -31.532146453857422, "logps/rejected": -181.38446044921875, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -0.20350289344787598, "rewards/margins": 4.220750331878662, "rewards/rejected": -4.424252986907959, "step": 5050 }, { "epoch": 0.79, "learning_rate": 1.0442726282483532e-05, "logits/chosen": -3.0464847087860107, "logits/rejected": -2.4182400703430176, "logps/chosen": -316.3277893066406, "logps/rejected": -311.24273681640625, "loss": 6.649, "rewards/accuracies": 0.0, "rewards/chosen": -6.964064121246338, "rewards/margins": -6.645112991333008, "rewards/rejected": -0.3189513087272644, "step": 5051 }, { "epoch": 0.79, "learning_rate": 1.0441992841952383e-05, "logits/chosen": -2.8601551055908203, "logits/rejected": -2.9985430240631104, "logps/chosen": -70.44973754882812, "logps/rejected": -235.25418090820312, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.4840127229690552, "rewards/margins": 5.51102876663208, "rewards/rejected": -5.995041370391846, "step": 5052 }, { "epoch": 0.79, "learning_rate": 1.0441259401421235e-05, "logits/chosen": -2.109853982925415, "logits/rejected": -2.7789759635925293, "logps/chosen": -161.3695831298828, "logps/rejected": -327.7020263671875, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -0.13830547034740448, "rewards/margins": 4.276472091674805, "rewards/rejected": -4.4147772789001465, "step": 5053 }, { "epoch": 0.79, "learning_rate": 1.0440525960890087e-05, "logits/chosen": -2.9622020721435547, "logits/rejected": -3.1397764682769775, "logps/chosen": -528.4957275390625, "logps/rejected": -453.27874755859375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7665483355522156, "rewards/margins": 6.034848690032959, "rewards/rejected": -6.801397323608398, "step": 5054 }, { "epoch": 0.79, "learning_rate": 1.043979252035894e-05, "logits/chosen": -2.765687942504883, "logits/rejected": -1.1417516469955444, "logps/chosen": -310.8870849609375, "logps/rejected": -148.47225952148438, "loss": 0.4923, "rewards/accuracies": 0.5, "rewards/chosen": -2.155010223388672, "rewards/margins": 0.6588510870933533, "rewards/rejected": -2.81386137008667, "step": 5055 }, { "epoch": 0.79, "learning_rate": 1.0439059079827793e-05, "logits/chosen": -1.9258092641830444, "logits/rejected": -2.5723485946655273, "logps/chosen": -131.09237670898438, "logps/rejected": -300.7449951171875, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": -1.159708857536316, "rewards/margins": 5.721999168395996, "rewards/rejected": -6.881708145141602, "step": 5056 }, { "epoch": 0.79, "learning_rate": 1.0438325639296645e-05, "logits/chosen": -1.9661448001861572, "logits/rejected": -2.9038290977478027, "logps/chosen": -134.93118286132812, "logps/rejected": -265.6524658203125, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": 0.3042541742324829, "rewards/margins": 4.0468645095825195, "rewards/rejected": -3.742609977722168, "step": 5057 }, { "epoch": 0.79, "learning_rate": 1.0437592198765496e-05, "logits/chosen": -2.1279537677764893, "logits/rejected": -2.825676202774048, "logps/chosen": -103.11371612548828, "logps/rejected": -151.76930236816406, "loss": 1.767, "rewards/accuracies": 0.5, "rewards/chosen": -1.9982246160507202, "rewards/margins": 1.418102741241455, "rewards/rejected": -3.416327476501465, "step": 5058 }, { "epoch": 0.79, "learning_rate": 1.0436858758234348e-05, "logits/chosen": -2.711840867996216, "logits/rejected": -2.9349560737609863, "logps/chosen": -46.19706344604492, "logps/rejected": -194.113525390625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.1437559723854065, "rewards/margins": 6.09511661529541, "rewards/rejected": -6.238872528076172, "step": 5059 }, { "epoch": 0.79, "learning_rate": 1.04361253177032e-05, "logits/chosen": -2.8350813388824463, "logits/rejected": -2.952443838119507, "logps/chosen": -206.05264282226562, "logps/rejected": -197.09019470214844, "loss": 3.7399, "rewards/accuracies": 0.5, "rewards/chosen": -4.547093391418457, "rewards/margins": -0.8171865940093994, "rewards/rejected": -3.7299070358276367, "step": 5060 }, { "epoch": 0.79, "learning_rate": 1.0435391877172052e-05, "logits/chosen": -2.7209596633911133, "logits/rejected": -2.8127424716949463, "logps/chosen": -106.73503875732422, "logps/rejected": -206.20062255859375, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -0.10344313085079193, "rewards/margins": 4.445112705230713, "rewards/rejected": -4.548555850982666, "step": 5061 }, { "epoch": 0.79, "learning_rate": 1.0434658436640904e-05, "logits/chosen": -0.2719419002532959, "logits/rejected": -2.625089168548584, "logps/chosen": -60.393470764160156, "logps/rejected": -225.7015380859375, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -1.0957483053207397, "rewards/margins": 5.040175914764404, "rewards/rejected": -6.135924339294434, "step": 5062 }, { "epoch": 0.79, "learning_rate": 1.0433924996109756e-05, "logits/chosen": -2.409278392791748, "logits/rejected": -3.127453088760376, "logps/chosen": -75.6750259399414, "logps/rejected": -210.93299865722656, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -0.6452288627624512, "rewards/margins": 4.294973850250244, "rewards/rejected": -4.940202713012695, "step": 5063 }, { "epoch": 0.79, "learning_rate": 1.043319155557861e-05, "logits/chosen": -0.44152677059173584, "logits/rejected": -1.0737758874893188, "logps/chosen": -113.33531188964844, "logps/rejected": -286.33197021484375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.7365154027938843, "rewards/margins": 5.227449417114258, "rewards/rejected": -5.963964939117432, "step": 5064 }, { "epoch": 0.79, "learning_rate": 1.0432458115047461e-05, "logits/chosen": -2.6713736057281494, "logits/rejected": -2.538111925125122, "logps/chosen": -227.66912841796875, "logps/rejected": -229.7831268310547, "loss": 1.3137, "rewards/accuracies": 0.5, "rewards/chosen": -1.327667236328125, "rewards/margins": 1.7770622968673706, "rewards/rejected": -3.104729413986206, "step": 5065 }, { "epoch": 0.79, "learning_rate": 1.0431724674516313e-05, "logits/chosen": -2.367893695831299, "logits/rejected": -2.7897214889526367, "logps/chosen": -65.19143676757812, "logps/rejected": -277.4990234375, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -1.2157286405563354, "rewards/margins": 5.575382232666016, "rewards/rejected": -6.791110992431641, "step": 5066 }, { "epoch": 0.79, "learning_rate": 1.0430991233985165e-05, "logits/chosen": -2.4539542198181152, "logits/rejected": -2.930082321166992, "logps/chosen": -366.67742919921875, "logps/rejected": -376.479736328125, "loss": 2.783, "rewards/accuracies": 0.5, "rewards/chosen": -3.266563653945923, "rewards/margins": -0.6027071475982666, "rewards/rejected": -2.6638565063476562, "step": 5067 }, { "epoch": 0.79, "learning_rate": 1.0430257793454017e-05, "logits/chosen": -1.0593668222427368, "logits/rejected": -2.5330045223236084, "logps/chosen": -219.62953186035156, "logps/rejected": -361.89434814453125, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.009694285690784454, "rewards/margins": 4.5777482986450195, "rewards/rejected": -4.587442874908447, "step": 5068 }, { "epoch": 0.79, "learning_rate": 1.0429524352922869e-05, "logits/chosen": -2.8555376529693604, "logits/rejected": -1.886038064956665, "logps/chosen": -257.24810791015625, "logps/rejected": -101.47120666503906, "loss": 2.9872, "rewards/accuracies": 0.5, "rewards/chosen": -2.763245105743408, "rewards/margins": -1.5963784456253052, "rewards/rejected": -1.1668667793273926, "step": 5069 }, { "epoch": 0.79, "learning_rate": 1.042879091239172e-05, "logits/chosen": -2.7763779163360596, "logits/rejected": -3.136535882949829, "logps/chosen": -168.75555419921875, "logps/rejected": -286.1910095214844, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -0.026729583740234375, "rewards/margins": 5.621939182281494, "rewards/rejected": -5.6486687660217285, "step": 5070 }, { "epoch": 0.79, "learning_rate": 1.0428057471860572e-05, "logits/chosen": -2.668408155441284, "logits/rejected": -2.74117374420166, "logps/chosen": -155.22557067871094, "logps/rejected": -168.3833465576172, "loss": 2.0363, "rewards/accuracies": 0.5, "rewards/chosen": -1.9070101976394653, "rewards/margins": 0.01891160011291504, "rewards/rejected": -1.9259216785430908, "step": 5071 }, { "epoch": 0.79, "learning_rate": 1.0427324031329424e-05, "logits/chosen": -1.4656161069869995, "logits/rejected": -2.7118752002716064, "logps/chosen": -67.82437133789062, "logps/rejected": -225.52389526367188, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": 0.28309622406959534, "rewards/margins": 5.447195529937744, "rewards/rejected": -5.164099216461182, "step": 5072 }, { "epoch": 0.79, "learning_rate": 1.0426590590798278e-05, "logits/chosen": -2.283618211746216, "logits/rejected": -3.049800157546997, "logps/chosen": -60.87206268310547, "logps/rejected": -245.29623413085938, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 0.024297526106238365, "rewards/margins": 5.4856767654418945, "rewards/rejected": -5.461379051208496, "step": 5073 }, { "epoch": 0.79, "learning_rate": 1.042585715026713e-05, "logits/chosen": -2.411736488342285, "logits/rejected": -2.6752569675445557, "logps/chosen": -142.58815002441406, "logps/rejected": -218.15939331054688, "loss": 0.0687, "rewards/accuracies": 1.0, "rewards/chosen": -0.34095364809036255, "rewards/margins": 4.027541160583496, "rewards/rejected": -4.368494987487793, "step": 5074 }, { "epoch": 0.79, "learning_rate": 1.0425123709735983e-05, "logits/chosen": -2.921327829360962, "logits/rejected": -2.940464973449707, "logps/chosen": -26.804765701293945, "logps/rejected": -162.64089965820312, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 0.3422524333000183, "rewards/margins": 5.659067153930664, "rewards/rejected": -5.31681489944458, "step": 5075 }, { "epoch": 0.79, "learning_rate": 1.0424390269204835e-05, "logits/chosen": -2.4511637687683105, "logits/rejected": -2.9795520305633545, "logps/chosen": -43.138816833496094, "logps/rejected": -148.86404418945312, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -0.6500623822212219, "rewards/margins": 4.411202430725098, "rewards/rejected": -5.061264514923096, "step": 5076 }, { "epoch": 0.79, "learning_rate": 1.0423656828673687e-05, "logits/chosen": -2.6233668327331543, "logits/rejected": -2.9033420085906982, "logps/chosen": -108.82247161865234, "logps/rejected": -309.2364196777344, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.9077122211456299, "rewards/margins": 5.433170318603516, "rewards/rejected": -6.340882301330566, "step": 5077 }, { "epoch": 0.79, "learning_rate": 1.0422923388142539e-05, "logits/chosen": -2.597961664199829, "logits/rejected": -2.7946407794952393, "logps/chosen": -141.993408203125, "logps/rejected": -108.8006362915039, "loss": 2.39, "rewards/accuracies": 0.5, "rewards/chosen": -3.863865375518799, "rewards/margins": -0.307361364364624, "rewards/rejected": -3.556504011154175, "step": 5078 }, { "epoch": 0.79, "learning_rate": 1.0422189947611391e-05, "logits/chosen": -2.1992275714874268, "logits/rejected": -3.252589225769043, "logps/chosen": -329.3723449707031, "logps/rejected": -576.5222778320312, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 0.4850030839443207, "rewards/margins": 6.912753105163574, "rewards/rejected": -6.4277496337890625, "step": 5079 }, { "epoch": 0.79, "learning_rate": 1.0421456507080243e-05, "logits/chosen": -2.0520474910736084, "logits/rejected": -2.537619113922119, "logps/chosen": -127.13677215576172, "logps/rejected": -154.7450714111328, "loss": 1.6672, "rewards/accuracies": 0.5, "rewards/chosen": -2.522087335586548, "rewards/margins": 1.4154281616210938, "rewards/rejected": -3.9375154972076416, "step": 5080 }, { "epoch": 0.79, "learning_rate": 1.0420723066549095e-05, "logits/chosen": -2.753774404525757, "logits/rejected": -3.117764711380005, "logps/chosen": -328.16448974609375, "logps/rejected": -350.5016174316406, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.2986656129360199, "rewards/margins": 3.9321320056915283, "rewards/rejected": -4.23079776763916, "step": 5081 }, { "epoch": 0.79, "learning_rate": 1.0419989626017948e-05, "logits/chosen": -2.906242847442627, "logits/rejected": -2.03930401802063, "logps/chosen": -835.513671875, "logps/rejected": -470.4547424316406, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.2225532829761505, "rewards/margins": 6.814814567565918, "rewards/rejected": -6.59226131439209, "step": 5082 }, { "epoch": 0.79, "learning_rate": 1.04192561854868e-05, "logits/chosen": -1.2111836671829224, "logits/rejected": -2.81243634223938, "logps/chosen": -122.4538803100586, "logps/rejected": -504.34564208984375, "loss": 0.1995, "rewards/accuracies": 1.0, "rewards/chosen": -1.2117972373962402, "rewards/margins": 2.710618019104004, "rewards/rejected": -3.922415256500244, "step": 5083 }, { "epoch": 0.79, "learning_rate": 1.0418522744955652e-05, "logits/chosen": -2.8831677436828613, "logits/rejected": -3.0486180782318115, "logps/chosen": -230.52523803710938, "logps/rejected": -190.5750274658203, "loss": 4.0064, "rewards/accuracies": 0.5, "rewards/chosen": -4.19963264465332, "rewards/margins": -1.7696211338043213, "rewards/rejected": -2.430011034011841, "step": 5084 }, { "epoch": 0.79, "learning_rate": 1.0417789304424504e-05, "logits/chosen": -1.6792856454849243, "logits/rejected": -2.7714638710021973, "logps/chosen": -93.86140441894531, "logps/rejected": -218.99484252929688, "loss": 0.0879, "rewards/accuracies": 1.0, "rewards/chosen": -1.244175910949707, "rewards/margins": 2.4105823040008545, "rewards/rejected": -3.6547579765319824, "step": 5085 }, { "epoch": 0.79, "learning_rate": 1.0417055863893356e-05, "logits/chosen": -2.8176779747009277, "logits/rejected": -2.532890558242798, "logps/chosen": -101.55815124511719, "logps/rejected": -203.05551147460938, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.14549733698368073, "rewards/margins": 6.83161735534668, "rewards/rejected": -6.977114677429199, "step": 5086 }, { "epoch": 0.79, "learning_rate": 1.0416322423362208e-05, "logits/chosen": -3.078327178955078, "logits/rejected": -2.591143846511841, "logps/chosen": -136.93527221679688, "logps/rejected": -157.80010986328125, "loss": 1.8014, "rewards/accuracies": 0.5, "rewards/chosen": -2.3696489334106445, "rewards/margins": 1.0624563694000244, "rewards/rejected": -3.432105302810669, "step": 5087 }, { "epoch": 0.79, "learning_rate": 1.041558898283106e-05, "logits/chosen": -1.7675273418426514, "logits/rejected": -3.0091230869293213, "logps/chosen": -19.950115203857422, "logps/rejected": -210.4353485107422, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -0.46869999170303345, "rewards/margins": 4.703073501586914, "rewards/rejected": -5.171773910522461, "step": 5088 }, { "epoch": 0.79, "learning_rate": 1.0414855542299911e-05, "logits/chosen": -2.5556461811065674, "logits/rejected": -3.2826526165008545, "logps/chosen": -28.557523727416992, "logps/rejected": -141.61058044433594, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.0736676454544067, "rewards/margins": 4.986570358276367, "rewards/rejected": -6.060237884521484, "step": 5089 }, { "epoch": 0.79, "learning_rate": 1.0414122101768763e-05, "logits/chosen": -1.0867888927459717, "logits/rejected": -2.5157358646392822, "logps/chosen": -102.67603302001953, "logps/rejected": -364.9696044921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.20211371779441833, "rewards/margins": 7.808107376098633, "rewards/rejected": -8.010221481323242, "step": 5090 }, { "epoch": 0.79, "learning_rate": 1.0413388661237617e-05, "logits/chosen": -2.6160683631896973, "logits/rejected": -3.029789924621582, "logps/chosen": -43.506492614746094, "logps/rejected": -183.0247039794922, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -1.057849645614624, "rewards/margins": 4.518435478210449, "rewards/rejected": -5.576285362243652, "step": 5091 }, { "epoch": 0.79, "learning_rate": 1.0412655220706469e-05, "logits/chosen": -1.9252039194107056, "logits/rejected": -3.0463263988494873, "logps/chosen": -113.22967529296875, "logps/rejected": -346.43658447265625, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -0.4225258231163025, "rewards/margins": 4.22412633895874, "rewards/rejected": -4.6466522216796875, "step": 5092 }, { "epoch": 0.79, "learning_rate": 1.041192178017532e-05, "logits/chosen": -2.3435702323913574, "logits/rejected": -3.02005934715271, "logps/chosen": -90.82605743408203, "logps/rejected": -273.375, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -0.41562843322753906, "rewards/margins": 4.937697410583496, "rewards/rejected": -5.353325843811035, "step": 5093 }, { "epoch": 0.79, "learning_rate": 1.0411188339644172e-05, "logits/chosen": -1.963765025138855, "logits/rejected": -2.7157695293426514, "logps/chosen": -467.3038330078125, "logps/rejected": -696.6444702148438, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.4066741466522217, "rewards/margins": 6.592578411102295, "rewards/rejected": -7.9992523193359375, "step": 5094 }, { "epoch": 0.79, "learning_rate": 1.0410454899113024e-05, "logits/chosen": -2.3482472896575928, "logits/rejected": -2.8032045364379883, "logps/chosen": -166.70809936523438, "logps/rejected": -361.30328369140625, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.5222557187080383, "rewards/margins": 5.085966110229492, "rewards/rejected": -5.608222007751465, "step": 5095 }, { "epoch": 0.79, "learning_rate": 1.0409721458581876e-05, "logits/chosen": -2.84185528755188, "logits/rejected": -2.980339527130127, "logps/chosen": -158.4236602783203, "logps/rejected": -227.74826049804688, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.6744239926338196, "rewards/margins": 5.922748565673828, "rewards/rejected": -6.597172737121582, "step": 5096 }, { "epoch": 0.79, "learning_rate": 1.0408988018050728e-05, "logits/chosen": -2.526066303253174, "logits/rejected": -1.6514068841934204, "logps/chosen": -184.94044494628906, "logps/rejected": -250.28306579589844, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": 0.18509522080421448, "rewards/margins": 6.618593692779541, "rewards/rejected": -6.433498382568359, "step": 5097 }, { "epoch": 0.79, "learning_rate": 1.040825457751958e-05, "logits/chosen": -1.6913011074066162, "logits/rejected": -2.3972134590148926, "logps/chosen": -216.43325805664062, "logps/rejected": -337.5552978515625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.07391631603240967, "rewards/margins": 7.801072120666504, "rewards/rejected": -7.727155685424805, "step": 5098 }, { "epoch": 0.79, "learning_rate": 1.0407521136988432e-05, "logits/chosen": -2.3778293132781982, "logits/rejected": -2.9382214546203613, "logps/chosen": -142.7541961669922, "logps/rejected": -337.01251220703125, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": -0.9340133666992188, "rewards/margins": 4.676807403564453, "rewards/rejected": -5.610820770263672, "step": 5099 }, { "epoch": 0.79, "learning_rate": 1.0406787696457285e-05, "logits/chosen": -2.730588674545288, "logits/rejected": -2.8773391246795654, "logps/chosen": -96.15553283691406, "logps/rejected": -220.61280822753906, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.35864031314849854, "rewards/margins": 4.540522575378418, "rewards/rejected": -4.181882858276367, "step": 5100 }, { "epoch": 0.79, "learning_rate": 1.0406054255926137e-05, "logits/chosen": -2.658220052719116, "logits/rejected": -2.0637638568878174, "logps/chosen": -321.0684814453125, "logps/rejected": -310.5213317871094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6282787322998047, "rewards/margins": 7.161438465118408, "rewards/rejected": -8.789716720581055, "step": 5101 }, { "epoch": 0.79, "learning_rate": 1.040532081539499e-05, "logits/chosen": -2.3512513637542725, "logits/rejected": -3.033208131790161, "logps/chosen": -32.4099006652832, "logps/rejected": -225.05438232421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.35884928703308105, "rewards/margins": 6.644227027893066, "rewards/rejected": -6.285377502441406, "step": 5102 }, { "epoch": 0.79, "learning_rate": 1.0404587374863841e-05, "logits/chosen": -1.901463508605957, "logits/rejected": -3.2757439613342285, "logps/chosen": -185.2623291015625, "logps/rejected": -679.758544921875, "loss": 2.6872, "rewards/accuracies": 0.5, "rewards/chosen": -2.5069198608398438, "rewards/margins": 1.2355804443359375, "rewards/rejected": -3.7425003051757812, "step": 5103 }, { "epoch": 0.79, "learning_rate": 1.0403853934332693e-05, "logits/chosen": -3.102597236633301, "logits/rejected": -2.715772867202759, "logps/chosen": -192.08323669433594, "logps/rejected": -127.6793441772461, "loss": 0.8767, "rewards/accuracies": 0.5, "rewards/chosen": -1.1145915985107422, "rewards/margins": 1.5080101490020752, "rewards/rejected": -2.6226017475128174, "step": 5104 }, { "epoch": 0.79, "learning_rate": 1.0403120493801545e-05, "logits/chosen": -1.9809972047805786, "logits/rejected": -2.7935869693756104, "logps/chosen": -49.68380355834961, "logps/rejected": -495.84088134765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.0003464818000793457, "rewards/margins": 7.460135459899902, "rewards/rejected": -7.459788799285889, "step": 5105 }, { "epoch": 0.79, "learning_rate": 1.0402387053270397e-05, "logits/chosen": -2.439361810684204, "logits/rejected": -2.828709602355957, "logps/chosen": -99.06555938720703, "logps/rejected": -296.5649719238281, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6564933657646179, "rewards/margins": 6.519400596618652, "rewards/rejected": -7.175894260406494, "step": 5106 }, { "epoch": 0.79, "learning_rate": 1.040165361273925e-05, "logits/chosen": -2.9422783851623535, "logits/rejected": -2.626110315322876, "logps/chosen": -162.8180389404297, "logps/rejected": -111.13826751708984, "loss": 2.0805, "rewards/accuracies": 0.5, "rewards/chosen": -2.451084852218628, "rewards/margins": 1.8539087772369385, "rewards/rejected": -4.304993629455566, "step": 5107 }, { "epoch": 0.79, "learning_rate": 1.0400920172208102e-05, "logits/chosen": -2.840252161026001, "logits/rejected": -3.093573808670044, "logps/chosen": -217.1398162841797, "logps/rejected": -342.5404357910156, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4016495943069458, "rewards/margins": 6.769641399383545, "rewards/rejected": -7.171290874481201, "step": 5108 }, { "epoch": 0.79, "learning_rate": 1.0400186731676956e-05, "logits/chosen": -2.3973276615142822, "logits/rejected": -3.2541022300720215, "logps/chosen": -58.99763488769531, "logps/rejected": -227.51205444335938, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 0.2729108929634094, "rewards/margins": 5.074019432067871, "rewards/rejected": -4.801108360290527, "step": 5109 }, { "epoch": 0.79, "learning_rate": 1.0399453291145808e-05, "logits/chosen": -2.7033262252807617, "logits/rejected": -2.779935836791992, "logps/chosen": -111.90470123291016, "logps/rejected": -251.27906799316406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.19385813176631927, "rewards/margins": 7.261162757873535, "rewards/rejected": -7.455020904541016, "step": 5110 }, { "epoch": 0.79, "learning_rate": 1.039871985061466e-05, "logits/chosen": -1.7229948043823242, "logits/rejected": -3.0091936588287354, "logps/chosen": -186.35964965820312, "logps/rejected": -449.2769775390625, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 0.5038139224052429, "rewards/margins": 6.1979451179504395, "rewards/rejected": -5.694131374359131, "step": 5111 }, { "epoch": 0.8, "learning_rate": 1.0397986410083511e-05, "logits/chosen": -2.1331260204315186, "logits/rejected": -2.4068925380706787, "logps/chosen": -88.23876190185547, "logps/rejected": -420.56829833984375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.9932619333267212, "rewards/margins": 6.816376686096191, "rewards/rejected": -7.809638023376465, "step": 5112 }, { "epoch": 0.8, "learning_rate": 1.0397252969552363e-05, "logits/chosen": -2.701340436935425, "logits/rejected": -3.1397643089294434, "logps/chosen": -107.097412109375, "logps/rejected": -271.77227783203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.9687831401824951, "rewards/margins": 9.423407554626465, "rewards/rejected": -8.45462417602539, "step": 5113 }, { "epoch": 0.8, "learning_rate": 1.0396519529021215e-05, "logits/chosen": -1.7055187225341797, "logits/rejected": -2.7783894538879395, "logps/chosen": -175.6165771484375, "logps/rejected": -341.22930908203125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.5706218481063843, "rewards/margins": 6.446007251739502, "rewards/rejected": -7.016629219055176, "step": 5114 }, { "epoch": 0.8, "learning_rate": 1.0395786088490067e-05, "logits/chosen": -1.781857967376709, "logits/rejected": -2.082709789276123, "logps/chosen": -273.97900390625, "logps/rejected": -382.8023986816406, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7328948974609375, "rewards/margins": 7.121482849121094, "rewards/rejected": -7.854377746582031, "step": 5115 }, { "epoch": 0.8, "learning_rate": 1.0395052647958919e-05, "logits/chosen": -1.2117670774459839, "logits/rejected": -2.9771244525909424, "logps/chosen": -156.97218322753906, "logps/rejected": -564.84912109375, "loss": 2.9995, "rewards/accuracies": 0.5, "rewards/chosen": -4.494669437408447, "rewards/margins": -0.6121587753295898, "rewards/rejected": -3.8825104236602783, "step": 5116 }, { "epoch": 0.8, "learning_rate": 1.0394319207427772e-05, "logits/chosen": -2.7204549312591553, "logits/rejected": -2.8905577659606934, "logps/chosen": -65.8247299194336, "logps/rejected": -183.43394470214844, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.27829551696777344, "rewards/margins": 6.023542404174805, "rewards/rejected": -6.301837921142578, "step": 5117 }, { "epoch": 0.8, "learning_rate": 1.0393585766896624e-05, "logits/chosen": -2.453800916671753, "logits/rejected": -2.919517993927002, "logps/chosen": -145.64154052734375, "logps/rejected": -327.8026123046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.3730552196502686, "rewards/margins": 8.21349811553955, "rewards/rejected": -6.840442657470703, "step": 5118 }, { "epoch": 0.8, "learning_rate": 1.0392852326365476e-05, "logits/chosen": -2.7882988452911377, "logits/rejected": -2.433030366897583, "logps/chosen": -520.987548828125, "logps/rejected": -538.2044067382812, "loss": 3.5899, "rewards/accuracies": 0.5, "rewards/chosen": -3.0096428394317627, "rewards/margins": -0.9508941173553467, "rewards/rejected": -2.058748722076416, "step": 5119 }, { "epoch": 0.8, "learning_rate": 1.0392118885834328e-05, "logits/chosen": -1.433874249458313, "logits/rejected": -2.763734817504883, "logps/chosen": -130.10731506347656, "logps/rejected": -333.83740234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.38886016607284546, "rewards/margins": 6.607137680053711, "rewards/rejected": -6.995997428894043, "step": 5120 }, { "epoch": 0.8, "learning_rate": 1.039138544530318e-05, "logits/chosen": -2.6843934059143066, "logits/rejected": -2.954352855682373, "logps/chosen": -388.7527770996094, "logps/rejected": -544.074951171875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.15300637483596802, "rewards/margins": 7.19676399230957, "rewards/rejected": -7.349769592285156, "step": 5121 }, { "epoch": 0.8, "learning_rate": 1.0390652004772032e-05, "logits/chosen": -2.9059653282165527, "logits/rejected": -2.00126051902771, "logps/chosen": -331.0352783203125, "logps/rejected": -52.61238479614258, "loss": 7.1302, "rewards/accuracies": 0.0, "rewards/chosen": -6.948205947875977, "rewards/margins": -7.127051830291748, "rewards/rejected": 0.17884612083435059, "step": 5122 }, { "epoch": 0.8, "learning_rate": 1.0389918564240884e-05, "logits/chosen": -3.072760581970215, "logits/rejected": -3.323978900909424, "logps/chosen": -300.88665771484375, "logps/rejected": -567.3245849609375, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.30357199907302856, "rewards/margins": 6.95015811920166, "rewards/rejected": -7.253729820251465, "step": 5123 }, { "epoch": 0.8, "learning_rate": 1.0389185123709736e-05, "logits/chosen": -1.633311152458191, "logits/rejected": -2.8230228424072266, "logps/chosen": -104.1319808959961, "logps/rejected": -271.2655029296875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.4945641756057739, "rewards/margins": 5.880636215209961, "rewards/rejected": -6.375200271606445, "step": 5124 }, { "epoch": 0.8, "learning_rate": 1.0388451683178587e-05, "logits/chosen": -2.842245578765869, "logits/rejected": -1.7141774892807007, "logps/chosen": -232.98190307617188, "logps/rejected": -121.62519836425781, "loss": 2.5337, "rewards/accuracies": 0.5, "rewards/chosen": -2.4993889331817627, "rewards/margins": -0.1903831958770752, "rewards/rejected": -2.3090057373046875, "step": 5125 }, { "epoch": 0.8, "learning_rate": 1.0387718242647441e-05, "logits/chosen": -2.490586280822754, "logits/rejected": -3.0934484004974365, "logps/chosen": -94.0157241821289, "logps/rejected": -262.0191345214844, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.3018913269042969, "rewards/margins": 4.665848731994629, "rewards/rejected": -4.967740058898926, "step": 5126 }, { "epoch": 0.8, "learning_rate": 1.0386984802116293e-05, "logits/chosen": -2.617060422897339, "logits/rejected": -1.9133210182189941, "logps/chosen": -551.288818359375, "logps/rejected": -513.04150390625, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -1.0375159978866577, "rewards/margins": 7.527379989624023, "rewards/rejected": -8.564895629882812, "step": 5127 }, { "epoch": 0.8, "learning_rate": 1.0386251361585145e-05, "logits/chosen": -2.4938912391662598, "logits/rejected": -2.836562395095825, "logps/chosen": -258.6038818359375, "logps/rejected": -346.60028076171875, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -0.5940490961074829, "rewards/margins": 3.6856143474578857, "rewards/rejected": -4.2796630859375, "step": 5128 }, { "epoch": 0.8, "learning_rate": 1.0385517921053997e-05, "logits/chosen": -1.7693772315979004, "logits/rejected": -2.7297489643096924, "logps/chosen": -81.91771697998047, "logps/rejected": -255.34365844726562, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 0.23243027925491333, "rewards/margins": 5.890615463256836, "rewards/rejected": -5.6581854820251465, "step": 5129 }, { "epoch": 0.8, "learning_rate": 1.0384784480522849e-05, "logits/chosen": -2.6836256980895996, "logits/rejected": -2.321885585784912, "logps/chosen": -176.24452209472656, "logps/rejected": -117.28414154052734, "loss": 1.9375, "rewards/accuracies": 0.5, "rewards/chosen": -1.4833579063415527, "rewards/margins": 1.2549453973770142, "rewards/rejected": -2.7383034229278564, "step": 5130 }, { "epoch": 0.8, "learning_rate": 1.03840510399917e-05, "logits/chosen": -3.1114723682403564, "logits/rejected": -3.0020716190338135, "logps/chosen": -379.0128173828125, "logps/rejected": -368.1905517578125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.7948987483978271, "rewards/margins": 5.0605926513671875, "rewards/rejected": -5.855491638183594, "step": 5131 }, { "epoch": 0.8, "learning_rate": 1.0383317599460552e-05, "logits/chosen": -1.8481334447860718, "logits/rejected": -2.451967239379883, "logps/chosen": -230.13658142089844, "logps/rejected": -244.71319580078125, "loss": 0.9742, "rewards/accuracies": 0.5, "rewards/chosen": -2.206761360168457, "rewards/margins": 4.082930088043213, "rewards/rejected": -6.28969144821167, "step": 5132 }, { "epoch": 0.8, "learning_rate": 1.0382584158929404e-05, "logits/chosen": -2.767976760864258, "logits/rejected": -2.85296630859375, "logps/chosen": -319.00457763671875, "logps/rejected": -242.59889221191406, "loss": 0.0348, "rewards/accuracies": 1.0, "rewards/chosen": -1.2041053771972656, "rewards/margins": 3.5605669021606445, "rewards/rejected": -4.76467227935791, "step": 5133 }, { "epoch": 0.8, "learning_rate": 1.0381850718398256e-05, "logits/chosen": -1.8729277849197388, "logits/rejected": -2.791381597518921, "logps/chosen": -179.8480224609375, "logps/rejected": -493.4389343261719, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.2790588438510895, "rewards/margins": 10.59050178527832, "rewards/rejected": -10.869560241699219, "step": 5134 }, { "epoch": 0.8, "learning_rate": 1.038111727786711e-05, "logits/chosen": -1.719643235206604, "logits/rejected": -2.7160003185272217, "logps/chosen": -132.98806762695312, "logps/rejected": -136.1864776611328, "loss": 2.5604, "rewards/accuracies": 0.5, "rewards/chosen": -3.134953737258911, "rewards/margins": -0.13909244537353516, "rewards/rejected": -2.995861291885376, "step": 5135 }, { "epoch": 0.8, "learning_rate": 1.0380383837335962e-05, "logits/chosen": -2.0175859928131104, "logits/rejected": -2.488513708114624, "logps/chosen": -1023.0655517578125, "logps/rejected": -1102.8233642578125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.5232429504394531, "rewards/margins": 5.4658684730529785, "rewards/rejected": -6.989111423492432, "step": 5136 }, { "epoch": 0.8, "learning_rate": 1.0379650396804813e-05, "logits/chosen": -2.6657156944274902, "logits/rejected": -2.4123260974884033, "logps/chosen": -229.21670532226562, "logps/rejected": -180.3372802734375, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -0.08785629272460938, "rewards/margins": 5.23284387588501, "rewards/rejected": -5.320700168609619, "step": 5137 }, { "epoch": 0.8, "learning_rate": 1.0378916956273665e-05, "logits/chosen": -2.6903769969940186, "logits/rejected": -2.958280563354492, "logps/chosen": -741.790283203125, "logps/rejected": -696.4118041992188, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 0.06546401232481003, "rewards/margins": 7.08396053314209, "rewards/rejected": -7.018496513366699, "step": 5138 }, { "epoch": 0.8, "learning_rate": 1.0378183515742517e-05, "logits/chosen": -2.6219494342803955, "logits/rejected": -2.744136333465576, "logps/chosen": -122.07310485839844, "logps/rejected": -248.07391357421875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5386165380477905, "rewards/margins": 6.123676300048828, "rewards/rejected": -7.66229248046875, "step": 5139 }, { "epoch": 0.8, "learning_rate": 1.0377450075211369e-05, "logits/chosen": -1.2077161073684692, "logits/rejected": -2.8745956420898438, "logps/chosen": -102.42385864257812, "logps/rejected": -368.04132080078125, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.48816415667533875, "rewards/margins": 5.3218183517456055, "rewards/rejected": -5.8099822998046875, "step": 5140 }, { "epoch": 0.8, "learning_rate": 1.0376716634680223e-05, "logits/chosen": -2.055579662322998, "logits/rejected": -3.1091909408569336, "logps/chosen": -126.83247375488281, "logps/rejected": -363.4600830078125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.33840543031692505, "rewards/margins": 5.7209320068359375, "rewards/rejected": -6.059337615966797, "step": 5141 }, { "epoch": 0.8, "learning_rate": 1.0375983194149074e-05, "logits/chosen": -2.79168963432312, "logits/rejected": -2.24544095993042, "logps/chosen": -129.6629180908203, "logps/rejected": -131.5270233154297, "loss": 0.1385, "rewards/accuracies": 1.0, "rewards/chosen": -2.1026523113250732, "rewards/margins": 3.413675308227539, "rewards/rejected": -5.516327857971191, "step": 5142 }, { "epoch": 0.8, "learning_rate": 1.0375249753617926e-05, "logits/chosen": -2.9970765113830566, "logits/rejected": -3.3004608154296875, "logps/chosen": -36.56858825683594, "logps/rejected": -182.4159698486328, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 0.27250805497169495, "rewards/margins": 5.1039886474609375, "rewards/rejected": -4.831480503082275, "step": 5143 }, { "epoch": 0.8, "learning_rate": 1.037451631308678e-05, "logits/chosen": -2.617309093475342, "logits/rejected": -2.732510566711426, "logps/chosen": -44.87089920043945, "logps/rejected": -172.46998596191406, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.3738676309585571, "rewards/margins": 6.587520599365234, "rewards/rejected": -7.961388111114502, "step": 5144 }, { "epoch": 0.8, "learning_rate": 1.0373782872555632e-05, "logits/chosen": -2.9477767944335938, "logits/rejected": -2.481492280960083, "logps/chosen": -488.1209716796875, "logps/rejected": -377.27325439453125, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.1722267866134644, "rewards/margins": 4.499896049499512, "rewards/rejected": -5.672122955322266, "step": 5145 }, { "epoch": 0.8, "learning_rate": 1.0373049432024484e-05, "logits/chosen": -2.2528295516967773, "logits/rejected": -2.9691872596740723, "logps/chosen": -112.19014739990234, "logps/rejected": -191.76210021972656, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.23698121309280396, "rewards/margins": 4.767646789550781, "rewards/rejected": -5.0046281814575195, "step": 5146 }, { "epoch": 0.8, "learning_rate": 1.0372315991493336e-05, "logits/chosen": -1.67745840549469, "logits/rejected": -3.2187113761901855, "logps/chosen": -234.83255004882812, "logps/rejected": -747.000244140625, "loss": 3.0252, "rewards/accuracies": 0.5, "rewards/chosen": -4.329594135284424, "rewards/margins": 0.6229820251464844, "rewards/rejected": -4.95257568359375, "step": 5147 }, { "epoch": 0.8, "learning_rate": 1.0371582550962187e-05, "logits/chosen": -1.9994008541107178, "logits/rejected": -2.5589921474456787, "logps/chosen": -202.21743774414062, "logps/rejected": -345.896240234375, "loss": 0.0701, "rewards/accuracies": 1.0, "rewards/chosen": -1.9974517822265625, "rewards/margins": 5.460868835449219, "rewards/rejected": -7.458320617675781, "step": 5148 }, { "epoch": 0.8, "learning_rate": 1.037084911043104e-05, "logits/chosen": -2.0916483402252197, "logits/rejected": -2.9502737522125244, "logps/chosen": -199.66339111328125, "logps/rejected": -362.7744140625, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.5236557126045227, "rewards/margins": 4.582427978515625, "rewards/rejected": -5.106083869934082, "step": 5149 }, { "epoch": 0.8, "learning_rate": 1.0370115669899891e-05, "logits/chosen": -1.5971723794937134, "logits/rejected": -2.8089523315429688, "logps/chosen": -185.14755249023438, "logps/rejected": -355.38525390625, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -0.22930069267749786, "rewards/margins": 5.951068878173828, "rewards/rejected": -6.180369853973389, "step": 5150 }, { "epoch": 0.8, "learning_rate": 1.0369382229368743e-05, "logits/chosen": -2.887789487838745, "logits/rejected": -2.682663917541504, "logps/chosen": -344.6347351074219, "logps/rejected": -140.70518493652344, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": 0.198558047413826, "rewards/margins": 4.2373151779174805, "rewards/rejected": -4.03875732421875, "step": 5151 }, { "epoch": 0.8, "learning_rate": 1.0368648788837595e-05, "logits/chosen": -2.333981513977051, "logits/rejected": -2.771226167678833, "logps/chosen": -59.461517333984375, "logps/rejected": -183.304443359375, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -0.7721017599105835, "rewards/margins": 5.282195091247559, "rewards/rejected": -6.054296970367432, "step": 5152 }, { "epoch": 0.8, "learning_rate": 1.0367915348306449e-05, "logits/chosen": -1.975219488143921, "logits/rejected": -2.415178060531616, "logps/chosen": -66.36640167236328, "logps/rejected": -135.7534637451172, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -1.317360281944275, "rewards/margins": 3.014653444290161, "rewards/rejected": -4.332014083862305, "step": 5153 }, { "epoch": 0.8, "learning_rate": 1.03671819077753e-05, "logits/chosen": -2.992187023162842, "logits/rejected": -2.414884567260742, "logps/chosen": -643.3523559570312, "logps/rejected": -810.8479614257812, "loss": 2.8674, "rewards/accuracies": 0.5, "rewards/chosen": -3.2382309436798096, "rewards/margins": 1.5113496780395508, "rewards/rejected": -4.749580383300781, "step": 5154 }, { "epoch": 0.8, "learning_rate": 1.0366448467244152e-05, "logits/chosen": -2.8503458499908447, "logits/rejected": -1.4230903387069702, "logps/chosen": -443.41217041015625, "logps/rejected": -215.410400390625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.11700135469436646, "rewards/margins": 6.379217624664307, "rewards/rejected": -6.496218681335449, "step": 5155 }, { "epoch": 0.8, "learning_rate": 1.0365715026713004e-05, "logits/chosen": -2.7707581520080566, "logits/rejected": -3.0927977561950684, "logps/chosen": -49.56232452392578, "logps/rejected": -188.9976806640625, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -0.7717962861061096, "rewards/margins": 4.412134170532227, "rewards/rejected": -5.183930397033691, "step": 5156 }, { "epoch": 0.8, "learning_rate": 1.0364981586181856e-05, "logits/chosen": -2.843635320663452, "logits/rejected": -2.769132137298584, "logps/chosen": -160.67193603515625, "logps/rejected": -210.06820678710938, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.5542659759521484, "rewards/margins": 5.922485828399658, "rewards/rejected": -6.476751804351807, "step": 5157 }, { "epoch": 0.8, "learning_rate": 1.0364248145650708e-05, "logits/chosen": -2.0416691303253174, "logits/rejected": -2.9816813468933105, "logps/chosen": -47.081417083740234, "logps/rejected": -196.54818725585938, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.0830751359462738, "rewards/margins": 5.471843719482422, "rewards/rejected": -5.5549187660217285, "step": 5158 }, { "epoch": 0.8, "learning_rate": 1.036351470511956e-05, "logits/chosen": -2.4242730140686035, "logits/rejected": -2.5957159996032715, "logps/chosen": -56.3137092590332, "logps/rejected": -178.072021484375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.31927594542503357, "rewards/margins": 6.211583137512207, "rewards/rejected": -6.530858993530273, "step": 5159 }, { "epoch": 0.8, "learning_rate": 1.0362781264588412e-05, "logits/chosen": -2.936676025390625, "logits/rejected": -2.711893081665039, "logps/chosen": -146.99151611328125, "logps/rejected": -197.02056884765625, "loss": 0.4318, "rewards/accuracies": 0.5, "rewards/chosen": -3.086911201477051, "rewards/margins": 3.2099311351776123, "rewards/rejected": -6.296842575073242, "step": 5160 }, { "epoch": 0.8, "learning_rate": 1.0362047824057264e-05, "logits/chosen": -2.7501120567321777, "logits/rejected": -2.9222323894500732, "logps/chosen": -132.62823486328125, "logps/rejected": -320.17279052734375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.8779011368751526, "rewards/margins": 6.131442070007324, "rewards/rejected": -7.009343147277832, "step": 5161 }, { "epoch": 0.8, "learning_rate": 1.0361314383526117e-05, "logits/chosen": -2.8642871379852295, "logits/rejected": -2.3717775344848633, "logps/chosen": -428.4945068359375, "logps/rejected": -196.14186096191406, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.44075697660446167, "rewards/margins": 6.793713569641113, "rewards/rejected": -7.234470367431641, "step": 5162 }, { "epoch": 0.8, "learning_rate": 1.0360580942994969e-05, "logits/chosen": -2.9167635440826416, "logits/rejected": -3.2514374256134033, "logps/chosen": -62.70022201538086, "logps/rejected": -204.49362182617188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.4346381425857544, "rewards/margins": 6.669313430786133, "rewards/rejected": -8.103951454162598, "step": 5163 }, { "epoch": 0.8, "learning_rate": 1.0359847502463821e-05, "logits/chosen": -1.9775073528289795, "logits/rejected": -3.059589385986328, "logps/chosen": -50.217350006103516, "logps/rejected": -327.03070068359375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0843921899795532, "rewards/margins": 7.985940456390381, "rewards/rejected": -9.070333480834961, "step": 5164 }, { "epoch": 0.8, "learning_rate": 1.0359114061932673e-05, "logits/chosen": -2.1621036529541016, "logits/rejected": -2.8395071029663086, "logps/chosen": -309.70611572265625, "logps/rejected": -531.28857421875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.8119148015975952, "rewards/margins": 7.906705379486084, "rewards/rejected": -9.718620300292969, "step": 5165 }, { "epoch": 0.8, "learning_rate": 1.0358380621401525e-05, "logits/chosen": -2.477174758911133, "logits/rejected": -3.187720775604248, "logps/chosen": -122.00244140625, "logps/rejected": -266.04168701171875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.07472915947437286, "rewards/margins": 6.714061737060547, "rewards/rejected": -6.6393327713012695, "step": 5166 }, { "epoch": 0.8, "learning_rate": 1.0357647180870377e-05, "logits/chosen": -1.601003885269165, "logits/rejected": -2.5981197357177734, "logps/chosen": -68.67269897460938, "logps/rejected": -323.95843505859375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.8182012438774109, "rewards/margins": 8.571520805358887, "rewards/rejected": -9.389721870422363, "step": 5167 }, { "epoch": 0.8, "learning_rate": 1.0356913740339228e-05, "logits/chosen": -1.444877028465271, "logits/rejected": -2.6013364791870117, "logps/chosen": -104.31934356689453, "logps/rejected": -191.74569702148438, "loss": 1.3788, "rewards/accuracies": 0.5, "rewards/chosen": -2.311048984527588, "rewards/margins": 1.5531771183013916, "rewards/rejected": -3.8642261028289795, "step": 5168 }, { "epoch": 0.8, "learning_rate": 1.035618029980808e-05, "logits/chosen": -3.168757200241089, "logits/rejected": -1.2026492357254028, "logps/chosen": -351.16851806640625, "logps/rejected": -79.61686706542969, "loss": 1.8809, "rewards/accuracies": 0.0, "rewards/chosen": -2.661390781402588, "rewards/margins": -1.6927109956741333, "rewards/rejected": -0.9686797857284546, "step": 5169 }, { "epoch": 0.8, "learning_rate": 1.0355446859276932e-05, "logits/chosen": -1.9200668334960938, "logits/rejected": -3.178312301635742, "logps/chosen": -32.29728317260742, "logps/rejected": -330.6689453125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.5731850862503052, "rewards/margins": 5.663748264312744, "rewards/rejected": -6.23693323135376, "step": 5170 }, { "epoch": 0.8, "learning_rate": 1.0354713418745786e-05, "logits/chosen": -3.07859206199646, "logits/rejected": -3.0611109733581543, "logps/chosen": -346.0644836425781, "logps/rejected": -350.93304443359375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8103376626968384, "rewards/margins": 6.741819381713867, "rewards/rejected": -7.552156448364258, "step": 5171 }, { "epoch": 0.8, "learning_rate": 1.0353979978214638e-05, "logits/chosen": -2.403503179550171, "logits/rejected": -2.876650094985962, "logps/chosen": -295.2138671875, "logps/rejected": -347.63623046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.665722668170929, "rewards/margins": 7.369292259216309, "rewards/rejected": -6.703569412231445, "step": 5172 }, { "epoch": 0.8, "learning_rate": 1.035324653768349e-05, "logits/chosen": -1.9038546085357666, "logits/rejected": -3.0182862281799316, "logps/chosen": -72.68318176269531, "logps/rejected": -370.21783447265625, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 0.009769052267074585, "rewards/margins": 5.393395900726318, "rewards/rejected": -5.383626937866211, "step": 5173 }, { "epoch": 0.8, "learning_rate": 1.0352513097152341e-05, "logits/chosen": -1.8320281505584717, "logits/rejected": -3.017225503921509, "logps/chosen": -32.60396194458008, "logps/rejected": -244.9625244140625, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.3275763988494873, "rewards/margins": 4.712249755859375, "rewards/rejected": -6.039826393127441, "step": 5174 }, { "epoch": 0.8, "learning_rate": 1.0351779656621195e-05, "logits/chosen": -2.7680561542510986, "logits/rejected": -3.1739532947540283, "logps/chosen": -101.04905700683594, "logps/rejected": -303.52386474609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.10071903467178345, "rewards/margins": 7.204688549041748, "rewards/rejected": -7.305407524108887, "step": 5175 }, { "epoch": 0.8, "learning_rate": 1.0351046216090047e-05, "logits/chosen": -1.5180437564849854, "logits/rejected": -1.6550010442733765, "logps/chosen": -358.76690673828125, "logps/rejected": -133.13438415527344, "loss": 2.3328, "rewards/accuracies": 0.5, "rewards/chosen": -3.338301181793213, "rewards/margins": 1.5480382442474365, "rewards/rejected": -4.8863396644592285, "step": 5176 }, { "epoch": 0.81, "learning_rate": 1.0350312775558899e-05, "logits/chosen": -3.124875783920288, "logits/rejected": -2.679401397705078, "logps/chosen": -200.1118621826172, "logps/rejected": -226.85842895507812, "loss": 2.5904, "rewards/accuracies": 0.5, "rewards/chosen": -3.3831827640533447, "rewards/margins": -0.24790501594543457, "rewards/rejected": -3.13527774810791, "step": 5177 }, { "epoch": 0.81, "learning_rate": 1.034957933502775e-05, "logits/chosen": -2.6537888050079346, "logits/rejected": -2.7177274227142334, "logps/chosen": -169.52052307128906, "logps/rejected": -355.7276611328125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.342803955078125, "rewards/margins": 6.0338335037231445, "rewards/rejected": -6.3766374588012695, "step": 5178 }, { "epoch": 0.81, "learning_rate": 1.0348845894496602e-05, "logits/chosen": -2.983476161956787, "logits/rejected": -3.013939142227173, "logps/chosen": -189.28475952148438, "logps/rejected": -284.221923828125, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.3777187466621399, "rewards/margins": 5.139290809631348, "rewards/rejected": -5.517009735107422, "step": 5179 }, { "epoch": 0.81, "learning_rate": 1.0348112453965456e-05, "logits/chosen": -2.8480396270751953, "logits/rejected": -2.570051908493042, "logps/chosen": -337.0774230957031, "logps/rejected": -209.7841033935547, "loss": 6.5499, "rewards/accuracies": 0.5, "rewards/chosen": -6.328139305114746, "rewards/margins": -3.7620203495025635, "rewards/rejected": -2.5661187171936035, "step": 5180 }, { "epoch": 0.81, "learning_rate": 1.0347379013434308e-05, "logits/chosen": -2.9071173667907715, "logits/rejected": -1.4907900094985962, "logps/chosen": -244.02316284179688, "logps/rejected": -101.83863830566406, "loss": 0.5751, "rewards/accuracies": 0.5, "rewards/chosen": -3.6711673736572266, "rewards/margins": 1.8966360092163086, "rewards/rejected": -5.567803382873535, "step": 5181 }, { "epoch": 0.81, "learning_rate": 1.034664557290316e-05, "logits/chosen": -2.6064510345458984, "logits/rejected": -2.641737222671509, "logps/chosen": -166.42205810546875, "logps/rejected": -284.11456298828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.535400390625, "rewards/margins": 8.89266586303711, "rewards/rejected": -8.35726547241211, "step": 5182 }, { "epoch": 0.81, "learning_rate": 1.0345912132372012e-05, "logits/chosen": -1.1063833236694336, "logits/rejected": -2.673661470413208, "logps/chosen": -43.53203582763672, "logps/rejected": -223.690673828125, "loss": 0.0522, "rewards/accuracies": 1.0, "rewards/chosen": -1.918891191482544, "rewards/margins": 4.5023417472839355, "rewards/rejected": -6.421233177185059, "step": 5183 }, { "epoch": 0.81, "learning_rate": 1.0345178691840864e-05, "logits/chosen": -2.319125175476074, "logits/rejected": -2.4130477905273438, "logps/chosen": -167.73348999023438, "logps/rejected": -246.6943359375, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": -3.8004672527313232, "rewards/margins": 4.831615447998047, "rewards/rejected": -8.63208293914795, "step": 5184 }, { "epoch": 0.81, "learning_rate": 1.0344445251309715e-05, "logits/chosen": -2.9010932445526123, "logits/rejected": -2.898001194000244, "logps/chosen": -85.52304077148438, "logps/rejected": -160.25274658203125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.38311728835105896, "rewards/margins": 5.974325656890869, "rewards/rejected": -6.357442855834961, "step": 5185 }, { "epoch": 0.81, "learning_rate": 1.0343711810778567e-05, "logits/chosen": -3.0383400917053223, "logits/rejected": -2.1537842750549316, "logps/chosen": -459.6511535644531, "logps/rejected": -82.39842987060547, "loss": 4.0217, "rewards/accuracies": 0.0, "rewards/chosen": -3.732870578765869, "rewards/margins": -3.9766435623168945, "rewards/rejected": 0.2437729835510254, "step": 5186 }, { "epoch": 0.81, "learning_rate": 1.034297837024742e-05, "logits/chosen": -2.7714221477508545, "logits/rejected": -2.947676181793213, "logps/chosen": -124.2452392578125, "logps/rejected": -101.4262924194336, "loss": 3.6021, "rewards/accuracies": 0.5, "rewards/chosen": -3.6679680347442627, "rewards/margins": 0.035974740982055664, "rewards/rejected": -3.7039427757263184, "step": 5187 }, { "epoch": 0.81, "learning_rate": 1.0342244929716271e-05, "logits/chosen": -2.869335889816284, "logits/rejected": -2.784095525741577, "logps/chosen": -153.63685607910156, "logps/rejected": -54.486454010009766, "loss": 3.304, "rewards/accuracies": 0.5, "rewards/chosen": -5.490464210510254, "rewards/margins": -2.4878101348876953, "rewards/rejected": -3.0026538372039795, "step": 5188 }, { "epoch": 0.81, "learning_rate": 1.0341511489185125e-05, "logits/chosen": -2.2046351432800293, "logits/rejected": -2.5873546600341797, "logps/chosen": -86.15646362304688, "logps/rejected": -264.3404235839844, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.5928196907043457, "rewards/margins": 6.426267623901367, "rewards/rejected": -8.019087791442871, "step": 5189 }, { "epoch": 0.81, "learning_rate": 1.0340778048653977e-05, "logits/chosen": -2.475403308868408, "logits/rejected": -2.577664375305176, "logps/chosen": -119.34635925292969, "logps/rejected": -315.32843017578125, "loss": 0.4419, "rewards/accuracies": 0.5, "rewards/chosen": -2.273716688156128, "rewards/margins": 6.2092695236206055, "rewards/rejected": -8.482986450195312, "step": 5190 }, { "epoch": 0.81, "learning_rate": 1.0340044608122828e-05, "logits/chosen": -1.6390810012817383, "logits/rejected": -2.9016356468200684, "logps/chosen": -76.63706970214844, "logps/rejected": -310.570068359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5799152851104736, "rewards/margins": 6.647312164306641, "rewards/rejected": -7.227227687835693, "step": 5191 }, { "epoch": 0.81, "learning_rate": 1.033931116759168e-05, "logits/chosen": -2.3264503479003906, "logits/rejected": -3.1960651874542236, "logps/chosen": -155.10494995117188, "logps/rejected": -301.6610412597656, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.25656548142433167, "rewards/margins": 5.2009992599487305, "rewards/rejected": -5.457564830780029, "step": 5192 }, { "epoch": 0.81, "learning_rate": 1.0338577727060532e-05, "logits/chosen": -1.941507339477539, "logits/rejected": -3.0136399269104004, "logps/chosen": -154.29925537109375, "logps/rejected": -358.74810791015625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.2807376980781555, "rewards/margins": 6.510581016540527, "rewards/rejected": -6.2298431396484375, "step": 5193 }, { "epoch": 0.81, "learning_rate": 1.0337844286529384e-05, "logits/chosen": -1.4083945751190186, "logits/rejected": -2.3900628089904785, "logps/chosen": -245.68504333496094, "logps/rejected": -269.318115234375, "loss": 3.0561, "rewards/accuracies": 0.5, "rewards/chosen": -4.872538089752197, "rewards/margins": -1.3656843900680542, "rewards/rejected": -3.5068535804748535, "step": 5194 }, { "epoch": 0.81, "learning_rate": 1.0337110845998236e-05, "logits/chosen": -2.634033203125, "logits/rejected": -2.911769151687622, "logps/chosen": -90.77780151367188, "logps/rejected": -227.46035766601562, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7031833529472351, "rewards/margins": 6.020648002624512, "rewards/rejected": -6.7238311767578125, "step": 5195 }, { "epoch": 0.81, "learning_rate": 1.0336377405467088e-05, "logits/chosen": -2.2959342002868652, "logits/rejected": -2.787785530090332, "logps/chosen": -196.69085693359375, "logps/rejected": -255.4097137451172, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 0.6845978498458862, "rewards/margins": 5.7515740394592285, "rewards/rejected": -5.066976070404053, "step": 5196 }, { "epoch": 0.81, "learning_rate": 1.033564396493594e-05, "logits/chosen": -2.519089460372925, "logits/rejected": -1.6152936220169067, "logps/chosen": -465.96484375, "logps/rejected": -434.1007080078125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.400055408477783, "rewards/margins": 5.802401542663574, "rewards/rejected": -8.202457427978516, "step": 5197 }, { "epoch": 0.81, "learning_rate": 1.0334910524404793e-05, "logits/chosen": -2.7845475673675537, "logits/rejected": -2.6885199546813965, "logps/chosen": -273.5407409667969, "logps/rejected": -377.739990234375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.9293785095214844, "rewards/margins": 6.9503583908081055, "rewards/rejected": -7.87973690032959, "step": 5198 }, { "epoch": 0.81, "learning_rate": 1.0334177083873645e-05, "logits/chosen": -1.5909196138381958, "logits/rejected": -2.7773187160491943, "logps/chosen": -43.04014587402344, "logps/rejected": -222.24356079101562, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.5845057964324951, "rewards/margins": 5.025134086608887, "rewards/rejected": -6.609639644622803, "step": 5199 }, { "epoch": 0.81, "learning_rate": 1.0333443643342497e-05, "logits/chosen": -1.5857874155044556, "logits/rejected": -2.777306079864502, "logps/chosen": -99.36823272705078, "logps/rejected": -355.052001953125, "loss": 0.0906, "rewards/accuracies": 1.0, "rewards/chosen": -1.4155789613723755, "rewards/margins": 6.378514289855957, "rewards/rejected": -7.794093132019043, "step": 5200 }, { "epoch": 0.81, "learning_rate": 1.0332710202811349e-05, "logits/chosen": -2.5869903564453125, "logits/rejected": -3.0951671600341797, "logps/chosen": -565.549072265625, "logps/rejected": -638.880615234375, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.0061874389648438, "rewards/margins": 5.623533725738525, "rewards/rejected": -7.629721641540527, "step": 5201 }, { "epoch": 0.81, "learning_rate": 1.03319767622802e-05, "logits/chosen": -2.0135135650634766, "logits/rejected": -2.8698575496673584, "logps/chosen": -52.66373825073242, "logps/rejected": -241.08921813964844, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.2699365615844727, "rewards/margins": 6.476037979125977, "rewards/rejected": -7.745974540710449, "step": 5202 }, { "epoch": 0.81, "learning_rate": 1.0331243321749053e-05, "logits/chosen": -1.9002081155776978, "logits/rejected": -2.7187039852142334, "logps/chosen": -208.6241455078125, "logps/rejected": -405.040771484375, "loss": 3.7567, "rewards/accuracies": 0.5, "rewards/chosen": -4.106136798858643, "rewards/margins": -0.5863776206970215, "rewards/rejected": -3.5197594165802, "step": 5203 }, { "epoch": 0.81, "learning_rate": 1.0330509881217904e-05, "logits/chosen": -2.5973291397094727, "logits/rejected": -2.7767703533172607, "logps/chosen": -116.54624938964844, "logps/rejected": -362.11956787109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.16887015104293823, "rewards/margins": 7.880987167358398, "rewards/rejected": -8.049857139587402, "step": 5204 }, { "epoch": 0.81, "learning_rate": 1.0329776440686756e-05, "logits/chosen": -2.6615042686462402, "logits/rejected": -2.935309648513794, "logps/chosen": -218.21759033203125, "logps/rejected": -360.634033203125, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": 0.6418914794921875, "rewards/margins": 5.735670566558838, "rewards/rejected": -5.09377908706665, "step": 5205 }, { "epoch": 0.81, "learning_rate": 1.032904300015561e-05, "logits/chosen": -2.9045774936676025, "logits/rejected": -2.944965362548828, "logps/chosen": -523.025146484375, "logps/rejected": -474.51373291015625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.31842803955078125, "rewards/margins": 5.117883682250977, "rewards/rejected": -5.436311721801758, "step": 5206 }, { "epoch": 0.81, "learning_rate": 1.0328309559624462e-05, "logits/chosen": -3.1541337966918945, "logits/rejected": -2.72082781791687, "logps/chosen": -726.5738525390625, "logps/rejected": -586.21337890625, "loss": 3.886, "rewards/accuracies": 0.5, "rewards/chosen": -4.748293876647949, "rewards/margins": -0.44341421127319336, "rewards/rejected": -4.304879665374756, "step": 5207 }, { "epoch": 0.81, "learning_rate": 1.0327576119093314e-05, "logits/chosen": -2.842665195465088, "logits/rejected": -3.1564009189605713, "logps/chosen": -168.32424926757812, "logps/rejected": -325.9385681152344, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.24430809915065765, "rewards/margins": 7.758511066436768, "rewards/rejected": -7.514203071594238, "step": 5208 }, { "epoch": 0.81, "learning_rate": 1.0326842678562167e-05, "logits/chosen": -2.756267786026001, "logits/rejected": -3.1826493740081787, "logps/chosen": -35.0364990234375, "logps/rejected": -147.27316284179688, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -2.114607810974121, "rewards/margins": 4.272562026977539, "rewards/rejected": -6.38716983795166, "step": 5209 }, { "epoch": 0.81, "learning_rate": 1.032610923803102e-05, "logits/chosen": -3.192458391189575, "logits/rejected": -2.8055195808410645, "logps/chosen": -151.3026580810547, "logps/rejected": -93.21467590332031, "loss": 2.8061, "rewards/accuracies": 0.0, "rewards/chosen": -4.795945167541504, "rewards/margins": -2.7384352684020996, "rewards/rejected": -2.0575101375579834, "step": 5210 }, { "epoch": 0.81, "learning_rate": 1.0325375797499871e-05, "logits/chosen": -2.3503317832946777, "logits/rejected": -2.7740418910980225, "logps/chosen": -106.56971740722656, "logps/rejected": -309.6590270996094, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.1989986896514893, "rewards/margins": 5.717281341552734, "rewards/rejected": -7.916280269622803, "step": 5211 }, { "epoch": 0.81, "learning_rate": 1.0324642356968723e-05, "logits/chosen": -2.51495623588562, "logits/rejected": -2.7638158798217773, "logps/chosen": -234.6341094970703, "logps/rejected": -358.96453857421875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.311307668685913, "rewards/margins": 5.635904312133789, "rewards/rejected": -6.947212219238281, "step": 5212 }, { "epoch": 0.81, "learning_rate": 1.0323908916437575e-05, "logits/chosen": -2.814326047897339, "logits/rejected": -2.962369203567505, "logps/chosen": -132.40328979492188, "logps/rejected": -222.875732421875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.16605912148952484, "rewards/margins": 6.944919586181641, "rewards/rejected": -6.778861045837402, "step": 5213 }, { "epoch": 0.81, "learning_rate": 1.0323175475906427e-05, "logits/chosen": -3.1477556228637695, "logits/rejected": -3.2340097427368164, "logps/chosen": -434.6312255859375, "logps/rejected": -429.57037353515625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.480710029602051, "rewards/margins": 5.928203582763672, "rewards/rejected": -8.408913612365723, "step": 5214 }, { "epoch": 0.81, "learning_rate": 1.032244203537528e-05, "logits/chosen": -3.148358106613159, "logits/rejected": -2.4864559173583984, "logps/chosen": -209.05197143554688, "logps/rejected": -147.64752197265625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7613977789878845, "rewards/margins": 6.338953018188477, "rewards/rejected": -7.100350856781006, "step": 5215 }, { "epoch": 0.81, "learning_rate": 1.0321708594844132e-05, "logits/chosen": -2.6763951778411865, "logits/rejected": -2.612929344177246, "logps/chosen": -390.45361328125, "logps/rejected": -447.309326171875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.0197442770004272, "rewards/margins": 6.30460786819458, "rewards/rejected": -7.324352264404297, "step": 5216 }, { "epoch": 0.81, "learning_rate": 1.0320975154312984e-05, "logits/chosen": -2.7718653678894043, "logits/rejected": -3.0907645225524902, "logps/chosen": -179.33432006835938, "logps/rejected": -268.06451416015625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.12144164741039276, "rewards/margins": 5.217313766479492, "rewards/rejected": -5.3387556076049805, "step": 5217 }, { "epoch": 0.81, "learning_rate": 1.0320241713781836e-05, "logits/chosen": -2.144665479660034, "logits/rejected": -2.8328161239624023, "logps/chosen": -149.8355255126953, "logps/rejected": -261.9803161621094, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.17329025268554688, "rewards/margins": 6.685244083404541, "rewards/rejected": -6.511953830718994, "step": 5218 }, { "epoch": 0.81, "learning_rate": 1.0319508273250688e-05, "logits/chosen": -2.957291603088379, "logits/rejected": -3.208578586578369, "logps/chosen": -513.5609130859375, "logps/rejected": -647.3434448242188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9023352861404419, "rewards/margins": 6.661456108093262, "rewards/rejected": -7.563791275024414, "step": 5219 }, { "epoch": 0.81, "learning_rate": 1.031877483271954e-05, "logits/chosen": -2.6292519569396973, "logits/rejected": -2.9870715141296387, "logps/chosen": -95.67862701416016, "logps/rejected": -177.67453002929688, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.5260780453681946, "rewards/margins": 5.544055461883545, "rewards/rejected": -6.070133209228516, "step": 5220 }, { "epoch": 0.81, "learning_rate": 1.0318041392188392e-05, "logits/chosen": -2.7086181640625, "logits/rejected": -2.2868096828460693, "logps/chosen": -277.53515625, "logps/rejected": -428.17449951171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.7296662330627441, "rewards/margins": 8.24057388305664, "rewards/rejected": -9.970239639282227, "step": 5221 }, { "epoch": 0.81, "learning_rate": 1.0317307951657243e-05, "logits/chosen": -2.7066874504089355, "logits/rejected": -2.8183963298797607, "logps/chosen": -230.24273681640625, "logps/rejected": -341.012939453125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.23297004401683807, "rewards/margins": 8.341466903686523, "rewards/rejected": -8.10849666595459, "step": 5222 }, { "epoch": 0.81, "learning_rate": 1.0316574511126095e-05, "logits/chosen": -1.3303591012954712, "logits/rejected": -2.402095079421997, "logps/chosen": -113.53449249267578, "logps/rejected": -161.64674377441406, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -1.1571006774902344, "rewards/margins": 5.535830974578857, "rewards/rejected": -6.692931652069092, "step": 5223 }, { "epoch": 0.81, "learning_rate": 1.0315841070594949e-05, "logits/chosen": -2.5407497882843018, "logits/rejected": -2.7909345626831055, "logps/chosen": -112.87994384765625, "logps/rejected": -235.70364379882812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0786882638931274, "rewards/margins": 9.078185081481934, "rewards/rejected": -10.15687370300293, "step": 5224 }, { "epoch": 0.81, "learning_rate": 1.03151076300638e-05, "logits/chosen": -1.3597379922866821, "logits/rejected": -2.129746198654175, "logps/chosen": -278.40399169921875, "logps/rejected": -465.15618896484375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.1521193981170654, "rewards/margins": 6.401186943054199, "rewards/rejected": -7.553306579589844, "step": 5225 }, { "epoch": 0.81, "learning_rate": 1.0314374189532653e-05, "logits/chosen": -2.928586721420288, "logits/rejected": -2.552187442779541, "logps/chosen": -142.09524536132812, "logps/rejected": -193.7811279296875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.29669567942619324, "rewards/margins": 6.97871208190918, "rewards/rejected": -7.275407791137695, "step": 5226 }, { "epoch": 0.81, "learning_rate": 1.0313640749001504e-05, "logits/chosen": -2.7437238693237305, "logits/rejected": -2.9340484142303467, "logps/chosen": -169.44729614257812, "logps/rejected": -289.51763916015625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.3401474058628082, "rewards/margins": 7.079046249389648, "rewards/rejected": -7.419194221496582, "step": 5227 }, { "epoch": 0.81, "learning_rate": 1.0312907308470356e-05, "logits/chosen": -1.638558030128479, "logits/rejected": -2.9975497722625732, "logps/chosen": -332.25042724609375, "logps/rejected": -475.44677734375, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.3325546383857727, "rewards/margins": 4.684635162353516, "rewards/rejected": -5.017189979553223, "step": 5228 }, { "epoch": 0.81, "learning_rate": 1.0312173867939208e-05, "logits/chosen": -2.612191915512085, "logits/rejected": -2.949183702468872, "logps/chosen": -108.75868225097656, "logps/rejected": -243.2042694091797, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.045798420906067, "rewards/margins": 7.796813011169434, "rewards/rejected": -8.842611312866211, "step": 5229 }, { "epoch": 0.81, "learning_rate": 1.031144042740806e-05, "logits/chosen": -3.002756118774414, "logits/rejected": -2.3206984996795654, "logps/chosen": -351.66839599609375, "logps/rejected": -275.2580261230469, "loss": 2.0254, "rewards/accuracies": 0.5, "rewards/chosen": -2.326326847076416, "rewards/margins": 0.44050073623657227, "rewards/rejected": -2.7668275833129883, "step": 5230 }, { "epoch": 0.81, "learning_rate": 1.0310706986876912e-05, "logits/chosen": -1.0948678255081177, "logits/rejected": -2.5298514366149902, "logps/chosen": -212.4031982421875, "logps/rejected": -399.84027099609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.12429618835449219, "rewards/margins": 8.506660461425781, "rewards/rejected": -8.382364273071289, "step": 5231 }, { "epoch": 0.81, "learning_rate": 1.0309973546345764e-05, "logits/chosen": -1.8959710597991943, "logits/rejected": -3.0359201431274414, "logps/chosen": -109.13009643554688, "logps/rejected": -318.09869384765625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.0034202337265015, "rewards/margins": 5.003446578979492, "rewards/rejected": -6.006866455078125, "step": 5232 }, { "epoch": 0.81, "learning_rate": 1.0309240105814617e-05, "logits/chosen": -2.721036911010742, "logits/rejected": -2.2479183673858643, "logps/chosen": -339.47247314453125, "logps/rejected": -333.0784606933594, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.10292662680149078, "rewards/margins": 5.851068496704102, "rewards/rejected": -5.9539947509765625, "step": 5233 }, { "epoch": 0.81, "learning_rate": 1.030850666528347e-05, "logits/chosen": -2.517839193344116, "logits/rejected": -2.441479206085205, "logps/chosen": -417.15618896484375, "logps/rejected": -441.091552734375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.5008549690246582, "rewards/margins": 5.708614349365234, "rewards/rejected": -7.209469795227051, "step": 5234 }, { "epoch": 0.81, "learning_rate": 1.0307773224752321e-05, "logits/chosen": -2.612694263458252, "logits/rejected": -2.9391231536865234, "logps/chosen": -85.91202545166016, "logps/rejected": -153.25523376464844, "loss": 1.0072, "rewards/accuracies": 0.5, "rewards/chosen": -3.509347677230835, "rewards/margins": 3.4320788383483887, "rewards/rejected": -6.9414262771606445, "step": 5235 }, { "epoch": 0.81, "learning_rate": 1.0307039784221173e-05, "logits/chosen": -1.9421851634979248, "logits/rejected": -3.1394617557525635, "logps/chosen": -317.8182067871094, "logps/rejected": -484.1474609375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 1.2050747871398926, "rewards/margins": 7.175451278686523, "rewards/rejected": -5.970376968383789, "step": 5236 }, { "epoch": 0.81, "learning_rate": 1.0306306343690025e-05, "logits/chosen": -2.1514089107513428, "logits/rejected": -3.036583423614502, "logps/chosen": -63.524993896484375, "logps/rejected": -256.63006591796875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.6706321835517883, "rewards/margins": 6.127752780914307, "rewards/rejected": -5.457120895385742, "step": 5237 }, { "epoch": 0.81, "learning_rate": 1.0305572903158877e-05, "logits/chosen": -3.17991304397583, "logits/rejected": -3.32436203956604, "logps/chosen": -87.60265350341797, "logps/rejected": -179.06077575683594, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5126457810401917, "rewards/margins": 6.667834281921387, "rewards/rejected": -7.180480003356934, "step": 5238 }, { "epoch": 0.81, "learning_rate": 1.0304839462627729e-05, "logits/chosen": -2.6254327297210693, "logits/rejected": -2.90346622467041, "logps/chosen": -540.9209594726562, "logps/rejected": -472.5389099121094, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.23934268951416016, "rewards/margins": 6.695034027099609, "rewards/rejected": -6.9343767166137695, "step": 5239 }, { "epoch": 0.81, "learning_rate": 1.030410602209658e-05, "logits/chosen": -2.293360471725464, "logits/rejected": -3.0883116722106934, "logps/chosen": -259.329833984375, "logps/rejected": -508.8082580566406, "loss": 3.1872, "rewards/accuracies": 0.5, "rewards/chosen": -3.8518941402435303, "rewards/margins": -0.11374545097351074, "rewards/rejected": -3.7381486892700195, "step": 5240 }, { "epoch": 0.82, "learning_rate": 1.0303372581565434e-05, "logits/chosen": -1.174331784248352, "logits/rejected": -2.8272721767425537, "logps/chosen": -102.08059692382812, "logps/rejected": -429.8642578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.739183783531189, "rewards/margins": 7.888164520263672, "rewards/rejected": -8.627347946166992, "step": 5241 }, { "epoch": 0.82, "learning_rate": 1.0302639141034286e-05, "logits/chosen": -2.956033706665039, "logits/rejected": -2.8909482955932617, "logps/chosen": -173.2043914794922, "logps/rejected": -169.72457885742188, "loss": 3.0926, "rewards/accuracies": 0.5, "rewards/chosen": -4.579984188079834, "rewards/margins": 0.4817171096801758, "rewards/rejected": -5.061700820922852, "step": 5242 }, { "epoch": 0.82, "learning_rate": 1.030190570050314e-05, "logits/chosen": -2.818290948867798, "logits/rejected": -1.8127566576004028, "logps/chosen": -573.42138671875, "logps/rejected": -393.28729248046875, "loss": 3.0945, "rewards/accuracies": 0.5, "rewards/chosen": -3.86033034324646, "rewards/margins": 0.9111237525939941, "rewards/rejected": -4.771453857421875, "step": 5243 }, { "epoch": 0.82, "learning_rate": 1.0301172259971991e-05, "logits/chosen": -2.3963873386383057, "logits/rejected": -2.865032434463501, "logps/chosen": -79.1755599975586, "logps/rejected": -218.0487060546875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": 0.06481199711561203, "rewards/margins": 6.312605857849121, "rewards/rejected": -6.247793674468994, "step": 5244 }, { "epoch": 0.82, "learning_rate": 1.0300438819440843e-05, "logits/chosen": -2.680880546569824, "logits/rejected": -3.183366060256958, "logps/chosen": -43.4063606262207, "logps/rejected": -160.60992431640625, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -1.109940767288208, "rewards/margins": 3.8019800186157227, "rewards/rejected": -4.911920547485352, "step": 5245 }, { "epoch": 0.82, "learning_rate": 1.0299705378909695e-05, "logits/chosen": -1.5783483982086182, "logits/rejected": -2.7418720722198486, "logps/chosen": -69.60188293457031, "logps/rejected": -248.29519653320312, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.09927558898925781, "rewards/margins": 5.84444522857666, "rewards/rejected": -5.745169639587402, "step": 5246 }, { "epoch": 0.82, "learning_rate": 1.0298971938378547e-05, "logits/chosen": -0.8978656530380249, "logits/rejected": -1.1961568593978882, "logps/chosen": -50.63671112060547, "logps/rejected": -270.7663879394531, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.2276073694229126, "rewards/margins": 7.320704936981201, "rewards/rejected": -7.093097686767578, "step": 5247 }, { "epoch": 0.82, "learning_rate": 1.0298238497847399e-05, "logits/chosen": -2.38854718208313, "logits/rejected": -2.788675308227539, "logps/chosen": -450.9521179199219, "logps/rejected": -441.117919921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.7406372427940369, "rewards/margins": 7.721865653991699, "rewards/rejected": -8.462503433227539, "step": 5248 }, { "epoch": 0.82, "learning_rate": 1.0297505057316251e-05, "logits/chosen": -1.621151328086853, "logits/rejected": -2.949620485305786, "logps/chosen": -77.93342590332031, "logps/rejected": -391.2226257324219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7950937151908875, "rewards/margins": 9.301830291748047, "rewards/rejected": -10.096923828125, "step": 5249 }, { "epoch": 0.82, "learning_rate": 1.0296771616785103e-05, "logits/chosen": -1.9601813554763794, "logits/rejected": -3.123425245285034, "logps/chosen": -144.5511016845703, "logps/rejected": -422.31646728515625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.1871788054704666, "rewards/margins": 8.839719772338867, "rewards/rejected": -9.026899337768555, "step": 5250 }, { "epoch": 0.82, "learning_rate": 1.0296038176253956e-05, "logits/chosen": -1.6798081398010254, "logits/rejected": -2.7225146293640137, "logps/chosen": -51.27384567260742, "logps/rejected": -253.75413513183594, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.6966524720191956, "rewards/margins": 6.506781578063965, "rewards/rejected": -7.203433990478516, "step": 5251 }, { "epoch": 0.82, "learning_rate": 1.0295304735722808e-05, "logits/chosen": -1.7803049087524414, "logits/rejected": -2.336426258087158, "logps/chosen": -338.40386962890625, "logps/rejected": -450.1436462402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.3086647391319275, "rewards/margins": 11.264245986938477, "rewards/rejected": -10.955581665039062, "step": 5252 }, { "epoch": 0.82, "learning_rate": 1.029457129519166e-05, "logits/chosen": -2.761639356613159, "logits/rejected": -3.0312769412994385, "logps/chosen": -34.51852035522461, "logps/rejected": -150.11874389648438, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.0520610809326172, "rewards/margins": 5.411799430847168, "rewards/rejected": -6.463860511779785, "step": 5253 }, { "epoch": 0.82, "learning_rate": 1.0293837854660512e-05, "logits/chosen": -2.7849762439727783, "logits/rejected": -2.0667006969451904, "logps/chosen": -140.60012817382812, "logps/rejected": -111.53343200683594, "loss": 0.2845, "rewards/accuracies": 1.0, "rewards/chosen": -2.9904208183288574, "rewards/margins": 1.885677695274353, "rewards/rejected": -4.8760986328125, "step": 5254 }, { "epoch": 0.82, "learning_rate": 1.0293104414129364e-05, "logits/chosen": -1.8302884101867676, "logits/rejected": -2.888171672821045, "logps/chosen": -291.81005859375, "logps/rejected": -432.4853210449219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.148217797279358, "rewards/margins": 8.68980598449707, "rewards/rejected": -9.838024139404297, "step": 5255 }, { "epoch": 0.82, "learning_rate": 1.0292370973598216e-05, "logits/chosen": -2.9733195304870605, "logits/rejected": -1.328916311264038, "logps/chosen": -453.75189208984375, "logps/rejected": -184.03549194335938, "loss": 2.0454, "rewards/accuracies": 0.5, "rewards/chosen": -2.9149627685546875, "rewards/margins": 1.88114333152771, "rewards/rejected": -4.796106338500977, "step": 5256 }, { "epoch": 0.82, "learning_rate": 1.0291637533067068e-05, "logits/chosen": -2.345280885696411, "logits/rejected": -2.6848881244659424, "logps/chosen": -433.38482666015625, "logps/rejected": -359.5556945800781, "loss": 0.1122, "rewards/accuracies": 1.0, "rewards/chosen": -3.316633462905884, "rewards/margins": 3.0766420364379883, "rewards/rejected": -6.393275260925293, "step": 5257 }, { "epoch": 0.82, "learning_rate": 1.029090409253592e-05, "logits/chosen": -1.3696647882461548, "logits/rejected": -2.3267343044281006, "logps/chosen": -186.51841735839844, "logps/rejected": -404.83062744140625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.5557953119277954, "rewards/margins": 6.103643417358398, "rewards/rejected": -7.6594390869140625, "step": 5258 }, { "epoch": 0.82, "learning_rate": 1.0290170652004771e-05, "logits/chosen": -2.523529291152954, "logits/rejected": -3.0187506675720215, "logps/chosen": -84.50606536865234, "logps/rejected": -93.34812927246094, "loss": 4.5486, "rewards/accuracies": 0.5, "rewards/chosen": -4.542543411254883, "rewards/margins": -0.027637481689453125, "rewards/rejected": -4.51490592956543, "step": 5259 }, { "epoch": 0.82, "learning_rate": 1.0289437211473625e-05, "logits/chosen": -2.6526291370391846, "logits/rejected": -2.9083850383758545, "logps/chosen": -127.57026672363281, "logps/rejected": -280.838134765625, "loss": 0.0462, "rewards/accuracies": 1.0, "rewards/chosen": -0.08492012321949005, "rewards/margins": 4.079577445983887, "rewards/rejected": -4.164497375488281, "step": 5260 }, { "epoch": 0.82, "learning_rate": 1.0288703770942477e-05, "logits/chosen": -1.7343556880950928, "logits/rejected": -2.2178919315338135, "logps/chosen": -77.98528289794922, "logps/rejected": -263.97393798828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.40692442655563354, "rewards/margins": 8.0426664352417, "rewards/rejected": -7.635741233825684, "step": 5261 }, { "epoch": 0.82, "learning_rate": 1.0287970330411329e-05, "logits/chosen": -2.733912706375122, "logits/rejected": -2.349534749984741, "logps/chosen": -424.4010009765625, "logps/rejected": -347.2050476074219, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -1.0759319067001343, "rewards/margins": 4.225348472595215, "rewards/rejected": -5.3012800216674805, "step": 5262 }, { "epoch": 0.82, "learning_rate": 1.028723688988018e-05, "logits/chosen": -2.489133834838867, "logits/rejected": -2.9083731174468994, "logps/chosen": -334.00579833984375, "logps/rejected": -404.96343994140625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -0.5017410516738892, "rewards/margins": 5.176802158355713, "rewards/rejected": -5.6785430908203125, "step": 5263 }, { "epoch": 0.82, "learning_rate": 1.0286503449349032e-05, "logits/chosen": -2.595231294631958, "logits/rejected": -2.7864489555358887, "logps/chosen": -175.614990234375, "logps/rejected": -397.5794372558594, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.766039252281189, "rewards/margins": 7.815732479095459, "rewards/rejected": -7.0496931076049805, "step": 5264 }, { "epoch": 0.82, "learning_rate": 1.0285770008817884e-05, "logits/chosen": -1.9331971406936646, "logits/rejected": -3.037597894668579, "logps/chosen": -323.5995178222656, "logps/rejected": -837.0791625976562, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.1088987588882446, "rewards/margins": 8.619266510009766, "rewards/rejected": -9.728164672851562, "step": 5265 }, { "epoch": 0.82, "learning_rate": 1.0285036568286736e-05, "logits/chosen": -2.8645761013031006, "logits/rejected": -2.448751926422119, "logps/chosen": -232.08230590820312, "logps/rejected": -201.61056518554688, "loss": 0.7459, "rewards/accuracies": 0.5, "rewards/chosen": -3.584064483642578, "rewards/margins": 2.9195494651794434, "rewards/rejected": -6.5036139488220215, "step": 5266 }, { "epoch": 0.82, "learning_rate": 1.0284303127755588e-05, "logits/chosen": -3.1661536693573, "logits/rejected": -3.2878189086914062, "logps/chosen": -31.679767608642578, "logps/rejected": -144.05702209472656, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.5102729797363281, "rewards/margins": 7.669308662414551, "rewards/rejected": -9.179581642150879, "step": 5267 }, { "epoch": 0.82, "learning_rate": 1.028356968722444e-05, "logits/chosen": -1.6080896854400635, "logits/rejected": -2.9713916778564453, "logps/chosen": -330.7593688964844, "logps/rejected": -488.0924987792969, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -0.5507583618164062, "rewards/margins": 4.668342590332031, "rewards/rejected": -5.2191009521484375, "step": 5268 }, { "epoch": 0.82, "learning_rate": 1.0282836246693294e-05, "logits/chosen": -2.7467586994171143, "logits/rejected": -2.541100263595581, "logps/chosen": -808.1124267578125, "logps/rejected": -688.653564453125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.5621306896209717, "rewards/margins": 5.328778266906738, "rewards/rejected": -6.890909194946289, "step": 5269 }, { "epoch": 0.82, "learning_rate": 1.0282102806162145e-05, "logits/chosen": -2.821305751800537, "logits/rejected": -2.640387535095215, "logps/chosen": -614.8804931640625, "logps/rejected": -561.6187744140625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.20587845146656036, "rewards/margins": 7.12868595123291, "rewards/rejected": -6.922807693481445, "step": 5270 }, { "epoch": 0.82, "learning_rate": 1.0281369365630997e-05, "logits/chosen": -1.2660139799118042, "logits/rejected": -2.122910976409912, "logps/chosen": -272.53948974609375, "logps/rejected": -700.62841796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.078059434890747, "rewards/margins": 10.617834091186523, "rewards/rejected": -11.695892333984375, "step": 5271 }, { "epoch": 0.82, "learning_rate": 1.028063592509985e-05, "logits/chosen": -2.949871063232422, "logits/rejected": -2.083949089050293, "logps/chosen": -361.46893310546875, "logps/rejected": -194.30836486816406, "loss": 1.7075, "rewards/accuracies": 0.5, "rewards/chosen": -2.3894195556640625, "rewards/margins": 0.9719535112380981, "rewards/rejected": -3.36137318611145, "step": 5272 }, { "epoch": 0.82, "learning_rate": 1.0279902484568701e-05, "logits/chosen": -1.3689121007919312, "logits/rejected": -2.994041919708252, "logps/chosen": -111.59761047363281, "logps/rejected": -289.0641174316406, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.0204015970230103, "rewards/margins": 6.2091522216796875, "rewards/rejected": -7.229554176330566, "step": 5273 }, { "epoch": 0.82, "learning_rate": 1.0279169044037553e-05, "logits/chosen": -1.926403522491455, "logits/rejected": -2.749326705932617, "logps/chosen": -185.19464111328125, "logps/rejected": -377.99774169921875, "loss": 4.1448, "rewards/accuracies": 0.5, "rewards/chosen": -4.8615641593933105, "rewards/margins": -0.755934476852417, "rewards/rejected": -4.1056294441223145, "step": 5274 }, { "epoch": 0.82, "learning_rate": 1.0278435603506406e-05, "logits/chosen": -2.752363920211792, "logits/rejected": -1.400623083114624, "logps/chosen": -637.5277709960938, "logps/rejected": -248.13128662109375, "loss": 2.6307, "rewards/accuracies": 0.5, "rewards/chosen": -4.041801452636719, "rewards/margins": 2.0832290649414062, "rewards/rejected": -6.125030517578125, "step": 5275 }, { "epoch": 0.82, "learning_rate": 1.0277702162975258e-05, "logits/chosen": -2.464160203933716, "logits/rejected": -2.9004223346710205, "logps/chosen": -101.9955062866211, "logps/rejected": -469.3946533203125, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": -1.21672523021698, "rewards/margins": 7.575526714324951, "rewards/rejected": -8.792251586914062, "step": 5276 }, { "epoch": 0.82, "learning_rate": 1.027696872244411e-05, "logits/chosen": -2.7842931747436523, "logits/rejected": -2.3276314735412598, "logps/chosen": -152.90676879882812, "logps/rejected": -242.7550048828125, "loss": 1.7508, "rewards/accuracies": 0.5, "rewards/chosen": -4.833200454711914, "rewards/margins": 3.0053811073303223, "rewards/rejected": -7.838581562042236, "step": 5277 }, { "epoch": 0.82, "learning_rate": 1.0276235281912964e-05, "logits/chosen": -2.4283745288848877, "logits/rejected": -1.5656652450561523, "logps/chosen": -313.1412658691406, "logps/rejected": -359.94647216796875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6338802576065063, "rewards/margins": 9.015844345092773, "rewards/rejected": -9.649724006652832, "step": 5278 }, { "epoch": 0.82, "learning_rate": 1.0275501841381816e-05, "logits/chosen": -1.858828067779541, "logits/rejected": -2.058328866958618, "logps/chosen": -142.4856719970703, "logps/rejected": -303.0205078125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.7249549627304077, "rewards/margins": 6.907750606536865, "rewards/rejected": -7.6327056884765625, "step": 5279 }, { "epoch": 0.82, "learning_rate": 1.0274768400850668e-05, "logits/chosen": -2.538682222366333, "logits/rejected": -3.008516788482666, "logps/chosen": -366.93206787109375, "logps/rejected": -480.36566162109375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 0.3358306884765625, "rewards/margins": 6.487448215484619, "rewards/rejected": -6.151618003845215, "step": 5280 }, { "epoch": 0.82, "learning_rate": 1.027403496031952e-05, "logits/chosen": -1.654831051826477, "logits/rejected": -2.3296422958374023, "logps/chosen": -101.12494659423828, "logps/rejected": -223.86758422851562, "loss": 0.9561, "rewards/accuracies": 0.5, "rewards/chosen": -3.7568485736846924, "rewards/margins": 3.6148433685302734, "rewards/rejected": -7.371691703796387, "step": 5281 }, { "epoch": 0.82, "learning_rate": 1.0273301519788371e-05, "logits/chosen": -2.6349706649780273, "logits/rejected": -2.7730400562286377, "logps/chosen": -328.38836669921875, "logps/rejected": -288.22589111328125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.5414547324180603, "rewards/margins": 6.821971893310547, "rewards/rejected": -7.363426208496094, "step": 5282 }, { "epoch": 0.82, "learning_rate": 1.0272568079257223e-05, "logits/chosen": -2.8800108432769775, "logits/rejected": -3.201101303100586, "logps/chosen": -111.61517333984375, "logps/rejected": -278.6584167480469, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.5252903699874878, "rewards/margins": 7.858182907104492, "rewards/rejected": -9.38347339630127, "step": 5283 }, { "epoch": 0.82, "learning_rate": 1.0271834638726075e-05, "logits/chosen": -2.973902702331543, "logits/rejected": -2.479022264480591, "logps/chosen": -258.5026550292969, "logps/rejected": -244.7171630859375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.7067481875419617, "rewards/margins": 7.441204071044922, "rewards/rejected": -8.14795207977295, "step": 5284 }, { "epoch": 0.82, "learning_rate": 1.0271101198194927e-05, "logits/chosen": -1.8914653062820435, "logits/rejected": -2.8429622650146484, "logps/chosen": -78.11228942871094, "logps/rejected": -270.68212890625, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -2.0990986824035645, "rewards/margins": 5.9142231941223145, "rewards/rejected": -8.013321876525879, "step": 5285 }, { "epoch": 0.82, "learning_rate": 1.0270367757663779e-05, "logits/chosen": -1.985324501991272, "logits/rejected": -2.9491665363311768, "logps/chosen": -68.45326232910156, "logps/rejected": -244.45506286621094, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.607708752155304, "rewards/margins": 6.091558456420898, "rewards/rejected": -5.48384952545166, "step": 5286 }, { "epoch": 0.82, "learning_rate": 1.0269634317132632e-05, "logits/chosen": -0.6019729971885681, "logits/rejected": -2.462287425994873, "logps/chosen": -76.53973388671875, "logps/rejected": -428.63189697265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.098950147628784, "rewards/margins": 8.971630096435547, "rewards/rejected": -11.070579528808594, "step": 5287 }, { "epoch": 0.82, "learning_rate": 1.0268900876601484e-05, "logits/chosen": -1.918163776397705, "logits/rejected": -2.846801996231079, "logps/chosen": -92.1609878540039, "logps/rejected": -205.38516235351562, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": -3.3631534576416016, "rewards/margins": 4.404825210571289, "rewards/rejected": -7.767978668212891, "step": 5288 }, { "epoch": 0.82, "learning_rate": 1.0268167436070336e-05, "logits/chosen": -2.853558301925659, "logits/rejected": -2.826908826828003, "logps/chosen": -612.80078125, "logps/rejected": -476.9912414550781, "loss": 2.6143, "rewards/accuracies": 0.5, "rewards/chosen": -3.4842629432678223, "rewards/margins": 1.0703506469726562, "rewards/rejected": -4.5546135902404785, "step": 5289 }, { "epoch": 0.82, "learning_rate": 1.0267433995539188e-05, "logits/chosen": -2.3330225944519043, "logits/rejected": -2.797832727432251, "logps/chosen": -147.3917236328125, "logps/rejected": -253.955078125, "loss": 2.4905, "rewards/accuracies": 0.5, "rewards/chosen": -4.479443550109863, "rewards/margins": 0.41338014602661133, "rewards/rejected": -4.892823696136475, "step": 5290 }, { "epoch": 0.82, "learning_rate": 1.026670055500804e-05, "logits/chosen": -1.979341983795166, "logits/rejected": -2.96134877204895, "logps/chosen": -78.6717529296875, "logps/rejected": -198.4337158203125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.7561564445495605, "rewards/margins": 6.24766731262207, "rewards/rejected": -8.003823280334473, "step": 5291 }, { "epoch": 0.82, "learning_rate": 1.0265967114476892e-05, "logits/chosen": -2.421865224838257, "logits/rejected": -2.755847692489624, "logps/chosen": -344.9034729003906, "logps/rejected": -326.51910400390625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -2.634093999862671, "rewards/margins": 5.764278411865234, "rewards/rejected": -8.398372650146484, "step": 5292 }, { "epoch": 0.82, "learning_rate": 1.0265233673945744e-05, "logits/chosen": -2.8164687156677246, "logits/rejected": -3.2304816246032715, "logps/chosen": -653.0548095703125, "logps/rejected": -623.8941040039062, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.5670181512832642, "rewards/margins": 7.097152233123779, "rewards/rejected": -8.664170265197754, "step": 5293 }, { "epoch": 0.82, "learning_rate": 1.0264500233414596e-05, "logits/chosen": -2.7809927463531494, "logits/rejected": -3.0340464115142822, "logps/chosen": -64.25489807128906, "logps/rejected": -151.1416015625, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": -3.1962783336639404, "rewards/margins": 3.3453550338745117, "rewards/rejected": -6.541633605957031, "step": 5294 }, { "epoch": 0.82, "learning_rate": 1.0263766792883447e-05, "logits/chosen": -2.4273948669433594, "logits/rejected": -2.776853084564209, "logps/chosen": -51.6440544128418, "logps/rejected": -220.85348510742188, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": -3.480161428451538, "rewards/margins": 3.281801700592041, "rewards/rejected": -6.761962890625, "step": 5295 }, { "epoch": 0.82, "learning_rate": 1.0263033352352301e-05, "logits/chosen": -1.7928434610366821, "logits/rejected": -2.80641508102417, "logps/chosen": -203.94969177246094, "logps/rejected": -347.046875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.8777996301651001, "rewards/margins": 5.683791160583496, "rewards/rejected": -6.561590671539307, "step": 5296 }, { "epoch": 0.82, "learning_rate": 1.0262299911821153e-05, "logits/chosen": -2.1414477825164795, "logits/rejected": -2.0587902069091797, "logps/chosen": -216.67343139648438, "logps/rejected": -357.130859375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6157115697860718, "rewards/margins": 7.60359001159668, "rewards/rejected": -9.219301223754883, "step": 5297 }, { "epoch": 0.82, "learning_rate": 1.0261566471290005e-05, "logits/chosen": -3.0164191722869873, "logits/rejected": -2.8761661052703857, "logps/chosen": -158.9188995361328, "logps/rejected": -196.95103454589844, "loss": 2.593, "rewards/accuracies": 0.5, "rewards/chosen": -1.348854422569275, "rewards/margins": 1.6863536834716797, "rewards/rejected": -3.035208225250244, "step": 5298 }, { "epoch": 0.82, "learning_rate": 1.0260833030758857e-05, "logits/chosen": -0.8907749056816101, "logits/rejected": -2.7370662689208984, "logps/chosen": -134.9130096435547, "logps/rejected": -567.432373046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.6284340023994446, "rewards/margins": 8.191919326782227, "rewards/rejected": -8.820352554321289, "step": 5299 }, { "epoch": 0.82, "learning_rate": 1.0260099590227709e-05, "logits/chosen": -2.1489381790161133, "logits/rejected": -2.9277660846710205, "logps/chosen": -56.4281005859375, "logps/rejected": -321.05126953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0382297039031982, "rewards/margins": 8.524518966674805, "rewards/rejected": -9.562748908996582, "step": 5300 }, { "epoch": 0.82, "learning_rate": 1.025936614969656e-05, "logits/chosen": -2.5674846172332764, "logits/rejected": -2.7631137371063232, "logps/chosen": -70.78779602050781, "logps/rejected": -277.285400390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3748635053634644, "rewards/margins": 7.296702861785889, "rewards/rejected": -8.671566009521484, "step": 5301 }, { "epoch": 0.82, "learning_rate": 1.0258632709165412e-05, "logits/chosen": -2.647578001022339, "logits/rejected": -2.79372501373291, "logps/chosen": -297.9197998046875, "logps/rejected": -347.28594970703125, "loss": 2.9058, "rewards/accuracies": 0.5, "rewards/chosen": -4.154162406921387, "rewards/margins": -0.2806885242462158, "rewards/rejected": -3.87347412109375, "step": 5302 }, { "epoch": 0.82, "learning_rate": 1.0257899268634264e-05, "logits/chosen": -2.834263563156128, "logits/rejected": -3.0913164615631104, "logps/chosen": -493.62689208984375, "logps/rejected": -488.86773681640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0143508911132812, "rewards/margins": 8.869915008544922, "rewards/rejected": -9.884265899658203, "step": 5303 }, { "epoch": 0.82, "learning_rate": 1.0257165828103118e-05, "logits/chosen": -2.024925947189331, "logits/rejected": -2.6687092781066895, "logps/chosen": -123.98776245117188, "logps/rejected": -404.2215881347656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.7463043332099915, "rewards/margins": 8.03936767578125, "rewards/rejected": -8.785672187805176, "step": 5304 }, { "epoch": 0.83, "learning_rate": 1.025643238757197e-05, "logits/chosen": -3.048682689666748, "logits/rejected": -2.7305572032928467, "logps/chosen": -137.96957397460938, "logps/rejected": -162.4710235595703, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.230449914932251, "rewards/margins": 6.242794036865234, "rewards/rejected": -7.473244667053223, "step": 5305 }, { "epoch": 0.83, "learning_rate": 1.0255698947040821e-05, "logits/chosen": -1.5288314819335938, "logits/rejected": -3.0033798217773438, "logps/chosen": -129.35366821289062, "logps/rejected": -435.74090576171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.06085777282714844, "rewards/margins": 7.864537239074707, "rewards/rejected": -7.9253950119018555, "step": 5306 }, { "epoch": 0.83, "learning_rate": 1.0254965506509673e-05, "logits/chosen": -3.0703563690185547, "logits/rejected": -2.637930154800415, "logps/chosen": -111.15008544921875, "logps/rejected": -175.26646423339844, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -1.6921968460083008, "rewards/margins": 4.881038665771484, "rewards/rejected": -6.573235511779785, "step": 5307 }, { "epoch": 0.83, "learning_rate": 1.0254232065978525e-05, "logits/chosen": -2.597433567047119, "logits/rejected": -2.089014768600464, "logps/chosen": -315.2335205078125, "logps/rejected": -204.5992431640625, "loss": 5.7029, "rewards/accuracies": 0.5, "rewards/chosen": -4.771605968475342, "rewards/margins": -1.1788969039916992, "rewards/rejected": -3.5927090644836426, "step": 5308 }, { "epoch": 0.83, "learning_rate": 1.0253498625447377e-05, "logits/chosen": -2.6079413890838623, "logits/rejected": -3.0727286338806152, "logps/chosen": -224.71743774414062, "logps/rejected": -347.73004150390625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.11605632305145264, "rewards/margins": 7.258412837982178, "rewards/rejected": -7.374468803405762, "step": 5309 }, { "epoch": 0.83, "learning_rate": 1.025276518491623e-05, "logits/chosen": -2.5981650352478027, "logits/rejected": -2.7859745025634766, "logps/chosen": -31.8507080078125, "logps/rejected": -236.38706970214844, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/chosen": -0.2480744570493698, "rewards/margins": 4.625123977661133, "rewards/rejected": -4.873198509216309, "step": 5310 }, { "epoch": 0.83, "learning_rate": 1.0252031744385083e-05, "logits/chosen": -2.5554354190826416, "logits/rejected": -2.480947732925415, "logps/chosen": -330.65625, "logps/rejected": -260.3932800292969, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.2535283863544464, "rewards/margins": 6.141963481903076, "rewards/rejected": -6.395491600036621, "step": 5311 }, { "epoch": 0.83, "learning_rate": 1.0251298303853934e-05, "logits/chosen": -2.5359156131744385, "logits/rejected": -2.688845634460449, "logps/chosen": -98.30521392822266, "logps/rejected": -264.2567138671875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.17149290442466736, "rewards/margins": 7.021517276763916, "rewards/rejected": -7.193010330200195, "step": 5312 }, { "epoch": 0.83, "learning_rate": 1.0250564863322788e-05, "logits/chosen": -1.2924997806549072, "logits/rejected": -2.403965950012207, "logps/chosen": -37.11341094970703, "logps/rejected": -272.1921691894531, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.3488105535507202, "rewards/margins": 7.274548530578613, "rewards/rejected": -8.623358726501465, "step": 5313 }, { "epoch": 0.83, "learning_rate": 1.024983142279164e-05, "logits/chosen": -1.885775089263916, "logits/rejected": -2.7140939235687256, "logps/chosen": -104.9627685546875, "logps/rejected": -364.1593322753906, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6794917583465576, "rewards/margins": 8.60055923461914, "rewards/rejected": -9.280050277709961, "step": 5314 }, { "epoch": 0.83, "learning_rate": 1.0249097982260492e-05, "logits/chosen": -2.371067523956299, "logits/rejected": -2.9345226287841797, "logps/chosen": -178.72557067871094, "logps/rejected": -303.74261474609375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9978193640708923, "rewards/margins": 6.967038154602051, "rewards/rejected": -7.964857578277588, "step": 5315 }, { "epoch": 0.83, "learning_rate": 1.0248364541729344e-05, "logits/chosen": -3.0741965770721436, "logits/rejected": -2.3696184158325195, "logps/chosen": -357.25592041015625, "logps/rejected": -127.817626953125, "loss": 5.2613, "rewards/accuracies": 0.5, "rewards/chosen": -5.459586143493652, "rewards/margins": -1.4829514026641846, "rewards/rejected": -3.9766345024108887, "step": 5316 }, { "epoch": 0.83, "learning_rate": 1.0247631101198196e-05, "logits/chosen": -2.666388988494873, "logits/rejected": -2.110985040664673, "logps/chosen": -239.22752380371094, "logps/rejected": -207.24591064453125, "loss": 0.0328, "rewards/accuracies": 1.0, "rewards/chosen": -0.30651551485061646, "rewards/margins": 3.9642977714538574, "rewards/rejected": -4.270813465118408, "step": 5317 }, { "epoch": 0.83, "learning_rate": 1.0246897660667047e-05, "logits/chosen": -3.0811820030212402, "logits/rejected": -2.356642007827759, "logps/chosen": -361.6628112792969, "logps/rejected": -159.189208984375, "loss": 5.2368, "rewards/accuracies": 0.0, "rewards/chosen": -5.635504722595215, "rewards/margins": -5.230879783630371, "rewards/rejected": -0.4046253561973572, "step": 5318 }, { "epoch": 0.83, "learning_rate": 1.02461642201359e-05, "logits/chosen": -1.2360204458236694, "logits/rejected": -2.9999468326568604, "logps/chosen": -147.41622924804688, "logps/rejected": -561.6878662109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.2109367400407791, "rewards/margins": 9.960502624511719, "rewards/rejected": -10.171439170837402, "step": 5319 }, { "epoch": 0.83, "learning_rate": 1.0245430779604751e-05, "logits/chosen": -1.6691055297851562, "logits/rejected": -2.9792070388793945, "logps/chosen": -93.58111572265625, "logps/rejected": -454.58734130859375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5312540531158447, "rewards/margins": 7.87816047668457, "rewards/rejected": -8.409414291381836, "step": 5320 }, { "epoch": 0.83, "learning_rate": 1.0244697339073603e-05, "logits/chosen": -2.1114754676818848, "logits/rejected": -2.729172945022583, "logps/chosen": -163.34890747070312, "logps/rejected": -215.0812530517578, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.6394892930984497, "rewards/margins": 6.121298789978027, "rewards/rejected": -6.7607879638671875, "step": 5321 }, { "epoch": 0.83, "learning_rate": 1.0243963898542457e-05, "logits/chosen": -2.9156792163848877, "logits/rejected": -2.442559003829956, "logps/chosen": -514.2825927734375, "logps/rejected": -373.6954345703125, "loss": 3.0113, "rewards/accuracies": 0.5, "rewards/chosen": -2.6149048805236816, "rewards/margins": 0.7207427024841309, "rewards/rejected": -3.3356475830078125, "step": 5322 }, { "epoch": 0.83, "learning_rate": 1.0243230458011309e-05, "logits/chosen": -2.3026840686798096, "logits/rejected": -3.017381191253662, "logps/chosen": -255.271728515625, "logps/rejected": -461.0183410644531, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.6694374084472656, "rewards/margins": 6.846131324768066, "rewards/rejected": -7.515568733215332, "step": 5323 }, { "epoch": 0.83, "learning_rate": 1.024249701748016e-05, "logits/chosen": -2.708817481994629, "logits/rejected": -2.6594438552856445, "logps/chosen": -84.53849792480469, "logps/rejected": -150.25225830078125, "loss": 3.4559, "rewards/accuracies": 0.5, "rewards/chosen": -5.101307392120361, "rewards/margins": -0.1349167823791504, "rewards/rejected": -4.966390609741211, "step": 5324 }, { "epoch": 0.83, "learning_rate": 1.0241763576949012e-05, "logits/chosen": -2.745516300201416, "logits/rejected": -2.9672555923461914, "logps/chosen": -311.726806640625, "logps/rejected": -246.47193908691406, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.4742765426635742, "rewards/margins": 6.607600212097168, "rewards/rejected": -7.081876754760742, "step": 5325 }, { "epoch": 0.83, "learning_rate": 1.0241030136417864e-05, "logits/chosen": -2.5111424922943115, "logits/rejected": -2.705146074295044, "logps/chosen": -112.73443603515625, "logps/rejected": -118.28164672851562, "loss": 1.573, "rewards/accuracies": 0.5, "rewards/chosen": -1.968986988067627, "rewards/margins": 2.814957857131958, "rewards/rejected": -4.783944606781006, "step": 5326 }, { "epoch": 0.83, "learning_rate": 1.0240296695886716e-05, "logits/chosen": -2.360507011413574, "logits/rejected": -2.826476573944092, "logps/chosen": -190.22564697265625, "logps/rejected": -262.88714599609375, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -1.146763563156128, "rewards/margins": 4.295408248901367, "rewards/rejected": -5.442171573638916, "step": 5327 }, { "epoch": 0.83, "learning_rate": 1.0239563255355568e-05, "logits/chosen": -1.8018133640289307, "logits/rejected": -2.436034679412842, "logps/chosen": -197.166015625, "logps/rejected": -168.9696502685547, "loss": 4.836, "rewards/accuracies": 0.5, "rewards/chosen": -4.909428596496582, "rewards/margins": -2.1050994396209717, "rewards/rejected": -2.8043291568756104, "step": 5328 }, { "epoch": 0.83, "learning_rate": 1.023882981482442e-05, "logits/chosen": -2.429393768310547, "logits/rejected": -2.956803560256958, "logps/chosen": -336.0588073730469, "logps/rejected": -475.8126220703125, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.315789818763733, "rewards/margins": 4.616822719573975, "rewards/rejected": -5.932612419128418, "step": 5329 }, { "epoch": 0.83, "learning_rate": 1.0238096374293272e-05, "logits/chosen": -2.308892011642456, "logits/rejected": -2.9728031158447266, "logps/chosen": -39.35813903808594, "logps/rejected": -341.9683837890625, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -0.8197218179702759, "rewards/margins": 3.3468475341796875, "rewards/rejected": -4.166569709777832, "step": 5330 }, { "epoch": 0.83, "learning_rate": 1.0237362933762125e-05, "logits/chosen": -2.0833799839019775, "logits/rejected": -2.679926872253418, "logps/chosen": -37.16484832763672, "logps/rejected": -158.9716796875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.3279980421066284, "rewards/margins": 5.550698280334473, "rewards/rejected": -6.878696441650391, "step": 5331 }, { "epoch": 0.83, "learning_rate": 1.0236629493230977e-05, "logits/chosen": -2.3444106578826904, "logits/rejected": -3.08497953414917, "logps/chosen": -167.7646942138672, "logps/rejected": -382.49835205078125, "loss": 3.5873, "rewards/accuracies": 0.5, "rewards/chosen": -4.843103408813477, "rewards/margins": -0.9084658622741699, "rewards/rejected": -3.9346375465393066, "step": 5332 }, { "epoch": 0.83, "learning_rate": 1.0235896052699829e-05, "logits/chosen": -2.142566204071045, "logits/rejected": -2.867671012878418, "logps/chosen": -366.21221923828125, "logps/rejected": -597.50048828125, "loss": 2.2538, "rewards/accuracies": 0.5, "rewards/chosen": -2.477013111114502, "rewards/margins": 2.841926097869873, "rewards/rejected": -5.318939208984375, "step": 5333 }, { "epoch": 0.83, "learning_rate": 1.0235162612168681e-05, "logits/chosen": -2.648547649383545, "logits/rejected": -2.7997968196868896, "logps/chosen": -104.03202819824219, "logps/rejected": -238.26260375976562, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 0.15360145270824432, "rewards/margins": 4.939192771911621, "rewards/rejected": -4.785591125488281, "step": 5334 }, { "epoch": 0.83, "learning_rate": 1.0234429171637533e-05, "logits/chosen": -2.8458447456359863, "logits/rejected": -2.8480987548828125, "logps/chosen": -187.74449157714844, "logps/rejected": -174.61300659179688, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.09704649448394775, "rewards/margins": 6.4917426109313965, "rewards/rejected": -6.588788986206055, "step": 5335 }, { "epoch": 0.83, "learning_rate": 1.0233695731106385e-05, "logits/chosen": -3.0286903381347656, "logits/rejected": -1.9214597940444946, "logps/chosen": -710.2467651367188, "logps/rejected": -264.43267822265625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.3450256586074829, "rewards/margins": 5.71870231628418, "rewards/rejected": -6.063727378845215, "step": 5336 }, { "epoch": 0.83, "learning_rate": 1.0232962290575236e-05, "logits/chosen": -2.2335591316223145, "logits/rejected": -2.9842841625213623, "logps/chosen": -49.68959426879883, "logps/rejected": -352.6950378417969, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 1.0736486911773682, "rewards/margins": 6.918549060821533, "rewards/rejected": -5.844900608062744, "step": 5337 }, { "epoch": 0.83, "learning_rate": 1.0232228850044088e-05, "logits/chosen": -1.999177098274231, "logits/rejected": -2.7477784156799316, "logps/chosen": -126.35543823242188, "logps/rejected": -220.21762084960938, "loss": 2.5609, "rewards/accuracies": 0.5, "rewards/chosen": -3.5263869762420654, "rewards/margins": 1.2850017547607422, "rewards/rejected": -4.811388969421387, "step": 5338 }, { "epoch": 0.83, "learning_rate": 1.023149540951294e-05, "logits/chosen": -3.0024852752685547, "logits/rejected": -1.8826526403427124, "logps/chosen": -186.57980346679688, "logps/rejected": -119.8425521850586, "loss": 1.6406, "rewards/accuracies": 0.5, "rewards/chosen": -2.092473030090332, "rewards/margins": 2.2973504066467285, "rewards/rejected": -4.3898234367370605, "step": 5339 }, { "epoch": 0.83, "learning_rate": 1.0230761968981794e-05, "logits/chosen": -2.031534194946289, "logits/rejected": -2.429088592529297, "logps/chosen": -385.36712646484375, "logps/rejected": -473.9183654785156, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.45601654052734375, "rewards/margins": 7.131434440612793, "rewards/rejected": -7.587450981140137, "step": 5340 }, { "epoch": 0.83, "learning_rate": 1.0230028528450646e-05, "logits/chosen": -2.80633807182312, "logits/rejected": -2.7384536266326904, "logps/chosen": -275.926025390625, "logps/rejected": -218.7198486328125, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -0.31621477007865906, "rewards/margins": 4.8956756591796875, "rewards/rejected": -5.211890697479248, "step": 5341 }, { "epoch": 0.83, "learning_rate": 1.0229295087919498e-05, "logits/chosen": -3.092740297317505, "logits/rejected": -2.8254990577697754, "logps/chosen": -120.33502197265625, "logps/rejected": -94.8897476196289, "loss": 1.6237, "rewards/accuracies": 0.5, "rewards/chosen": -2.6906018257141113, "rewards/margins": 1.0993114709854126, "rewards/rejected": -3.7899131774902344, "step": 5342 }, { "epoch": 0.83, "learning_rate": 1.022856164738835e-05, "logits/chosen": -2.4717979431152344, "logits/rejected": -2.903749942779541, "logps/chosen": -210.48760986328125, "logps/rejected": -207.89138793945312, "loss": 3.4904, "rewards/accuracies": 0.5, "rewards/chosen": -3.7011430263519287, "rewards/margins": -0.5418686866760254, "rewards/rejected": -3.1592743396759033, "step": 5343 }, { "epoch": 0.83, "learning_rate": 1.0227828206857203e-05, "logits/chosen": -2.4765918254852295, "logits/rejected": -3.182234764099121, "logps/chosen": -173.86207580566406, "logps/rejected": -385.9873352050781, "loss": 2.7971, "rewards/accuracies": 0.5, "rewards/chosen": -3.087185859680176, "rewards/margins": 2.8127410411834717, "rewards/rejected": -5.899926662445068, "step": 5344 }, { "epoch": 0.83, "learning_rate": 1.0227094766326055e-05, "logits/chosen": -2.7171645164489746, "logits/rejected": -2.340764284133911, "logps/chosen": -398.2395324707031, "logps/rejected": -185.29135131835938, "loss": 4.652, "rewards/accuracies": 0.5, "rewards/chosen": -4.699603080749512, "rewards/margins": -2.1766042709350586, "rewards/rejected": -2.522998809814453, "step": 5345 }, { "epoch": 0.83, "learning_rate": 1.0226361325794907e-05, "logits/chosen": -2.621647834777832, "logits/rejected": -2.684596300125122, "logps/chosen": -193.85601806640625, "logps/rejected": -104.22007751464844, "loss": 1.942, "rewards/accuracies": 0.5, "rewards/chosen": -3.096553087234497, "rewards/margins": 0.8056581020355225, "rewards/rejected": -3.9022111892700195, "step": 5346 }, { "epoch": 0.83, "learning_rate": 1.0225627885263759e-05, "logits/chosen": -0.9816669225692749, "logits/rejected": -1.0653300285339355, "logps/chosen": -211.14915466308594, "logps/rejected": -458.104736328125, "loss": 0.2151, "rewards/accuracies": 1.0, "rewards/chosen": -2.712864398956299, "rewards/margins": 2.8348636627197266, "rewards/rejected": -5.547728061676025, "step": 5347 }, { "epoch": 0.83, "learning_rate": 1.022489444473261e-05, "logits/chosen": -3.1727113723754883, "logits/rejected": -3.2272980213165283, "logps/chosen": -50.2042350769043, "logps/rejected": -135.54855346679688, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.676450252532959, "rewards/margins": 5.723859786987305, "rewards/rejected": -6.4003095626831055, "step": 5348 }, { "epoch": 0.83, "learning_rate": 1.0224161004201464e-05, "logits/chosen": -1.5683186054229736, "logits/rejected": -2.9121391773223877, "logps/chosen": -114.10611724853516, "logps/rejected": -234.9935302734375, "loss": 2.1117, "rewards/accuracies": 0.5, "rewards/chosen": -1.2364253997802734, "rewards/margins": 1.4460668563842773, "rewards/rejected": -2.682492256164551, "step": 5349 }, { "epoch": 0.83, "learning_rate": 1.0223427563670316e-05, "logits/chosen": -2.499807119369507, "logits/rejected": -2.6781091690063477, "logps/chosen": -149.06619262695312, "logps/rejected": -170.28121948242188, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": 0.5429973602294922, "rewards/margins": 5.008187294006348, "rewards/rejected": -4.4651899337768555, "step": 5350 }, { "epoch": 0.83, "learning_rate": 1.0222694123139168e-05, "logits/chosen": -2.6957175731658936, "logits/rejected": -2.985443353652954, "logps/chosen": -30.806175231933594, "logps/rejected": -192.81687927246094, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": 0.669169545173645, "rewards/margins": 5.855879783630371, "rewards/rejected": -5.186710357666016, "step": 5351 }, { "epoch": 0.83, "learning_rate": 1.022196068260802e-05, "logits/chosen": -2.819215774536133, "logits/rejected": -2.272731065750122, "logps/chosen": -485.57940673828125, "logps/rejected": -307.0955810546875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.4111695289611816, "rewards/margins": 4.602823257446289, "rewards/rejected": -6.0139923095703125, "step": 5352 }, { "epoch": 0.83, "learning_rate": 1.0221227242076872e-05, "logits/chosen": -2.75909686088562, "logits/rejected": -3.045267343521118, "logps/chosen": -100.49726104736328, "logps/rejected": -203.0068359375, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -0.5181598663330078, "rewards/margins": 3.973658561706543, "rewards/rejected": -4.491818428039551, "step": 5353 }, { "epoch": 0.83, "learning_rate": 1.0220493801545724e-05, "logits/chosen": -1.6088416576385498, "logits/rejected": -1.9224146604537964, "logps/chosen": -307.7560729980469, "logps/rejected": -456.3284606933594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.36367493867874146, "rewards/margins": 8.20175552368164, "rewards/rejected": -8.565431594848633, "step": 5354 }, { "epoch": 0.83, "learning_rate": 1.0219760361014575e-05, "logits/chosen": -2.614365339279175, "logits/rejected": -3.1126322746276855, "logps/chosen": -38.11226272583008, "logps/rejected": -266.22003173828125, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -0.7971175909042358, "rewards/margins": 4.9521965980529785, "rewards/rejected": -5.749314308166504, "step": 5355 }, { "epoch": 0.83, "learning_rate": 1.0219026920483427e-05, "logits/chosen": -2.8092689514160156, "logits/rejected": -1.9187651872634888, "logps/chosen": -203.34056091308594, "logps/rejected": -302.58831787109375, "loss": 2.3111, "rewards/accuracies": 0.5, "rewards/chosen": -1.8216474056243896, "rewards/margins": 2.4691314697265625, "rewards/rejected": -4.290779113769531, "step": 5356 }, { "epoch": 0.83, "learning_rate": 1.0218293479952279e-05, "logits/chosen": -1.887311339378357, "logits/rejected": -2.7090907096862793, "logps/chosen": -169.01968383789062, "logps/rejected": -352.3883972167969, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.3010299801826477, "rewards/margins": 6.173835754394531, "rewards/rejected": -6.474865913391113, "step": 5357 }, { "epoch": 0.83, "learning_rate": 1.0217560039421133e-05, "logits/chosen": -2.811840295791626, "logits/rejected": -2.599752187728882, "logps/chosen": -196.23696899414062, "logps/rejected": -237.5482635498047, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -0.4299575984477997, "rewards/margins": 4.391118049621582, "rewards/rejected": -4.821075439453125, "step": 5358 }, { "epoch": 0.83, "learning_rate": 1.0216826598889985e-05, "logits/chosen": -2.58243989944458, "logits/rejected": -2.6866612434387207, "logps/chosen": -43.07714080810547, "logps/rejected": -236.70068359375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.8902395367622375, "rewards/margins": 8.169610023498535, "rewards/rejected": -7.279370307922363, "step": 5359 }, { "epoch": 0.83, "learning_rate": 1.0216093158358836e-05, "logits/chosen": -3.2944493293762207, "logits/rejected": -3.204160690307617, "logps/chosen": -75.9757308959961, "logps/rejected": -125.81906127929688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.20949502289295197, "rewards/margins": 6.629018783569336, "rewards/rejected": -6.838513374328613, "step": 5360 }, { "epoch": 0.83, "learning_rate": 1.0215359717827688e-05, "logits/chosen": -2.9090325832366943, "logits/rejected": -3.137104034423828, "logps/chosen": -51.692100524902344, "logps/rejected": -150.56321716308594, "loss": 0.9809, "rewards/accuracies": 0.5, "rewards/chosen": -3.3257832527160645, "rewards/margins": 2.823909044265747, "rewards/rejected": -6.149692535400391, "step": 5361 }, { "epoch": 0.83, "learning_rate": 1.021462627729654e-05, "logits/chosen": -2.7372779846191406, "logits/rejected": -2.759789228439331, "logps/chosen": -45.741363525390625, "logps/rejected": -127.58296203613281, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.7509517669677734, "rewards/margins": 4.957793235778809, "rewards/rejected": -5.708745002746582, "step": 5362 }, { "epoch": 0.83, "learning_rate": 1.0213892836765392e-05, "logits/chosen": -1.5903916358947754, "logits/rejected": -1.8661483526229858, "logps/chosen": -70.99115753173828, "logps/rejected": -308.9609069824219, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": 0.9297952055931091, "rewards/margins": 4.942297458648682, "rewards/rejected": -4.012502670288086, "step": 5363 }, { "epoch": 0.83, "learning_rate": 1.0213159396234244e-05, "logits/chosen": -2.265249252319336, "logits/rejected": -2.7355282306671143, "logps/chosen": -192.67001342773438, "logps/rejected": -310.1994934082031, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.31851083040237427, "rewards/margins": 7.695492744445801, "rewards/rejected": -8.01400375366211, "step": 5364 }, { "epoch": 0.83, "learning_rate": 1.0212425955703096e-05, "logits/chosen": -2.602182388305664, "logits/rejected": -2.960628032684326, "logps/chosen": -466.2303466796875, "logps/rejected": -886.8829956054688, "loss": 2.0465, "rewards/accuracies": 0.5, "rewards/chosen": -3.1571197509765625, "rewards/margins": 1.49875807762146, "rewards/rejected": -4.655878067016602, "step": 5365 }, { "epoch": 0.83, "learning_rate": 1.0211692515171948e-05, "logits/chosen": -3.2577505111694336, "logits/rejected": -3.027693510055542, "logps/chosen": -405.37127685546875, "logps/rejected": -241.46583557128906, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": 0.1284378170967102, "rewards/margins": 5.017071723937988, "rewards/rejected": -4.888634204864502, "step": 5366 }, { "epoch": 0.83, "learning_rate": 1.0210959074640801e-05, "logits/chosen": -2.736536979675293, "logits/rejected": -2.5008456707000732, "logps/chosen": -242.05889892578125, "logps/rejected": -235.41845703125, "loss": 2.5696, "rewards/accuracies": 0.5, "rewards/chosen": -1.7573720216751099, "rewards/margins": 0.852360725402832, "rewards/rejected": -2.6097328662872314, "step": 5367 }, { "epoch": 0.83, "learning_rate": 1.0210225634109653e-05, "logits/chosen": -1.9684323072433472, "logits/rejected": -2.8581697940826416, "logps/chosen": -391.00543212890625, "logps/rejected": -576.1127319335938, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -0.7561420798301697, "rewards/margins": 4.969925880432129, "rewards/rejected": -5.726068019866943, "step": 5368 }, { "epoch": 0.83, "learning_rate": 1.0209492193578505e-05, "logits/chosen": -2.7473556995391846, "logits/rejected": -2.4255075454711914, "logps/chosen": -106.73411560058594, "logps/rejected": -249.43411254882812, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.019300464540719986, "rewards/margins": 6.888010025024414, "rewards/rejected": -6.907310485839844, "step": 5369 }, { "epoch": 0.84, "learning_rate": 1.0208758753047357e-05, "logits/chosen": -3.139814615249634, "logits/rejected": -2.823206901550293, "logps/chosen": -387.7667236328125, "logps/rejected": -440.6257019042969, "loss": 5.5872, "rewards/accuracies": 0.0, "rewards/chosen": -5.1986470222473145, "rewards/margins": -5.578736305236816, "rewards/rejected": 0.38008958101272583, "step": 5370 }, { "epoch": 0.84, "learning_rate": 1.0208025312516209e-05, "logits/chosen": -1.7907830476760864, "logits/rejected": -2.639824867248535, "logps/chosen": -159.9112548828125, "logps/rejected": -394.1849670410156, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.11722946166992188, "rewards/margins": 6.741039276123047, "rewards/rejected": -6.858268737792969, "step": 5371 }, { "epoch": 0.84, "learning_rate": 1.020729187198506e-05, "logits/chosen": -2.7215194702148438, "logits/rejected": -1.6759408712387085, "logps/chosen": -129.9404296875, "logps/rejected": -123.9532470703125, "loss": 0.4747, "rewards/accuracies": 0.5, "rewards/chosen": -0.31805193424224854, "rewards/margins": 5.425124168395996, "rewards/rejected": -5.743175983428955, "step": 5372 }, { "epoch": 0.84, "learning_rate": 1.0206558431453913e-05, "logits/chosen": -2.938065528869629, "logits/rejected": -2.627121925354004, "logps/chosen": -341.2236633300781, "logps/rejected": -161.09933471679688, "loss": 3.4464, "rewards/accuracies": 0.5, "rewards/chosen": -2.536608934402466, "rewards/margins": -1.586511254310608, "rewards/rejected": -0.9500976800918579, "step": 5373 }, { "epoch": 0.84, "learning_rate": 1.0205824990922764e-05, "logits/chosen": -1.8624051809310913, "logits/rejected": -2.9884121417999268, "logps/chosen": -266.04205322265625, "logps/rejected": -357.9117431640625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.06554870307445526, "rewards/margins": 7.482945919036865, "rewards/rejected": -7.548494338989258, "step": 5374 }, { "epoch": 0.84, "learning_rate": 1.0205091550391616e-05, "logits/chosen": -3.3528618812561035, "logits/rejected": -3.18127703666687, "logps/chosen": -88.89511108398438, "logps/rejected": -175.22943115234375, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": 0.1254890412092209, "rewards/margins": 4.135983467102051, "rewards/rejected": -4.010494709014893, "step": 5375 }, { "epoch": 0.84, "learning_rate": 1.020435810986047e-05, "logits/chosen": -2.5399370193481445, "logits/rejected": -2.541196823120117, "logps/chosen": -171.24041748046875, "logps/rejected": -217.488525390625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.42592453956604004, "rewards/margins": 5.464751720428467, "rewards/rejected": -5.890676498413086, "step": 5376 }, { "epoch": 0.84, "learning_rate": 1.0203624669329322e-05, "logits/chosen": -1.819949746131897, "logits/rejected": -2.964827060699463, "logps/chosen": -144.86703491210938, "logps/rejected": -402.90472412109375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.2458038330078125, "rewards/margins": 6.332171440124512, "rewards/rejected": -6.086367607116699, "step": 5377 }, { "epoch": 0.84, "learning_rate": 1.0202891228798175e-05, "logits/chosen": -1.543147325515747, "logits/rejected": -2.5931951999664307, "logps/chosen": -271.93011474609375, "logps/rejected": -285.0776672363281, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.9418067932128906, "rewards/margins": 6.9839277267456055, "rewards/rejected": -6.042120933532715, "step": 5378 }, { "epoch": 0.84, "learning_rate": 1.0202157788267027e-05, "logits/chosen": -2.6468005180358887, "logits/rejected": -1.8601839542388916, "logps/chosen": -157.53079223632812, "logps/rejected": -168.7404327392578, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 0.1972263753414154, "rewards/margins": 5.332791805267334, "rewards/rejected": -5.135565280914307, "step": 5379 }, { "epoch": 0.84, "learning_rate": 1.0201424347735879e-05, "logits/chosen": -1.756083607673645, "logits/rejected": -2.7401037216186523, "logps/chosen": -77.30062866210938, "logps/rejected": -253.0561065673828, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.9197723865509033, "rewards/margins": 6.361991882324219, "rewards/rejected": -5.4422197341918945, "step": 5380 }, { "epoch": 0.84, "learning_rate": 1.0200690907204731e-05, "logits/chosen": -2.6801211833953857, "logits/rejected": -2.966277837753296, "logps/chosen": -592.2755126953125, "logps/rejected": -499.07769775390625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.8804771900177002, "rewards/margins": 5.914216041564941, "rewards/rejected": -5.03373908996582, "step": 5381 }, { "epoch": 0.84, "learning_rate": 1.0199957466673583e-05, "logits/chosen": -2.530805826187134, "logits/rejected": -2.4854037761688232, "logps/chosen": -478.90728759765625, "logps/rejected": -440.17169189453125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.07432860136032104, "rewards/margins": 6.117093563079834, "rewards/rejected": -6.042765140533447, "step": 5382 }, { "epoch": 0.84, "learning_rate": 1.0199224026142435e-05, "logits/chosen": -2.4321234226226807, "logits/rejected": -2.6433026790618896, "logps/chosen": -35.32819366455078, "logps/rejected": -128.44813537597656, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -1.0092194080352783, "rewards/margins": 5.526586532592773, "rewards/rejected": -6.535805702209473, "step": 5383 }, { "epoch": 0.84, "learning_rate": 1.0198490585611287e-05, "logits/chosen": -2.5828440189361572, "logits/rejected": -2.7702417373657227, "logps/chosen": -139.70339965820312, "logps/rejected": -207.81515502929688, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": 0.24257126450538635, "rewards/margins": 4.516792297363281, "rewards/rejected": -4.274220943450928, "step": 5384 }, { "epoch": 0.84, "learning_rate": 1.019775714508014e-05, "logits/chosen": -2.441833734512329, "logits/rejected": -2.9152979850769043, "logps/chosen": -111.35892486572266, "logps/rejected": -410.3870544433594, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.09721909463405609, "rewards/margins": 10.187475204467773, "rewards/rejected": -10.284693717956543, "step": 5385 }, { "epoch": 0.84, "learning_rate": 1.0197023704548992e-05, "logits/chosen": -2.3966145515441895, "logits/rejected": -2.877279758453369, "logps/chosen": -180.1105499267578, "logps/rejected": -276.2644348144531, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -0.4003662168979645, "rewards/margins": 5.976251602172852, "rewards/rejected": -6.376617431640625, "step": 5386 }, { "epoch": 0.84, "learning_rate": 1.0196290264017844e-05, "logits/chosen": -2.422348737716675, "logits/rejected": -2.9367470741271973, "logps/chosen": -236.91668701171875, "logps/rejected": -391.1275634765625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.269989013671875, "rewards/margins": 8.05636215209961, "rewards/rejected": -8.326351165771484, "step": 5387 }, { "epoch": 0.84, "learning_rate": 1.0195556823486696e-05, "logits/chosen": -2.113647699356079, "logits/rejected": -2.640711784362793, "logps/chosen": -276.20404052734375, "logps/rejected": -298.95977783203125, "loss": 3.3832, "rewards/accuracies": 0.5, "rewards/chosen": -2.951235294342041, "rewards/margins": -0.3048524856567383, "rewards/rejected": -2.6463828086853027, "step": 5388 }, { "epoch": 0.84, "learning_rate": 1.0194823382955548e-05, "logits/chosen": -2.5011606216430664, "logits/rejected": -3.1688427925109863, "logps/chosen": -63.512874603271484, "logps/rejected": -220.89776611328125, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": 0.9966874122619629, "rewards/margins": 4.614096641540527, "rewards/rejected": -3.6174092292785645, "step": 5389 }, { "epoch": 0.84, "learning_rate": 1.01940899424244e-05, "logits/chosen": -1.798823356628418, "logits/rejected": -2.8903534412384033, "logps/chosen": -66.45974731445312, "logps/rejected": -346.68798828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.07626715302467346, "rewards/margins": 7.479482173919678, "rewards/rejected": -7.403215408325195, "step": 5390 }, { "epoch": 0.84, "learning_rate": 1.0193356501893251e-05, "logits/chosen": -2.734121084213257, "logits/rejected": -2.8251595497131348, "logps/chosen": -186.65017700195312, "logps/rejected": -321.301025390625, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": -0.6716415882110596, "rewards/margins": 2.986813545227051, "rewards/rejected": -3.6584548950195312, "step": 5391 }, { "epoch": 0.84, "learning_rate": 1.0192623061362103e-05, "logits/chosen": -2.5453696250915527, "logits/rejected": -2.5753602981567383, "logps/chosen": -225.33441162109375, "logps/rejected": -303.09246826171875, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.004437640309333801, "rewards/margins": 5.48046875, "rewards/rejected": -5.484906196594238, "step": 5392 }, { "epoch": 0.84, "learning_rate": 1.0191889620830957e-05, "logits/chosen": -2.720100164413452, "logits/rejected": -2.544156312942505, "logps/chosen": -173.5098876953125, "logps/rejected": -236.37722778320312, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.515777587890625, "rewards/margins": 5.448485374450684, "rewards/rejected": -5.964262962341309, "step": 5393 }, { "epoch": 0.84, "learning_rate": 1.0191156180299809e-05, "logits/chosen": -2.8721764087677, "logits/rejected": -2.648050308227539, "logps/chosen": -229.90557861328125, "logps/rejected": -220.34873962402344, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.2266281247138977, "rewards/margins": 5.467637062072754, "rewards/rejected": -5.694265365600586, "step": 5394 }, { "epoch": 0.84, "learning_rate": 1.019042273976866e-05, "logits/chosen": -1.6027165651321411, "logits/rejected": -2.7596566677093506, "logps/chosen": -105.11894989013672, "logps/rejected": -396.1966857910156, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.4856855869293213, "rewards/margins": 7.713247776031494, "rewards/rejected": -7.227561950683594, "step": 5395 }, { "epoch": 0.84, "learning_rate": 1.0189689299237513e-05, "logits/chosen": -2.8073697090148926, "logits/rejected": -2.620305061340332, "logps/chosen": -224.4644775390625, "logps/rejected": -236.1611785888672, "loss": 0.2207, "rewards/accuracies": 1.0, "rewards/chosen": -0.9616625308990479, "rewards/margins": 2.9715168476104736, "rewards/rejected": -3.9331793785095215, "step": 5396 }, { "epoch": 0.84, "learning_rate": 1.0188955858706364e-05, "logits/chosen": -2.588798761367798, "logits/rejected": -2.8636653423309326, "logps/chosen": -280.2501220703125, "logps/rejected": -390.8492431640625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.1169651746749878, "rewards/margins": 5.487701416015625, "rewards/rejected": -6.604666709899902, "step": 5397 }, { "epoch": 0.84, "learning_rate": 1.0188222418175216e-05, "logits/chosen": -2.1448452472686768, "logits/rejected": -2.5393569469451904, "logps/chosen": -189.6361083984375, "logps/rejected": -412.78656005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.6929840445518494, "rewards/margins": 11.411470413208008, "rewards/rejected": -10.718485832214355, "step": 5398 }, { "epoch": 0.84, "learning_rate": 1.0187488977644068e-05, "logits/chosen": -2.952075481414795, "logits/rejected": -1.8420140743255615, "logps/chosen": -652.8555908203125, "logps/rejected": -272.849365234375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.10272179543972015, "rewards/margins": 6.280818939208984, "rewards/rejected": -6.383540153503418, "step": 5399 }, { "epoch": 0.84, "learning_rate": 1.018675553711292e-05, "logits/chosen": -2.521557331085205, "logits/rejected": -2.6284291744232178, "logps/chosen": -148.28045654296875, "logps/rejected": -150.80215454101562, "loss": 1.9445, "rewards/accuracies": 0.5, "rewards/chosen": -3.1113693714141846, "rewards/margins": 1.3484128713607788, "rewards/rejected": -4.459782123565674, "step": 5400 }, { "epoch": 0.84, "learning_rate": 1.0186022096581772e-05, "logits/chosen": -1.0487644672393799, "logits/rejected": -2.724289894104004, "logps/chosen": -61.0433464050293, "logps/rejected": -180.16220092773438, "loss": 0.5474, "rewards/accuracies": 0.5, "rewards/chosen": -4.126895427703857, "rewards/margins": 2.5991926193237305, "rewards/rejected": -6.726088047027588, "step": 5401 }, { "epoch": 0.84, "learning_rate": 1.0185288656050626e-05, "logits/chosen": -2.6221542358398438, "logits/rejected": -1.345824122428894, "logps/chosen": -247.86167907714844, "logps/rejected": -157.66876220703125, "loss": 0.741, "rewards/accuracies": 0.5, "rewards/chosen": -1.386415958404541, "rewards/margins": 3.1553800106048584, "rewards/rejected": -4.54179573059082, "step": 5402 }, { "epoch": 0.84, "learning_rate": 1.0184555215519477e-05, "logits/chosen": -1.7265487909317017, "logits/rejected": -2.5338294506073, "logps/chosen": -114.99299621582031, "logps/rejected": -290.6312255859375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.1920210123062134, "rewards/margins": 7.112401962280273, "rewards/rejected": -8.304422378540039, "step": 5403 }, { "epoch": 0.84, "learning_rate": 1.018382177498833e-05, "logits/chosen": -1.7627331018447876, "logits/rejected": -2.991917371749878, "logps/chosen": -96.81938171386719, "logps/rejected": -489.9453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.07962837815284729, "rewards/margins": 8.995765686035156, "rewards/rejected": -9.075393676757812, "step": 5404 }, { "epoch": 0.84, "learning_rate": 1.0183088334457181e-05, "logits/chosen": -2.443804979324341, "logits/rejected": -2.814250946044922, "logps/chosen": -184.6247100830078, "logps/rejected": -280.7375183105469, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.4068443775177002, "rewards/margins": 6.391500949859619, "rewards/rejected": -7.798345565795898, "step": 5405 }, { "epoch": 0.84, "learning_rate": 1.0182354893926033e-05, "logits/chosen": -2.4968650341033936, "logits/rejected": -2.5672988891601562, "logps/chosen": -176.48365783691406, "logps/rejected": -375.678466796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5291076898574829, "rewards/margins": 9.033032417297363, "rewards/rejected": -9.562139511108398, "step": 5406 }, { "epoch": 0.84, "learning_rate": 1.0181621453394885e-05, "logits/chosen": -2.501737356185913, "logits/rejected": -1.7177772521972656, "logps/chosen": -428.5250549316406, "logps/rejected": -340.2760009765625, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -2.0359535217285156, "rewards/margins": 4.383378028869629, "rewards/rejected": -6.4193315505981445, "step": 5407 }, { "epoch": 0.84, "learning_rate": 1.0180888012863737e-05, "logits/chosen": -2.848097324371338, "logits/rejected": -2.4928207397460938, "logps/chosen": -119.11295318603516, "logps/rejected": -73.54586029052734, "loss": 2.3559, "rewards/accuracies": 0.0, "rewards/chosen": -6.052364349365234, "rewards/margins": -2.0298118591308594, "rewards/rejected": -4.022552013397217, "step": 5408 }, { "epoch": 0.84, "learning_rate": 1.0180154572332589e-05, "logits/chosen": -2.873114585876465, "logits/rejected": -2.285342216491699, "logps/chosen": -215.53956604003906, "logps/rejected": -158.89593505859375, "loss": 3.41, "rewards/accuracies": 0.5, "rewards/chosen": -4.442965984344482, "rewards/margins": -0.6285302639007568, "rewards/rejected": -3.8144357204437256, "step": 5409 }, { "epoch": 0.84, "learning_rate": 1.0179421131801442e-05, "logits/chosen": -1.9596115350723267, "logits/rejected": -2.703366279602051, "logps/chosen": -70.6585922241211, "logps/rejected": -476.35150146484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.1901935636997223, "rewards/margins": 7.450228214263916, "rewards/rejected": -7.6404218673706055, "step": 5410 }, { "epoch": 0.84, "learning_rate": 1.0178687691270294e-05, "logits/chosen": -2.704218864440918, "logits/rejected": -2.522783041000366, "logps/chosen": -129.0606689453125, "logps/rejected": -200.65155029296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6730648279190063, "rewards/margins": 6.809845924377441, "rewards/rejected": -7.482910633087158, "step": 5411 }, { "epoch": 0.84, "learning_rate": 1.0177954250739148e-05, "logits/chosen": -2.5729198455810547, "logits/rejected": -2.866316318511963, "logps/chosen": -122.88274383544922, "logps/rejected": -299.945068359375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 0.9325066208839417, "rewards/margins": 6.047781944274902, "rewards/rejected": -5.1152753829956055, "step": 5412 }, { "epoch": 0.84, "learning_rate": 1.0177220810208e-05, "logits/chosen": -1.9365721940994263, "logits/rejected": -2.5402753353118896, "logps/chosen": -265.39434814453125, "logps/rejected": -262.7035827636719, "loss": 0.4189, "rewards/accuracies": 0.5, "rewards/chosen": -1.7961578369140625, "rewards/margins": 2.8873097896575928, "rewards/rejected": -4.683467864990234, "step": 5413 }, { "epoch": 0.84, "learning_rate": 1.0176487369676851e-05, "logits/chosen": -2.401185989379883, "logits/rejected": -2.725271463394165, "logps/chosen": -49.5355110168457, "logps/rejected": -309.77801513671875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.6253849267959595, "rewards/margins": 6.440854072570801, "rewards/rejected": -8.066239356994629, "step": 5414 }, { "epoch": 0.84, "learning_rate": 1.0175753929145703e-05, "logits/chosen": -2.383629322052002, "logits/rejected": -2.7371256351470947, "logps/chosen": -54.132843017578125, "logps/rejected": -158.63259887695312, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -2.317239761352539, "rewards/margins": 5.264382362365723, "rewards/rejected": -7.581622123718262, "step": 5415 }, { "epoch": 0.84, "learning_rate": 1.0175020488614555e-05, "logits/chosen": -3.1638376712799072, "logits/rejected": -2.9678092002868652, "logps/chosen": -1039.2337646484375, "logps/rejected": -682.8526000976562, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.5835937261581421, "rewards/margins": 6.376333236694336, "rewards/rejected": -6.959927558898926, "step": 5416 }, { "epoch": 0.84, "learning_rate": 1.0174287048083407e-05, "logits/chosen": -3.0471928119659424, "logits/rejected": -2.1875662803649902, "logps/chosen": -631.3973999023438, "logps/rejected": -225.01510620117188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0143754482269287, "rewards/margins": 7.104292869567871, "rewards/rejected": -8.118668556213379, "step": 5417 }, { "epoch": 0.84, "learning_rate": 1.0173553607552259e-05, "logits/chosen": -3.0249826908111572, "logits/rejected": -2.496428966522217, "logps/chosen": -357.7093505859375, "logps/rejected": -170.24803161621094, "loss": 4.1008, "rewards/accuracies": 0.5, "rewards/chosen": -5.050886154174805, "rewards/margins": -0.8500185012817383, "rewards/rejected": -4.200867652893066, "step": 5418 }, { "epoch": 0.84, "learning_rate": 1.017282016702111e-05, "logits/chosen": -2.8104426860809326, "logits/rejected": -2.910431385040283, "logps/chosen": -125.92704772949219, "logps/rejected": -239.849853515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.22881737351417542, "rewards/margins": 7.592275619506836, "rewards/rejected": -7.821093559265137, "step": 5419 }, { "epoch": 0.84, "learning_rate": 1.0172086726489964e-05, "logits/chosen": -2.0871424674987793, "logits/rejected": -2.966925621032715, "logps/chosen": -227.60302734375, "logps/rejected": -337.2774963378906, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.6763557195663452, "rewards/margins": 5.123342514038086, "rewards/rejected": -5.799698829650879, "step": 5420 }, { "epoch": 0.84, "learning_rate": 1.0171353285958816e-05, "logits/chosen": -1.1463178396224976, "logits/rejected": -1.6351028680801392, "logps/chosen": -118.7669677734375, "logps/rejected": -332.42724609375, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.20510812103748322, "rewards/margins": 5.137659072875977, "rewards/rejected": -5.342767715454102, "step": 5421 }, { "epoch": 0.84, "learning_rate": 1.0170619845427668e-05, "logits/chosen": -2.80702805519104, "logits/rejected": -2.51310396194458, "logps/chosen": -248.65289306640625, "logps/rejected": -78.28611755371094, "loss": 4.2015, "rewards/accuracies": 0.5, "rewards/chosen": -4.300327777862549, "rewards/margins": -1.434142827987671, "rewards/rejected": -2.866185426712036, "step": 5422 }, { "epoch": 0.84, "learning_rate": 1.016988640489652e-05, "logits/chosen": -2.0760140419006348, "logits/rejected": -2.8240585327148438, "logps/chosen": -48.28756332397461, "logps/rejected": -165.98101806640625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.8188316822052002, "rewards/margins": 6.17350435256958, "rewards/rejected": -7.992336273193359, "step": 5423 }, { "epoch": 0.84, "learning_rate": 1.0169152964365372e-05, "logits/chosen": -1.50094473361969, "logits/rejected": -2.780728578567505, "logps/chosen": -104.68719482421875, "logps/rejected": -287.65008544921875, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -1.0784801244735718, "rewards/margins": 4.509004592895508, "rewards/rejected": -5.587484836578369, "step": 5424 }, { "epoch": 0.84, "learning_rate": 1.0168419523834224e-05, "logits/chosen": -2.700608730316162, "logits/rejected": -2.7852416038513184, "logps/chosen": -229.32955932617188, "logps/rejected": -213.04702758789062, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.9584286212921143, "rewards/margins": 5.670191764831543, "rewards/rejected": -7.628620147705078, "step": 5425 }, { "epoch": 0.84, "learning_rate": 1.0167686083303076e-05, "logits/chosen": -2.3429105281829834, "logits/rejected": -2.5940022468566895, "logps/chosen": -84.38459777832031, "logps/rejected": -260.906494140625, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -2.8089566230773926, "rewards/margins": 3.5167996883392334, "rewards/rejected": -6.325756072998047, "step": 5426 }, { "epoch": 0.84, "learning_rate": 1.0166952642771928e-05, "logits/chosen": -1.5424193143844604, "logits/rejected": -2.8832199573516846, "logps/chosen": -91.75094604492188, "logps/rejected": -315.2899169921875, "loss": 0.2555, "rewards/accuracies": 1.0, "rewards/chosen": -4.01582145690918, "rewards/margins": 4.245861530303955, "rewards/rejected": -8.261683464050293, "step": 5427 }, { "epoch": 0.84, "learning_rate": 1.016621920224078e-05, "logits/chosen": -1.4529247283935547, "logits/rejected": -2.3328683376312256, "logps/chosen": -137.5891571044922, "logps/rejected": -282.302001953125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.6619457006454468, "rewards/margins": 5.236166954040527, "rewards/rejected": -5.8981122970581055, "step": 5428 }, { "epoch": 0.84, "learning_rate": 1.0165485761709633e-05, "logits/chosen": -2.117184638977051, "logits/rejected": -2.753889560699463, "logps/chosen": -255.98548889160156, "logps/rejected": -494.55450439453125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.2783222198486328, "rewards/margins": 6.429975509643555, "rewards/rejected": -7.7082977294921875, "step": 5429 }, { "epoch": 0.84, "learning_rate": 1.0164752321178485e-05, "logits/chosen": -2.6267752647399902, "logits/rejected": -2.4842731952667236, "logps/chosen": -102.38194274902344, "logps/rejected": -152.4288787841797, "loss": 0.0645, "rewards/accuracies": 1.0, "rewards/chosen": -2.4369118213653564, "rewards/margins": 4.4956440925598145, "rewards/rejected": -6.93255615234375, "step": 5430 }, { "epoch": 0.84, "learning_rate": 1.0164018880647337e-05, "logits/chosen": -1.992472529411316, "logits/rejected": -2.7794861793518066, "logps/chosen": -95.09273529052734, "logps/rejected": -325.4092712402344, "loss": 0.0494, "rewards/accuracies": 1.0, "rewards/chosen": -3.734577178955078, "rewards/margins": 3.9724483489990234, "rewards/rejected": -7.707025527954102, "step": 5431 }, { "epoch": 0.84, "learning_rate": 1.0163285440116189e-05, "logits/chosen": -1.6511437892913818, "logits/rejected": -2.1343283653259277, "logps/chosen": -129.42108154296875, "logps/rejected": -368.312744140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0137076377868652, "rewards/margins": 7.611382007598877, "rewards/rejected": -8.625089645385742, "step": 5432 }, { "epoch": 0.84, "learning_rate": 1.016255199958504e-05, "logits/chosen": -2.5695877075195312, "logits/rejected": -2.508354663848877, "logps/chosen": -288.8708801269531, "logps/rejected": -430.59027099609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.809413433074951, "rewards/margins": 8.8330717086792, "rewards/rejected": -11.642484664916992, "step": 5433 }, { "epoch": 0.85, "learning_rate": 1.0161818559053892e-05, "logits/chosen": -2.6471383571624756, "logits/rejected": -3.0406270027160645, "logps/chosen": -28.285213470458984, "logps/rejected": -240.7387237548828, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.5481820106506348, "rewards/margins": 8.577354431152344, "rewards/rejected": -9.12553596496582, "step": 5434 }, { "epoch": 0.85, "learning_rate": 1.0161085118522744e-05, "logits/chosen": -2.832198143005371, "logits/rejected": -1.9367319345474243, "logps/chosen": -187.03463745117188, "logps/rejected": -147.42657470703125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.3520435094833374, "rewards/margins": 6.1997528076171875, "rewards/rejected": -6.551795959472656, "step": 5435 }, { "epoch": 0.85, "learning_rate": 1.0160351677991596e-05, "logits/chosen": -1.7009387016296387, "logits/rejected": -2.840881109237671, "logps/chosen": -123.73445129394531, "logps/rejected": -331.7643737792969, "loss": 0.2442, "rewards/accuracies": 1.0, "rewards/chosen": -2.6366119384765625, "rewards/margins": 4.0975236892700195, "rewards/rejected": -6.734135627746582, "step": 5436 }, { "epoch": 0.85, "learning_rate": 1.0159618237460448e-05, "logits/chosen": -1.7676793336868286, "logits/rejected": -2.587696075439453, "logps/chosen": -205.98243713378906, "logps/rejected": -293.02874755859375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.0542190074920654, "rewards/margins": 7.232434272766113, "rewards/rejected": -8.286653518676758, "step": 5437 }, { "epoch": 0.85, "learning_rate": 1.0158884796929302e-05, "logits/chosen": -2.903867244720459, "logits/rejected": -2.0973622798919678, "logps/chosen": -214.16986083984375, "logps/rejected": -83.79087829589844, "loss": 1.9895, "rewards/accuracies": 0.5, "rewards/chosen": -5.0945305824279785, "rewards/margins": -1.5865038633346558, "rewards/rejected": -3.508026599884033, "step": 5438 }, { "epoch": 0.85, "learning_rate": 1.0158151356398153e-05, "logits/chosen": -2.083411931991577, "logits/rejected": -2.98931622505188, "logps/chosen": -171.15200805664062, "logps/rejected": -592.1376342773438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2285130023956299, "rewards/margins": 12.999046325683594, "rewards/rejected": -14.227559089660645, "step": 5439 }, { "epoch": 0.85, "learning_rate": 1.0157417915867005e-05, "logits/chosen": -1.863904595375061, "logits/rejected": -3.0166516304016113, "logps/chosen": -66.263671875, "logps/rejected": -305.9683532714844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2567453384399414, "rewards/margins": 7.036115646362305, "rewards/rejected": -8.292860984802246, "step": 5440 }, { "epoch": 0.85, "learning_rate": 1.0156684475335857e-05, "logits/chosen": -2.1138341426849365, "logits/rejected": -2.575604200363159, "logps/chosen": -150.39389038085938, "logps/rejected": -241.99143981933594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.22152289748191833, "rewards/margins": 8.061623573303223, "rewards/rejected": -7.8401007652282715, "step": 5441 }, { "epoch": 0.85, "learning_rate": 1.0155951034804709e-05, "logits/chosen": -1.5644994974136353, "logits/rejected": -2.6334121227264404, "logps/chosen": -160.1785888671875, "logps/rejected": -319.83465576171875, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.0449260473251343, "rewards/margins": 7.553004264831543, "rewards/rejected": -8.597929954528809, "step": 5442 }, { "epoch": 0.85, "learning_rate": 1.0155217594273561e-05, "logits/chosen": -2.663663864135742, "logits/rejected": -1.8107991218566895, "logps/chosen": -201.1173858642578, "logps/rejected": -129.97317504882812, "loss": 2.2051, "rewards/accuracies": 0.5, "rewards/chosen": -4.519281387329102, "rewards/margins": 0.42562103271484375, "rewards/rejected": -4.944902420043945, "step": 5443 }, { "epoch": 0.85, "learning_rate": 1.0154484153742415e-05, "logits/chosen": -2.065268039703369, "logits/rejected": -2.817456007003784, "logps/chosen": -77.44670104980469, "logps/rejected": -263.145751953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0063925981521606, "rewards/margins": 6.561391830444336, "rewards/rejected": -7.567784309387207, "step": 5444 }, { "epoch": 0.85, "learning_rate": 1.0153750713211266e-05, "logits/chosen": -1.5377168655395508, "logits/rejected": -2.6557300090789795, "logps/chosen": -109.77217102050781, "logps/rejected": -270.59808349609375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7388797998428345, "rewards/margins": 7.166932106018066, "rewards/rejected": -7.9058122634887695, "step": 5445 }, { "epoch": 0.85, "learning_rate": 1.0153017272680118e-05, "logits/chosen": -2.2190170288085938, "logits/rejected": -2.9204063415527344, "logps/chosen": -91.9146957397461, "logps/rejected": -338.98431396484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7601273059844971, "rewards/margins": 7.472052097320557, "rewards/rejected": -8.232179641723633, "step": 5446 }, { "epoch": 0.85, "learning_rate": 1.0152283832148972e-05, "logits/chosen": -1.9518946409225464, "logits/rejected": -2.7601842880249023, "logps/chosen": -84.75144958496094, "logps/rejected": -275.9171142578125, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -1.9227025508880615, "rewards/margins": 3.6714389324188232, "rewards/rejected": -5.594141483306885, "step": 5447 }, { "epoch": 0.85, "learning_rate": 1.0151550391617824e-05, "logits/chosen": -2.4163501262664795, "logits/rejected": -3.131843090057373, "logps/chosen": -38.37456512451172, "logps/rejected": -283.29681396484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9163949489593506, "rewards/margins": 8.438407897949219, "rewards/rejected": -9.354803085327148, "step": 5448 }, { "epoch": 0.85, "learning_rate": 1.0150816951086676e-05, "logits/chosen": -2.9051342010498047, "logits/rejected": -2.549453020095825, "logps/chosen": -194.7889404296875, "logps/rejected": -105.25216674804688, "loss": 3.2348, "rewards/accuracies": 0.0, "rewards/chosen": -5.0507330894470215, "rewards/margins": -3.111405372619629, "rewards/rejected": -1.9393280744552612, "step": 5449 }, { "epoch": 0.85, "learning_rate": 1.0150083510555528e-05, "logits/chosen": -2.887160539627075, "logits/rejected": -2.9147355556488037, "logps/chosen": -129.5459747314453, "logps/rejected": -204.45773315429688, "loss": 2.6001, "rewards/accuracies": 0.5, "rewards/chosen": -3.130152463912964, "rewards/margins": 2.016995906829834, "rewards/rejected": -5.147148609161377, "step": 5450 }, { "epoch": 0.85, "learning_rate": 1.014935007002438e-05, "logits/chosen": -2.7101521492004395, "logits/rejected": -2.5002596378326416, "logps/chosen": -363.1300354003906, "logps/rejected": -333.42877197265625, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -0.31342852115631104, "rewards/margins": 6.31695032119751, "rewards/rejected": -6.6303791999816895, "step": 5451 }, { "epoch": 0.85, "learning_rate": 1.0148616629493231e-05, "logits/chosen": -2.4844753742218018, "logits/rejected": -3.1277658939361572, "logps/chosen": -48.70012283325195, "logps/rejected": -200.93154907226562, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.4120120704174042, "rewards/margins": 7.753609657287598, "rewards/rejected": -8.165621757507324, "step": 5452 }, { "epoch": 0.85, "learning_rate": 1.0147883188962083e-05, "logits/chosen": -2.9072089195251465, "logits/rejected": -1.9480661153793335, "logps/chosen": -360.7107238769531, "logps/rejected": -286.0713806152344, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -1.637690782546997, "rewards/margins": 4.766879081726074, "rewards/rejected": -6.40457010269165, "step": 5453 }, { "epoch": 0.85, "learning_rate": 1.0147149748430935e-05, "logits/chosen": -2.881685972213745, "logits/rejected": -2.1260673999786377, "logps/chosen": -247.52894592285156, "logps/rejected": -180.82052612304688, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -1.1301517486572266, "rewards/margins": 3.9692254066467285, "rewards/rejected": -5.099377632141113, "step": 5454 }, { "epoch": 0.85, "learning_rate": 1.0146416307899787e-05, "logits/chosen": -2.733243703842163, "logits/rejected": -2.7106146812438965, "logps/chosen": -152.44070434570312, "logps/rejected": -193.751220703125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.3600940704345703, "rewards/margins": 6.76092529296875, "rewards/rejected": -7.12101936340332, "step": 5455 }, { "epoch": 0.85, "learning_rate": 1.014568286736864e-05, "logits/chosen": -2.573901891708374, "logits/rejected": -2.3672714233398438, "logps/chosen": -476.4048156738281, "logps/rejected": -544.581298828125, "loss": 2.7223, "rewards/accuracies": 0.5, "rewards/chosen": -4.659963130950928, "rewards/margins": 0.49248409271240234, "rewards/rejected": -5.15244722366333, "step": 5456 }, { "epoch": 0.85, "learning_rate": 1.0144949426837492e-05, "logits/chosen": -2.606173276901245, "logits/rejected": -2.7862863540649414, "logps/chosen": -179.49896240234375, "logps/rejected": -306.1455993652344, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.0865898132324219, "rewards/margins": 6.230978488922119, "rewards/rejected": -7.317568302154541, "step": 5457 }, { "epoch": 0.85, "learning_rate": 1.0144215986306344e-05, "logits/chosen": -2.207916498184204, "logits/rejected": -2.555692434310913, "logps/chosen": -176.61395263671875, "logps/rejected": -233.01235961914062, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.569226086139679, "rewards/margins": 6.3747453689575195, "rewards/rejected": -6.943971633911133, "step": 5458 }, { "epoch": 0.85, "learning_rate": 1.0143482545775196e-05, "logits/chosen": -2.234790325164795, "logits/rejected": -2.4789488315582275, "logps/chosen": -230.59381103515625, "logps/rejected": -421.1326904296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.5696756839752197, "rewards/margins": 8.424184799194336, "rewards/rejected": -9.993860244750977, "step": 5459 }, { "epoch": 0.85, "learning_rate": 1.0142749105244048e-05, "logits/chosen": -2.518470048904419, "logits/rejected": -1.9573103189468384, "logps/chosen": -262.48870849609375, "logps/rejected": -273.8224792480469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.1112411618232727, "rewards/margins": 8.55434799194336, "rewards/rejected": -8.66558837890625, "step": 5460 }, { "epoch": 0.85, "learning_rate": 1.01420156647129e-05, "logits/chosen": -2.8350846767425537, "logits/rejected": -2.977893590927124, "logps/chosen": -201.37420654296875, "logps/rejected": -357.0943298339844, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.5551223754882812, "rewards/margins": 7.3404693603515625, "rewards/rejected": -7.895591735839844, "step": 5461 }, { "epoch": 0.85, "learning_rate": 1.0141282224181752e-05, "logits/chosen": -1.9529417753219604, "logits/rejected": -2.7664639949798584, "logps/chosen": -277.51531982421875, "logps/rejected": -319.9870300292969, "loss": 2.6081, "rewards/accuracies": 0.5, "rewards/chosen": -4.759659767150879, "rewards/margins": 0.7038857936859131, "rewards/rejected": -5.463545322418213, "step": 5462 }, { "epoch": 0.85, "learning_rate": 1.0140548783650604e-05, "logits/chosen": -1.7357439994812012, "logits/rejected": -2.8529560565948486, "logps/chosen": -158.78564453125, "logps/rejected": -356.3135986328125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.121957778930664, "rewards/margins": 6.511486530303955, "rewards/rejected": -7.633444309234619, "step": 5463 }, { "epoch": 0.85, "learning_rate": 1.0139815343119456e-05, "logits/chosen": -2.443895101547241, "logits/rejected": -2.72370982170105, "logps/chosen": -53.65545654296875, "logps/rejected": -200.3848114013672, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9276517033576965, "rewards/margins": 8.165534019470215, "rewards/rejected": -9.093185424804688, "step": 5464 }, { "epoch": 0.85, "learning_rate": 1.0139081902588309e-05, "logits/chosen": -2.1825010776519775, "logits/rejected": -2.881596803665161, "logps/chosen": -114.7957763671875, "logps/rejected": -250.22425842285156, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.155362367630005, "rewards/margins": 6.460938453674316, "rewards/rejected": -8.616300582885742, "step": 5465 }, { "epoch": 0.85, "learning_rate": 1.0138348462057161e-05, "logits/chosen": -2.765238046646118, "logits/rejected": -2.747260570526123, "logps/chosen": -420.7652282714844, "logps/rejected": -519.5397338867188, "loss": 3.2603, "rewards/accuracies": 0.5, "rewards/chosen": -5.553620338439941, "rewards/margins": -0.055071115493774414, "rewards/rejected": -5.498548984527588, "step": 5466 }, { "epoch": 0.85, "learning_rate": 1.0137615021526013e-05, "logits/chosen": -1.605947732925415, "logits/rejected": -2.7508175373077393, "logps/chosen": -116.67485809326172, "logps/rejected": -587.8111572265625, "loss": 4.5168, "rewards/accuracies": 0.5, "rewards/chosen": -4.071020603179932, "rewards/margins": -0.9356002807617188, "rewards/rejected": -3.135420322418213, "step": 5467 }, { "epoch": 0.85, "learning_rate": 1.0136881580994865e-05, "logits/chosen": -2.392247438430786, "logits/rejected": -2.7299606800079346, "logps/chosen": -95.7047348022461, "logps/rejected": -350.970703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.866192638874054, "rewards/margins": 8.325557708740234, "rewards/rejected": -9.191749572753906, "step": 5468 }, { "epoch": 0.85, "learning_rate": 1.0136148140463717e-05, "logits/chosen": -1.5257731676101685, "logits/rejected": -2.9174413681030273, "logps/chosen": -534.4522705078125, "logps/rejected": -727.8197631835938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4834625720977783, "rewards/margins": 8.555959701538086, "rewards/rejected": -10.039421081542969, "step": 5469 }, { "epoch": 0.85, "learning_rate": 1.0135414699932568e-05, "logits/chosen": -1.8956917524337769, "logits/rejected": -2.758620500564575, "logps/chosen": -416.51666259765625, "logps/rejected": -712.2196044921875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.9678258895874023, "rewards/margins": 6.422521591186523, "rewards/rejected": -8.390347480773926, "step": 5470 }, { "epoch": 0.85, "learning_rate": 1.013468125940142e-05, "logits/chosen": -1.6520171165466309, "logits/rejected": -2.55804705619812, "logps/chosen": -95.25093841552734, "logps/rejected": -249.1148681640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.3768675327301025, "rewards/margins": 6.918136119842529, "rewards/rejected": -8.295003890991211, "step": 5471 }, { "epoch": 0.85, "learning_rate": 1.0133947818870272e-05, "logits/chosen": -2.710916042327881, "logits/rejected": -3.164478063583374, "logps/chosen": -268.7595520019531, "logps/rejected": -267.99456787109375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.441375732421875, "rewards/margins": 5.29745626449585, "rewards/rejected": -6.738831996917725, "step": 5472 }, { "epoch": 0.85, "learning_rate": 1.0133214378339124e-05, "logits/chosen": -1.5335078239440918, "logits/rejected": -3.135118007659912, "logps/chosen": -54.249916076660156, "logps/rejected": -370.59466552734375, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -2.2360029220581055, "rewards/margins": 4.201326370239258, "rewards/rejected": -6.437329292297363, "step": 5473 }, { "epoch": 0.85, "learning_rate": 1.0132480937807978e-05, "logits/chosen": -1.2970048189163208, "logits/rejected": -2.8502795696258545, "logps/chosen": -136.57867431640625, "logps/rejected": -299.6847839355469, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -2.158254623413086, "rewards/margins": 5.529726505279541, "rewards/rejected": -7.687980651855469, "step": 5474 }, { "epoch": 0.85, "learning_rate": 1.013174749727683e-05, "logits/chosen": -2.754434585571289, "logits/rejected": -3.2309749126434326, "logps/chosen": -247.83416748046875, "logps/rejected": -441.66009521484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.5887252688407898, "rewards/margins": 7.40312385559082, "rewards/rejected": -7.991848945617676, "step": 5475 }, { "epoch": 0.85, "learning_rate": 1.0131014056745681e-05, "logits/chosen": -2.5886874198913574, "logits/rejected": -2.8393733501434326, "logps/chosen": -125.71975708007812, "logps/rejected": -281.2151184082031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.292802095413208, "rewards/margins": 8.619901657104492, "rewards/rejected": -9.912703514099121, "step": 5476 }, { "epoch": 0.85, "learning_rate": 1.0130280616214533e-05, "logits/chosen": -2.6513664722442627, "logits/rejected": -1.8751118183135986, "logps/chosen": -253.26324462890625, "logps/rejected": -299.23223876953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4453933238983154, "rewards/margins": 8.457594871520996, "rewards/rejected": -9.90298843383789, "step": 5477 }, { "epoch": 0.85, "learning_rate": 1.0129547175683387e-05, "logits/chosen": -2.4867477416992188, "logits/rejected": -2.7458958625793457, "logps/chosen": -502.0019836425781, "logps/rejected": -492.1638488769531, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -3.265655994415283, "rewards/margins": 5.463947772979736, "rewards/rejected": -8.72960376739502, "step": 5478 }, { "epoch": 0.85, "learning_rate": 1.0128813735152239e-05, "logits/chosen": -2.723863363265991, "logits/rejected": -1.8389363288879395, "logps/chosen": -669.24755859375, "logps/rejected": -438.7355651855469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.44363176822662354, "rewards/margins": 8.744977951049805, "rewards/rejected": -9.18860912322998, "step": 5479 }, { "epoch": 0.85, "learning_rate": 1.012808029462109e-05, "logits/chosen": -2.6674084663391113, "logits/rejected": -2.77858567237854, "logps/chosen": -130.02639770507812, "logps/rejected": -241.8079833984375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.861158013343811, "rewards/margins": 7.092014789581299, "rewards/rejected": -7.95317268371582, "step": 5480 }, { "epoch": 0.85, "learning_rate": 1.0127346854089943e-05, "logits/chosen": -1.920580506324768, "logits/rejected": -2.792553424835205, "logps/chosen": -240.0009765625, "logps/rejected": -660.4918212890625, "loss": 2.8842, "rewards/accuracies": 0.5, "rewards/chosen": -4.792350769042969, "rewards/margins": 1.2225754261016846, "rewards/rejected": -6.014926433563232, "step": 5481 }, { "epoch": 0.85, "learning_rate": 1.0126613413558794e-05, "logits/chosen": -2.433971405029297, "logits/rejected": -2.8508052825927734, "logps/chosen": -195.83212280273438, "logps/rejected": -345.6236572265625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.6668503284454346, "rewards/margins": 5.444732666015625, "rewards/rejected": -7.1115827560424805, "step": 5482 }, { "epoch": 0.85, "learning_rate": 1.0125879973027648e-05, "logits/chosen": -1.7981464862823486, "logits/rejected": -2.4474575519561768, "logps/chosen": -154.97183227539062, "logps/rejected": -415.73675537109375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.568666458129883, "rewards/margins": 5.61737060546875, "rewards/rejected": -8.186037063598633, "step": 5483 }, { "epoch": 0.85, "learning_rate": 1.01251465324965e-05, "logits/chosen": -2.750633955001831, "logits/rejected": -2.6009175777435303, "logps/chosen": -641.6754760742188, "logps/rejected": -609.9673461914062, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.6668281555175781, "rewards/margins": 8.137472152709961, "rewards/rejected": -8.804300308227539, "step": 5484 }, { "epoch": 0.85, "learning_rate": 1.0124413091965352e-05, "logits/chosen": -1.5725947618484497, "logits/rejected": -2.7497239112854004, "logps/chosen": -159.5113983154297, "logps/rejected": -383.923828125, "loss": 3.2215, "rewards/accuracies": 0.5, "rewards/chosen": -4.295314311981201, "rewards/margins": 1.4695825576782227, "rewards/rejected": -5.764896869659424, "step": 5485 }, { "epoch": 0.85, "learning_rate": 1.0123679651434204e-05, "logits/chosen": -2.9067676067352295, "logits/rejected": -2.8076565265655518, "logps/chosen": -417.91180419921875, "logps/rejected": -207.234130859375, "loss": 7.6312, "rewards/accuracies": 0.0, "rewards/chosen": -8.722221374511719, "rewards/margins": -7.629298210144043, "rewards/rejected": -1.0929229259490967, "step": 5486 }, { "epoch": 0.85, "learning_rate": 1.0122946210903056e-05, "logits/chosen": -1.5220497846603394, "logits/rejected": -2.9770777225494385, "logps/chosen": -53.456382751464844, "logps/rejected": -474.8238830566406, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.24017333984375, "rewards/margins": 7.91965389251709, "rewards/rejected": -9.15982723236084, "step": 5487 }, { "epoch": 0.85, "learning_rate": 1.0122212770371907e-05, "logits/chosen": -3.0173962116241455, "logits/rejected": -2.6732232570648193, "logps/chosen": -283.0360412597656, "logps/rejected": -358.0788269042969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.0973153114318848, "rewards/margins": 9.342472076416016, "rewards/rejected": -11.439786911010742, "step": 5488 }, { "epoch": 0.85, "learning_rate": 1.012147932984076e-05, "logits/chosen": -2.901048183441162, "logits/rejected": -2.354475736618042, "logps/chosen": -288.9391174316406, "logps/rejected": -105.80241394042969, "loss": 6.7009, "rewards/accuracies": 0.0, "rewards/chosen": -9.306595802307129, "rewards/margins": -6.699501037597656, "rewards/rejected": -2.6070947647094727, "step": 5489 }, { "epoch": 0.85, "learning_rate": 1.0120745889309611e-05, "logits/chosen": -1.3748480081558228, "logits/rejected": -1.812052845954895, "logps/chosen": -177.52279663085938, "logps/rejected": -297.53369140625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.5794460773468018, "rewards/margins": 6.053184986114502, "rewards/rejected": -8.632631301879883, "step": 5490 }, { "epoch": 0.85, "learning_rate": 1.0120012448778465e-05, "logits/chosen": -2.0249195098876953, "logits/rejected": -2.85219407081604, "logps/chosen": -97.67530059814453, "logps/rejected": -241.96951293945312, "loss": 2.336, "rewards/accuracies": 0.5, "rewards/chosen": -5.26835823059082, "rewards/margins": 2.333230495452881, "rewards/rejected": -7.601588726043701, "step": 5491 }, { "epoch": 0.85, "learning_rate": 1.0119279008247317e-05, "logits/chosen": -0.9169069528579712, "logits/rejected": -2.7636475563049316, "logps/chosen": -39.24369430541992, "logps/rejected": -262.4831848144531, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.311229705810547, "rewards/margins": 6.068760871887207, "rewards/rejected": -8.379990577697754, "step": 5492 }, { "epoch": 0.85, "learning_rate": 1.0118545567716168e-05, "logits/chosen": -2.9488718509674072, "logits/rejected": -2.309170961380005, "logps/chosen": -177.7176513671875, "logps/rejected": -202.84664916992188, "loss": 0.1998, "rewards/accuracies": 1.0, "rewards/chosen": -1.5757637023925781, "rewards/margins": 5.353277683258057, "rewards/rejected": -6.929041385650635, "step": 5493 }, { "epoch": 0.85, "learning_rate": 1.011781212718502e-05, "logits/chosen": -1.169130802154541, "logits/rejected": -2.4284913539886475, "logps/chosen": -64.00823974609375, "logps/rejected": -275.4500732421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.237160325050354, "rewards/margins": 6.931643962860107, "rewards/rejected": -8.168804168701172, "step": 5494 }, { "epoch": 0.85, "learning_rate": 1.0117078686653872e-05, "logits/chosen": -2.130974531173706, "logits/rejected": -2.602827310562134, "logps/chosen": -121.9847640991211, "logps/rejected": -397.0296630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6387038230895996, "rewards/margins": 11.064468383789062, "rewards/rejected": -12.70317268371582, "step": 5495 }, { "epoch": 0.85, "learning_rate": 1.0116345246122724e-05, "logits/chosen": -2.8842625617980957, "logits/rejected": -2.5806386470794678, "logps/chosen": -271.4696960449219, "logps/rejected": -207.25033569335938, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.1771202087402344, "rewards/margins": 4.933786392211914, "rewards/rejected": -6.110906600952148, "step": 5496 }, { "epoch": 0.85, "learning_rate": 1.0115611805591576e-05, "logits/chosen": -1.1364707946777344, "logits/rejected": -2.6090567111968994, "logps/chosen": -70.1654052734375, "logps/rejected": -330.3968811035156, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6680545806884766, "rewards/margins": 7.555201530456543, "rewards/rejected": -9.22325611114502, "step": 5497 }, { "epoch": 0.86, "learning_rate": 1.0114878365060428e-05, "logits/chosen": -2.161043643951416, "logits/rejected": -2.6323792934417725, "logps/chosen": -40.311439514160156, "logps/rejected": -224.02952575683594, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.8404054641723633, "rewards/margins": 5.569549560546875, "rewards/rejected": -7.409955024719238, "step": 5498 }, { "epoch": 0.86, "learning_rate": 1.011414492452928e-05, "logits/chosen": -1.8902627229690552, "logits/rejected": -2.9596328735351562, "logps/chosen": -88.87928771972656, "logps/rejected": -278.67828369140625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.947105050086975, "rewards/margins": 7.26735782623291, "rewards/rejected": -9.214462280273438, "step": 5499 }, { "epoch": 0.86, "learning_rate": 1.0113411483998133e-05, "logits/chosen": -2.811939001083374, "logits/rejected": -3.042187213897705, "logps/chosen": -123.70230102539062, "logps/rejected": -233.35031127929688, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -2.238931655883789, "rewards/margins": 5.384729385375977, "rewards/rejected": -7.623661041259766, "step": 5500 }, { "epoch": 0.86, "learning_rate": 1.0112678043466985e-05, "logits/chosen": -2.738604784011841, "logits/rejected": -2.4702906608581543, "logps/chosen": -620.983642578125, "logps/rejected": -540.5973510742188, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.6319046020507812, "rewards/margins": 7.806057929992676, "rewards/rejected": -11.437962532043457, "step": 5501 }, { "epoch": 0.86, "learning_rate": 1.0111944602935837e-05, "logits/chosen": -3.015341281890869, "logits/rejected": -2.0271804332733154, "logps/chosen": -247.21707153320312, "logps/rejected": -145.95108032226562, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 0.137098491191864, "rewards/margins": 5.033106803894043, "rewards/rejected": -4.896008491516113, "step": 5502 }, { "epoch": 0.86, "learning_rate": 1.0111211162404689e-05, "logits/chosen": -1.6468490362167358, "logits/rejected": -3.030276298522949, "logps/chosen": -147.75250244140625, "logps/rejected": -326.736328125, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -2.337843418121338, "rewards/margins": 5.286306381225586, "rewards/rejected": -7.624149799346924, "step": 5503 }, { "epoch": 0.86, "learning_rate": 1.011047772187354e-05, "logits/chosen": -1.6996616125106812, "logits/rejected": -1.8476186990737915, "logps/chosen": -112.28103637695312, "logps/rejected": -213.97409057617188, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.835534691810608, "rewards/margins": 7.082625389099121, "rewards/rejected": -8.918160438537598, "step": 5504 }, { "epoch": 0.86, "learning_rate": 1.0109744281342393e-05, "logits/chosen": -2.7588589191436768, "logits/rejected": -2.977820634841919, "logps/chosen": -139.7034454345703, "logps/rejected": -108.656982421875, "loss": 3.9566, "rewards/accuracies": 0.5, "rewards/chosen": -5.135817527770996, "rewards/margins": 0.2644937038421631, "rewards/rejected": -5.40031099319458, "step": 5505 }, { "epoch": 0.86, "learning_rate": 1.0109010840811245e-05, "logits/chosen": -2.8392207622528076, "logits/rejected": -3.1449058055877686, "logps/chosen": -74.10813903808594, "logps/rejected": -161.690673828125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.201277017593384, "rewards/margins": 6.28146505355835, "rewards/rejected": -8.482742309570312, "step": 5506 }, { "epoch": 0.86, "learning_rate": 1.0108277400280096e-05, "logits/chosen": -3.2116472721099854, "logits/rejected": -2.5088858604431152, "logps/chosen": -351.836669921875, "logps/rejected": -98.84896850585938, "loss": 5.7346, "rewards/accuracies": 0.0, "rewards/chosen": -7.690986633300781, "rewards/margins": -5.731122970581055, "rewards/rejected": -1.9598634243011475, "step": 5507 }, { "epoch": 0.86, "learning_rate": 1.0107543959748948e-05, "logits/chosen": -1.542871117591858, "logits/rejected": -3.16747784614563, "logps/chosen": -62.826812744140625, "logps/rejected": -637.3553466796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.5632930994033813, "rewards/margins": 7.090647220611572, "rewards/rejected": -8.653940200805664, "step": 5508 }, { "epoch": 0.86, "learning_rate": 1.0106810519217802e-05, "logits/chosen": -1.383330225944519, "logits/rejected": -2.387529134750366, "logps/chosen": -59.49872970581055, "logps/rejected": -346.73748779296875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.9937065839767456, "rewards/margins": 6.44339656829834, "rewards/rejected": -8.437103271484375, "step": 5509 }, { "epoch": 0.86, "learning_rate": 1.0106077078686654e-05, "logits/chosen": -3.0215258598327637, "logits/rejected": -2.9503495693206787, "logps/chosen": -286.15472412109375, "logps/rejected": -194.74395751953125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.4674382209777832, "rewards/margins": 5.129329681396484, "rewards/rejected": -6.596767425537109, "step": 5510 }, { "epoch": 0.86, "learning_rate": 1.0105343638155506e-05, "logits/chosen": -2.8415610790252686, "logits/rejected": -3.0809192657470703, "logps/chosen": -218.9393768310547, "logps/rejected": -471.0825500488281, "loss": 3.3307, "rewards/accuracies": 0.5, "rewards/chosen": -5.6937360763549805, "rewards/margins": 1.2665541172027588, "rewards/rejected": -6.960290908813477, "step": 5511 }, { "epoch": 0.86, "learning_rate": 1.010461019762436e-05, "logits/chosen": -2.6173195838928223, "logits/rejected": -2.9803919792175293, "logps/chosen": -69.5645751953125, "logps/rejected": -201.08731079101562, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.346341609954834, "rewards/margins": 6.649134635925293, "rewards/rejected": -8.995475769042969, "step": 5512 }, { "epoch": 0.86, "learning_rate": 1.0103876757093211e-05, "logits/chosen": -2.3917763233184814, "logits/rejected": -2.522681951522827, "logps/chosen": -125.34122467041016, "logps/rejected": -124.54092407226562, "loss": 1.9429, "rewards/accuracies": 0.5, "rewards/chosen": -3.531163215637207, "rewards/margins": 1.0477386713027954, "rewards/rejected": -4.578901767730713, "step": 5513 }, { "epoch": 0.86, "learning_rate": 1.0103143316562063e-05, "logits/chosen": -1.9682515859603882, "logits/rejected": -3.0876078605651855, "logps/chosen": -106.43829345703125, "logps/rejected": -354.428466796875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.6538368463516235, "rewards/margins": 8.01552963256836, "rewards/rejected": -9.669365882873535, "step": 5514 }, { "epoch": 0.86, "learning_rate": 1.0102409876030915e-05, "logits/chosen": -2.5515003204345703, "logits/rejected": -2.7857558727264404, "logps/chosen": -100.60513305664062, "logps/rejected": -209.9100341796875, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -2.7460885047912598, "rewards/margins": 4.487005233764648, "rewards/rejected": -7.233093738555908, "step": 5515 }, { "epoch": 0.86, "learning_rate": 1.0101676435499767e-05, "logits/chosen": -1.4262689352035522, "logits/rejected": -2.7965409755706787, "logps/chosen": -64.44393157958984, "logps/rejected": -239.51480102539062, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.696990966796875, "rewards/margins": 5.841818809509277, "rewards/rejected": -8.538809776306152, "step": 5516 }, { "epoch": 0.86, "learning_rate": 1.0100942994968619e-05, "logits/chosen": -2.643918514251709, "logits/rejected": -2.142402172088623, "logps/chosen": -345.91864013671875, "logps/rejected": -343.433837890625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.2307348251342773, "rewards/margins": 5.684565544128418, "rewards/rejected": -7.915300369262695, "step": 5517 }, { "epoch": 0.86, "learning_rate": 1.0100209554437472e-05, "logits/chosen": -2.8995931148529053, "logits/rejected": -2.9739928245544434, "logps/chosen": -351.8351135253906, "logps/rejected": -464.2156982421875, "loss": 3.284, "rewards/accuracies": 0.5, "rewards/chosen": -4.6096978187561035, "rewards/margins": 0.5838415622711182, "rewards/rejected": -5.193539619445801, "step": 5518 }, { "epoch": 0.86, "learning_rate": 1.0099476113906324e-05, "logits/chosen": -2.5960817337036133, "logits/rejected": -2.6994919776916504, "logps/chosen": -223.9044189453125, "logps/rejected": -253.170654296875, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": -2.4946837425231934, "rewards/margins": 2.7785630226135254, "rewards/rejected": -5.273246765136719, "step": 5519 }, { "epoch": 0.86, "learning_rate": 1.0098742673375176e-05, "logits/chosen": -1.7973014116287231, "logits/rejected": -2.872070789337158, "logps/chosen": -174.74343872070312, "logps/rejected": -330.45806884765625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.9305671453475952, "rewards/margins": 5.628837585449219, "rewards/rejected": -6.5594048500061035, "step": 5520 }, { "epoch": 0.86, "learning_rate": 1.0098009232844028e-05, "logits/chosen": -2.6902964115142822, "logits/rejected": -2.9345715045928955, "logps/chosen": -171.38699340820312, "logps/rejected": -340.1741943359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6418533325195312, "rewards/margins": 7.04649019241333, "rewards/rejected": -8.688343048095703, "step": 5521 }, { "epoch": 0.86, "learning_rate": 1.009727579231288e-05, "logits/chosen": -1.7244571447372437, "logits/rejected": -2.2349162101745605, "logps/chosen": -101.27937316894531, "logps/rejected": -296.18310546875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.8963723182678223, "rewards/margins": 6.515963077545166, "rewards/rejected": -8.412335395812988, "step": 5522 }, { "epoch": 0.86, "learning_rate": 1.0096542351781732e-05, "logits/chosen": -2.2825303077697754, "logits/rejected": -2.8788509368896484, "logps/chosen": -90.88802337646484, "logps/rejected": -248.923828125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -2.690814733505249, "rewards/margins": 5.526162147521973, "rewards/rejected": -8.216976165771484, "step": 5523 }, { "epoch": 0.86, "learning_rate": 1.0095808911250583e-05, "logits/chosen": -2.5449764728546143, "logits/rejected": -2.4542689323425293, "logps/chosen": -377.35943603515625, "logps/rejected": -462.3561096191406, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -1.7994616031646729, "rewards/margins": 5.603846549987793, "rewards/rejected": -7.403307914733887, "step": 5524 }, { "epoch": 0.86, "learning_rate": 1.0095075470719435e-05, "logits/chosen": -3.233914613723755, "logits/rejected": -2.863584041595459, "logps/chosen": -509.1689453125, "logps/rejected": -340.8197937011719, "loss": 0.4933, "rewards/accuracies": 0.5, "rewards/chosen": -1.960778832435608, "rewards/margins": 4.286053657531738, "rewards/rejected": -6.246832847595215, "step": 5525 }, { "epoch": 0.86, "learning_rate": 1.0094342030188287e-05, "logits/chosen": -2.7781083583831787, "logits/rejected": -2.8110909461975098, "logps/chosen": -84.40262603759766, "logps/rejected": -284.702392578125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.9181795120239258, "rewards/margins": 6.97667121887207, "rewards/rejected": -8.894850730895996, "step": 5526 }, { "epoch": 0.86, "learning_rate": 1.009360858965714e-05, "logits/chosen": -1.7143652439117432, "logits/rejected": -3.1105270385742188, "logps/chosen": -39.45960235595703, "logps/rejected": -284.6380615234375, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -2.0115442276000977, "rewards/margins": 5.279932975769043, "rewards/rejected": -7.291477203369141, "step": 5527 }, { "epoch": 0.86, "learning_rate": 1.0092875149125993e-05, "logits/chosen": -2.530792474746704, "logits/rejected": -2.8512909412384033, "logps/chosen": -804.6056518554688, "logps/rejected": -699.5028076171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.8385757207870483, "rewards/margins": 7.708987236022949, "rewards/rejected": -9.547563552856445, "step": 5528 }, { "epoch": 0.86, "learning_rate": 1.0092141708594845e-05, "logits/chosen": -2.7511603832244873, "logits/rejected": -2.3092386722564697, "logps/chosen": -390.4339294433594, "logps/rejected": -374.7425231933594, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.9379287958145142, "rewards/margins": 7.369551181793213, "rewards/rejected": -8.307479858398438, "step": 5529 }, { "epoch": 0.86, "learning_rate": 1.0091408268063696e-05, "logits/chosen": -2.60729718208313, "logits/rejected": -2.6145739555358887, "logps/chosen": -641.77783203125, "logps/rejected": -506.1251220703125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.4468995332717896, "rewards/margins": 5.73142147064209, "rewards/rejected": -7.17832088470459, "step": 5530 }, { "epoch": 0.86, "learning_rate": 1.0090674827532548e-05, "logits/chosen": -2.0047507286071777, "logits/rejected": -3.120790481567383, "logps/chosen": -154.51531982421875, "logps/rejected": -507.38726806640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.3921715021133423, "rewards/margins": 8.815662384033203, "rewards/rejected": -10.207834243774414, "step": 5531 }, { "epoch": 0.86, "learning_rate": 1.00899413870014e-05, "logits/chosen": -1.9336637258529663, "logits/rejected": -2.9948384761810303, "logps/chosen": -86.25341033935547, "logps/rejected": -281.3837890625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.5456175804138184, "rewards/margins": 5.186573505401611, "rewards/rejected": -7.73219108581543, "step": 5532 }, { "epoch": 0.86, "learning_rate": 1.0089207946470252e-05, "logits/chosen": -2.675780773162842, "logits/rejected": -2.803621292114258, "logps/chosen": -142.0683135986328, "logps/rejected": -246.80345153808594, "loss": 3.5367, "rewards/accuracies": 0.5, "rewards/chosen": -4.648172378540039, "rewards/margins": 0.7087984085083008, "rewards/rejected": -5.35697078704834, "step": 5533 }, { "epoch": 0.86, "learning_rate": 1.0088474505939104e-05, "logits/chosen": -2.9216833114624023, "logits/rejected": -1.4619195461273193, "logps/chosen": -379.96466064453125, "logps/rejected": -59.47272872924805, "loss": 7.219, "rewards/accuracies": 0.0, "rewards/chosen": -8.579498291015625, "rewards/margins": -7.218306541442871, "rewards/rejected": -1.3611912727355957, "step": 5534 }, { "epoch": 0.86, "learning_rate": 1.0087741065407956e-05, "logits/chosen": -2.189246416091919, "logits/rejected": -2.495767116546631, "logps/chosen": -437.97357177734375, "logps/rejected": -351.9684753417969, "loss": 3.098, "rewards/accuracies": 0.5, "rewards/chosen": -5.555088996887207, "rewards/margins": 0.6095466613769531, "rewards/rejected": -6.16463565826416, "step": 5535 }, { "epoch": 0.86, "learning_rate": 1.008700762487681e-05, "logits/chosen": -2.6480019092559814, "logits/rejected": -2.867961883544922, "logps/chosen": -143.45880126953125, "logps/rejected": -321.88946533203125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.2696104049682617, "rewards/margins": 6.77128791809082, "rewards/rejected": -9.040898323059082, "step": 5536 }, { "epoch": 0.86, "learning_rate": 1.0086274184345661e-05, "logits/chosen": -2.920504331588745, "logits/rejected": -1.8883253335952759, "logps/chosen": -394.61767578125, "logps/rejected": -347.5259704589844, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.6589255332946777, "rewards/margins": 5.895435333251953, "rewards/rejected": -8.554361343383789, "step": 5537 }, { "epoch": 0.86, "learning_rate": 1.0085540743814513e-05, "logits/chosen": -2.799192190170288, "logits/rejected": -2.1213223934173584, "logps/chosen": -630.57666015625, "logps/rejected": -522.1105346679688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.112112522125244, "rewards/margins": 8.129260063171387, "rewards/rejected": -10.241373062133789, "step": 5538 }, { "epoch": 0.86, "learning_rate": 1.0084807303283365e-05, "logits/chosen": -2.1835787296295166, "logits/rejected": -2.846351146697998, "logps/chosen": -212.3102264404297, "logps/rejected": -334.873779296875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1680176258087158, "rewards/margins": 6.897523403167725, "rewards/rejected": -8.06554126739502, "step": 5539 }, { "epoch": 0.86, "learning_rate": 1.0084073862752217e-05, "logits/chosen": -2.7263171672821045, "logits/rejected": -2.4436233043670654, "logps/chosen": -360.66632080078125, "logps/rejected": -242.05535888671875, "loss": 1.3304, "rewards/accuracies": 0.5, "rewards/chosen": -3.3968894481658936, "rewards/margins": -0.49906349182128906, "rewards/rejected": -2.8978259563446045, "step": 5540 }, { "epoch": 0.86, "learning_rate": 1.0083340422221069e-05, "logits/chosen": -2.5119314193725586, "logits/rejected": -2.478403091430664, "logps/chosen": -126.83578491210938, "logps/rejected": -233.05789184570312, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.3568620681762695, "rewards/margins": 5.6385393142700195, "rewards/rejected": -7.995401382446289, "step": 5541 }, { "epoch": 0.86, "learning_rate": 1.008260698168992e-05, "logits/chosen": -2.890471935272217, "logits/rejected": -2.905956745147705, "logps/chosen": -73.07258605957031, "logps/rejected": -138.9951171875, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -2.335866928100586, "rewards/margins": 4.437099456787109, "rewards/rejected": -6.772966384887695, "step": 5542 }, { "epoch": 0.86, "learning_rate": 1.0081873541158773e-05, "logits/chosen": -3.071601629257202, "logits/rejected": -2.396965742111206, "logps/chosen": -416.02423095703125, "logps/rejected": -312.5234375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.818555474281311, "rewards/margins": 6.718517303466797, "rewards/rejected": -7.537072658538818, "step": 5543 }, { "epoch": 0.86, "learning_rate": 1.0081140100627626e-05, "logits/chosen": -2.3619022369384766, "logits/rejected": -2.948308229446411, "logps/chosen": -148.719970703125, "logps/rejected": -267.76458740234375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.891503095626831, "rewards/margins": 5.866981506347656, "rewards/rejected": -7.758484840393066, "step": 5544 }, { "epoch": 0.86, "learning_rate": 1.0080406660096478e-05, "logits/chosen": -3.0986690521240234, "logits/rejected": -2.742462158203125, "logps/chosen": -229.76437377929688, "logps/rejected": -276.89031982421875, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.8110321760177612, "rewards/margins": 6.176904678344727, "rewards/rejected": -7.987936973571777, "step": 5545 }, { "epoch": 0.86, "learning_rate": 1.0079673219565332e-05, "logits/chosen": -2.942009925842285, "logits/rejected": -2.9920196533203125, "logps/chosen": -293.1193542480469, "logps/rejected": -317.1246337890625, "loss": 2.8308, "rewards/accuracies": 0.5, "rewards/chosen": -5.370902061462402, "rewards/margins": -1.2727328538894653, "rewards/rejected": -4.098169326782227, "step": 5546 }, { "epoch": 0.86, "learning_rate": 1.0078939779034183e-05, "logits/chosen": -3.017500877380371, "logits/rejected": -2.581301689147949, "logps/chosen": -460.05987548828125, "logps/rejected": -358.4773254394531, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.3427963256835938, "rewards/margins": 6.790073871612549, "rewards/rejected": -8.1328706741333, "step": 5547 }, { "epoch": 0.86, "learning_rate": 1.0078206338503035e-05, "logits/chosen": -2.615649938583374, "logits/rejected": -2.965881824493408, "logps/chosen": -110.37030029296875, "logps/rejected": -258.7177734375, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.399209976196289, "rewards/margins": 5.057221412658691, "rewards/rejected": -6.4564313888549805, "step": 5548 }, { "epoch": 0.86, "learning_rate": 1.0077472897971887e-05, "logits/chosen": -1.40794837474823, "logits/rejected": -1.9314067363739014, "logps/chosen": -107.07930755615234, "logps/rejected": -328.02178955078125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.6752346754074097, "rewards/margins": 6.585353851318359, "rewards/rejected": -8.260588645935059, "step": 5549 }, { "epoch": 0.86, "learning_rate": 1.0076739457440739e-05, "logits/chosen": -2.911872625350952, "logits/rejected": -2.0853865146636963, "logps/chosen": -435.77374267578125, "logps/rejected": -346.13525390625, "loss": 2.6701, "rewards/accuracies": 0.5, "rewards/chosen": -5.130975246429443, "rewards/margins": 0.8346476554870605, "rewards/rejected": -5.965623378753662, "step": 5550 }, { "epoch": 0.86, "learning_rate": 1.0076006016909591e-05, "logits/chosen": -3.0294077396392822, "logits/rejected": -2.8811192512512207, "logps/chosen": -251.23040771484375, "logps/rejected": -197.7306365966797, "loss": 5.4636, "rewards/accuracies": 0.0, "rewards/chosen": -7.058073043823242, "rewards/margins": -5.457235336303711, "rewards/rejected": -1.6008375883102417, "step": 5551 }, { "epoch": 0.86, "learning_rate": 1.0075272576378443e-05, "logits/chosen": -2.555508613586426, "logits/rejected": -1.975955843925476, "logps/chosen": -468.4844055175781, "logps/rejected": -381.226806640625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.564117193222046, "rewards/margins": 5.779872417449951, "rewards/rejected": -8.343989372253418, "step": 5552 }, { "epoch": 0.86, "learning_rate": 1.0074539135847295e-05, "logits/chosen": -2.7260684967041016, "logits/rejected": -1.7269476652145386, "logps/chosen": -216.40615844726562, "logps/rejected": -107.60377502441406, "loss": 3.3212, "rewards/accuracies": 0.5, "rewards/chosen": -4.463901042938232, "rewards/margins": -0.9049441814422607, "rewards/rejected": -3.5589568614959717, "step": 5553 }, { "epoch": 0.86, "learning_rate": 1.0073805695316148e-05, "logits/chosen": -1.868483066558838, "logits/rejected": -2.622631072998047, "logps/chosen": -58.58516311645508, "logps/rejected": -414.3589782714844, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.715542197227478, "rewards/margins": 4.909084796905518, "rewards/rejected": -6.624627113342285, "step": 5554 }, { "epoch": 0.86, "learning_rate": 1.0073072254785e-05, "logits/chosen": -2.787715196609497, "logits/rejected": -3.1256585121154785, "logps/chosen": -86.63563537597656, "logps/rejected": -198.79153442382812, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -2.5651073455810547, "rewards/margins": 4.918036937713623, "rewards/rejected": -7.483144760131836, "step": 5555 }, { "epoch": 0.86, "learning_rate": 1.0072338814253852e-05, "logits/chosen": -1.5304309129714966, "logits/rejected": -2.9443106651306152, "logps/chosen": -246.32305908203125, "logps/rejected": -540.3681030273438, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.103479027748108, "rewards/margins": 7.072798728942871, "rewards/rejected": -8.176277160644531, "step": 5556 }, { "epoch": 0.86, "learning_rate": 1.0071605373722704e-05, "logits/chosen": -1.4172027111053467, "logits/rejected": -2.8046445846557617, "logps/chosen": -199.38113403320312, "logps/rejected": -386.02886962890625, "loss": 0.3283, "rewards/accuracies": 1.0, "rewards/chosen": -3.6616158485412598, "rewards/margins": 1.7057448625564575, "rewards/rejected": -5.367360591888428, "step": 5557 }, { "epoch": 0.86, "learning_rate": 1.0070871933191556e-05, "logits/chosen": -2.645733594894409, "logits/rejected": -2.2181248664855957, "logps/chosen": -347.4229431152344, "logps/rejected": -262.1614685058594, "loss": 3.3931, "rewards/accuracies": 0.5, "rewards/chosen": -4.228884696960449, "rewards/margins": 0.30352354049682617, "rewards/rejected": -4.532408237457275, "step": 5558 }, { "epoch": 0.86, "learning_rate": 1.0070138492660408e-05, "logits/chosen": -1.1818355321884155, "logits/rejected": -2.9262919425964355, "logps/chosen": -131.02024841308594, "logps/rejected": -378.8350830078125, "loss": 0.1016, "rewards/accuracies": 1.0, "rewards/chosen": -1.431033730506897, "rewards/margins": 3.899116039276123, "rewards/rejected": -5.3301496505737305, "step": 5559 }, { "epoch": 0.86, "learning_rate": 1.006940505212926e-05, "logits/chosen": -2.900190830230713, "logits/rejected": -2.9143452644348145, "logps/chosen": -47.441043853759766, "logps/rejected": -112.38796997070312, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -2.3437206745147705, "rewards/margins": 4.408933639526367, "rewards/rejected": -6.752654552459717, "step": 5560 }, { "epoch": 0.86, "learning_rate": 1.0068671611598111e-05, "logits/chosen": -2.3920204639434814, "logits/rejected": -3.1939122676849365, "logps/chosen": -175.68792724609375, "logps/rejected": -474.098388671875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.5460147857666016, "rewards/margins": 4.88059139251709, "rewards/rejected": -7.426606178283691, "step": 5561 }, { "epoch": 0.87, "learning_rate": 1.0067938171066963e-05, "logits/chosen": -2.033125162124634, "logits/rejected": -2.7445452213287354, "logps/chosen": -112.18524932861328, "logps/rejected": -310.1507568359375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.3157999515533447, "rewards/margins": 5.483338356018066, "rewards/rejected": -6.799138069152832, "step": 5562 }, { "epoch": 0.87, "learning_rate": 1.0067204730535817e-05, "logits/chosen": -1.9453693628311157, "logits/rejected": -2.777745246887207, "logps/chosen": -139.7985076904297, "logps/rejected": -265.3316955566406, "loss": 1.4414, "rewards/accuracies": 0.5, "rewards/chosen": -4.649344444274902, "rewards/margins": 1.368105411529541, "rewards/rejected": -6.017450332641602, "step": 5563 }, { "epoch": 0.87, "learning_rate": 1.0066471290004669e-05, "logits/chosen": -2.6321043968200684, "logits/rejected": -2.9123241901397705, "logps/chosen": -142.1702423095703, "logps/rejected": -278.7652893066406, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -1.4500923156738281, "rewards/margins": 4.336234092712402, "rewards/rejected": -5.7863264083862305, "step": 5564 }, { "epoch": 0.87, "learning_rate": 1.006573784947352e-05, "logits/chosen": -2.588911533355713, "logits/rejected": -2.093621253967285, "logps/chosen": -177.56622314453125, "logps/rejected": -401.6297607421875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0996220111846924, "rewards/margins": 6.89876651763916, "rewards/rejected": -7.998388767242432, "step": 5565 }, { "epoch": 0.87, "learning_rate": 1.0065004408942373e-05, "logits/chosen": -2.8664143085479736, "logits/rejected": -3.085339307785034, "logps/chosen": -199.7145233154297, "logps/rejected": -208.7278289794922, "loss": 0.1588, "rewards/accuracies": 1.0, "rewards/chosen": -3.3688979148864746, "rewards/margins": 2.574341058731079, "rewards/rejected": -5.943239212036133, "step": 5566 }, { "epoch": 0.87, "learning_rate": 1.0064270968411224e-05, "logits/chosen": -2.174678087234497, "logits/rejected": -2.612252950668335, "logps/chosen": -361.30804443359375, "logps/rejected": -308.4443359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.2862991392612457, "rewards/margins": 7.533877849578857, "rewards/rejected": -7.82017707824707, "step": 5567 }, { "epoch": 0.87, "learning_rate": 1.0063537527880076e-05, "logits/chosen": -3.2452776432037354, "logits/rejected": -1.255126714706421, "logps/chosen": -341.511474609375, "logps/rejected": -78.55709838867188, "loss": 0.1051, "rewards/accuracies": 1.0, "rewards/chosen": -3.643002510070801, "rewards/margins": 2.532912254333496, "rewards/rejected": -6.175914764404297, "step": 5568 }, { "epoch": 0.87, "learning_rate": 1.0062804087348928e-05, "logits/chosen": -2.829930305480957, "logits/rejected": -2.2988102436065674, "logps/chosen": -494.2481994628906, "logps/rejected": -384.3216552734375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.6101776361465454, "rewards/margins": 5.362539768218994, "rewards/rejected": -5.97271728515625, "step": 5569 }, { "epoch": 0.87, "learning_rate": 1.006207064681778e-05, "logits/chosen": -2.8664257526397705, "logits/rejected": -2.069912910461426, "logps/chosen": -279.61419677734375, "logps/rejected": -161.60523986816406, "loss": 3.1508, "rewards/accuracies": 0.5, "rewards/chosen": -4.82839298248291, "rewards/margins": 0.6884279251098633, "rewards/rejected": -5.516820907592773, "step": 5570 }, { "epoch": 0.87, "learning_rate": 1.0061337206286632e-05, "logits/chosen": -2.29818058013916, "logits/rejected": -2.8360891342163086, "logps/chosen": -228.0891571044922, "logps/rejected": -384.2354736328125, "loss": 2.3099, "rewards/accuracies": 0.5, "rewards/chosen": -3.4040513038635254, "rewards/margins": 1.2763948440551758, "rewards/rejected": -4.680446147918701, "step": 5571 }, { "epoch": 0.87, "learning_rate": 1.0060603765755485e-05, "logits/chosen": -2.5507051944732666, "logits/rejected": -2.975152015686035, "logps/chosen": -718.6465454101562, "logps/rejected": -734.7256469726562, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -1.1488479375839233, "rewards/margins": 4.786066055297852, "rewards/rejected": -5.934913635253906, "step": 5572 }, { "epoch": 0.87, "learning_rate": 1.0059870325224337e-05, "logits/chosen": -2.449188232421875, "logits/rejected": -2.7998032569885254, "logps/chosen": -97.54010009765625, "logps/rejected": -231.03408813476562, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -2.591277837753296, "rewards/margins": 4.986860752105713, "rewards/rejected": -7.57813835144043, "step": 5573 }, { "epoch": 0.87, "learning_rate": 1.005913688469319e-05, "logits/chosen": -2.4883322715759277, "logits/rejected": -1.4463812112808228, "logps/chosen": -190.83177185058594, "logps/rejected": -129.1712646484375, "loss": 1.6508, "rewards/accuracies": 0.5, "rewards/chosen": -3.6757867336273193, "rewards/margins": 1.1498368978500366, "rewards/rejected": -4.825623512268066, "step": 5574 }, { "epoch": 0.87, "learning_rate": 1.0058403444162041e-05, "logits/chosen": -1.7814518213272095, "logits/rejected": -2.9341938495635986, "logps/chosen": -148.5810546875, "logps/rejected": -318.50164794921875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.881903886795044, "rewards/margins": 6.56650447845459, "rewards/rejected": -8.448408126831055, "step": 5575 }, { "epoch": 0.87, "learning_rate": 1.0057670003630893e-05, "logits/chosen": -1.6663401126861572, "logits/rejected": -3.2086751461029053, "logps/chosen": -84.39796447753906, "logps/rejected": -411.31549072265625, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.2172874212265015, "rewards/margins": 4.9817376136779785, "rewards/rejected": -6.1990251541137695, "step": 5576 }, { "epoch": 0.87, "learning_rate": 1.0056936563099745e-05, "logits/chosen": -2.832394599914551, "logits/rejected": -2.5562503337860107, "logps/chosen": -294.99993896484375, "logps/rejected": -381.8385009765625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.028758227825164795, "rewards/margins": 7.994956016540527, "rewards/rejected": -8.023714065551758, "step": 5577 }, { "epoch": 0.87, "learning_rate": 1.0056203122568598e-05, "logits/chosen": -2.8721892833709717, "logits/rejected": -2.9226760864257812, "logps/chosen": -80.17897033691406, "logps/rejected": -217.5802764892578, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": -0.8712891340255737, "rewards/margins": 3.649458885192871, "rewards/rejected": -4.520748138427734, "step": 5578 }, { "epoch": 0.87, "learning_rate": 1.005546968203745e-05, "logits/chosen": -2.6653568744659424, "logits/rejected": -2.625049352645874, "logps/chosen": -85.12257385253906, "logps/rejected": -160.90789794921875, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.9181966781616211, "rewards/margins": 4.394401550292969, "rewards/rejected": -5.31259822845459, "step": 5579 }, { "epoch": 0.87, "learning_rate": 1.0054736241506304e-05, "logits/chosen": -2.310777425765991, "logits/rejected": -3.096400022506714, "logps/chosen": -93.3267822265625, "logps/rejected": -312.6395263671875, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": -0.39580079913139343, "rewards/margins": 4.780365943908691, "rewards/rejected": -5.176166534423828, "step": 5580 }, { "epoch": 0.87, "learning_rate": 1.0054002800975156e-05, "logits/chosen": -2.8550803661346436, "logits/rejected": -2.3274240493774414, "logps/chosen": -737.3882446289062, "logps/rejected": -434.66033935546875, "loss": 0.2496, "rewards/accuracies": 1.0, "rewards/chosen": -2.5231246948242188, "rewards/margins": 1.7818984985351562, "rewards/rejected": -4.305023193359375, "step": 5581 }, { "epoch": 0.87, "learning_rate": 1.0053269360444008e-05, "logits/chosen": -2.8204104900360107, "logits/rejected": -2.6350111961364746, "logps/chosen": -134.77105712890625, "logps/rejected": -316.364501953125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.6145005226135254, "rewards/margins": 5.875662803649902, "rewards/rejected": -8.49016284942627, "step": 5582 }, { "epoch": 0.87, "learning_rate": 1.005253591991286e-05, "logits/chosen": -1.3061742782592773, "logits/rejected": -2.754056215286255, "logps/chosen": -102.60657501220703, "logps/rejected": -402.1769714355469, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.2657146453857422, "rewards/margins": 6.230154991149902, "rewards/rejected": -6.4958696365356445, "step": 5583 }, { "epoch": 0.87, "learning_rate": 1.0051802479381711e-05, "logits/chosen": -2.2812962532043457, "logits/rejected": -2.8112165927886963, "logps/chosen": -116.6604232788086, "logps/rejected": -250.72154235839844, "loss": 2.2687, "rewards/accuracies": 0.5, "rewards/chosen": -4.8573408126831055, "rewards/margins": 0.9057579040527344, "rewards/rejected": -5.763099193572998, "step": 5584 }, { "epoch": 0.87, "learning_rate": 1.0051069038850563e-05, "logits/chosen": -2.6763405799865723, "logits/rejected": -2.8705689907073975, "logps/chosen": -597.7230834960938, "logps/rejected": -474.96002197265625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.004595566540956497, "rewards/margins": 5.183974266052246, "rewards/rejected": -5.188570022583008, "step": 5585 }, { "epoch": 0.87, "learning_rate": 1.0050335598319415e-05, "logits/chosen": -2.2712466716766357, "logits/rejected": -2.8755996227264404, "logps/chosen": -380.62347412109375, "logps/rejected": -365.6716613769531, "loss": 1.7415, "rewards/accuracies": 0.5, "rewards/chosen": -1.3795113563537598, "rewards/margins": 2.3392157554626465, "rewards/rejected": -3.718726634979248, "step": 5586 }, { "epoch": 0.87, "learning_rate": 1.0049602157788267e-05, "logits/chosen": -2.5382142066955566, "logits/rejected": -2.9094014167785645, "logps/chosen": -36.16315460205078, "logps/rejected": -201.53167724609375, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.422272801399231, "rewards/margins": 4.454187393188477, "rewards/rejected": -5.876460075378418, "step": 5587 }, { "epoch": 0.87, "learning_rate": 1.0048868717257119e-05, "logits/chosen": -2.820122718811035, "logits/rejected": -3.326531410217285, "logps/chosen": -422.70416259765625, "logps/rejected": -457.76239013671875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.801353931427002, "rewards/margins": 5.079545021057129, "rewards/rejected": -6.880899429321289, "step": 5588 }, { "epoch": 0.87, "learning_rate": 1.0048135276725973e-05, "logits/chosen": -3.1528666019439697, "logits/rejected": -3.0103859901428223, "logps/chosen": -291.0231018066406, "logps/rejected": -181.51809692382812, "loss": 2.1155, "rewards/accuracies": 0.5, "rewards/chosen": -2.580972194671631, "rewards/margins": 0.45566892623901367, "rewards/rejected": -3.0366411209106445, "step": 5589 }, { "epoch": 0.87, "learning_rate": 1.0047401836194824e-05, "logits/chosen": -2.7522809505462646, "logits/rejected": -3.0297372341156006, "logps/chosen": -128.62774658203125, "logps/rejected": -323.7415771484375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.13872492313385, "rewards/margins": 6.1751179695129395, "rewards/rejected": -7.3138427734375, "step": 5590 }, { "epoch": 0.87, "learning_rate": 1.0046668395663676e-05, "logits/chosen": -2.734654426574707, "logits/rejected": -3.1326372623443604, "logps/chosen": -122.90095520019531, "logps/rejected": -236.99813842773438, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.1141773462295532, "rewards/margins": 4.479727745056152, "rewards/rejected": -5.593905448913574, "step": 5591 }, { "epoch": 0.87, "learning_rate": 1.0045934955132528e-05, "logits/chosen": -2.820312023162842, "logits/rejected": -2.4076719284057617, "logps/chosen": -543.3565063476562, "logps/rejected": -391.45172119140625, "loss": 2.3893, "rewards/accuracies": 0.5, "rewards/chosen": -3.8137056827545166, "rewards/margins": 0.44502973556518555, "rewards/rejected": -4.258735656738281, "step": 5592 }, { "epoch": 0.87, "learning_rate": 1.004520151460138e-05, "logits/chosen": -2.8956100940704346, "logits/rejected": -2.456044912338257, "logps/chosen": -134.45401000976562, "logps/rejected": -192.83982849121094, "loss": 2.0353, "rewards/accuracies": 0.5, "rewards/chosen": -3.2109127044677734, "rewards/margins": 1.0354790687561035, "rewards/rejected": -4.246391773223877, "step": 5593 }, { "epoch": 0.87, "learning_rate": 1.0044468074070232e-05, "logits/chosen": -2.7219128608703613, "logits/rejected": -2.779649496078491, "logps/chosen": -372.99444580078125, "logps/rejected": -351.0928955078125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.039784252643585205, "rewards/margins": 6.083687782287598, "rewards/rejected": -6.123472213745117, "step": 5594 }, { "epoch": 0.87, "learning_rate": 1.0043734633539084e-05, "logits/chosen": -2.419452667236328, "logits/rejected": -2.792006015777588, "logps/chosen": -122.1148681640625, "logps/rejected": -285.43048095703125, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/chosen": -1.3324886560440063, "rewards/margins": 3.080794334411621, "rewards/rejected": -4.413282871246338, "step": 5595 }, { "epoch": 0.87, "learning_rate": 1.0043001193007936e-05, "logits/chosen": -2.4659502506256104, "logits/rejected": -3.1988649368286133, "logps/chosen": -34.183353424072266, "logps/rejected": -244.08473205566406, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.0296412706375122, "rewards/margins": 6.432451248168945, "rewards/rejected": -7.462092399597168, "step": 5596 }, { "epoch": 0.87, "learning_rate": 1.0042267752476788e-05, "logits/chosen": -2.852895975112915, "logits/rejected": -2.9091222286224365, "logps/chosen": -433.5649108886719, "logps/rejected": -349.47564697265625, "loss": 3.6679, "rewards/accuracies": 0.5, "rewards/chosen": -3.567394971847534, "rewards/margins": -0.3249053955078125, "rewards/rejected": -3.2424895763397217, "step": 5597 }, { "epoch": 0.87, "learning_rate": 1.0041534311945641e-05, "logits/chosen": -1.8674681186676025, "logits/rejected": -2.7737972736358643, "logps/chosen": -175.60491943359375, "logps/rejected": -420.4385986328125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6331550478935242, "rewards/margins": 6.692197799682617, "rewards/rejected": -7.325352668762207, "step": 5598 }, { "epoch": 0.87, "learning_rate": 1.0040800871414493e-05, "logits/chosen": -1.5361518859863281, "logits/rejected": -2.825457811355591, "logps/chosen": -83.72663879394531, "logps/rejected": -294.5505065917969, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.3891825675964355, "rewards/margins": 5.261170387268066, "rewards/rejected": -6.650352478027344, "step": 5599 }, { "epoch": 0.87, "learning_rate": 1.0040067430883345e-05, "logits/chosen": -1.4602593183517456, "logits/rejected": -3.0826196670532227, "logps/chosen": -112.8991928100586, "logps/rejected": -318.4632568359375, "loss": 0.1005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7561733722686768, "rewards/margins": 2.250375747680664, "rewards/rejected": -3.006549119949341, "step": 5600 }, { "epoch": 0.87, "learning_rate": 1.0039333990352197e-05, "logits/chosen": -2.266695737838745, "logits/rejected": -2.920081853866577, "logps/chosen": -180.2122802734375, "logps/rejected": -346.60235595703125, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.15816651284694672, "rewards/margins": 5.301705360412598, "rewards/rejected": -5.459871768951416, "step": 5601 }, { "epoch": 0.87, "learning_rate": 1.0038600549821049e-05, "logits/chosen": -2.158904790878296, "logits/rejected": -2.9103283882141113, "logps/chosen": -135.04440307617188, "logps/rejected": -339.3163146972656, "loss": 0.0639, "rewards/accuracies": 1.0, "rewards/chosen": -0.20058059692382812, "rewards/margins": 3.629706621170044, "rewards/rejected": -3.830287218093872, "step": 5602 }, { "epoch": 0.87, "learning_rate": 1.00378671092899e-05, "logits/chosen": -2.982851266860962, "logits/rejected": -2.8253684043884277, "logps/chosen": -79.22499084472656, "logps/rejected": -116.6207275390625, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.833795428276062, "rewards/margins": 4.686361312866211, "rewards/rejected": -6.5201568603515625, "step": 5603 }, { "epoch": 0.87, "learning_rate": 1.0037133668758752e-05, "logits/chosen": -2.958682060241699, "logits/rejected": -2.8364017009735107, "logps/chosen": -113.95114135742188, "logps/rejected": -141.58383178710938, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.9620254039764404, "rewards/margins": 6.008544921875, "rewards/rejected": -6.9705705642700195, "step": 5604 }, { "epoch": 0.87, "learning_rate": 1.0036400228227604e-05, "logits/chosen": -3.0464601516723633, "logits/rejected": -2.6976919174194336, "logps/chosen": -343.28216552734375, "logps/rejected": -341.0450439453125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -0.6277313232421875, "rewards/margins": 4.8556623458862305, "rewards/rejected": -5.483393669128418, "step": 5605 }, { "epoch": 0.87, "learning_rate": 1.0035666787696456e-05, "logits/chosen": -1.9335330724716187, "logits/rejected": -2.5683910846710205, "logps/chosen": -167.08946228027344, "logps/rejected": -403.6595458984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.2552349269390106, "rewards/margins": 8.037187576293945, "rewards/rejected": -8.292423248291016, "step": 5606 }, { "epoch": 0.87, "learning_rate": 1.003493334716531e-05, "logits/chosen": -2.924910306930542, "logits/rejected": -2.9723434448242188, "logps/chosen": -214.24441528320312, "logps/rejected": -212.2311248779297, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 0.41213685274124146, "rewards/margins": 5.849590301513672, "rewards/rejected": -5.437453746795654, "step": 5607 }, { "epoch": 0.87, "learning_rate": 1.0034199906634162e-05, "logits/chosen": -2.322953701019287, "logits/rejected": -2.8405399322509766, "logps/chosen": -187.15234375, "logps/rejected": -350.1131591796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3541618585586548, "rewards/margins": 8.986956596374512, "rewards/rejected": -9.341117858886719, "step": 5608 }, { "epoch": 0.87, "learning_rate": 1.0033466466103013e-05, "logits/chosen": -0.9165393114089966, "logits/rejected": -2.5180678367614746, "logps/chosen": -107.3103256225586, "logps/rejected": -268.7427978515625, "loss": 2.0514, "rewards/accuracies": 0.5, "rewards/chosen": -2.844970226287842, "rewards/margins": 0.3016500473022461, "rewards/rejected": -3.146620273590088, "step": 5609 }, { "epoch": 0.87, "learning_rate": 1.0032733025571865e-05, "logits/chosen": -2.8858025074005127, "logits/rejected": -3.0180649757385254, "logps/chosen": -71.18162536621094, "logps/rejected": -255.4438018798828, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -1.772942304611206, "rewards/margins": 4.667114734649658, "rewards/rejected": -6.440056800842285, "step": 5610 }, { "epoch": 0.87, "learning_rate": 1.0031999585040717e-05, "logits/chosen": -2.6134700775146484, "logits/rejected": -3.1548666954040527, "logps/chosen": -331.532470703125, "logps/rejected": -432.72076416015625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.9174942374229431, "rewards/margins": 5.652522087097168, "rewards/rejected": -6.570016860961914, "step": 5611 }, { "epoch": 0.87, "learning_rate": 1.003126614450957e-05, "logits/chosen": -2.531893253326416, "logits/rejected": -2.7775797843933105, "logps/chosen": -141.5482940673828, "logps/rejected": -173.2435302734375, "loss": 0.316, "rewards/accuracies": 1.0, "rewards/chosen": -2.2947804927825928, "rewards/margins": 3.4039182662963867, "rewards/rejected": -5.698698997497559, "step": 5612 }, { "epoch": 0.87, "learning_rate": 1.0030532703978423e-05, "logits/chosen": -2.8483083248138428, "logits/rejected": -3.2019622325897217, "logps/chosen": -147.85983276367188, "logps/rejected": -218.75279235839844, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": -1.1619383096694946, "rewards/margins": 3.2656426429748535, "rewards/rejected": -4.427580833435059, "step": 5613 }, { "epoch": 0.87, "learning_rate": 1.0029799263447275e-05, "logits/chosen": -2.8134007453918457, "logits/rejected": -1.8816595077514648, "logps/chosen": -554.8240356445312, "logps/rejected": -277.8399658203125, "loss": 1.5291, "rewards/accuracies": 0.5, "rewards/chosen": -2.859002113342285, "rewards/margins": 2.197701930999756, "rewards/rejected": -5.056704044342041, "step": 5614 }, { "epoch": 0.87, "learning_rate": 1.0029065822916126e-05, "logits/chosen": -2.1245720386505127, "logits/rejected": -2.9095351696014404, "logps/chosen": -120.22615051269531, "logps/rejected": -393.08392333984375, "loss": 0.1589, "rewards/accuracies": 1.0, "rewards/chosen": -0.9553331732749939, "rewards/margins": 2.416689872741699, "rewards/rejected": -3.372023105621338, "step": 5615 }, { "epoch": 0.87, "learning_rate": 1.002833238238498e-05, "logits/chosen": -2.2780327796936035, "logits/rejected": -2.6057136058807373, "logps/chosen": -209.21437072753906, "logps/rejected": -374.2034912109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8593933582305908, "rewards/margins": 7.3607330322265625, "rewards/rejected": -8.220126152038574, "step": 5616 }, { "epoch": 0.87, "learning_rate": 1.0027598941853832e-05, "logits/chosen": -1.3509247303009033, "logits/rejected": -2.880502700805664, "logps/chosen": -68.41227722167969, "logps/rejected": -293.9206237792969, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.07496432960033417, "rewards/margins": 4.822340965270996, "rewards/rejected": -4.897305488586426, "step": 5617 }, { "epoch": 0.87, "learning_rate": 1.0026865501322684e-05, "logits/chosen": -2.671261787414551, "logits/rejected": -2.9103779792785645, "logps/chosen": -205.65786743164062, "logps/rejected": -313.34002685546875, "loss": 3.4782, "rewards/accuracies": 0.5, "rewards/chosen": -3.931520700454712, "rewards/margins": 0.09535002708435059, "rewards/rejected": -4.0268707275390625, "step": 5618 }, { "epoch": 0.87, "learning_rate": 1.0026132060791536e-05, "logits/chosen": -2.785255193710327, "logits/rejected": -2.576378107070923, "logps/chosen": -280.5787048339844, "logps/rejected": -525.118896484375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.23009264469146729, "rewards/margins": 7.604008197784424, "rewards/rejected": -7.834100723266602, "step": 5619 }, { "epoch": 0.87, "learning_rate": 1.0025398620260388e-05, "logits/chosen": -1.9019358158111572, "logits/rejected": -2.8498358726501465, "logps/chosen": -102.04852294921875, "logps/rejected": -338.9501037597656, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": 0.08176802843809128, "rewards/margins": 4.986670970916748, "rewards/rejected": -4.904902935028076, "step": 5620 }, { "epoch": 0.87, "learning_rate": 1.002466517972924e-05, "logits/chosen": -2.3630666732788086, "logits/rejected": -2.9807164669036865, "logps/chosen": -302.62799072265625, "logps/rejected": -504.5469970703125, "loss": 2.1957, "rewards/accuracies": 0.5, "rewards/chosen": -3.8204078674316406, "rewards/margins": 0.045454978942871094, "rewards/rejected": -3.8658628463745117, "step": 5621 }, { "epoch": 0.87, "learning_rate": 1.0023931739198091e-05, "logits/chosen": -2.707495927810669, "logits/rejected": -2.2428393363952637, "logps/chosen": -265.0842590332031, "logps/rejected": -222.68374633789062, "loss": 1.3238, "rewards/accuracies": 0.5, "rewards/chosen": -3.150336980819702, "rewards/margins": 2.4705424308776855, "rewards/rejected": -5.620879650115967, "step": 5622 }, { "epoch": 0.87, "learning_rate": 1.0023198298666943e-05, "logits/chosen": -1.402966022491455, "logits/rejected": -2.808009624481201, "logps/chosen": -78.70695495605469, "logps/rejected": -375.52459716796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5309808850288391, "rewards/margins": 7.148970603942871, "rewards/rejected": -7.6799516677856445, "step": 5623 }, { "epoch": 0.87, "learning_rate": 1.0022464858135795e-05, "logits/chosen": -2.68597149848938, "logits/rejected": -2.6946170330047607, "logps/chosen": -147.41363525390625, "logps/rejected": -300.6914367675781, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.4899276793003082, "rewards/margins": 5.8423614501953125, "rewards/rejected": -6.332289218902588, "step": 5624 }, { "epoch": 0.87, "learning_rate": 1.0021731417604649e-05, "logits/chosen": -2.753183603286743, "logits/rejected": -1.2976874113082886, "logps/chosen": -533.5159912109375, "logps/rejected": -249.7931671142578, "loss": 1.237, "rewards/accuracies": 0.5, "rewards/chosen": -3.9195022583007812, "rewards/margins": 1.1985363960266113, "rewards/rejected": -5.118038654327393, "step": 5625 }, { "epoch": 0.87, "learning_rate": 1.00209979770735e-05, "logits/chosen": -1.4856293201446533, "logits/rejected": -2.710477828979492, "logps/chosen": -42.706024169921875, "logps/rejected": -338.3203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.9072527885437012, "rewards/margins": 8.008923530578613, "rewards/rejected": -8.916175842285156, "step": 5626 }, { "epoch": 0.88, "learning_rate": 1.0020264536542352e-05, "logits/chosen": -1.8747228384017944, "logits/rejected": -2.9065349102020264, "logps/chosen": -103.21793365478516, "logps/rejected": -331.90802001953125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.25954172015190125, "rewards/margins": 6.9765167236328125, "rewards/rejected": -6.71697473526001, "step": 5627 }, { "epoch": 0.88, "learning_rate": 1.0019531096011204e-05, "logits/chosen": -1.9377131462097168, "logits/rejected": -2.6485416889190674, "logps/chosen": -72.74758911132812, "logps/rejected": -237.44000244140625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.4424619674682617, "rewards/margins": 5.565364837646484, "rewards/rejected": -6.007826805114746, "step": 5628 }, { "epoch": 0.88, "learning_rate": 1.0018797655480056e-05, "logits/chosen": -2.878274440765381, "logits/rejected": -2.960756301879883, "logps/chosen": -322.34088134765625, "logps/rejected": -389.3494567871094, "loss": 1.8339, "rewards/accuracies": 0.5, "rewards/chosen": -2.0761475563049316, "rewards/margins": 0.7985596656799316, "rewards/rejected": -2.8747072219848633, "step": 5629 }, { "epoch": 0.88, "learning_rate": 1.0018064214948908e-05, "logits/chosen": -2.8947126865386963, "logits/rejected": -2.1648976802825928, "logps/chosen": -363.3779296875, "logps/rejected": -360.278076171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.2289581298828125, "rewards/margins": 8.071859359741211, "rewards/rejected": -8.30081844329834, "step": 5630 }, { "epoch": 0.88, "learning_rate": 1.001733077441776e-05, "logits/chosen": -1.3123533725738525, "logits/rejected": -2.327322244644165, "logps/chosen": -280.98162841796875, "logps/rejected": -403.9006042480469, "loss": 0.8123, "rewards/accuracies": 0.5, "rewards/chosen": -3.484325408935547, "rewards/margins": 4.205874919891357, "rewards/rejected": -7.6902008056640625, "step": 5631 }, { "epoch": 0.88, "learning_rate": 1.0016597333886612e-05, "logits/chosen": -2.8933258056640625, "logits/rejected": -2.653240203857422, "logps/chosen": -702.8603515625, "logps/rejected": -581.030517578125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.083824872970581, "rewards/margins": 5.297292709350586, "rewards/rejected": -6.381117343902588, "step": 5632 }, { "epoch": 0.88, "learning_rate": 1.0015863893355464e-05, "logits/chosen": -2.493845224380493, "logits/rejected": -2.8876688480377197, "logps/chosen": -245.2439727783203, "logps/rejected": -516.7398071289062, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 0.5871871709823608, "rewards/margins": 6.516988754272461, "rewards/rejected": -5.929801940917969, "step": 5633 }, { "epoch": 0.88, "learning_rate": 1.0015130452824317e-05, "logits/chosen": -2.9868996143341064, "logits/rejected": -2.924631357192993, "logps/chosen": -295.8369445800781, "logps/rejected": -278.114013671875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.4132692217826843, "rewards/margins": 5.875924587249756, "rewards/rejected": -6.289194107055664, "step": 5634 }, { "epoch": 0.88, "learning_rate": 1.0014397012293169e-05, "logits/chosen": -2.811882734298706, "logits/rejected": -2.838045597076416, "logps/chosen": -249.80628967285156, "logps/rejected": -125.35955810546875, "loss": 3.6591, "rewards/accuracies": 0.0, "rewards/chosen": -4.288693428039551, "rewards/margins": -3.6317782402038574, "rewards/rejected": -0.6569149494171143, "step": 5635 }, { "epoch": 0.88, "learning_rate": 1.0013663571762021e-05, "logits/chosen": -2.741487979888916, "logits/rejected": -3.004589796066284, "logps/chosen": -37.152137756347656, "logps/rejected": -140.14044189453125, "loss": 0.1564, "rewards/accuracies": 1.0, "rewards/chosen": -1.932309627532959, "rewards/margins": 3.985670328140259, "rewards/rejected": -5.917980194091797, "step": 5636 }, { "epoch": 0.88, "learning_rate": 1.0012930131230873e-05, "logits/chosen": -2.682117462158203, "logits/rejected": -3.1542859077453613, "logps/chosen": -334.8637390136719, "logps/rejected": -391.6864318847656, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.37907713651657104, "rewards/margins": 5.839938163757324, "rewards/rejected": -6.219015598297119, "step": 5637 }, { "epoch": 0.88, "learning_rate": 1.0012196690699725e-05, "logits/chosen": -1.6133089065551758, "logits/rejected": -2.6177639961242676, "logps/chosen": -107.99645233154297, "logps/rejected": -310.5566101074219, "loss": 2.6408, "rewards/accuracies": 0.5, "rewards/chosen": -2.6687703132629395, "rewards/margins": 2.08186674118042, "rewards/rejected": -4.750637054443359, "step": 5638 }, { "epoch": 0.88, "learning_rate": 1.0011463250168577e-05, "logits/chosen": -2.3964993953704834, "logits/rejected": -2.8928890228271484, "logps/chosen": -94.15582275390625, "logps/rejected": -230.11822509765625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.356580913066864, "rewards/margins": 6.033492088317871, "rewards/rejected": -6.390072822570801, "step": 5639 }, { "epoch": 0.88, "learning_rate": 1.0010729809637428e-05, "logits/chosen": -3.019534111022949, "logits/rejected": -3.0203464031219482, "logps/chosen": -415.16729736328125, "logps/rejected": -317.719970703125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.36003801226615906, "rewards/margins": 5.982989311218262, "rewards/rejected": -6.343027591705322, "step": 5640 }, { "epoch": 0.88, "learning_rate": 1.000999636910628e-05, "logits/chosen": -2.129765033721924, "logits/rejected": -2.7312891483306885, "logps/chosen": -145.42391967773438, "logps/rejected": -245.2867889404297, "loss": 2.4935, "rewards/accuracies": 0.5, "rewards/chosen": -2.877516269683838, "rewards/margins": 0.3032186031341553, "rewards/rejected": -3.180734872817993, "step": 5641 }, { "epoch": 0.88, "learning_rate": 1.0009262928575132e-05, "logits/chosen": -1.7937164306640625, "logits/rejected": -2.7694833278656006, "logps/chosen": -107.73977661132812, "logps/rejected": -208.01026916503906, "loss": 2.193, "rewards/accuracies": 0.5, "rewards/chosen": -2.225648880004883, "rewards/margins": 1.3298602104187012, "rewards/rejected": -3.555509090423584, "step": 5642 }, { "epoch": 0.88, "learning_rate": 1.0008529488043986e-05, "logits/chosen": -2.9028687477111816, "logits/rejected": -3.13366436958313, "logps/chosen": -51.14771270751953, "logps/rejected": -303.95855712890625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.49478572607040405, "rewards/margins": 5.895295143127441, "rewards/rejected": -6.39008092880249, "step": 5643 }, { "epoch": 0.88, "learning_rate": 1.0007796047512838e-05, "logits/chosen": -2.0722179412841797, "logits/rejected": -3.1715047359466553, "logps/chosen": -512.1987915039062, "logps/rejected": -696.1790771484375, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.019086450338363647, "rewards/margins": 5.477952003479004, "rewards/rejected": -5.4970383644104, "step": 5644 }, { "epoch": 0.88, "learning_rate": 1.000706260698169e-05, "logits/chosen": -2.3405072689056396, "logits/rejected": -3.093320369720459, "logps/chosen": -85.90385437011719, "logps/rejected": -337.66278076171875, "loss": 0.9779, "rewards/accuracies": 0.5, "rewards/chosen": -2.2802000045776367, "rewards/margins": 2.255385160446167, "rewards/rejected": -4.535585403442383, "step": 5645 }, { "epoch": 0.88, "learning_rate": 1.0006329166450543e-05, "logits/chosen": -2.8155832290649414, "logits/rejected": -1.9564350843429565, "logps/chosen": -204.2503204345703, "logps/rejected": -189.46043395996094, "loss": 2.7904, "rewards/accuracies": 0.5, "rewards/chosen": -1.7887241840362549, "rewards/margins": -0.32320594787597656, "rewards/rejected": -1.4655182361602783, "step": 5646 }, { "epoch": 0.88, "learning_rate": 1.0005595725919395e-05, "logits/chosen": -1.1944459676742554, "logits/rejected": -2.832984447479248, "logps/chosen": -231.1747283935547, "logps/rejected": -434.598876953125, "loss": 1.2148, "rewards/accuracies": 0.5, "rewards/chosen": -2.2454001903533936, "rewards/margins": 2.2070560455322266, "rewards/rejected": -4.452456474304199, "step": 5647 }, { "epoch": 0.88, "learning_rate": 1.0004862285388247e-05, "logits/chosen": -3.0882935523986816, "logits/rejected": -1.9246025085449219, "logps/chosen": -615.3536376953125, "logps/rejected": -347.447509765625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.44479256868362427, "rewards/margins": 5.930062294006348, "rewards/rejected": -5.485270023345947, "step": 5648 }, { "epoch": 0.88, "learning_rate": 1.0004128844857099e-05, "logits/chosen": -2.306288242340088, "logits/rejected": -1.429432988166809, "logps/chosen": -357.95947265625, "logps/rejected": -326.54376220703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.36468470096588135, "rewards/margins": 7.217372894287109, "rewards/rejected": -7.582056999206543, "step": 5649 }, { "epoch": 0.88, "learning_rate": 1.000339540432595e-05, "logits/chosen": -2.875169515609741, "logits/rejected": -3.207242965698242, "logps/chosen": -70.09326934814453, "logps/rejected": -198.35897827148438, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.015684008598327637, "rewards/margins": 6.148097991943359, "rewards/rejected": -6.163782119750977, "step": 5650 }, { "epoch": 0.88, "learning_rate": 1.0002661963794803e-05, "logits/chosen": -2.851372003555298, "logits/rejected": -2.336758613586426, "logps/chosen": -481.8984680175781, "logps/rejected": -913.46240234375, "loss": 3.3212, "rewards/accuracies": 0.5, "rewards/chosen": -3.4482948780059814, "rewards/margins": 0.051801204681396484, "rewards/rejected": -3.500096082687378, "step": 5651 }, { "epoch": 0.88, "learning_rate": 1.0001928523263656e-05, "logits/chosen": -2.176948070526123, "logits/rejected": -2.8452389240264893, "logps/chosen": -376.70562744140625, "logps/rejected": -466.26544189453125, "loss": 1.5427, "rewards/accuracies": 0.5, "rewards/chosen": 0.5037781000137329, "rewards/margins": 2.414273262023926, "rewards/rejected": -1.9104950428009033, "step": 5652 }, { "epoch": 0.88, "learning_rate": 1.0001195082732508e-05, "logits/chosen": -2.61606502532959, "logits/rejected": -3.1559371948242188, "logps/chosen": -122.21865844726562, "logps/rejected": -231.90359497070312, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.12411995232105255, "rewards/margins": 6.11064338684082, "rewards/rejected": -5.986523628234863, "step": 5653 }, { "epoch": 0.88, "learning_rate": 1.000046164220136e-05, "logits/chosen": -2.031397819519043, "logits/rejected": -2.8688745498657227, "logps/chosen": -291.68292236328125, "logps/rejected": -422.853515625, "loss": 2.4757, "rewards/accuracies": 0.5, "rewards/chosen": -2.4663026332855225, "rewards/margins": 1.998481035232544, "rewards/rejected": -4.464783668518066, "step": 5654 }, { "epoch": 0.88, "learning_rate": 9.999728201670212e-06, "logits/chosen": -2.863959550857544, "logits/rejected": -3.094620943069458, "logps/chosen": -43.35408020019531, "logps/rejected": -146.2268829345703, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.8330605626106262, "rewards/margins": 4.4839582443237305, "rewards/rejected": -5.317018508911133, "step": 5655 }, { "epoch": 0.88, "learning_rate": 9.998994761139064e-06, "logits/chosen": -1.951949119567871, "logits/rejected": -2.3939898014068604, "logps/chosen": -176.42120361328125, "logps/rejected": -456.63897705078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.30355149507522583, "rewards/margins": 9.679393768310547, "rewards/rejected": -9.37584114074707, "step": 5656 }, { "epoch": 0.88, "learning_rate": 9.998261320607915e-06, "logits/chosen": -2.845510482788086, "logits/rejected": -3.0151150226593018, "logps/chosen": -127.76539611816406, "logps/rejected": -275.3089599609375, "loss": 1.691, "rewards/accuracies": 0.5, "rewards/chosen": -2.5346124172210693, "rewards/margins": 0.9886404275894165, "rewards/rejected": -3.5232529640197754, "step": 5657 }, { "epoch": 0.88, "learning_rate": 9.997527880076767e-06, "logits/chosen": -2.7063028812408447, "logits/rejected": -3.0844109058380127, "logps/chosen": -43.39409255981445, "logps/rejected": -356.6982116699219, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.5773754119873047, "rewards/margins": 7.077393054962158, "rewards/rejected": -7.654768943786621, "step": 5658 }, { "epoch": 0.88, "learning_rate": 9.99679443954562e-06, "logits/chosen": -2.818958282470703, "logits/rejected": -3.1444311141967773, "logps/chosen": -133.39979553222656, "logps/rejected": -334.8403015136719, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.8607732653617859, "rewards/margins": 5.459692001342773, "rewards/rejected": -6.320465087890625, "step": 5659 }, { "epoch": 0.88, "learning_rate": 9.996060999014471e-06, "logits/chosen": -2.1542489528656006, "logits/rejected": -2.9173457622528076, "logps/chosen": -384.23486328125, "logps/rejected": -425.5696105957031, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5022903680801392, "rewards/margins": 5.217329978942871, "rewards/rejected": -5.7196197509765625, "step": 5660 }, { "epoch": 0.88, "learning_rate": 9.995327558483325e-06, "logits/chosen": -2.1585965156555176, "logits/rejected": -2.9842822551727295, "logps/chosen": -215.8408660888672, "logps/rejected": -511.05474853515625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.1279037445783615, "rewards/margins": 7.999859809875488, "rewards/rejected": -7.871955871582031, "step": 5661 }, { "epoch": 0.88, "learning_rate": 9.994594117952177e-06, "logits/chosen": -2.7464725971221924, "logits/rejected": -3.131950855255127, "logps/chosen": -194.2125701904297, "logps/rejected": -155.75796508789062, "loss": 1.7432, "rewards/accuracies": 0.5, "rewards/chosen": -2.6178247928619385, "rewards/margins": 1.16456937789917, "rewards/rejected": -3.7823941707611084, "step": 5662 }, { "epoch": 0.88, "learning_rate": 9.993860677421028e-06, "logits/chosen": -2.230541229248047, "logits/rejected": -2.681976795196533, "logps/chosen": -270.3637390136719, "logps/rejected": -342.80792236328125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6126447916030884, "rewards/margins": 6.584423065185547, "rewards/rejected": -7.197068214416504, "step": 5663 }, { "epoch": 0.88, "learning_rate": 9.99312723688988e-06, "logits/chosen": -2.912947654724121, "logits/rejected": -3.2232415676116943, "logps/chosen": -59.92424774169922, "logps/rejected": -260.6067199707031, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.6553252339363098, "rewards/margins": 5.890480995178223, "rewards/rejected": -6.545806407928467, "step": 5664 }, { "epoch": 0.88, "learning_rate": 9.992393796358732e-06, "logits/chosen": -2.3326194286346436, "logits/rejected": -3.093626022338867, "logps/chosen": -110.70690155029297, "logps/rejected": -391.8503723144531, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.2833206057548523, "rewards/margins": 6.599957466125488, "rewards/rejected": -6.31663703918457, "step": 5665 }, { "epoch": 0.88, "learning_rate": 9.991660355827584e-06, "logits/chosen": -1.42362380027771, "logits/rejected": -2.830624580383301, "logps/chosen": -35.30327606201172, "logps/rejected": -201.88551330566406, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.10899262130260468, "rewards/margins": 4.5394086837768555, "rewards/rejected": -4.648401260375977, "step": 5666 }, { "epoch": 0.88, "learning_rate": 9.990926915296436e-06, "logits/chosen": -2.710195302963257, "logits/rejected": -2.6884663105010986, "logps/chosen": -81.31254577636719, "logps/rejected": -67.65852355957031, "loss": 2.5384, "rewards/accuracies": 0.5, "rewards/chosen": -2.945726156234741, "rewards/margins": 0.47405123710632324, "rewards/rejected": -3.4197773933410645, "step": 5667 }, { "epoch": 0.88, "learning_rate": 9.990193474765288e-06, "logits/chosen": -2.8428049087524414, "logits/rejected": -3.0568010807037354, "logps/chosen": -548.3380126953125, "logps/rejected": -452.3216247558594, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.16360700130462646, "rewards/margins": 6.096044540405273, "rewards/rejected": -5.932437419891357, "step": 5668 }, { "epoch": 0.88, "learning_rate": 9.98946003423414e-06, "logits/chosen": -2.514005184173584, "logits/rejected": -2.845552921295166, "logps/chosen": -149.94232177734375, "logps/rejected": -253.23910522460938, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.389006644487381, "rewards/margins": 5.052712440490723, "rewards/rejected": -5.441719055175781, "step": 5669 }, { "epoch": 0.88, "learning_rate": 9.988726593702993e-06, "logits/chosen": -2.2035186290740967, "logits/rejected": -2.9755947589874268, "logps/chosen": -100.58619689941406, "logps/rejected": -212.80624389648438, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": -0.37023890018463135, "rewards/margins": 3.5842556953430176, "rewards/rejected": -3.9544949531555176, "step": 5670 }, { "epoch": 0.88, "learning_rate": 9.987993153171845e-06, "logits/chosen": -2.4940173625946045, "logits/rejected": -2.9547390937805176, "logps/chosen": -279.76336669921875, "logps/rejected": -337.241943359375, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -0.7070732712745667, "rewards/margins": 3.452427625656128, "rewards/rejected": -4.159501075744629, "step": 5671 }, { "epoch": 0.88, "learning_rate": 9.987259712640697e-06, "logits/chosen": -2.5372629165649414, "logits/rejected": -2.768998622894287, "logps/chosen": -105.84626770019531, "logps/rejected": -328.914306640625, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.49421292543411255, "rewards/margins": 5.121789932250977, "rewards/rejected": -5.616002559661865, "step": 5672 }, { "epoch": 0.88, "learning_rate": 9.986526272109549e-06, "logits/chosen": -3.0312018394470215, "logits/rejected": -2.806309938430786, "logps/chosen": -144.656982421875, "logps/rejected": -40.08338928222656, "loss": 4.7374, "rewards/accuracies": 0.0, "rewards/chosen": -5.158358573913574, "rewards/margins": -4.689239978790283, "rewards/rejected": -0.46911850571632385, "step": 5673 }, { "epoch": 0.88, "learning_rate": 9.9857928315784e-06, "logits/chosen": -2.8472745418548584, "logits/rejected": -1.4743491411209106, "logps/chosen": -551.2529907226562, "logps/rejected": -268.73211669921875, "loss": 4.3195, "rewards/accuracies": 0.5, "rewards/chosen": -6.360503673553467, "rewards/margins": -3.205784320831299, "rewards/rejected": -3.154719352722168, "step": 5674 }, { "epoch": 0.88, "learning_rate": 9.985059391047253e-06, "logits/chosen": -2.555189371109009, "logits/rejected": -2.6029796600341797, "logps/chosen": -185.76856994628906, "logps/rejected": -257.87176513671875, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -0.7368562817573547, "rewards/margins": 3.675339937210083, "rewards/rejected": -4.412196159362793, "step": 5675 }, { "epoch": 0.88, "learning_rate": 9.984325950516105e-06, "logits/chosen": -2.7170400619506836, "logits/rejected": -2.3901314735412598, "logps/chosen": -282.05084228515625, "logps/rejected": -187.11312866210938, "loss": 0.2912, "rewards/accuracies": 1.0, "rewards/chosen": -0.2835460901260376, "rewards/margins": 4.723507404327393, "rewards/rejected": -5.007053375244141, "step": 5676 }, { "epoch": 0.88, "learning_rate": 9.983592509984956e-06, "logits/chosen": -2.6376888751983643, "logits/rejected": -2.3791348934173584, "logps/chosen": -488.8548583984375, "logps/rejected": -388.2565002441406, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -0.264609158039093, "rewards/margins": 4.6548309326171875, "rewards/rejected": -4.919440269470215, "step": 5677 }, { "epoch": 0.88, "learning_rate": 9.98285906945381e-06, "logits/chosen": -2.067769765853882, "logits/rejected": -2.9331960678100586, "logps/chosen": -106.41666412353516, "logps/rejected": -386.2734375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.09215927124023438, "rewards/margins": 6.351546764373779, "rewards/rejected": -6.443706035614014, "step": 5678 }, { "epoch": 0.88, "learning_rate": 9.982125628922662e-06, "logits/chosen": -2.0093765258789062, "logits/rejected": -2.9217641353607178, "logps/chosen": -131.12171936035156, "logps/rejected": -288.11962890625, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -0.607282280921936, "rewards/margins": 4.705842971801758, "rewards/rejected": -5.313125133514404, "step": 5679 }, { "epoch": 0.88, "learning_rate": 9.981392188391515e-06, "logits/chosen": -2.581399440765381, "logits/rejected": -2.772306442260742, "logps/chosen": -25.321998596191406, "logps/rejected": -111.74708557128906, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.9637115597724915, "rewards/margins": 4.692346572875977, "rewards/rejected": -5.656058311462402, "step": 5680 }, { "epoch": 0.88, "learning_rate": 9.980658747860367e-06, "logits/chosen": -1.5705112218856812, "logits/rejected": -2.557021141052246, "logps/chosen": -182.74180603027344, "logps/rejected": -521.4396362304688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4692276120185852, "rewards/margins": 7.870659828186035, "rewards/rejected": -8.339886665344238, "step": 5681 }, { "epoch": 0.88, "learning_rate": 9.97992530732922e-06, "logits/chosen": -2.8336448669433594, "logits/rejected": -2.954251289367676, "logps/chosen": -75.75395965576172, "logps/rejected": -156.91651916503906, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.13454151153564453, "rewards/margins": 5.593565940856934, "rewards/rejected": -5.728107452392578, "step": 5682 }, { "epoch": 0.88, "learning_rate": 9.979191866798071e-06, "logits/chosen": -2.511897087097168, "logits/rejected": -2.7242624759674072, "logps/chosen": -66.62796783447266, "logps/rejected": -228.49111938476562, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.005141362547874451, "rewards/margins": 5.392834663391113, "rewards/rejected": -5.397976398468018, "step": 5683 }, { "epoch": 0.88, "learning_rate": 9.978458426266923e-06, "logits/chosen": -2.4859797954559326, "logits/rejected": -2.3744165897369385, "logps/chosen": -88.56845092773438, "logps/rejected": -219.372314453125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.05192255973815918, "rewards/margins": 6.397000312805176, "rewards/rejected": -6.448922634124756, "step": 5684 }, { "epoch": 0.88, "learning_rate": 9.977724985735775e-06, "logits/chosen": -2.970060110092163, "logits/rejected": -2.8758440017700195, "logps/chosen": -529.9896240234375, "logps/rejected": -264.5376281738281, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.5535034537315369, "rewards/margins": 5.431265830993652, "rewards/rejected": -5.984768867492676, "step": 5685 }, { "epoch": 0.88, "learning_rate": 9.976991545204627e-06, "logits/chosen": -2.9349749088287354, "logits/rejected": -3.081472635269165, "logps/chosen": -183.86831665039062, "logps/rejected": -338.353271484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.5005233883857727, "rewards/margins": 7.773889541625977, "rewards/rejected": -7.2733659744262695, "step": 5686 }, { "epoch": 0.88, "learning_rate": 9.97625810467348e-06, "logits/chosen": -2.6711063385009766, "logits/rejected": -2.529487371444702, "logps/chosen": -94.57040405273438, "logps/rejected": -246.71603393554688, "loss": 1.1135, "rewards/accuracies": 0.5, "rewards/chosen": -1.8826475143432617, "rewards/margins": 3.265320301055908, "rewards/rejected": -5.147967338562012, "step": 5687 }, { "epoch": 0.88, "learning_rate": 9.975524664142332e-06, "logits/chosen": -1.872104525566101, "logits/rejected": -2.6780223846435547, "logps/chosen": -168.39154052734375, "logps/rejected": -283.9013366699219, "loss": 0.1102, "rewards/accuracies": 1.0, "rewards/chosen": -2.397184371948242, "rewards/margins": 3.621838092803955, "rewards/rejected": -6.019022464752197, "step": 5688 }, { "epoch": 0.88, "learning_rate": 9.974791223611184e-06, "logits/chosen": -2.7293541431427, "logits/rejected": -2.919677495956421, "logps/chosen": -90.66876983642578, "logps/rejected": -212.36207580566406, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9379094839096069, "rewards/margins": 6.515567779541016, "rewards/rejected": -7.453477382659912, "step": 5689 }, { "epoch": 0.88, "learning_rate": 9.974057783080036e-06, "logits/chosen": -2.8835337162017822, "logits/rejected": -2.874687433242798, "logps/chosen": -137.21258544921875, "logps/rejected": -161.02603149414062, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": 0.6335812211036682, "rewards/margins": 5.032159328460693, "rewards/rejected": -4.398577690124512, "step": 5690 }, { "epoch": 0.89, "learning_rate": 9.973324342548888e-06, "logits/chosen": -2.880232572555542, "logits/rejected": -2.039900064468384, "logps/chosen": -809.5751953125, "logps/rejected": -504.2154235839844, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -1.3880584239959717, "rewards/margins": 6.093304634094238, "rewards/rejected": -7.481363296508789, "step": 5691 }, { "epoch": 0.89, "learning_rate": 9.97259090201774e-06, "logits/chosen": -0.605430543422699, "logits/rejected": -2.1465373039245605, "logps/chosen": -75.09232330322266, "logps/rejected": -433.77105712890625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.7578796148300171, "rewards/margins": 5.984401702880859, "rewards/rejected": -6.742281436920166, "step": 5692 }, { "epoch": 0.89, "learning_rate": 9.971857461486592e-06, "logits/chosen": -2.870776891708374, "logits/rejected": -2.7546253204345703, "logps/chosen": -142.14137268066406, "logps/rejected": -294.45037841796875, "loss": 1.4287, "rewards/accuracies": 0.5, "rewards/chosen": -1.1186950206756592, "rewards/margins": 2.870851755142212, "rewards/rejected": -3.989546775817871, "step": 5693 }, { "epoch": 0.89, "learning_rate": 9.971124020955443e-06, "logits/chosen": -2.146925210952759, "logits/rejected": -2.791355609893799, "logps/chosen": -230.94659423828125, "logps/rejected": -502.4549865722656, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.7022407054901123, "rewards/margins": 7.16343355178833, "rewards/rejected": -8.865674018859863, "step": 5694 }, { "epoch": 0.89, "learning_rate": 9.970390580424295e-06, "logits/chosen": -1.828788161277771, "logits/rejected": -2.7498204708099365, "logps/chosen": -119.00169372558594, "logps/rejected": -303.75421142578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8309078216552734, "rewards/margins": 7.273561000823975, "rewards/rejected": -8.104469299316406, "step": 5695 }, { "epoch": 0.89, "learning_rate": 9.969657139893149e-06, "logits/chosen": -1.4551924467086792, "logits/rejected": -2.8686273097991943, "logps/chosen": -219.41159057617188, "logps/rejected": -462.10125732421875, "loss": 0.3647, "rewards/accuracies": 0.5, "rewards/chosen": -1.4074398279190063, "rewards/margins": 3.016637086868286, "rewards/rejected": -4.424077033996582, "step": 5696 }, { "epoch": 0.89, "learning_rate": 9.968923699362e-06, "logits/chosen": -2.730161190032959, "logits/rejected": -2.8493165969848633, "logps/chosen": -334.9944152832031, "logps/rejected": -426.60882568359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.1961875855922699, "rewards/margins": 8.037744522094727, "rewards/rejected": -8.233932495117188, "step": 5697 }, { "epoch": 0.89, "learning_rate": 9.968190258830853e-06, "logits/chosen": -0.803069531917572, "logits/rejected": -2.6722774505615234, "logps/chosen": -32.511566162109375, "logps/rejected": -413.43377685546875, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -1.1144765615463257, "rewards/margins": 5.84064245223999, "rewards/rejected": -6.9551191329956055, "step": 5698 }, { "epoch": 0.89, "learning_rate": 9.967456818299705e-06, "logits/chosen": -2.7454802989959717, "logits/rejected": -1.9599374532699585, "logps/chosen": -244.68276977539062, "logps/rejected": -161.42807006835938, "loss": 3.0674, "rewards/accuracies": 0.5, "rewards/chosen": -3.5880234241485596, "rewards/margins": 0.3575608730316162, "rewards/rejected": -3.945584297180176, "step": 5699 }, { "epoch": 0.89, "learning_rate": 9.966723377768556e-06, "logits/chosen": -1.4063506126403809, "logits/rejected": -2.276139974594116, "logps/chosen": -230.56121826171875, "logps/rejected": -238.82432556152344, "loss": 2.2618, "rewards/accuracies": 0.5, "rewards/chosen": -2.793567657470703, "rewards/margins": 0.7169528007507324, "rewards/rejected": -3.5105204582214355, "step": 5700 }, { "epoch": 0.89, "learning_rate": 9.965989937237408e-06, "logits/chosen": -2.8193368911743164, "logits/rejected": -3.089533805847168, "logps/chosen": -279.32080078125, "logps/rejected": -452.907958984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.4636902809143066, "rewards/margins": 7.011819839477539, "rewards/rejected": -8.475509643554688, "step": 5701 }, { "epoch": 0.89, "learning_rate": 9.96525649670626e-06, "logits/chosen": -1.5199873447418213, "logits/rejected": -2.148928165435791, "logps/chosen": -142.48268127441406, "logps/rejected": -331.3230285644531, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.576432466506958, "rewards/margins": 7.224453926086426, "rewards/rejected": -7.800887107849121, "step": 5702 }, { "epoch": 0.89, "learning_rate": 9.964523056175112e-06, "logits/chosen": -2.190743923187256, "logits/rejected": -2.2437477111816406, "logps/chosen": -198.5747528076172, "logps/rejected": -245.002197265625, "loss": 1.3561, "rewards/accuracies": 0.5, "rewards/chosen": -2.797211170196533, "rewards/margins": 3.205705165863037, "rewards/rejected": -6.002915859222412, "step": 5703 }, { "epoch": 0.89, "learning_rate": 9.963789615643964e-06, "logits/chosen": -2.402005195617676, "logits/rejected": -3.1726863384246826, "logps/chosen": -93.34423828125, "logps/rejected": -261.56488037109375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.2469266653060913, "rewards/margins": 6.08937406539917, "rewards/rejected": -7.336300849914551, "step": 5704 }, { "epoch": 0.89, "learning_rate": 9.963056175112817e-06, "logits/chosen": -2.627293825149536, "logits/rejected": -2.9927985668182373, "logps/chosen": -65.27360534667969, "logps/rejected": -211.5875701904297, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.5631130337715149, "rewards/margins": 5.269336700439453, "rewards/rejected": -5.832449436187744, "step": 5705 }, { "epoch": 0.89, "learning_rate": 9.96232273458167e-06, "logits/chosen": -1.5335028171539307, "logits/rejected": -2.6179425716400146, "logps/chosen": -168.55947875976562, "logps/rejected": -347.19610595703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.0871204361319542, "rewards/margins": 7.49400520324707, "rewards/rejected": -7.5811262130737305, "step": 5706 }, { "epoch": 0.89, "learning_rate": 9.961589294050521e-06, "logits/chosen": -1.984194278717041, "logits/rejected": -2.9678502082824707, "logps/chosen": -110.86148834228516, "logps/rejected": -268.709716796875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 0.13355980813503265, "rewards/margins": 5.176119804382324, "rewards/rejected": -5.042559623718262, "step": 5707 }, { "epoch": 0.89, "learning_rate": 9.960855853519373e-06, "logits/chosen": -1.9531985521316528, "logits/rejected": -2.755734443664551, "logps/chosen": -50.70320129394531, "logps/rejected": -157.15560913085938, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.45458871126174927, "rewards/margins": 5.471090316772461, "rewards/rejected": -5.925678730010986, "step": 5708 }, { "epoch": 0.89, "learning_rate": 9.960122412988225e-06, "logits/chosen": -2.3211095333099365, "logits/rejected": -2.8557348251342773, "logps/chosen": -93.02033996582031, "logps/rejected": -182.13417053222656, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.7234681844711304, "rewards/margins": 5.936623573303223, "rewards/rejected": -6.660091400146484, "step": 5709 }, { "epoch": 0.89, "learning_rate": 9.959388972457077e-06, "logits/chosen": -3.096806764602661, "logits/rejected": -2.2721152305603027, "logps/chosen": -271.79400634765625, "logps/rejected": -146.92626953125, "loss": 0.0732, "rewards/accuracies": 1.0, "rewards/chosen": -2.5893735885620117, "rewards/margins": 4.162630081176758, "rewards/rejected": -6.7520036697387695, "step": 5710 }, { "epoch": 0.89, "learning_rate": 9.958655531925929e-06, "logits/chosen": -0.7999206781387329, "logits/rejected": -3.16402006149292, "logps/chosen": -40.554908752441406, "logps/rejected": -450.55462646484375, "loss": 0.241, "rewards/accuracies": 1.0, "rewards/chosen": -0.49096137285232544, "rewards/margins": 3.4655299186706543, "rewards/rejected": -3.956491231918335, "step": 5711 }, { "epoch": 0.89, "learning_rate": 9.95792209139478e-06, "logits/chosen": -2.006748676300049, "logits/rejected": -2.607642412185669, "logps/chosen": -57.8027229309082, "logps/rejected": -188.63404846191406, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.8743003606796265, "rewards/margins": 5.744390964508057, "rewards/rejected": -6.618691444396973, "step": 5712 }, { "epoch": 0.89, "learning_rate": 9.957188650863634e-06, "logits/chosen": -2.734318733215332, "logits/rejected": -2.080040216445923, "logps/chosen": -447.7718505859375, "logps/rejected": -379.06658935546875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.46075552701950073, "rewards/margins": 7.813166618347168, "rewards/rejected": -7.352410793304443, "step": 5713 }, { "epoch": 0.89, "learning_rate": 9.956455210332486e-06, "logits/chosen": -2.7579572200775146, "logits/rejected": -3.173424005508423, "logps/chosen": -311.57513427734375, "logps/rejected": -493.7960205078125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.11338385939598083, "rewards/margins": 6.996270179748535, "rewards/rejected": -7.109654426574707, "step": 5714 }, { "epoch": 0.89, "learning_rate": 9.95572176980134e-06, "logits/chosen": -2.81353497505188, "logits/rejected": -2.1906607151031494, "logps/chosen": -449.9749755859375, "logps/rejected": -512.2706298828125, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -1.0965652465820312, "rewards/margins": 6.08175802230835, "rewards/rejected": -7.178323745727539, "step": 5715 }, { "epoch": 0.89, "learning_rate": 9.954988329270192e-06, "logits/chosen": -1.975908875465393, "logits/rejected": -2.5241811275482178, "logps/chosen": -351.9805908203125, "logps/rejected": -346.3341064453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.587561845779419, "rewards/margins": 7.22038459777832, "rewards/rejected": -7.80794620513916, "step": 5716 }, { "epoch": 0.89, "learning_rate": 9.954254888739043e-06, "logits/chosen": -2.3409249782562256, "logits/rejected": -1.942855715751648, "logps/chosen": -190.32162475585938, "logps/rejected": -230.984619140625, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -0.29549142718315125, "rewards/margins": 4.402289867401123, "rewards/rejected": -4.697781562805176, "step": 5717 }, { "epoch": 0.89, "learning_rate": 9.953521448207895e-06, "logits/chosen": -2.9753172397613525, "logits/rejected": -1.4689207077026367, "logps/chosen": -287.8990173339844, "logps/rejected": -151.96751403808594, "loss": 0.7704, "rewards/accuracies": 0.5, "rewards/chosen": -2.7177960872650146, "rewards/margins": 2.802727460861206, "rewards/rejected": -5.520523548126221, "step": 5718 }, { "epoch": 0.89, "learning_rate": 9.952788007676747e-06, "logits/chosen": -2.7944910526275635, "logits/rejected": -2.999589681625366, "logps/chosen": -40.632354736328125, "logps/rejected": -151.32342529296875, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -1.4321119785308838, "rewards/margins": 4.3161492347717285, "rewards/rejected": -5.748260974884033, "step": 5719 }, { "epoch": 0.89, "learning_rate": 9.952054567145599e-06, "logits/chosen": -2.3464972972869873, "logits/rejected": -3.2260189056396484, "logps/chosen": -109.36015319824219, "logps/rejected": -339.2099914550781, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": -0.6742122769355774, "rewards/margins": 4.5670061111450195, "rewards/rejected": -5.241218566894531, "step": 5720 }, { "epoch": 0.89, "learning_rate": 9.951321126614451e-06, "logits/chosen": -1.1948750019073486, "logits/rejected": -2.6279869079589844, "logps/chosen": -88.41975402832031, "logps/rejected": -168.67703247070312, "loss": 1.3723, "rewards/accuracies": 0.5, "rewards/chosen": -2.7212777137756348, "rewards/margins": 3.3157010078430176, "rewards/rejected": -6.036978721618652, "step": 5721 }, { "epoch": 0.89, "learning_rate": 9.950587686083303e-06, "logits/chosen": -2.3251914978027344, "logits/rejected": -2.8691163063049316, "logps/chosen": -119.30990600585938, "logps/rejected": -206.40521240234375, "loss": 2.8703, "rewards/accuracies": 0.5, "rewards/chosen": -2.7714593410491943, "rewards/margins": 0.6196298599243164, "rewards/rejected": -3.3910892009735107, "step": 5722 }, { "epoch": 0.89, "learning_rate": 9.949854245552156e-06, "logits/chosen": -2.277312755584717, "logits/rejected": -2.968111753463745, "logps/chosen": -267.40447998046875, "logps/rejected": -596.2135009765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.7221970558166504, "rewards/margins": 7.238846302032471, "rewards/rejected": -8.961043357849121, "step": 5723 }, { "epoch": 0.89, "learning_rate": 9.949120805021008e-06, "logits/chosen": -2.543200969696045, "logits/rejected": -2.7941370010375977, "logps/chosen": -191.03919982910156, "logps/rejected": -239.4535675048828, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.6977894306182861, "rewards/margins": 6.317200660705566, "rewards/rejected": -7.014990329742432, "step": 5724 }, { "epoch": 0.89, "learning_rate": 9.94838736448986e-06, "logits/chosen": -2.7415246963500977, "logits/rejected": -3.062981367111206, "logps/chosen": -56.680381774902344, "logps/rejected": -168.9853515625, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -1.4252276420593262, "rewards/margins": 5.34065055847168, "rewards/rejected": -6.765878677368164, "step": 5725 }, { "epoch": 0.89, "learning_rate": 9.947653923958712e-06, "logits/chosen": -2.6511504650115967, "logits/rejected": -2.7044811248779297, "logps/chosen": -44.22333526611328, "logps/rejected": -118.0901107788086, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.486142635345459, "rewards/margins": 5.235713005065918, "rewards/rejected": -6.721855640411377, "step": 5726 }, { "epoch": 0.89, "learning_rate": 9.946920483427564e-06, "logits/chosen": -1.5367296934127808, "logits/rejected": -1.5717319250106812, "logps/chosen": -68.54466247558594, "logps/rejected": -173.8023681640625, "loss": 0.0684, "rewards/accuracies": 1.0, "rewards/chosen": -1.7695993185043335, "rewards/margins": 5.7256035804748535, "rewards/rejected": -7.495203018188477, "step": 5727 }, { "epoch": 0.89, "learning_rate": 9.946187042896416e-06, "logits/chosen": -3.0356385707855225, "logits/rejected": -2.8614745140075684, "logps/chosen": -512.1083984375, "logps/rejected": -515.8050537109375, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -1.5497932434082031, "rewards/margins": 4.905246734619141, "rewards/rejected": -6.455039978027344, "step": 5728 }, { "epoch": 0.89, "learning_rate": 9.945453602365268e-06, "logits/chosen": -2.3676931858062744, "logits/rejected": -2.9069485664367676, "logps/chosen": -383.32879638671875, "logps/rejected": -465.2037658691406, "loss": 4.5683, "rewards/accuracies": 0.5, "rewards/chosen": -4.349684238433838, "rewards/margins": -1.4098389148712158, "rewards/rejected": -2.939845085144043, "step": 5729 }, { "epoch": 0.89, "learning_rate": 9.94472016183412e-06, "logits/chosen": -2.411261796951294, "logits/rejected": -1.2447922229766846, "logps/chosen": -210.88458251953125, "logps/rejected": -169.67391967773438, "loss": 3.5934, "rewards/accuracies": 0.5, "rewards/chosen": -4.08914041519165, "rewards/margins": 1.3099114894866943, "rewards/rejected": -5.399051666259766, "step": 5730 }, { "epoch": 0.89, "learning_rate": 9.943986721302971e-06, "logits/chosen": -2.840214729309082, "logits/rejected": -2.1581661701202393, "logps/chosen": -321.03765869140625, "logps/rejected": -230.6016082763672, "loss": 1.9181, "rewards/accuracies": 0.5, "rewards/chosen": -1.8231582641601562, "rewards/margins": 1.8328157663345337, "rewards/rejected": -3.6559739112854004, "step": 5731 }, { "epoch": 0.89, "learning_rate": 9.943253280771825e-06, "logits/chosen": -2.629399538040161, "logits/rejected": -2.9635438919067383, "logps/chosen": -780.43701171875, "logps/rejected": -499.3825378417969, "loss": 1.319, "rewards/accuracies": 0.5, "rewards/chosen": -3.227471113204956, "rewards/margins": 2.777982711791992, "rewards/rejected": -6.005454063415527, "step": 5732 }, { "epoch": 0.89, "learning_rate": 9.942519840240677e-06, "logits/chosen": -2.5725560188293457, "logits/rejected": -2.0975418090820312, "logps/chosen": -483.093505859375, "logps/rejected": -522.612548828125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.371939092874527, "rewards/margins": 6.2548017501831055, "rewards/rejected": -6.626741409301758, "step": 5733 }, { "epoch": 0.89, "learning_rate": 9.941786399709529e-06, "logits/chosen": -2.436652660369873, "logits/rejected": -2.299255132675171, "logps/chosen": -135.51068115234375, "logps/rejected": -171.03182983398438, "loss": 0.1653, "rewards/accuracies": 1.0, "rewards/chosen": -1.6511942148208618, "rewards/margins": 2.68272066116333, "rewards/rejected": -4.333914756774902, "step": 5734 }, { "epoch": 0.89, "learning_rate": 9.94105295917838e-06, "logits/chosen": -3.024238109588623, "logits/rejected": -1.7881531715393066, "logps/chosen": -166.25831604003906, "logps/rejected": -58.883018493652344, "loss": 3.0225, "rewards/accuracies": 0.0, "rewards/chosen": -6.024999141693115, "rewards/margins": -2.6921238899230957, "rewards/rejected": -3.3328752517700195, "step": 5735 }, { "epoch": 0.89, "learning_rate": 9.940319518647232e-06, "logits/chosen": -2.995452642440796, "logits/rejected": -2.3485255241394043, "logps/chosen": -204.7310028076172, "logps/rejected": -195.58433532714844, "loss": 1.1397, "rewards/accuracies": 0.5, "rewards/chosen": -1.766572117805481, "rewards/margins": 1.2852816581726074, "rewards/rejected": -3.051853656768799, "step": 5736 }, { "epoch": 0.89, "learning_rate": 9.939586078116084e-06, "logits/chosen": -2.602497100830078, "logits/rejected": -2.7481987476348877, "logps/chosen": -201.55374145507812, "logps/rejected": -203.9619140625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.33849257230758667, "rewards/margins": 6.43117094039917, "rewards/rejected": -6.7696638107299805, "step": 5737 }, { "epoch": 0.89, "learning_rate": 9.938852637584936e-06, "logits/chosen": -2.976284980773926, "logits/rejected": -2.832766056060791, "logps/chosen": -252.1402130126953, "logps/rejected": -300.7699890136719, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.06302948296070099, "rewards/margins": 7.5280046463012695, "rewards/rejected": -7.591033935546875, "step": 5738 }, { "epoch": 0.89, "learning_rate": 9.938119197053788e-06, "logits/chosen": -2.977144479751587, "logits/rejected": -2.1065776348114014, "logps/chosen": -192.2368927001953, "logps/rejected": -86.66670989990234, "loss": 0.2151, "rewards/accuracies": 1.0, "rewards/chosen": -1.944532036781311, "rewards/margins": 3.055966854095459, "rewards/rejected": -5.0004987716674805, "step": 5739 }, { "epoch": 0.89, "learning_rate": 9.93738575652264e-06, "logits/chosen": -1.9952588081359863, "logits/rejected": -2.870593309402466, "logps/chosen": -112.30683135986328, "logps/rejected": -290.33685302734375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.9567657709121704, "rewards/margins": 5.73118257522583, "rewards/rejected": -6.687948226928711, "step": 5740 }, { "epoch": 0.89, "learning_rate": 9.936652315991494e-06, "logits/chosen": -2.2804360389709473, "logits/rejected": -2.7703745365142822, "logps/chosen": -87.36559295654297, "logps/rejected": -264.7440490722656, "loss": 0.072, "rewards/accuracies": 1.0, "rewards/chosen": -2.1095566749572754, "rewards/margins": 4.69219446182251, "rewards/rejected": -6.801751136779785, "step": 5741 }, { "epoch": 0.89, "learning_rate": 9.935918875460345e-06, "logits/chosen": -2.729504108428955, "logits/rejected": -3.2431893348693848, "logps/chosen": -62.87944030761719, "logps/rejected": -269.0151672363281, "loss": 0.5357, "rewards/accuracies": 0.5, "rewards/chosen": -2.8781049251556396, "rewards/margins": 3.1629533767700195, "rewards/rejected": -6.041058540344238, "step": 5742 }, { "epoch": 0.89, "learning_rate": 9.935185434929197e-06, "logits/chosen": -2.2511212825775146, "logits/rejected": -2.6790006160736084, "logps/chosen": -96.26958465576172, "logps/rejected": -287.5985412597656, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -0.8730497360229492, "rewards/margins": 4.728179454803467, "rewards/rejected": -5.601229190826416, "step": 5743 }, { "epoch": 0.89, "learning_rate": 9.93445199439805e-06, "logits/chosen": -2.2148427963256836, "logits/rejected": -2.6436753273010254, "logps/chosen": -295.77911376953125, "logps/rejected": -280.041259765625, "loss": 1.1746, "rewards/accuracies": 0.5, "rewards/chosen": -1.726377010345459, "rewards/margins": 2.438913345336914, "rewards/rejected": -4.165290355682373, "step": 5744 }, { "epoch": 0.89, "learning_rate": 9.933718553866901e-06, "logits/chosen": -2.12066388130188, "logits/rejected": -3.005218029022217, "logps/chosen": -94.35464477539062, "logps/rejected": -407.503173828125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.015173330903053284, "rewards/margins": 7.026065349578857, "rewards/rejected": -7.041238784790039, "step": 5745 }, { "epoch": 0.89, "learning_rate": 9.932985113335753e-06, "logits/chosen": -2.2246530055999756, "logits/rejected": -2.767352819442749, "logps/chosen": -240.59555053710938, "logps/rejected": -398.68267822265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.42966651916503906, "rewards/margins": 6.798037528991699, "rewards/rejected": -7.227704048156738, "step": 5746 }, { "epoch": 0.89, "learning_rate": 9.932251672804607e-06, "logits/chosen": -1.9635920524597168, "logits/rejected": -2.5467374324798584, "logps/chosen": -320.22552490234375, "logps/rejected": -375.16864013671875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.024840164929628372, "rewards/margins": 6.537544250488281, "rewards/rejected": -6.562384605407715, "step": 5747 }, { "epoch": 0.89, "learning_rate": 9.931518232273458e-06, "logits/chosen": -1.7353060245513916, "logits/rejected": -2.915492534637451, "logps/chosen": -52.87384033203125, "logps/rejected": -305.78826904296875, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -0.83428955078125, "rewards/margins": 4.594694137573242, "rewards/rejected": -5.428983211517334, "step": 5748 }, { "epoch": 0.89, "learning_rate": 9.93078479174231e-06, "logits/chosen": -2.657472848892212, "logits/rejected": -1.682446837425232, "logps/chosen": -241.37547302246094, "logps/rejected": -280.2729187011719, "loss": 3.6623, "rewards/accuracies": 0.5, "rewards/chosen": -3.8335981369018555, "rewards/margins": 0.5687522888183594, "rewards/rejected": -4.402350425720215, "step": 5749 }, { "epoch": 0.89, "learning_rate": 9.930051351211164e-06, "logits/chosen": -2.5798356533050537, "logits/rejected": -2.7242753505706787, "logps/chosen": -44.51084899902344, "logps/rejected": -160.2381591796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.18405723571777344, "rewards/margins": 6.858466625213623, "rewards/rejected": -7.0425238609313965, "step": 5750 }, { "epoch": 0.89, "learning_rate": 9.929317910680016e-06, "logits/chosen": -2.762748956680298, "logits/rejected": -2.377898931503296, "logps/chosen": -101.19285583496094, "logps/rejected": -144.79769897460938, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.8450946807861328, "rewards/margins": 5.602428436279297, "rewards/rejected": -6.44752311706543, "step": 5751 }, { "epoch": 0.89, "learning_rate": 9.928584470148868e-06, "logits/chosen": -2.385099172592163, "logits/rejected": -2.803529977798462, "logps/chosen": -101.18467712402344, "logps/rejected": -336.7873229980469, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9066213965415955, "rewards/margins": 6.981300354003906, "rewards/rejected": -7.887921333312988, "step": 5752 }, { "epoch": 0.89, "learning_rate": 9.92785102961772e-06, "logits/chosen": -2.6607491970062256, "logits/rejected": -2.4226269721984863, "logps/chosen": -226.81446838378906, "logps/rejected": -247.690673828125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5350555777549744, "rewards/margins": 6.823108673095703, "rewards/rejected": -7.358163833618164, "step": 5753 }, { "epoch": 0.89, "learning_rate": 9.927117589086571e-06, "logits/chosen": -2.7841572761535645, "logits/rejected": -2.4100074768066406, "logps/chosen": -487.67333984375, "logps/rejected": -489.41265869140625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -2.2202911376953125, "rewards/margins": 5.797938823699951, "rewards/rejected": -8.018229484558105, "step": 5754 }, { "epoch": 0.9, "learning_rate": 9.926384148555423e-06, "logits/chosen": -2.3942275047302246, "logits/rejected": -2.5684092044830322, "logps/chosen": -184.64019775390625, "logps/rejected": -313.89111328125, "loss": 2.5676, "rewards/accuracies": 0.5, "rewards/chosen": -2.5563905239105225, "rewards/margins": 2.050097942352295, "rewards/rejected": -4.606488227844238, "step": 5755 }, { "epoch": 0.9, "learning_rate": 9.925650708024275e-06, "logits/chosen": -2.796485185623169, "logits/rejected": -2.8932483196258545, "logps/chosen": -158.70851135253906, "logps/rejected": -147.94754028320312, "loss": 2.667, "rewards/accuracies": 0.5, "rewards/chosen": -5.006256103515625, "rewards/margins": -1.0179539918899536, "rewards/rejected": -3.988302230834961, "step": 5756 }, { "epoch": 0.9, "learning_rate": 9.924917267493127e-06, "logits/chosen": -2.7207162380218506, "logits/rejected": -2.4020378589630127, "logps/chosen": -226.96018981933594, "logps/rejected": -300.27081298828125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.1716797351837158, "rewards/margins": 6.999478816986084, "rewards/rejected": -8.171158790588379, "step": 5757 }, { "epoch": 0.9, "learning_rate": 9.924183826961979e-06, "logits/chosen": -2.803647994995117, "logits/rejected": -2.0740907192230225, "logps/chosen": -254.32362365722656, "logps/rejected": -127.23297119140625, "loss": 1.7872, "rewards/accuracies": 0.5, "rewards/chosen": -2.0775885581970215, "rewards/margins": 0.4393397569656372, "rewards/rejected": -2.5169284343719482, "step": 5758 }, { "epoch": 0.9, "learning_rate": 9.923450386430832e-06, "logits/chosen": -2.080430030822754, "logits/rejected": -2.822653293609619, "logps/chosen": -36.374168395996094, "logps/rejected": -366.66546630859375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.291840672492981, "rewards/margins": 8.157451629638672, "rewards/rejected": -9.449292182922363, "step": 5759 }, { "epoch": 0.9, "learning_rate": 9.922716945899684e-06, "logits/chosen": -2.544696092605591, "logits/rejected": -2.6992404460906982, "logps/chosen": -298.53094482421875, "logps/rejected": -300.61175537109375, "loss": 1.4882, "rewards/accuracies": 0.5, "rewards/chosen": -2.719050645828247, "rewards/margins": 1.868154764175415, "rewards/rejected": -4.587205410003662, "step": 5760 }, { "epoch": 0.9, "learning_rate": 9.921983505368536e-06, "logits/chosen": -2.655701160430908, "logits/rejected": -2.931043863296509, "logps/chosen": -72.08231353759766, "logps/rejected": -175.0947265625, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -1.0671974420547485, "rewards/margins": 4.127018928527832, "rewards/rejected": -5.194216251373291, "step": 5761 }, { "epoch": 0.9, "learning_rate": 9.921250064837388e-06, "logits/chosen": -2.0869100093841553, "logits/rejected": -2.71986722946167, "logps/chosen": -140.9322052001953, "logps/rejected": -269.1107177734375, "loss": 2.5693, "rewards/accuracies": 0.5, "rewards/chosen": -3.579906463623047, "rewards/margins": 1.7753067016601562, "rewards/rejected": -5.355213165283203, "step": 5762 }, { "epoch": 0.9, "learning_rate": 9.92051662430624e-06, "logits/chosen": -2.6802079677581787, "logits/rejected": -2.786250591278076, "logps/chosen": -158.15072631835938, "logps/rejected": -226.27865600585938, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.20508310198783875, "rewards/margins": 5.241634845733643, "rewards/rejected": -5.446718215942383, "step": 5763 }, { "epoch": 0.9, "learning_rate": 9.919783183775092e-06, "logits/chosen": -2.743131637573242, "logits/rejected": -1.996586561203003, "logps/chosen": -239.9400634765625, "logps/rejected": -283.09375, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -1.229047417640686, "rewards/margins": 4.220668792724609, "rewards/rejected": -5.449716091156006, "step": 5764 }, { "epoch": 0.9, "learning_rate": 9.919049743243944e-06, "logits/chosen": -2.7832064628601074, "logits/rejected": -2.4169344902038574, "logps/chosen": -463.94354248046875, "logps/rejected": -453.294921875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.1301347017288208, "rewards/margins": 5.291704177856445, "rewards/rejected": -6.421839237213135, "step": 5765 }, { "epoch": 0.9, "learning_rate": 9.918316302712796e-06, "logits/chosen": -2.132469654083252, "logits/rejected": -2.9169020652770996, "logps/chosen": -218.56900024414062, "logps/rejected": -305.80078125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.39637529850006104, "rewards/margins": 5.39334774017334, "rewards/rejected": -5.7897233963012695, "step": 5766 }, { "epoch": 0.9, "learning_rate": 9.91758286218165e-06, "logits/chosen": -2.661552906036377, "logits/rejected": -2.7198781967163086, "logps/chosen": -209.86607360839844, "logps/rejected": -138.8738555908203, "loss": 2.8346, "rewards/accuracies": 0.5, "rewards/chosen": -4.193073272705078, "rewards/margins": -0.9296395778656006, "rewards/rejected": -3.2634334564208984, "step": 5767 }, { "epoch": 0.9, "learning_rate": 9.916849421650501e-06, "logits/chosen": -2.2239809036254883, "logits/rejected": -2.7746100425720215, "logps/chosen": -169.65853881835938, "logps/rejected": -358.7296142578125, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": -3.948871374130249, "rewards/margins": 4.285001277923584, "rewards/rejected": -8.233872413635254, "step": 5768 }, { "epoch": 0.9, "learning_rate": 9.916115981119353e-06, "logits/chosen": -2.9626572132110596, "logits/rejected": -2.685664176940918, "logps/chosen": -606.0579223632812, "logps/rejected": -309.11846923828125, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -0.05917969346046448, "rewards/margins": 5.074542045593262, "rewards/rejected": -5.133722305297852, "step": 5769 }, { "epoch": 0.9, "learning_rate": 9.915382540588205e-06, "logits/chosen": -2.760420560836792, "logits/rejected": -2.5650222301483154, "logps/chosen": -118.25154113769531, "logps/rejected": -112.91124725341797, "loss": 2.1676, "rewards/accuracies": 0.5, "rewards/chosen": -4.771180629730225, "rewards/margins": -0.34148168563842773, "rewards/rejected": -4.429698944091797, "step": 5770 }, { "epoch": 0.9, "learning_rate": 9.914649100057057e-06, "logits/chosen": -2.102940082550049, "logits/rejected": -2.71769380569458, "logps/chosen": -183.32305908203125, "logps/rejected": -319.30718994140625, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.8366859555244446, "rewards/margins": 4.48336124420166, "rewards/rejected": -5.320047378540039, "step": 5771 }, { "epoch": 0.9, "learning_rate": 9.913915659525909e-06, "logits/chosen": -2.3982999324798584, "logits/rejected": -2.7581260204315186, "logps/chosen": -540.0261840820312, "logps/rejected": -548.028564453125, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.6784579753875732, "rewards/margins": 6.019875526428223, "rewards/rejected": -7.698333740234375, "step": 5772 }, { "epoch": 0.9, "learning_rate": 9.91318221899476e-06, "logits/chosen": -2.118813991546631, "logits/rejected": -2.6619415283203125, "logps/chosen": -276.51934814453125, "logps/rejected": -383.86175537109375, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -1.4253191947937012, "rewards/margins": 4.560061931610107, "rewards/rejected": -5.985381126403809, "step": 5773 }, { "epoch": 0.9, "learning_rate": 9.912448778463612e-06, "logits/chosen": -1.9378652572631836, "logits/rejected": -2.9409172534942627, "logps/chosen": -144.68618774414062, "logps/rejected": -421.21417236328125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.1087058782577515, "rewards/margins": 6.354584693908691, "rewards/rejected": -7.463290691375732, "step": 5774 }, { "epoch": 0.9, "learning_rate": 9.911715337932464e-06, "logits/chosen": -0.8591586351394653, "logits/rejected": -1.6245503425598145, "logps/chosen": -219.49342346191406, "logps/rejected": -537.59619140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.9238581657409668, "rewards/margins": 6.7676897048950195, "rewards/rejected": -8.691548347473145, "step": 5775 }, { "epoch": 0.9, "learning_rate": 9.910981897401318e-06, "logits/chosen": -1.824196219444275, "logits/rejected": -2.6044790744781494, "logps/chosen": -223.83140563964844, "logps/rejected": -316.9258117675781, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -0.4201544523239136, "rewards/margins": 4.315638542175293, "rewards/rejected": -4.735793113708496, "step": 5776 }, { "epoch": 0.9, "learning_rate": 9.91024845687017e-06, "logits/chosen": -2.9383387565612793, "logits/rejected": -2.2802088260650635, "logps/chosen": -331.9986267089844, "logps/rejected": -259.06298828125, "loss": 3.0294, "rewards/accuracies": 0.5, "rewards/chosen": -3.5367510318756104, "rewards/margins": -0.5199556350708008, "rewards/rejected": -3.0167953968048096, "step": 5777 }, { "epoch": 0.9, "learning_rate": 9.909515016339022e-06, "logits/chosen": -2.663869619369507, "logits/rejected": -2.5774435997009277, "logps/chosen": -308.7899475097656, "logps/rejected": -174.89889526367188, "loss": 2.8032, "rewards/accuracies": 0.5, "rewards/chosen": -3.8407912254333496, "rewards/margins": -0.2610490322113037, "rewards/rejected": -3.579742193222046, "step": 5778 }, { "epoch": 0.9, "learning_rate": 9.908781575807873e-06, "logits/chosen": -2.8190956115722656, "logits/rejected": -3.0383682250976562, "logps/chosen": -252.57308959960938, "logps/rejected": -383.9014587402344, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -1.0860222578048706, "rewards/margins": 4.684454917907715, "rewards/rejected": -5.770477294921875, "step": 5779 }, { "epoch": 0.9, "learning_rate": 9.908048135276725e-06, "logits/chosen": -2.1582937240600586, "logits/rejected": -3.2074975967407227, "logps/chosen": -135.88026428222656, "logps/rejected": -309.68621826171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5601650476455688, "rewards/margins": 7.039437770843506, "rewards/rejected": -7.599602699279785, "step": 5780 }, { "epoch": 0.9, "learning_rate": 9.907314694745579e-06, "logits/chosen": -1.6394425630569458, "logits/rejected": -2.7145743370056152, "logps/chosen": -104.61537170410156, "logps/rejected": -388.71258544921875, "loss": 0.1434, "rewards/accuracies": 1.0, "rewards/chosen": -1.9702725410461426, "rewards/margins": 4.572751998901367, "rewards/rejected": -6.543024063110352, "step": 5781 }, { "epoch": 0.9, "learning_rate": 9.90658125421443e-06, "logits/chosen": -1.6398863792419434, "logits/rejected": -2.747162342071533, "logps/chosen": -71.39930725097656, "logps/rejected": -206.33749389648438, "loss": 0.1801, "rewards/accuracies": 1.0, "rewards/chosen": -2.2562479972839355, "rewards/margins": 2.831796169281006, "rewards/rejected": -5.088044166564941, "step": 5782 }, { "epoch": 0.9, "learning_rate": 9.905847813683283e-06, "logits/chosen": -1.6664003133773804, "logits/rejected": -2.7200424671173096, "logps/chosen": -131.56954956054688, "logps/rejected": -230.23861694335938, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -1.6631333827972412, "rewards/margins": 3.3470382690429688, "rewards/rejected": -5.010171890258789, "step": 5783 }, { "epoch": 0.9, "learning_rate": 9.905114373152135e-06, "logits/chosen": -2.288419008255005, "logits/rejected": -2.982757806777954, "logps/chosen": -77.72706604003906, "logps/rejected": -368.93463134765625, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -2.900499105453491, "rewards/margins": 5.0375447273254395, "rewards/rejected": -7.938043594360352, "step": 5784 }, { "epoch": 0.9, "learning_rate": 9.904380932620988e-06, "logits/chosen": -3.0231590270996094, "logits/rejected": -2.3287172317504883, "logps/chosen": -301.8930969238281, "logps/rejected": -193.77845764160156, "loss": 0.2596, "rewards/accuracies": 1.0, "rewards/chosen": -1.5696136951446533, "rewards/margins": 3.5217199325561523, "rewards/rejected": -5.091333866119385, "step": 5785 }, { "epoch": 0.9, "learning_rate": 9.90364749208984e-06, "logits/chosen": -2.699357271194458, "logits/rejected": -3.2554075717926025, "logps/chosen": -48.205047607421875, "logps/rejected": -249.72683715820312, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -0.45385047793388367, "rewards/margins": 4.466029644012451, "rewards/rejected": -4.919879913330078, "step": 5786 }, { "epoch": 0.9, "learning_rate": 9.902914051558692e-06, "logits/chosen": -2.603084087371826, "logits/rejected": -2.7864890098571777, "logps/chosen": -87.3704605102539, "logps/rejected": -290.7558898925781, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.4996435642242432, "rewards/margins": 6.427074432373047, "rewards/rejected": -7.926717758178711, "step": 5787 }, { "epoch": 0.9, "learning_rate": 9.902180611027544e-06, "logits/chosen": -2.8494439125061035, "logits/rejected": -2.22571063041687, "logps/chosen": -193.59521484375, "logps/rejected": -175.21630859375, "loss": 2.0281, "rewards/accuracies": 0.5, "rewards/chosen": -1.8536698818206787, "rewards/margins": 0.39664721488952637, "rewards/rejected": -2.250317096710205, "step": 5788 }, { "epoch": 0.9, "learning_rate": 9.901447170496396e-06, "logits/chosen": -2.3031368255615234, "logits/rejected": -2.753631114959717, "logps/chosen": -499.6752014160156, "logps/rejected": -452.7340393066406, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.017825335264205933, "rewards/margins": 5.352077960968018, "rewards/rejected": -5.369903564453125, "step": 5789 }, { "epoch": 0.9, "learning_rate": 9.900713729965247e-06, "logits/chosen": -2.6900253295898438, "logits/rejected": -1.7566773891448975, "logps/chosen": -200.7504425048828, "logps/rejected": -329.1529846191406, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": -1.4929478168487549, "rewards/margins": 7.004608631134033, "rewards/rejected": -8.497556686401367, "step": 5790 }, { "epoch": 0.9, "learning_rate": 9.8999802894341e-06, "logits/chosen": -3.1538984775543213, "logits/rejected": -2.5566954612731934, "logps/chosen": -300.7915954589844, "logps/rejected": -291.9630432128906, "loss": 4.7387, "rewards/accuracies": 0.0, "rewards/chosen": -6.010014533996582, "rewards/margins": -4.728811264038086, "rewards/rejected": -1.281203031539917, "step": 5791 }, { "epoch": 0.9, "learning_rate": 9.899246848902951e-06, "logits/chosen": -2.905768871307373, "logits/rejected": -2.656649589538574, "logps/chosen": -232.9371795654297, "logps/rejected": -233.6729736328125, "loss": 2.405, "rewards/accuracies": 0.5, "rewards/chosen": -3.4483978748321533, "rewards/margins": 0.30188441276550293, "rewards/rejected": -3.7502822875976562, "step": 5792 }, { "epoch": 0.9, "learning_rate": 9.898513408371803e-06, "logits/chosen": -2.534761667251587, "logits/rejected": -2.792559862136841, "logps/chosen": -207.23382568359375, "logps/rejected": -467.2758483886719, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.3099888563156128, "rewards/margins": 6.356893539428711, "rewards/rejected": -7.666882514953613, "step": 5793 }, { "epoch": 0.9, "learning_rate": 9.897779967840657e-06, "logits/chosen": -1.5485748052597046, "logits/rejected": -2.970005512237549, "logps/chosen": -110.44435119628906, "logps/rejected": -466.74102783203125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9900420904159546, "rewards/margins": 8.792703628540039, "rewards/rejected": -9.782745361328125, "step": 5794 }, { "epoch": 0.9, "learning_rate": 9.897046527309509e-06, "logits/chosen": -2.5041983127593994, "logits/rejected": -2.796022415161133, "logps/chosen": -92.90838623046875, "logps/rejected": -139.10443115234375, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -2.850198268890381, "rewards/margins": 4.609634876251221, "rewards/rejected": -7.459833145141602, "step": 5795 }, { "epoch": 0.9, "learning_rate": 9.89631308677836e-06, "logits/chosen": -1.6354769468307495, "logits/rejected": -2.8864803314208984, "logps/chosen": -191.99078369140625, "logps/rejected": -295.2913818359375, "loss": 2.6998, "rewards/accuracies": 0.5, "rewards/chosen": -3.4724533557891846, "rewards/margins": -1.275355577468872, "rewards/rejected": -2.1970977783203125, "step": 5796 }, { "epoch": 0.9, "learning_rate": 9.895579646247212e-06, "logits/chosen": -2.5222995281219482, "logits/rejected": -2.8373491764068604, "logps/chosen": -43.423099517822266, "logps/rejected": -141.23306274414062, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -2.277336597442627, "rewards/margins": 4.078804016113281, "rewards/rejected": -6.356140613555908, "step": 5797 }, { "epoch": 0.9, "learning_rate": 9.894846205716064e-06, "logits/chosen": -2.146562099456787, "logits/rejected": -2.6595938205718994, "logps/chosen": -351.98211669921875, "logps/rejected": -495.5712890625, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.4051647186279297, "rewards/margins": 4.816648483276367, "rewards/rejected": -6.221813201904297, "step": 5798 }, { "epoch": 0.9, "learning_rate": 9.894112765184916e-06, "logits/chosen": -2.2831130027770996, "logits/rejected": -2.2695186138153076, "logps/chosen": -160.17190551757812, "logps/rejected": -284.5201416015625, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -1.1766343116760254, "rewards/margins": 6.681513786315918, "rewards/rejected": -7.858147621154785, "step": 5799 }, { "epoch": 0.9, "learning_rate": 9.893379324653768e-06, "logits/chosen": -2.447866201400757, "logits/rejected": -2.6416711807250977, "logps/chosen": -302.8183288574219, "logps/rejected": -345.98260498046875, "loss": 2.5424, "rewards/accuracies": 0.5, "rewards/chosen": -3.9308321475982666, "rewards/margins": 1.765578269958496, "rewards/rejected": -5.696410179138184, "step": 5800 }, { "epoch": 0.9, "learning_rate": 9.89264588412262e-06, "logits/chosen": -2.0069189071655273, "logits/rejected": -2.997116804122925, "logps/chosen": -148.26220703125, "logps/rejected": -407.8495178222656, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.2941116392612457, "rewards/margins": 6.905398845672607, "rewards/rejected": -7.19951057434082, "step": 5801 }, { "epoch": 0.9, "learning_rate": 9.891912443591472e-06, "logits/chosen": -2.715672254562378, "logits/rejected": -3.1354293823242188, "logps/chosen": -277.52569580078125, "logps/rejected": -265.8117980957031, "loss": 3.1228, "rewards/accuracies": 0.5, "rewards/chosen": -3.6732680797576904, "rewards/margins": 0.28423023223876953, "rewards/rejected": -3.95749831199646, "step": 5802 }, { "epoch": 0.9, "learning_rate": 9.891179003060325e-06, "logits/chosen": -2.8893558979034424, "logits/rejected": -0.803005576133728, "logps/chosen": -1029.551513671875, "logps/rejected": -304.54388427734375, "loss": 2.8173, "rewards/accuracies": 0.5, "rewards/chosen": -3.679840087890625, "rewards/margins": 0.7247047424316406, "rewards/rejected": -4.404544830322266, "step": 5803 }, { "epoch": 0.9, "learning_rate": 9.890445562529177e-06, "logits/chosen": -2.862297773361206, "logits/rejected": -2.307438373565674, "logps/chosen": -270.1888122558594, "logps/rejected": -225.85755920410156, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.3923706114292145, "rewards/margins": 6.461906433105469, "rewards/rejected": -6.854276657104492, "step": 5804 }, { "epoch": 0.9, "learning_rate": 9.889712121998029e-06, "logits/chosen": -2.4830281734466553, "logits/rejected": -3.052983522415161, "logps/chosen": -81.04474639892578, "logps/rejected": -366.44219970703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.2725913524627686, "rewards/margins": 7.647841453552246, "rewards/rejected": -9.920433044433594, "step": 5805 }, { "epoch": 0.9, "learning_rate": 9.888978681466881e-06, "logits/chosen": -2.70965576171875, "logits/rejected": -2.956585645675659, "logps/chosen": -227.96841430664062, "logps/rejected": -171.2490997314453, "loss": 2.3954, "rewards/accuracies": 0.5, "rewards/chosen": -5.581336975097656, "rewards/margins": -1.5188817977905273, "rewards/rejected": -4.062455177307129, "step": 5806 }, { "epoch": 0.9, "learning_rate": 9.888245240935733e-06, "logits/chosen": -1.389852523803711, "logits/rejected": -2.4925429821014404, "logps/chosen": -199.30340576171875, "logps/rejected": -334.8433837890625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.9555187225341797, "rewards/margins": 5.541994571685791, "rewards/rejected": -6.497513771057129, "step": 5807 }, { "epoch": 0.9, "learning_rate": 9.887511800404585e-06, "logits/chosen": -1.6496531963348389, "logits/rejected": -2.511951446533203, "logps/chosen": -81.55623626708984, "logps/rejected": -190.9942626953125, "loss": 0.3254, "rewards/accuracies": 1.0, "rewards/chosen": -1.2609957456588745, "rewards/margins": 2.258024215698242, "rewards/rejected": -3.5190200805664062, "step": 5808 }, { "epoch": 0.9, "learning_rate": 9.886778359873437e-06, "logits/chosen": -2.715949773788452, "logits/rejected": -1.5000886917114258, "logps/chosen": -358.0762939453125, "logps/rejected": -113.48164367675781, "loss": 0.5861, "rewards/accuracies": 0.5, "rewards/chosen": -1.1642346382141113, "rewards/margins": 1.1566784381866455, "rewards/rejected": -2.320913076400757, "step": 5809 }, { "epoch": 0.9, "learning_rate": 9.886044919342288e-06, "logits/chosen": -2.8381001949310303, "logits/rejected": -3.3165903091430664, "logps/chosen": -405.2252502441406, "logps/rejected": -352.5318908691406, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.2449724674224854, "rewards/margins": 5.3376312255859375, "rewards/rejected": -6.582603931427002, "step": 5810 }, { "epoch": 0.9, "learning_rate": 9.88531147881114e-06, "logits/chosen": -2.0778133869171143, "logits/rejected": -2.9055845737457275, "logps/chosen": -91.27025604248047, "logps/rejected": -269.22607421875, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": 0.4019542634487152, "rewards/margins": 4.7576398849487305, "rewards/rejected": -4.355685710906982, "step": 5811 }, { "epoch": 0.9, "learning_rate": 9.884578038279994e-06, "logits/chosen": -2.5329060554504395, "logits/rejected": -2.9325144290924072, "logps/chosen": -96.39016723632812, "logps/rejected": -211.45782470703125, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -1.086134672164917, "rewards/margins": 4.126335620880127, "rewards/rejected": -5.212470054626465, "step": 5812 }, { "epoch": 0.9, "learning_rate": 9.883844597748846e-06, "logits/chosen": -2.709214448928833, "logits/rejected": -3.010718584060669, "logps/chosen": -63.921199798583984, "logps/rejected": -152.72911071777344, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -0.06485581398010254, "rewards/margins": 4.741078853607178, "rewards/rejected": -4.805934429168701, "step": 5813 }, { "epoch": 0.9, "learning_rate": 9.883111157217698e-06, "logits/chosen": -1.7780718803405762, "logits/rejected": -2.9046707153320312, "logps/chosen": -137.88311767578125, "logps/rejected": -432.1279296875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -0.3814578950405121, "rewards/margins": 5.067224025726318, "rewards/rejected": -5.448681831359863, "step": 5814 }, { "epoch": 0.9, "learning_rate": 9.882377716686551e-06, "logits/chosen": -3.1582770347595215, "logits/rejected": -1.6563751697540283, "logps/chosen": -1150.01708984375, "logps/rejected": -804.359130859375, "loss": 2.4103, "rewards/accuracies": 0.5, "rewards/chosen": -3.4377686977386475, "rewards/margins": 0.9438505172729492, "rewards/rejected": -4.381619453430176, "step": 5815 }, { "epoch": 0.9, "learning_rate": 9.881644276155403e-06, "logits/chosen": -2.9508185386657715, "logits/rejected": -2.0039777755737305, "logps/chosen": -596.74560546875, "logps/rejected": -253.74014282226562, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.23612672090530396, "rewards/margins": 6.646591663360596, "rewards/rejected": -6.410464763641357, "step": 5816 }, { "epoch": 0.9, "learning_rate": 9.880910835624255e-06, "logits/chosen": -2.872330665588379, "logits/rejected": -2.091813087463379, "logps/chosen": -526.7044067382812, "logps/rejected": -344.81475830078125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.5125831365585327, "rewards/margins": 5.909116268157959, "rewards/rejected": -5.396533012390137, "step": 5817 }, { "epoch": 0.9, "learning_rate": 9.880177395093107e-06, "logits/chosen": -2.4768266677856445, "logits/rejected": -2.9828567504882812, "logps/chosen": -79.14151000976562, "logps/rejected": -340.3466796875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -1.3308101892471313, "rewards/margins": 5.937333106994629, "rewards/rejected": -7.268143653869629, "step": 5818 }, { "epoch": 0.9, "learning_rate": 9.879443954561959e-06, "logits/chosen": -2.673893451690674, "logits/rejected": -3.0649242401123047, "logps/chosen": -248.32537841796875, "logps/rejected": -260.11962890625, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -0.644887387752533, "rewards/margins": 5.632598876953125, "rewards/rejected": -6.277486801147461, "step": 5819 }, { "epoch": 0.91, "learning_rate": 9.87871051403081e-06, "logits/chosen": -2.3747799396514893, "logits/rejected": -3.1553826332092285, "logps/chosen": -137.54299926757812, "logps/rejected": -238.9124755859375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.21069030463695526, "rewards/margins": 5.616903781890869, "rewards/rejected": -5.827593803405762, "step": 5820 }, { "epoch": 0.91, "learning_rate": 9.877977073499664e-06, "logits/chosen": -1.5876352787017822, "logits/rejected": -3.049936056137085, "logps/chosen": -43.20575714111328, "logps/rejected": -259.0962829589844, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.2647058665752411, "rewards/margins": 5.339325428009033, "rewards/rejected": -5.604031562805176, "step": 5821 }, { "epoch": 0.91, "learning_rate": 9.877243632968516e-06, "logits/chosen": -2.6927454471588135, "logits/rejected": -2.002539873123169, "logps/chosen": -242.16259765625, "logps/rejected": -192.17141723632812, "loss": 1.3053, "rewards/accuracies": 0.5, "rewards/chosen": -1.720607042312622, "rewards/margins": 1.88397216796875, "rewards/rejected": -3.604579210281372, "step": 5822 }, { "epoch": 0.91, "learning_rate": 9.876510192437368e-06, "logits/chosen": -2.0225517749786377, "logits/rejected": -2.635580062866211, "logps/chosen": -108.39027404785156, "logps/rejected": -204.82139587402344, "loss": 2.0765, "rewards/accuracies": 0.5, "rewards/chosen": -2.101898670196533, "rewards/margins": 1.098799467086792, "rewards/rejected": -3.200698137283325, "step": 5823 }, { "epoch": 0.91, "learning_rate": 9.87577675190622e-06, "logits/chosen": -2.6770339012145996, "logits/rejected": -2.90383243560791, "logps/chosen": -151.77012634277344, "logps/rejected": -253.0860137939453, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.7587540149688721, "rewards/margins": 5.073451995849609, "rewards/rejected": -5.8322062492370605, "step": 5824 }, { "epoch": 0.91, "learning_rate": 9.875043311375072e-06, "logits/chosen": -2.2262046337127686, "logits/rejected": -2.7032084465026855, "logps/chosen": -166.3567657470703, "logps/rejected": -197.72723388671875, "loss": 1.4467, "rewards/accuracies": 0.5, "rewards/chosen": -1.3284636735916138, "rewards/margins": 2.307746410369873, "rewards/rejected": -3.6362099647521973, "step": 5825 }, { "epoch": 0.91, "learning_rate": 9.874309870843924e-06, "logits/chosen": -2.8854284286499023, "logits/rejected": -2.8764610290527344, "logps/chosen": -157.7899169921875, "logps/rejected": -224.91436767578125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.2607414424419403, "rewards/margins": 6.115932464599609, "rewards/rejected": -5.855190753936768, "step": 5826 }, { "epoch": 0.91, "learning_rate": 9.873576430312775e-06, "logits/chosen": -2.531399965286255, "logits/rejected": -2.681340217590332, "logps/chosen": -137.75999450683594, "logps/rejected": -211.9152374267578, "loss": 1.6189, "rewards/accuracies": 0.5, "rewards/chosen": -3.6921398639678955, "rewards/margins": 0.5775072574615479, "rewards/rejected": -4.269647121429443, "step": 5827 }, { "epoch": 0.91, "learning_rate": 9.872842989781627e-06, "logits/chosen": -2.3985908031463623, "logits/rejected": -2.4093875885009766, "logps/chosen": -118.41903686523438, "logps/rejected": -218.40184020996094, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.868589460849762, "rewards/margins": 5.740039825439453, "rewards/rejected": -6.6086297035217285, "step": 5828 }, { "epoch": 0.91, "learning_rate": 9.87210954925048e-06, "logits/chosen": -2.465100049972534, "logits/rejected": -2.6341052055358887, "logps/chosen": -141.0110626220703, "logps/rejected": -124.0728759765625, "loss": 1.7768, "rewards/accuracies": 0.5, "rewards/chosen": -3.5355756282806396, "rewards/margins": 0.07662022113800049, "rewards/rejected": -3.6121959686279297, "step": 5829 }, { "epoch": 0.91, "learning_rate": 9.871376108719333e-06, "logits/chosen": -2.449625015258789, "logits/rejected": -2.881364583969116, "logps/chosen": -264.3497314453125, "logps/rejected": -222.8043212890625, "loss": 2.3295, "rewards/accuracies": 0.5, "rewards/chosen": -2.6740503311157227, "rewards/margins": 0.9488255977630615, "rewards/rejected": -3.622875928878784, "step": 5830 }, { "epoch": 0.91, "learning_rate": 9.870642668188185e-06, "logits/chosen": -1.9503225088119507, "logits/rejected": -2.9961771965026855, "logps/chosen": -46.927589416503906, "logps/rejected": -184.75039672851562, "loss": 0.2632, "rewards/accuracies": 1.0, "rewards/chosen": -2.2962141036987305, "rewards/margins": 2.680335521697998, "rewards/rejected": -4.9765496253967285, "step": 5831 }, { "epoch": 0.91, "learning_rate": 9.869909227657037e-06, "logits/chosen": -1.0165926218032837, "logits/rejected": -2.9534432888031006, "logps/chosen": -30.444334030151367, "logps/rejected": -305.90264892578125, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -0.5436262488365173, "rewards/margins": 4.315288543701172, "rewards/rejected": -4.858914375305176, "step": 5832 }, { "epoch": 0.91, "learning_rate": 9.869175787125888e-06, "logits/chosen": -2.695819616317749, "logits/rejected": -2.232076644897461, "logps/chosen": -82.63367462158203, "logps/rejected": -118.54086303710938, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -0.3486603796482086, "rewards/margins": 4.242107391357422, "rewards/rejected": -4.590767860412598, "step": 5833 }, { "epoch": 0.91, "learning_rate": 9.86844234659474e-06, "logits/chosen": -0.9655407071113586, "logits/rejected": -2.3250956535339355, "logps/chosen": -216.8092041015625, "logps/rejected": -356.77740478515625, "loss": 0.1233, "rewards/accuracies": 1.0, "rewards/chosen": -0.3982223570346832, "rewards/margins": 4.878868103027344, "rewards/rejected": -5.277090549468994, "step": 5834 }, { "epoch": 0.91, "learning_rate": 9.867708906063592e-06, "logits/chosen": -2.506495714187622, "logits/rejected": -3.1402699947357178, "logps/chosen": -486.83184814453125, "logps/rejected": -520.6695556640625, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -0.580822765827179, "rewards/margins": 3.5636918544769287, "rewards/rejected": -4.144514560699463, "step": 5835 }, { "epoch": 0.91, "learning_rate": 9.866975465532444e-06, "logits/chosen": -1.5726226568222046, "logits/rejected": -2.6620049476623535, "logps/chosen": -157.60887145996094, "logps/rejected": -319.787841796875, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.5137859582901001, "rewards/margins": 5.221909046173096, "rewards/rejected": -5.735694885253906, "step": 5836 }, { "epoch": 0.91, "learning_rate": 9.866242025001296e-06, "logits/chosen": -2.8187379837036133, "logits/rejected": -2.863199472427368, "logps/chosen": -151.08920288085938, "logps/rejected": -185.9791259765625, "loss": 2.1638, "rewards/accuracies": 0.5, "rewards/chosen": -2.011561155319214, "rewards/margins": 0.9743843078613281, "rewards/rejected": -2.985945463180542, "step": 5837 }, { "epoch": 0.91, "learning_rate": 9.865508584470148e-06, "logits/chosen": -3.2438716888427734, "logits/rejected": -3.206695079803467, "logps/chosen": -350.94183349609375, "logps/rejected": -271.1927795410156, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.038747787475586, "rewards/margins": 4.886526107788086, "rewards/rejected": -5.925273895263672, "step": 5838 }, { "epoch": 0.91, "learning_rate": 9.864775143939001e-06, "logits/chosen": -1.7605291604995728, "logits/rejected": -2.895052433013916, "logps/chosen": -68.44580078125, "logps/rejected": -215.0463409423828, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.1620977371931076, "rewards/margins": 5.051458835601807, "rewards/rejected": -5.21355676651001, "step": 5839 }, { "epoch": 0.91, "learning_rate": 9.864041703407853e-06, "logits/chosen": -1.360337257385254, "logits/rejected": -2.489614248275757, "logps/chosen": -367.6754150390625, "logps/rejected": -578.2967529296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.778020441532135, "rewards/margins": 7.523736000061035, "rewards/rejected": -6.745715141296387, "step": 5840 }, { "epoch": 0.91, "learning_rate": 9.863308262876705e-06, "logits/chosen": -2.8183891773223877, "logits/rejected": -1.975881814956665, "logps/chosen": -346.17578125, "logps/rejected": -242.82101440429688, "loss": 0.1552, "rewards/accuracies": 1.0, "rewards/chosen": -3.623464584350586, "rewards/margins": 1.7867251634597778, "rewards/rejected": -5.410189628601074, "step": 5841 }, { "epoch": 0.91, "learning_rate": 9.862574822345557e-06, "logits/chosen": -1.0211925506591797, "logits/rejected": -2.6265976428985596, "logps/chosen": -96.46923828125, "logps/rejected": -310.57501220703125, "loss": 0.0853, "rewards/accuracies": 1.0, "rewards/chosen": -2.670354127883911, "rewards/margins": 4.631381988525391, "rewards/rejected": -7.301735877990723, "step": 5842 }, { "epoch": 0.91, "learning_rate": 9.861841381814409e-06, "logits/chosen": -1.5907564163208008, "logits/rejected": -2.723923683166504, "logps/chosen": -308.62261962890625, "logps/rejected": -450.3211975097656, "loss": 2.8648, "rewards/accuracies": 0.5, "rewards/chosen": -3.203787326812744, "rewards/margins": 0.13213038444519043, "rewards/rejected": -3.3359177112579346, "step": 5843 }, { "epoch": 0.91, "learning_rate": 9.86110794128326e-06, "logits/chosen": -2.527366876602173, "logits/rejected": -2.851633071899414, "logps/chosen": -108.5745849609375, "logps/rejected": -224.3372802734375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.7718068957328796, "rewards/margins": 5.586640357971191, "rewards/rejected": -6.358447551727295, "step": 5844 }, { "epoch": 0.91, "learning_rate": 9.860374500752113e-06, "logits/chosen": -2.9783191680908203, "logits/rejected": -2.595487356185913, "logps/chosen": -154.77047729492188, "logps/rejected": -142.75735473632812, "loss": 1.4311, "rewards/accuracies": 0.5, "rewards/chosen": -1.0628010034561157, "rewards/margins": 1.1199944019317627, "rewards/rejected": -2.182795524597168, "step": 5845 }, { "epoch": 0.91, "learning_rate": 9.859641060220965e-06, "logits/chosen": -2.4720306396484375, "logits/rejected": -2.8594279289245605, "logps/chosen": -54.12726593017578, "logps/rejected": -165.49227905273438, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -1.283151388168335, "rewards/margins": 4.879741668701172, "rewards/rejected": -6.162893295288086, "step": 5846 }, { "epoch": 0.91, "learning_rate": 9.858907619689818e-06, "logits/chosen": -2.4382450580596924, "logits/rejected": -2.792111873626709, "logps/chosen": -116.85335540771484, "logps/rejected": -264.31884765625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -2.199953556060791, "rewards/margins": 5.499838829040527, "rewards/rejected": -7.699792385101318, "step": 5847 }, { "epoch": 0.91, "learning_rate": 9.85817417915867e-06, "logits/chosen": -2.7056407928466797, "logits/rejected": -2.67099928855896, "logps/chosen": -89.18375396728516, "logps/rejected": -94.3472671508789, "loss": 2.5559, "rewards/accuracies": 0.5, "rewards/chosen": -4.177188873291016, "rewards/margins": 0.07210040092468262, "rewards/rejected": -4.249289512634277, "step": 5848 }, { "epoch": 0.91, "learning_rate": 9.857440738627524e-06, "logits/chosen": -3.033761501312256, "logits/rejected": -2.8622853755950928, "logps/chosen": -433.45037841796875, "logps/rejected": -436.9486083984375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8169647455215454, "rewards/margins": 6.940190315246582, "rewards/rejected": -7.757155418395996, "step": 5849 }, { "epoch": 0.91, "learning_rate": 9.856707298096375e-06, "logits/chosen": -1.9207262992858887, "logits/rejected": -2.889498710632324, "logps/chosen": -130.9777374267578, "logps/rejected": -440.1949157714844, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.08517532050609589, "rewards/margins": 6.600344181060791, "rewards/rejected": -6.685519695281982, "step": 5850 }, { "epoch": 0.91, "learning_rate": 9.855973857565227e-06, "logits/chosen": -2.37919020652771, "logits/rejected": -2.8492214679718018, "logps/chosen": -287.6724548339844, "logps/rejected": -311.0816650390625, "loss": 0.0532, "rewards/accuracies": 1.0, "rewards/chosen": -0.27436724305152893, "rewards/margins": 4.141010284423828, "rewards/rejected": -4.415377616882324, "step": 5851 }, { "epoch": 0.91, "learning_rate": 9.85524041703408e-06, "logits/chosen": -2.4773142337799072, "logits/rejected": -2.708024263381958, "logps/chosen": -406.6221008300781, "logps/rejected": -481.1553955078125, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": -0.7715281844139099, "rewards/margins": 4.669526100158691, "rewards/rejected": -5.441053867340088, "step": 5852 }, { "epoch": 0.91, "learning_rate": 9.854506976502931e-06, "logits/chosen": -2.8048582077026367, "logits/rejected": -1.8778387308120728, "logps/chosen": -241.72296142578125, "logps/rejected": -208.4564208984375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.5430095195770264, "rewards/margins": 5.693416595458984, "rewards/rejected": -6.236425876617432, "step": 5853 }, { "epoch": 0.91, "learning_rate": 9.853773535971783e-06, "logits/chosen": -1.5917248725891113, "logits/rejected": -2.7079176902770996, "logps/chosen": -106.58123779296875, "logps/rejected": -325.98492431640625, "loss": 0.099, "rewards/accuracies": 1.0, "rewards/chosen": -1.9481651782989502, "rewards/margins": 5.339204788208008, "rewards/rejected": -7.287370681762695, "step": 5854 }, { "epoch": 0.91, "learning_rate": 9.853040095440635e-06, "logits/chosen": -2.23189377784729, "logits/rejected": -2.6057963371276855, "logps/chosen": -104.14593505859375, "logps/rejected": -270.0055847167969, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.8189258575439453, "rewards/margins": 6.121935844421387, "rewards/rejected": -6.940861701965332, "step": 5855 }, { "epoch": 0.91, "learning_rate": 9.852306654909487e-06, "logits/chosen": -2.6756911277770996, "logits/rejected": -2.9049720764160156, "logps/chosen": -127.68403625488281, "logps/rejected": -163.63906860351562, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.8201904892921448, "rewards/margins": 5.174030780792236, "rewards/rejected": -5.994221210479736, "step": 5856 }, { "epoch": 0.91, "learning_rate": 9.85157321437834e-06, "logits/chosen": -2.758913278579712, "logits/rejected": -2.263946294784546, "logps/chosen": -401.590576171875, "logps/rejected": -255.80223083496094, "loss": 2.0311, "rewards/accuracies": 0.5, "rewards/chosen": -2.945526123046875, "rewards/margins": 0.13450026512145996, "rewards/rejected": -3.080026388168335, "step": 5857 }, { "epoch": 0.91, "learning_rate": 9.850839773847192e-06, "logits/chosen": -1.651079535484314, "logits/rejected": -2.6230788230895996, "logps/chosen": -185.48556518554688, "logps/rejected": -392.8035583496094, "loss": 2.2181, "rewards/accuracies": 0.5, "rewards/chosen": -2.8178551197052, "rewards/margins": 1.5880959033966064, "rewards/rejected": -4.405951023101807, "step": 5858 }, { "epoch": 0.91, "learning_rate": 9.850106333316044e-06, "logits/chosen": -2.69701886177063, "logits/rejected": -3.339561700820923, "logps/chosen": -177.38848876953125, "logps/rejected": -333.27783203125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.2706794738769531, "rewards/margins": 5.587947845458984, "rewards/rejected": -5.8586273193359375, "step": 5859 }, { "epoch": 0.91, "learning_rate": 9.849372892784896e-06, "logits/chosen": -2.512962579727173, "logits/rejected": -1.7037993669509888, "logps/chosen": -173.88848876953125, "logps/rejected": -109.9954833984375, "loss": 2.7149, "rewards/accuracies": 0.5, "rewards/chosen": -2.3826353549957275, "rewards/margins": -0.20504093170166016, "rewards/rejected": -2.1775944232940674, "step": 5860 }, { "epoch": 0.91, "learning_rate": 9.848639452253748e-06, "logits/chosen": -2.59024715423584, "logits/rejected": -3.05210280418396, "logps/chosen": -128.07003784179688, "logps/rejected": -262.4541015625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.5430682897567749, "rewards/margins": 5.95469856262207, "rewards/rejected": -6.497766971588135, "step": 5861 }, { "epoch": 0.91, "learning_rate": 9.8479060117226e-06, "logits/chosen": -1.8187497854232788, "logits/rejected": -2.4429550170898438, "logps/chosen": -208.21388244628906, "logps/rejected": -311.4026794433594, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": 0.32680779695510864, "rewards/margins": 5.335500240325928, "rewards/rejected": -5.008692264556885, "step": 5862 }, { "epoch": 0.91, "learning_rate": 9.847172571191452e-06, "logits/chosen": -2.839710235595703, "logits/rejected": -1.6745375394821167, "logps/chosen": -187.01084899902344, "logps/rejected": -71.2112808227539, "loss": 0.3161, "rewards/accuracies": 1.0, "rewards/chosen": -0.8316711783409119, "rewards/margins": 2.050250291824341, "rewards/rejected": -2.8819215297698975, "step": 5863 }, { "epoch": 0.91, "learning_rate": 9.846439130660303e-06, "logits/chosen": -1.3492852449417114, "logits/rejected": -3.0378870964050293, "logps/chosen": -111.1461181640625, "logps/rejected": -377.9053039550781, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.47634392976760864, "rewards/margins": 4.900334358215332, "rewards/rejected": -5.376678466796875, "step": 5864 }, { "epoch": 0.91, "learning_rate": 9.845705690129157e-06, "logits/chosen": -2.2208755016326904, "logits/rejected": -2.698261260986328, "logps/chosen": -268.7560119628906, "logps/rejected": -265.5375061035156, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": -0.47231101989746094, "rewards/margins": 3.471419095993042, "rewards/rejected": -3.943730354309082, "step": 5865 }, { "epoch": 0.91, "learning_rate": 9.844972249598009e-06, "logits/chosen": -2.714524745941162, "logits/rejected": -3.0926694869995117, "logps/chosen": -103.17970275878906, "logps/rejected": -265.3994140625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.35412463545799255, "rewards/margins": 5.757694244384766, "rewards/rejected": -5.40356969833374, "step": 5866 }, { "epoch": 0.91, "learning_rate": 9.84423880906686e-06, "logits/chosen": -2.7224440574645996, "logits/rejected": -2.6518173217773438, "logps/chosen": -406.0426025390625, "logps/rejected": -419.32733154296875, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.6680206060409546, "rewards/margins": 5.358630180358887, "rewards/rejected": -7.026651382446289, "step": 5867 }, { "epoch": 0.91, "learning_rate": 9.843505368535713e-06, "logits/chosen": -3.179612159729004, "logits/rejected": -2.6911585330963135, "logps/chosen": -362.34332275390625, "logps/rejected": -207.58177185058594, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.18668478727340698, "rewards/margins": 4.732670307159424, "rewards/rejected": -4.919355392456055, "step": 5868 }, { "epoch": 0.91, "learning_rate": 9.842771928004564e-06, "logits/chosen": -2.8109488487243652, "logits/rejected": -2.314075231552124, "logps/chosen": -129.37081909179688, "logps/rejected": -254.3818359375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.9629188776016235, "rewards/margins": 5.933024883270264, "rewards/rejected": -6.895943641662598, "step": 5869 }, { "epoch": 0.91, "learning_rate": 9.842038487473416e-06, "logits/chosen": -2.185710906982422, "logits/rejected": -2.9840240478515625, "logps/chosen": -59.73137664794922, "logps/rejected": -240.00860595703125, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -0.3777567148208618, "rewards/margins": 5.463845252990723, "rewards/rejected": -5.841601848602295, "step": 5870 }, { "epoch": 0.91, "learning_rate": 9.841305046942268e-06, "logits/chosen": -2.8954076766967773, "logits/rejected": -3.153437852859497, "logps/chosen": -360.09405517578125, "logps/rejected": -391.68035888671875, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": 0.7624784708023071, "rewards/margins": 5.515412330627441, "rewards/rejected": -4.752933502197266, "step": 5871 }, { "epoch": 0.91, "learning_rate": 9.84057160641112e-06, "logits/chosen": -1.4923031330108643, "logits/rejected": -2.5210299491882324, "logps/chosen": -103.462646484375, "logps/rejected": -265.3637390136719, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.40579405426979065, "rewards/margins": 6.119609355926514, "rewards/rejected": -6.5254034996032715, "step": 5872 }, { "epoch": 0.91, "learning_rate": 9.839838165879972e-06, "logits/chosen": -2.902592420578003, "logits/rejected": -2.0282552242279053, "logps/chosen": -394.0785827636719, "logps/rejected": -258.93218994140625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.6677734851837158, "rewards/margins": 5.409707546234131, "rewards/rejected": -7.077481269836426, "step": 5873 }, { "epoch": 0.91, "learning_rate": 9.839104725348826e-06, "logits/chosen": -2.7365355491638184, "logits/rejected": -2.2193968296051025, "logps/chosen": -370.857177734375, "logps/rejected": -525.7005615234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.784075140953064, "rewards/margins": 8.678455352783203, "rewards/rejected": -9.462530136108398, "step": 5874 }, { "epoch": 0.91, "learning_rate": 9.838371284817677e-06, "logits/chosen": -1.3849724531173706, "logits/rejected": -2.3870739936828613, "logps/chosen": -59.678958892822266, "logps/rejected": -201.22096252441406, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": -1.6611063480377197, "rewards/margins": 4.273190498352051, "rewards/rejected": -5.934296607971191, "step": 5875 }, { "epoch": 0.91, "learning_rate": 9.83763784428653e-06, "logits/chosen": -2.9193856716156006, "logits/rejected": -2.6060221195220947, "logps/chosen": -169.06027221679688, "logps/rejected": -211.3192901611328, "loss": 0.1036, "rewards/accuracies": 1.0, "rewards/chosen": -1.8799644708633423, "rewards/margins": 3.961239814758301, "rewards/rejected": -5.8412041664123535, "step": 5876 }, { "epoch": 0.91, "learning_rate": 9.836904403755381e-06, "logits/chosen": -2.8296375274658203, "logits/rejected": -3.2229363918304443, "logps/chosen": -29.164064407348633, "logps/rejected": -198.95167541503906, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.7964014410972595, "rewards/margins": 5.337688446044922, "rewards/rejected": -6.134089469909668, "step": 5877 }, { "epoch": 0.91, "learning_rate": 9.836170963224233e-06, "logits/chosen": -2.69301438331604, "logits/rejected": -2.834174156188965, "logps/chosen": -522.5460205078125, "logps/rejected": -628.7210693359375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.8336365222930908, "rewards/margins": 6.475550651550293, "rewards/rejected": -7.309186935424805, "step": 5878 }, { "epoch": 0.91, "learning_rate": 9.835437522693085e-06, "logits/chosen": -2.2524285316467285, "logits/rejected": -2.755300998687744, "logps/chosen": -279.120849609375, "logps/rejected": -382.2933654785156, "loss": 4.0189, "rewards/accuracies": 0.5, "rewards/chosen": -3.4520723819732666, "rewards/margins": 1.2923212051391602, "rewards/rejected": -4.744393825531006, "step": 5879 }, { "epoch": 0.91, "learning_rate": 9.834704082161937e-06, "logits/chosen": -1.5577418804168701, "logits/rejected": -2.6208348274230957, "logps/chosen": -142.443603515625, "logps/rejected": -393.5491027832031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.23651599884033203, "rewards/margins": 8.901819229125977, "rewards/rejected": -9.138335227966309, "step": 5880 }, { "epoch": 0.91, "learning_rate": 9.83397064163079e-06, "logits/chosen": -2.4781758785247803, "logits/rejected": -2.718196392059326, "logps/chosen": -83.6588363647461, "logps/rejected": -232.81491088867188, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.9734705686569214, "rewards/margins": 5.602367401123047, "rewards/rejected": -6.575838088989258, "step": 5881 }, { "epoch": 0.91, "learning_rate": 9.833237201099642e-06, "logits/chosen": -2.1035711765289307, "logits/rejected": -2.7588586807250977, "logps/chosen": -260.6512451171875, "logps/rejected": -401.91754150390625, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 0.8669269680976868, "rewards/margins": 4.847118377685547, "rewards/rejected": -3.980191230773926, "step": 5882 }, { "epoch": 0.91, "learning_rate": 9.832503760568496e-06, "logits/chosen": -2.5352749824523926, "logits/rejected": -2.820077657699585, "logps/chosen": -607.21826171875, "logps/rejected": -568.3145751953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.030517578125, "rewards/margins": 6.454612731933594, "rewards/rejected": -7.485130310058594, "step": 5883 }, { "epoch": 0.92, "learning_rate": 9.831770320037348e-06, "logits/chosen": -1.7420756816864014, "logits/rejected": -2.8551769256591797, "logps/chosen": -88.8487777709961, "logps/rejected": -257.7508850097656, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": -0.03493118658661842, "rewards/margins": 3.7619504928588867, "rewards/rejected": -3.796881675720215, "step": 5884 }, { "epoch": 0.92, "learning_rate": 9.8310368795062e-06, "logits/chosen": -2.2213456630706787, "logits/rejected": -2.6659486293792725, "logps/chosen": -102.26777648925781, "logps/rejected": -236.5529327392578, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.10773152858018875, "rewards/margins": 6.344671249389648, "rewards/rejected": -6.236939430236816, "step": 5885 }, { "epoch": 0.92, "learning_rate": 9.830303438975052e-06, "logits/chosen": -2.411173105239868, "logits/rejected": -2.9294047355651855, "logps/chosen": -566.7733154296875, "logps/rejected": -541.0370483398438, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.03294980525970459, "rewards/margins": 6.872228145599365, "rewards/rejected": -6.839278221130371, "step": 5886 }, { "epoch": 0.92, "learning_rate": 9.829569998443903e-06, "logits/chosen": -2.6468961238861084, "logits/rejected": -1.6003047227859497, "logps/chosen": -423.7191162109375, "logps/rejected": -257.80035400390625, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -1.9060394763946533, "rewards/margins": 4.357403755187988, "rewards/rejected": -6.2634429931640625, "step": 5887 }, { "epoch": 0.92, "learning_rate": 9.828836557912755e-06, "logits/chosen": -1.324471116065979, "logits/rejected": -2.313206195831299, "logps/chosen": -177.26150512695312, "logps/rejected": -388.8116455078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.6032663583755493, "rewards/margins": 8.43453311920166, "rewards/rejected": -9.037799835205078, "step": 5888 }, { "epoch": 0.92, "learning_rate": 9.828103117381607e-06, "logits/chosen": -2.0407161712646484, "logits/rejected": -2.9591903686523438, "logps/chosen": -263.4071350097656, "logps/rejected": -512.6400756835938, "loss": 4.0902, "rewards/accuracies": 0.5, "rewards/chosen": -4.69321870803833, "rewards/margins": -1.6227962970733643, "rewards/rejected": -3.070422410964966, "step": 5889 }, { "epoch": 0.92, "learning_rate": 9.827369676850459e-06, "logits/chosen": -2.8134429454803467, "logits/rejected": -3.1802523136138916, "logps/chosen": -64.47999572753906, "logps/rejected": -311.7378234863281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.4260087311267853, "rewards/margins": 8.476497650146484, "rewards/rejected": -8.05048942565918, "step": 5890 }, { "epoch": 0.92, "learning_rate": 9.826636236319311e-06, "logits/chosen": -2.1811749935150146, "logits/rejected": -3.015089511871338, "logps/chosen": -197.20855712890625, "logps/rejected": -432.1929016113281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.27782973647117615, "rewards/margins": 9.055929183959961, "rewards/rejected": -8.778099060058594, "step": 5891 }, { "epoch": 0.92, "learning_rate": 9.825902795788164e-06, "logits/chosen": -2.861163854598999, "logits/rejected": -2.155179262161255, "logps/chosen": -203.64498901367188, "logps/rejected": -63.89409255981445, "loss": 0.3944, "rewards/accuracies": 0.5, "rewards/chosen": -1.014349102973938, "rewards/margins": 3.2403478622436523, "rewards/rejected": -4.254696846008301, "step": 5892 }, { "epoch": 0.92, "learning_rate": 9.825169355257016e-06, "logits/chosen": -2.4857258796691895, "logits/rejected": -2.8854708671569824, "logps/chosen": -203.1852264404297, "logps/rejected": -425.2703857421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0171046257019043, "rewards/margins": 7.01828670501709, "rewards/rejected": -8.035391807556152, "step": 5893 }, { "epoch": 0.92, "learning_rate": 9.824435914725868e-06, "logits/chosen": -2.5588479042053223, "logits/rejected": -2.578162908554077, "logps/chosen": -212.02017211914062, "logps/rejected": -321.786865234375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.8962299823760986, "rewards/margins": 5.854428291320801, "rewards/rejected": -6.7506585121154785, "step": 5894 }, { "epoch": 0.92, "learning_rate": 9.82370247419472e-06, "logits/chosen": -2.8009119033813477, "logits/rejected": -2.94295597076416, "logps/chosen": -258.82025146484375, "logps/rejected": -459.46136474609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.5433799624443054, "rewards/margins": 6.966006278991699, "rewards/rejected": -6.42262601852417, "step": 5895 }, { "epoch": 0.92, "learning_rate": 9.822969033663572e-06, "logits/chosen": -2.0620713233947754, "logits/rejected": -3.1210684776306152, "logps/chosen": -317.85296630859375, "logps/rejected": -562.4593505859375, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -0.4085533022880554, "rewards/margins": 3.9645700454711914, "rewards/rejected": -4.3731231689453125, "step": 5896 }, { "epoch": 0.92, "learning_rate": 9.822235593132424e-06, "logits/chosen": -2.7286503314971924, "logits/rejected": -2.6049861907958984, "logps/chosen": -535.8375854492188, "logps/rejected": -404.4140625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.674517035484314, "rewards/margins": 6.516293525695801, "rewards/rejected": -5.841776371002197, "step": 5897 }, { "epoch": 0.92, "learning_rate": 9.821502152601276e-06, "logits/chosen": -2.8068490028381348, "logits/rejected": -1.7274919748306274, "logps/chosen": -349.944580078125, "logps/rejected": -208.8834991455078, "loss": 3.5805, "rewards/accuracies": 0.5, "rewards/chosen": -4.794679641723633, "rewards/margins": 0.04597330093383789, "rewards/rejected": -4.840652942657471, "step": 5898 }, { "epoch": 0.92, "learning_rate": 9.820768712070128e-06, "logits/chosen": -2.752443790435791, "logits/rejected": -1.5190200805664062, "logps/chosen": -466.38104248046875, "logps/rejected": -378.1378479003906, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -2.687047004699707, "rewards/margins": 6.278258323669434, "rewards/rejected": -8.96530532836914, "step": 5899 }, { "epoch": 0.92, "learning_rate": 9.82003527153898e-06, "logits/chosen": -3.0968616008758545, "logits/rejected": -2.14457368850708, "logps/chosen": -298.4779968261719, "logps/rejected": -73.10110473632812, "loss": 1.6352, "rewards/accuracies": 0.5, "rewards/chosen": -2.742666482925415, "rewards/margins": 0.7781139612197876, "rewards/rejected": -3.520780563354492, "step": 5900 }, { "epoch": 0.92, "learning_rate": 9.819301831007833e-06, "logits/chosen": -2.1411361694335938, "logits/rejected": -2.7172365188598633, "logps/chosen": -296.18511962890625, "logps/rejected": -308.74212646484375, "loss": 3.7078, "rewards/accuracies": 0.5, "rewards/chosen": -3.491168260574341, "rewards/margins": -0.026542186737060547, "rewards/rejected": -3.4646260738372803, "step": 5901 }, { "epoch": 0.92, "learning_rate": 9.818568390476685e-06, "logits/chosen": -2.7762670516967773, "logits/rejected": -2.9350481033325195, "logps/chosen": -87.29942321777344, "logps/rejected": -169.45828247070312, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.21491661667823792, "rewards/margins": 5.222068786621094, "rewards/rejected": -5.436985492706299, "step": 5902 }, { "epoch": 0.92, "learning_rate": 9.817834949945537e-06, "logits/chosen": -2.7019174098968506, "logits/rejected": -1.5295352935791016, "logps/chosen": -481.4151611328125, "logps/rejected": -273.0523376464844, "loss": 0.8984, "rewards/accuracies": 0.5, "rewards/chosen": -0.45725634694099426, "rewards/margins": 2.289827823638916, "rewards/rejected": -2.747083902359009, "step": 5903 }, { "epoch": 0.92, "learning_rate": 9.817101509414389e-06, "logits/chosen": -1.9834301471710205, "logits/rejected": -2.996110200881958, "logps/chosen": -51.84602355957031, "logps/rejected": -174.14794921875, "loss": 0.3372, "rewards/accuracies": 1.0, "rewards/chosen": -2.113309144973755, "rewards/margins": 3.641756057739258, "rewards/rejected": -5.755065441131592, "step": 5904 }, { "epoch": 0.92, "learning_rate": 9.81636806888324e-06, "logits/chosen": -2.859952449798584, "logits/rejected": -3.089902877807617, "logps/chosen": -79.05742645263672, "logps/rejected": -330.2182922363281, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.18728503584861755, "rewards/margins": 6.064809322357178, "rewards/rejected": -6.252094268798828, "step": 5905 }, { "epoch": 0.92, "learning_rate": 9.815634628352092e-06, "logits/chosen": -2.7993698120117188, "logits/rejected": -2.5373449325561523, "logps/chosen": -112.18418884277344, "logps/rejected": -246.53143310546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.44676169753074646, "rewards/margins": 8.190561294555664, "rewards/rejected": -7.743799209594727, "step": 5906 }, { "epoch": 0.92, "learning_rate": 9.814901187820944e-06, "logits/chosen": -2.761686086654663, "logits/rejected": -3.1340367794036865, "logps/chosen": -123.87158966064453, "logps/rejected": -201.60787963867188, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -0.20598946511745453, "rewards/margins": 4.474984645843506, "rewards/rejected": -4.680974006652832, "step": 5907 }, { "epoch": 0.92, "learning_rate": 9.814167747289796e-06, "logits/chosen": -1.8715693950653076, "logits/rejected": -2.64459228515625, "logps/chosen": -92.08192443847656, "logps/rejected": -213.03839111328125, "loss": 0.0449, "rewards/accuracies": 1.0, "rewards/chosen": -1.8209779262542725, "rewards/margins": 4.478939533233643, "rewards/rejected": -6.299917221069336, "step": 5908 }, { "epoch": 0.92, "learning_rate": 9.813434306758648e-06, "logits/chosen": -1.6582955121994019, "logits/rejected": -2.871070146560669, "logps/chosen": -116.44801330566406, "logps/rejected": -376.44830322265625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.7216808795928955, "rewards/margins": 6.607758045196533, "rewards/rejected": -7.329439163208008, "step": 5909 }, { "epoch": 0.92, "learning_rate": 9.812700866227502e-06, "logits/chosen": -2.7232778072357178, "logits/rejected": -2.7950096130371094, "logps/chosen": -156.1868896484375, "logps/rejected": -143.141357421875, "loss": 3.3948, "rewards/accuracies": 0.5, "rewards/chosen": -4.068333625793457, "rewards/margins": -0.4234628677368164, "rewards/rejected": -3.6448707580566406, "step": 5910 }, { "epoch": 0.92, "learning_rate": 9.811967425696354e-06, "logits/chosen": -2.2735579013824463, "logits/rejected": -2.995009183883667, "logps/chosen": -187.33883666992188, "logps/rejected": -250.00668334960938, "loss": 2.4895, "rewards/accuracies": 0.5, "rewards/chosen": -2.7314047813415527, "rewards/margins": 0.1493539810180664, "rewards/rejected": -2.880758762359619, "step": 5911 }, { "epoch": 0.92, "learning_rate": 9.811233985165205e-06, "logits/chosen": -3.0482730865478516, "logits/rejected": -2.762601375579834, "logps/chosen": -310.7618408203125, "logps/rejected": -280.61865234375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": 0.09886935353279114, "rewards/margins": 6.076735496520996, "rewards/rejected": -5.977866172790527, "step": 5912 }, { "epoch": 0.92, "learning_rate": 9.810500544634057e-06, "logits/chosen": -2.978116750717163, "logits/rejected": -2.7605140209198, "logps/chosen": -225.84368896484375, "logps/rejected": -352.75201416015625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": 0.24649810791015625, "rewards/margins": 5.82125186920166, "rewards/rejected": -5.574753761291504, "step": 5913 }, { "epoch": 0.92, "learning_rate": 9.80976710410291e-06, "logits/chosen": -2.7727274894714355, "logits/rejected": -2.106658935546875, "logps/chosen": -221.7057342529297, "logps/rejected": -170.038330078125, "loss": 3.1516, "rewards/accuracies": 0.5, "rewards/chosen": -3.3236277103424072, "rewards/margins": -0.24712777137756348, "rewards/rejected": -3.0764999389648438, "step": 5914 }, { "epoch": 0.92, "learning_rate": 9.809033663571763e-06, "logits/chosen": -2.823192834854126, "logits/rejected": -2.9882564544677734, "logps/chosen": -247.66152954101562, "logps/rejected": -299.0149841308594, "loss": 3.1948, "rewards/accuracies": 0.5, "rewards/chosen": -3.227219581604004, "rewards/margins": -0.6576149463653564, "rewards/rejected": -2.5696046352386475, "step": 5915 }, { "epoch": 0.92, "learning_rate": 9.808300223040615e-06, "logits/chosen": -2.4755444526672363, "logits/rejected": -2.782132625579834, "logps/chosen": -56.01403045654297, "logps/rejected": -196.72779846191406, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 0.39147281646728516, "rewards/margins": 5.42067289352417, "rewards/rejected": -5.029200077056885, "step": 5916 }, { "epoch": 0.92, "learning_rate": 9.807566782509467e-06, "logits/chosen": -2.618413209915161, "logits/rejected": -2.8163435459136963, "logps/chosen": -109.95155334472656, "logps/rejected": -322.8287048339844, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.13165989518165588, "rewards/margins": 6.209076881408691, "rewards/rejected": -6.340736389160156, "step": 5917 }, { "epoch": 0.92, "learning_rate": 9.806833341978318e-06, "logits/chosen": -1.8871721029281616, "logits/rejected": -2.8967156410217285, "logps/chosen": -135.45474243164062, "logps/rejected": -379.497802734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.31844252347946167, "rewards/margins": 8.115928649902344, "rewards/rejected": -7.797486305236816, "step": 5918 }, { "epoch": 0.92, "learning_rate": 9.806099901447172e-06, "logits/chosen": -2.6273584365844727, "logits/rejected": -2.2772319316864014, "logps/chosen": -160.63800048828125, "logps/rejected": -169.65863037109375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.7830260992050171, "rewards/margins": 6.642900466918945, "rewards/rejected": -7.425926208496094, "step": 5919 }, { "epoch": 0.92, "learning_rate": 9.805366460916024e-06, "logits/chosen": -2.5842628479003906, "logits/rejected": -2.6907505989074707, "logps/chosen": -324.33038330078125, "logps/rejected": -309.8109130859375, "loss": 1.3879, "rewards/accuracies": 0.5, "rewards/chosen": -2.102208375930786, "rewards/margins": 1.5408124923706055, "rewards/rejected": -3.6430208683013916, "step": 5920 }, { "epoch": 0.92, "learning_rate": 9.804633020384876e-06, "logits/chosen": -3.294311046600342, "logits/rejected": -3.0658609867095947, "logps/chosen": -306.02886962890625, "logps/rejected": -167.43946838378906, "loss": 7.9138, "rewards/accuracies": 0.0, "rewards/chosen": -8.489166259765625, "rewards/margins": -7.913198471069336, "rewards/rejected": -0.5759682655334473, "step": 5921 }, { "epoch": 0.92, "learning_rate": 9.803899579853728e-06, "logits/chosen": -2.9881742000579834, "logits/rejected": -2.925741672515869, "logps/chosen": -151.51376342773438, "logps/rejected": -315.38446044921875, "loss": 1.6165, "rewards/accuracies": 0.5, "rewards/chosen": -1.5001245737075806, "rewards/margins": 0.884233832359314, "rewards/rejected": -2.3843584060668945, "step": 5922 }, { "epoch": 0.92, "learning_rate": 9.80316613932258e-06, "logits/chosen": -1.7646411657333374, "logits/rejected": -2.631314754486084, "logps/chosen": -134.36875915527344, "logps/rejected": -321.9687805175781, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.12125016003847122, "rewards/margins": 6.297275543212891, "rewards/rejected": -6.176025390625, "step": 5923 }, { "epoch": 0.92, "learning_rate": 9.802432698791431e-06, "logits/chosen": -2.1540379524230957, "logits/rejected": -2.799105167388916, "logps/chosen": -155.73895263671875, "logps/rejected": -265.5444030761719, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.13843536376953125, "rewards/margins": 5.524432182312012, "rewards/rejected": -5.3859968185424805, "step": 5924 }, { "epoch": 0.92, "learning_rate": 9.801699258260283e-06, "logits/chosen": -2.7643611431121826, "logits/rejected": -2.067331552505493, "logps/chosen": -394.788330078125, "logps/rejected": -196.0570831298828, "loss": 3.4008, "rewards/accuracies": 0.5, "rewards/chosen": -3.2457032203674316, "rewards/margins": -0.9933891296386719, "rewards/rejected": -2.2523140907287598, "step": 5925 }, { "epoch": 0.92, "learning_rate": 9.800965817729135e-06, "logits/chosen": -2.888495445251465, "logits/rejected": -2.6280596256256104, "logps/chosen": -347.6429748535156, "logps/rejected": -228.69728088378906, "loss": 0.0528, "rewards/accuracies": 1.0, "rewards/chosen": -0.9766906499862671, "rewards/margins": 3.4750094413757324, "rewards/rejected": -4.451700210571289, "step": 5926 }, { "epoch": 0.92, "learning_rate": 9.800232377197987e-06, "logits/chosen": -2.73754620552063, "logits/rejected": -2.707469940185547, "logps/chosen": -276.53094482421875, "logps/rejected": -242.45669555664062, "loss": 1.7209, "rewards/accuracies": 0.5, "rewards/chosen": -1.402722954750061, "rewards/margins": -0.6385787725448608, "rewards/rejected": -0.7641441822052002, "step": 5927 }, { "epoch": 0.92, "learning_rate": 9.79949893666684e-06, "logits/chosen": -2.331712007522583, "logits/rejected": -3.0126700401306152, "logps/chosen": -83.80297088623047, "logps/rejected": -251.71624755859375, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.06083717197179794, "rewards/margins": 4.922732830047607, "rewards/rejected": -4.983570098876953, "step": 5928 }, { "epoch": 0.92, "learning_rate": 9.798765496135692e-06, "logits/chosen": -2.213536024093628, "logits/rejected": -2.690523862838745, "logps/chosen": -206.67352294921875, "logps/rejected": -190.94984436035156, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.5036071538925171, "rewards/margins": 4.3770856857299805, "rewards/rejected": -4.880692481994629, "step": 5929 }, { "epoch": 0.92, "learning_rate": 9.798032055604544e-06, "logits/chosen": -2.4689579010009766, "logits/rejected": -2.637238025665283, "logps/chosen": -52.41447067260742, "logps/rejected": -89.64048767089844, "loss": 1.1509, "rewards/accuracies": 0.5, "rewards/chosen": -1.7179162502288818, "rewards/margins": 2.370793342590332, "rewards/rejected": -4.088709354400635, "step": 5930 }, { "epoch": 0.92, "learning_rate": 9.797298615073396e-06, "logits/chosen": -2.784167766571045, "logits/rejected": -2.3890764713287354, "logps/chosen": -464.408935546875, "logps/rejected": -546.2000732421875, "loss": 2.3075, "rewards/accuracies": 0.5, "rewards/chosen": -2.1459596157073975, "rewards/margins": 0.44844675064086914, "rewards/rejected": -2.5944063663482666, "step": 5931 }, { "epoch": 0.92, "learning_rate": 9.796565174542248e-06, "logits/chosen": -1.5531615018844604, "logits/rejected": -2.9800686836242676, "logps/chosen": -88.71418762207031, "logps/rejected": -275.8977355957031, "loss": 2.9978, "rewards/accuracies": 0.5, "rewards/chosen": -4.325249195098877, "rewards/margins": -0.6692087650299072, "rewards/rejected": -3.6560401916503906, "step": 5932 }, { "epoch": 0.92, "learning_rate": 9.7958317340111e-06, "logits/chosen": -2.2772483825683594, "logits/rejected": -3.041018486022949, "logps/chosen": -77.44276428222656, "logps/rejected": -369.7331237792969, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.6854305267333984, "rewards/margins": 6.608421802520752, "rewards/rejected": -7.29385232925415, "step": 5933 }, { "epoch": 0.92, "learning_rate": 9.795098293479952e-06, "logits/chosen": -2.6374714374542236, "logits/rejected": -2.6744935512542725, "logps/chosen": -54.97539520263672, "logps/rejected": -180.63746643066406, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": 0.4680103659629822, "rewards/margins": 5.40634822845459, "rewards/rejected": -4.938337802886963, "step": 5934 }, { "epoch": 0.92, "learning_rate": 9.794364852948804e-06, "logits/chosen": -2.671384811401367, "logits/rejected": -2.814465284347534, "logps/chosen": -140.3533172607422, "logps/rejected": -98.27708435058594, "loss": 2.3099, "rewards/accuracies": 0.5, "rewards/chosen": -2.4913792610168457, "rewards/margins": 0.5742888450622559, "rewards/rejected": -3.0656681060791016, "step": 5935 }, { "epoch": 0.92, "learning_rate": 9.793631412417656e-06, "logits/chosen": -2.607421636581421, "logits/rejected": -2.830436944961548, "logps/chosen": -121.26399230957031, "logps/rejected": -228.8399200439453, "loss": 0.9351, "rewards/accuracies": 0.5, "rewards/chosen": -0.6549506783485413, "rewards/margins": 1.3663955926895142, "rewards/rejected": -2.0213463306427, "step": 5936 }, { "epoch": 0.92, "learning_rate": 9.79289797188651e-06, "logits/chosen": -2.8301563262939453, "logits/rejected": -2.3692541122436523, "logps/chosen": -88.84881591796875, "logps/rejected": -82.62358856201172, "loss": 1.6144, "rewards/accuracies": 0.5, "rewards/chosen": -3.0263402462005615, "rewards/margins": 1.3709722757339478, "rewards/rejected": -4.397312641143799, "step": 5937 }, { "epoch": 0.92, "learning_rate": 9.792164531355361e-06, "logits/chosen": -2.058643341064453, "logits/rejected": -2.756500244140625, "logps/chosen": -156.0377960205078, "logps/rejected": -441.1250305175781, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.006015777587890625, "rewards/margins": 7.2681450843811035, "rewards/rejected": -7.274160861968994, "step": 5938 }, { "epoch": 0.92, "learning_rate": 9.791431090824213e-06, "logits/chosen": -2.465001106262207, "logits/rejected": -2.8562426567077637, "logps/chosen": -87.62974548339844, "logps/rejected": -350.3643493652344, "loss": 0.0478, "rewards/accuracies": 1.0, "rewards/chosen": -0.8176052570343018, "rewards/margins": 4.886648654937744, "rewards/rejected": -5.704253673553467, "step": 5939 }, { "epoch": 0.92, "learning_rate": 9.790697650293065e-06, "logits/chosen": -2.990557909011841, "logits/rejected": -3.139904499053955, "logps/chosen": -55.650081634521484, "logps/rejected": -155.69976806640625, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": 0.2315656840801239, "rewards/margins": 4.842414379119873, "rewards/rejected": -4.610848426818848, "step": 5940 }, { "epoch": 0.92, "learning_rate": 9.789964209761917e-06, "logits/chosen": -2.0020084381103516, "logits/rejected": -2.8977534770965576, "logps/chosen": -96.04757690429688, "logps/rejected": -397.0908203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.1003849059343338, "rewards/margins": 8.82911205291748, "rewards/rejected": -8.728727340698242, "step": 5941 }, { "epoch": 0.92, "learning_rate": 9.789230769230769e-06, "logits/chosen": -2.7545688152313232, "logits/rejected": -3.0109293460845947, "logps/chosen": -290.0139465332031, "logps/rejected": -468.5267639160156, "loss": 1.4057, "rewards/accuracies": 0.5, "rewards/chosen": -1.542399287223816, "rewards/margins": 2.5756213665008545, "rewards/rejected": -4.118020534515381, "step": 5942 }, { "epoch": 0.92, "learning_rate": 9.78849732869962e-06, "logits/chosen": -2.626537561416626, "logits/rejected": -2.368800401687622, "logps/chosen": -155.57772827148438, "logps/rejected": -244.86700439453125, "loss": 1.7142, "rewards/accuracies": 0.5, "rewards/chosen": -0.5985619425773621, "rewards/margins": 1.7315970659255981, "rewards/rejected": -2.3301589488983154, "step": 5943 }, { "epoch": 0.92, "learning_rate": 9.787763888168472e-06, "logits/chosen": -2.5118281841278076, "logits/rejected": -2.502856731414795, "logps/chosen": -86.21321868896484, "logps/rejected": -216.70538330078125, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": 0.5853596329689026, "rewards/margins": 5.214427471160889, "rewards/rejected": -4.629067897796631, "step": 5944 }, { "epoch": 0.92, "learning_rate": 9.787030447637324e-06, "logits/chosen": -1.0795586109161377, "logits/rejected": -2.878235340118408, "logps/chosen": -51.29042053222656, "logps/rejected": -267.8008117675781, "loss": 0.1448, "rewards/accuracies": 1.0, "rewards/chosen": -2.180569887161255, "rewards/margins": 2.9201865196228027, "rewards/rejected": -5.1007561683654785, "step": 5945 }, { "epoch": 0.92, "learning_rate": 9.786297007106178e-06, "logits/chosen": -2.843198537826538, "logits/rejected": -2.940525770187378, "logps/chosen": -65.99488067626953, "logps/rejected": -209.49951171875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.21610099077224731, "rewards/margins": 6.750235557556152, "rewards/rejected": -6.534134864807129, "step": 5946 }, { "epoch": 0.92, "learning_rate": 9.78556356657503e-06, "logits/chosen": -2.857391357421875, "logits/rejected": -2.230609655380249, "logps/chosen": -117.96223449707031, "logps/rejected": -167.38522338867188, "loss": 1.4284, "rewards/accuracies": 0.5, "rewards/chosen": -2.205791473388672, "rewards/margins": 1.5847563743591309, "rewards/rejected": -3.7905478477478027, "step": 5947 }, { "epoch": 0.93, "learning_rate": 9.784830126043882e-06, "logits/chosen": -2.8154196739196777, "logits/rejected": -2.5980021953582764, "logps/chosen": -89.08721160888672, "logps/rejected": -157.5062255859375, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -0.6209475994110107, "rewards/margins": 4.913195610046387, "rewards/rejected": -5.534142971038818, "step": 5948 }, { "epoch": 0.93, "learning_rate": 9.784096685512735e-06, "logits/chosen": -1.9509625434875488, "logits/rejected": -3.0261471271514893, "logps/chosen": -524.7444458007812, "logps/rejected": -651.1583251953125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": 0.394998162984848, "rewards/margins": 5.039156913757324, "rewards/rejected": -4.644159317016602, "step": 5949 }, { "epoch": 0.93, "learning_rate": 9.783363244981587e-06, "logits/chosen": -2.7529850006103516, "logits/rejected": -2.751006841659546, "logps/chosen": -148.92056274414062, "logps/rejected": -99.52916717529297, "loss": 1.6078, "rewards/accuracies": 0.5, "rewards/chosen": -1.1481964588165283, "rewards/margins": 0.4304933547973633, "rewards/rejected": -1.578689694404602, "step": 5950 }, { "epoch": 0.93, "learning_rate": 9.782629804450439e-06, "logits/chosen": -2.0855820178985596, "logits/rejected": -2.9783928394317627, "logps/chosen": -137.01637268066406, "logps/rejected": -380.2427978515625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.5524929165840149, "rewards/margins": 5.557667255401611, "rewards/rejected": -6.110159873962402, "step": 5951 }, { "epoch": 0.93, "learning_rate": 9.78189636391929e-06, "logits/chosen": -1.5037682056427002, "logits/rejected": -2.73868989944458, "logps/chosen": -92.3816909790039, "logps/rejected": -384.5783386230469, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -0.7676775455474854, "rewards/margins": 5.345660209655762, "rewards/rejected": -6.113337993621826, "step": 5952 }, { "epoch": 0.93, "learning_rate": 9.781162923388143e-06, "logits/chosen": -1.7887247800827026, "logits/rejected": -2.4978349208831787, "logps/chosen": -167.31015014648438, "logps/rejected": -439.30682373046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.36865463852882385, "rewards/margins": 7.242341995239258, "rewards/rejected": -7.610996246337891, "step": 5953 }, { "epoch": 0.93, "learning_rate": 9.780429482856996e-06, "logits/chosen": -2.0927670001983643, "logits/rejected": -2.8969318866729736, "logps/chosen": -136.35641479492188, "logps/rejected": -359.60552978515625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1559348106384277, "rewards/margins": 6.503807544708252, "rewards/rejected": -7.65974235534668, "step": 5954 }, { "epoch": 0.93, "learning_rate": 9.779696042325848e-06, "logits/chosen": -2.697352647781372, "logits/rejected": -3.1471402645111084, "logps/chosen": -34.84333801269531, "logps/rejected": -200.95423889160156, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": 0.030607610940933228, "rewards/margins": 5.128542423248291, "rewards/rejected": -5.097934722900391, "step": 5955 }, { "epoch": 0.93, "learning_rate": 9.7789626017947e-06, "logits/chosen": -1.8134397268295288, "logits/rejected": -2.932690143585205, "logps/chosen": -130.11207580566406, "logps/rejected": -350.8941650390625, "loss": 1.3531, "rewards/accuracies": 0.5, "rewards/chosen": -1.453892469406128, "rewards/margins": 1.8038086891174316, "rewards/rejected": -3.2577011585235596, "step": 5956 }, { "epoch": 0.93, "learning_rate": 9.778229161263552e-06, "logits/chosen": -2.7870850563049316, "logits/rejected": -2.4664859771728516, "logps/chosen": -468.97308349609375, "logps/rejected": -437.3613586425781, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.5405819416046143, "rewards/margins": 5.507871150970459, "rewards/rejected": -6.048452854156494, "step": 5957 }, { "epoch": 0.93, "learning_rate": 9.777495720732404e-06, "logits/chosen": -1.9648938179016113, "logits/rejected": -2.444347858428955, "logps/chosen": -109.48788452148438, "logps/rejected": -318.6376647949219, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.11180877685546875, "rewards/margins": 6.682311058044434, "rewards/rejected": -6.570502281188965, "step": 5958 }, { "epoch": 0.93, "learning_rate": 9.776762280201256e-06, "logits/chosen": -2.813659429550171, "logits/rejected": -1.7786437273025513, "logps/chosen": -173.44619750976562, "logps/rejected": -199.41473388671875, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -0.638562798500061, "rewards/margins": 3.9697442054748535, "rewards/rejected": -4.608306884765625, "step": 5959 }, { "epoch": 0.93, "learning_rate": 9.776028839670107e-06, "logits/chosen": -2.8681554794311523, "logits/rejected": -3.1000566482543945, "logps/chosen": -87.8526382446289, "logps/rejected": -244.37789916992188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.5385138988494873, "rewards/margins": 6.667133808135986, "rewards/rejected": -6.128620147705078, "step": 5960 }, { "epoch": 0.93, "learning_rate": 9.77529539913896e-06, "logits/chosen": -1.7504487037658691, "logits/rejected": -2.841867446899414, "logps/chosen": -147.50955200195312, "logps/rejected": -454.610595703125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.47115057706832886, "rewards/margins": 5.969686508178711, "rewards/rejected": -6.4408369064331055, "step": 5961 }, { "epoch": 0.93, "learning_rate": 9.774561958607811e-06, "logits/chosen": -2.728937864303589, "logits/rejected": -2.801591396331787, "logps/chosen": -195.95062255859375, "logps/rejected": -260.1732177734375, "loss": 0.0194, "rewards/accuracies": 1.0, "rewards/chosen": -0.16366805136203766, "rewards/margins": 4.936821937561035, "rewards/rejected": -5.100489616394043, "step": 5962 }, { "epoch": 0.93, "learning_rate": 9.773828518076665e-06, "logits/chosen": -3.023350238800049, "logits/rejected": -2.6590442657470703, "logps/chosen": -165.50608825683594, "logps/rejected": -94.63738250732422, "loss": 2.345, "rewards/accuracies": 0.5, "rewards/chosen": -3.020641803741455, "rewards/margins": 0.39790844917297363, "rewards/rejected": -3.4185502529144287, "step": 5963 }, { "epoch": 0.93, "learning_rate": 9.773095077545517e-06, "logits/chosen": -2.5337867736816406, "logits/rejected": -2.656794786453247, "logps/chosen": -279.4712219238281, "logps/rejected": -409.8286437988281, "loss": 0.8666, "rewards/accuracies": 0.5, "rewards/chosen": -1.6068817377090454, "rewards/margins": 1.7459903955459595, "rewards/rejected": -3.352872133255005, "step": 5964 }, { "epoch": 0.93, "learning_rate": 9.772361637014369e-06, "logits/chosen": -2.6005282402038574, "logits/rejected": -2.776601791381836, "logps/chosen": -298.0238037109375, "logps/rejected": -226.80982971191406, "loss": 4.4201, "rewards/accuracies": 0.5, "rewards/chosen": -3.725346565246582, "rewards/margins": -2.7214484214782715, "rewards/rejected": -1.0038982629776, "step": 5965 }, { "epoch": 0.93, "learning_rate": 9.77162819648322e-06, "logits/chosen": -1.6281731128692627, "logits/rejected": -2.7774882316589355, "logps/chosen": -215.91683959960938, "logps/rejected": -243.64523315429688, "loss": 2.1018, "rewards/accuracies": 0.5, "rewards/chosen": -2.2333261966705322, "rewards/margins": -1.2225995063781738, "rewards/rejected": -1.0107269287109375, "step": 5966 }, { "epoch": 0.93, "learning_rate": 9.770894755952072e-06, "logits/chosen": -2.9139533042907715, "logits/rejected": -1.2128863334655762, "logps/chosen": -198.13336181640625, "logps/rejected": -47.665557861328125, "loss": 2.861, "rewards/accuracies": 0.5, "rewards/chosen": -4.959743976593018, "rewards/margins": -1.8330187797546387, "rewards/rejected": -3.126725196838379, "step": 5967 }, { "epoch": 0.93, "learning_rate": 9.770161315420924e-06, "logits/chosen": -2.3053581714630127, "logits/rejected": -2.8887922763824463, "logps/chosen": -170.8548126220703, "logps/rejected": -379.9703369140625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.0108795166015625, "rewards/margins": 5.686230659484863, "rewards/rejected": -5.697110176086426, "step": 5968 }, { "epoch": 0.93, "learning_rate": 9.769427874889776e-06, "logits/chosen": -2.808882713317871, "logits/rejected": -1.9161603450775146, "logps/chosen": -272.21026611328125, "logps/rejected": -78.3144302368164, "loss": 0.6865, "rewards/accuracies": 0.5, "rewards/chosen": -0.9378036260604858, "rewards/margins": 0.8713608980178833, "rewards/rejected": -1.8091646432876587, "step": 5969 }, { "epoch": 0.93, "learning_rate": 9.768694434358628e-06, "logits/chosen": -2.032503843307495, "logits/rejected": -2.87442946434021, "logps/chosen": -60.64320373535156, "logps/rejected": -184.96514892578125, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -0.3267238736152649, "rewards/margins": 3.886904716491699, "rewards/rejected": -4.21362829208374, "step": 5970 }, { "epoch": 0.93, "learning_rate": 9.76796099382748e-06, "logits/chosen": -2.6660938262939453, "logits/rejected": -3.1121628284454346, "logps/chosen": -231.44241333007812, "logps/rejected": -423.1353759765625, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -0.0345817506313324, "rewards/margins": 5.507485389709473, "rewards/rejected": -5.542067050933838, "step": 5971 }, { "epoch": 0.93, "learning_rate": 9.767227553296333e-06, "logits/chosen": -2.9070019721984863, "logits/rejected": -2.6850388050079346, "logps/chosen": -488.4499816894531, "logps/rejected": -227.23385620117188, "loss": 0.8857, "rewards/accuracies": 0.5, "rewards/chosen": -0.9113907217979431, "rewards/margins": 2.1093719005584717, "rewards/rejected": -3.0207626819610596, "step": 5972 }, { "epoch": 0.93, "learning_rate": 9.766494112765185e-06, "logits/chosen": -1.8178629875183105, "logits/rejected": -2.1235320568084717, "logps/chosen": -243.7624969482422, "logps/rejected": -461.7387390136719, "loss": 2.6398, "rewards/accuracies": 0.5, "rewards/chosen": -3.090688467025757, "rewards/margins": -0.08549737930297852, "rewards/rejected": -3.0051910877227783, "step": 5973 }, { "epoch": 0.93, "learning_rate": 9.765760672234037e-06, "logits/chosen": -2.8758342266082764, "logits/rejected": -2.80004620552063, "logps/chosen": -227.3087158203125, "logps/rejected": -181.1829833984375, "loss": 2.0578, "rewards/accuracies": 0.5, "rewards/chosen": -1.8711662292480469, "rewards/margins": 1.3628461360931396, "rewards/rejected": -3.2340123653411865, "step": 5974 }, { "epoch": 0.93, "learning_rate": 9.765027231702889e-06, "logits/chosen": -3.057039976119995, "logits/rejected": -2.8620834350585938, "logps/chosen": -277.2679443359375, "logps/rejected": -295.1532897949219, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.20010605454444885, "rewards/margins": 6.42777156829834, "rewards/rejected": -6.227665901184082, "step": 5975 }, { "epoch": 0.93, "learning_rate": 9.764293791171741e-06, "logits/chosen": -2.0705418586730957, "logits/rejected": -2.7390522956848145, "logps/chosen": -235.63992309570312, "logps/rejected": -334.51513671875, "loss": 2.4556, "rewards/accuracies": 0.5, "rewards/chosen": -2.8515167236328125, "rewards/margins": -0.7928191423416138, "rewards/rejected": -2.0586977005004883, "step": 5976 }, { "epoch": 0.93, "learning_rate": 9.763560350640593e-06, "logits/chosen": -2.0662829875946045, "logits/rejected": -2.898634672164917, "logps/chosen": -450.67620849609375, "logps/rejected": -510.00048828125, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -1.1382156610488892, "rewards/margins": 4.353172302246094, "rewards/rejected": -5.491387844085693, "step": 5977 }, { "epoch": 0.93, "learning_rate": 9.762826910109445e-06, "logits/chosen": -1.4345873594284058, "logits/rejected": -2.694631814956665, "logps/chosen": -73.79423522949219, "logps/rejected": -283.0321960449219, "loss": 1.1247, "rewards/accuracies": 0.5, "rewards/chosen": -1.9156346321105957, "rewards/margins": 1.4464020729064941, "rewards/rejected": -3.36203670501709, "step": 5978 }, { "epoch": 0.93, "learning_rate": 9.762093469578297e-06, "logits/chosen": -3.034761667251587, "logits/rejected": -2.8123252391815186, "logps/chosen": -188.09288024902344, "logps/rejected": -127.59403991699219, "loss": 0.5473, "rewards/accuracies": 0.5, "rewards/chosen": -1.45538330078125, "rewards/margins": 1.3134698867797852, "rewards/rejected": -2.768853187561035, "step": 5979 }, { "epoch": 0.93, "learning_rate": 9.761360029047148e-06, "logits/chosen": -1.7658257484436035, "logits/rejected": -2.892427682876587, "logps/chosen": -124.25245666503906, "logps/rejected": -292.1758728027344, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -0.1809220314025879, "rewards/margins": 4.0830206871032715, "rewards/rejected": -4.263942718505859, "step": 5980 }, { "epoch": 0.93, "learning_rate": 9.760626588516002e-06, "logits/chosen": -2.3134958744049072, "logits/rejected": -2.711066484451294, "logps/chosen": -40.58247375488281, "logps/rejected": -156.07737731933594, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.1879962980747223, "rewards/margins": 5.10285758972168, "rewards/rejected": -5.290853977203369, "step": 5981 }, { "epoch": 0.93, "learning_rate": 9.759893147984854e-06, "logits/chosen": -3.048168420791626, "logits/rejected": -2.9199867248535156, "logps/chosen": -212.10398864746094, "logps/rejected": -197.1034698486328, "loss": 0.0362, "rewards/accuracies": 1.0, "rewards/chosen": -1.829559326171875, "rewards/margins": 3.8662610054016113, "rewards/rejected": -5.695820331573486, "step": 5982 }, { "epoch": 0.93, "learning_rate": 9.759159707453707e-06, "logits/chosen": -3.099108934402466, "logits/rejected": -2.32734751701355, "logps/chosen": -406.7092590332031, "logps/rejected": -323.2383117675781, "loss": 0.1267, "rewards/accuracies": 1.0, "rewards/chosen": 0.077880859375, "rewards/margins": 4.123476505279541, "rewards/rejected": -4.045595645904541, "step": 5983 }, { "epoch": 0.93, "learning_rate": 9.75842626692256e-06, "logits/chosen": -2.589930534362793, "logits/rejected": -2.531714916229248, "logps/chosen": -104.55790710449219, "logps/rejected": -90.95849609375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -1.337900161743164, "rewards/margins": 4.349206447601318, "rewards/rejected": -5.687106609344482, "step": 5984 }, { "epoch": 0.93, "learning_rate": 9.757692826391411e-06, "logits/chosen": -2.6475346088409424, "logits/rejected": -3.193235397338867, "logps/chosen": -302.9208984375, "logps/rejected": -452.11163330078125, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -0.9283279180526733, "rewards/margins": 4.356990814208984, "rewards/rejected": -5.285318851470947, "step": 5985 }, { "epoch": 0.93, "learning_rate": 9.756959385860263e-06, "logits/chosen": -2.289706230163574, "logits/rejected": -2.9291677474975586, "logps/chosen": -137.0531768798828, "logps/rejected": -260.6514892578125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 0.013242334127426147, "rewards/margins": 6.134723663330078, "rewards/rejected": -6.121481418609619, "step": 5986 }, { "epoch": 0.93, "learning_rate": 9.756225945329115e-06, "logits/chosen": -1.759676218032837, "logits/rejected": -2.7058403491973877, "logps/chosen": -132.360595703125, "logps/rejected": -301.6399230957031, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.29484596848487854, "rewards/margins": 6.207351207733154, "rewards/rejected": -6.502197265625, "step": 5987 }, { "epoch": 0.93, "learning_rate": 9.755492504797967e-06, "logits/chosen": -2.708400249481201, "logits/rejected": -2.537337303161621, "logps/chosen": -482.7751770019531, "logps/rejected": -557.919921875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.6632965207099915, "rewards/margins": 5.941211223602295, "rewards/rejected": -5.277914524078369, "step": 5988 }, { "epoch": 0.93, "learning_rate": 9.754759064266819e-06, "logits/chosen": -3.2732815742492676, "logits/rejected": -3.110687494277954, "logps/chosen": -468.02752685546875, "logps/rejected": -354.87109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.010974138975143433, "rewards/margins": 6.614731788635254, "rewards/rejected": -6.603757381439209, "step": 5989 }, { "epoch": 0.93, "learning_rate": 9.754025623735672e-06, "logits/chosen": -2.8830692768096924, "logits/rejected": -3.0177395343780518, "logps/chosen": -91.34038543701172, "logps/rejected": -268.43353271484375, "loss": 2.4063, "rewards/accuracies": 0.5, "rewards/chosen": -1.6759190559387207, "rewards/margins": 1.7045328617095947, "rewards/rejected": -3.3804516792297363, "step": 5990 }, { "epoch": 0.93, "learning_rate": 9.753292183204524e-06, "logits/chosen": -1.1717828512191772, "logits/rejected": -2.83961820602417, "logps/chosen": -151.57461547851562, "logps/rejected": -328.38104248046875, "loss": 0.7462, "rewards/accuracies": 0.5, "rewards/chosen": -1.55227792263031, "rewards/margins": 1.3412387371063232, "rewards/rejected": -2.893516778945923, "step": 5991 }, { "epoch": 0.93, "learning_rate": 9.752558742673376e-06, "logits/chosen": -3.166393280029297, "logits/rejected": -2.3331356048583984, "logps/chosen": -564.3775634765625, "logps/rejected": -254.31333923339844, "loss": 0.9055, "rewards/accuracies": 0.5, "rewards/chosen": -2.669766664505005, "rewards/margins": 1.4014370441436768, "rewards/rejected": -4.071203708648682, "step": 5992 }, { "epoch": 0.93, "learning_rate": 9.751825302142228e-06, "logits/chosen": -2.9230334758758545, "logits/rejected": -3.017411947250366, "logps/chosen": -950.7835693359375, "logps/rejected": -684.0194091796875, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.6665436029434204, "rewards/margins": 4.195953369140625, "rewards/rejected": -4.862497329711914, "step": 5993 }, { "epoch": 0.93, "learning_rate": 9.75109186161108e-06, "logits/chosen": -3.2480928897857666, "logits/rejected": -2.684704303741455, "logps/chosen": -330.5703430175781, "logps/rejected": -168.6365966796875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.6026817560195923, "rewards/margins": 6.006203651428223, "rewards/rejected": -6.608885288238525, "step": 5994 }, { "epoch": 0.93, "learning_rate": 9.750358421079932e-06, "logits/chosen": -2.7010715007781982, "logits/rejected": -2.940505266189575, "logps/chosen": -63.38482666015625, "logps/rejected": -170.61029052734375, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -0.35760974884033203, "rewards/margins": 3.650319814682007, "rewards/rejected": -4.007929801940918, "step": 5995 }, { "epoch": 0.93, "learning_rate": 9.749624980548784e-06, "logits/chosen": -2.264373302459717, "logits/rejected": -2.681232452392578, "logps/chosen": -223.2091064453125, "logps/rejected": -260.09576416015625, "loss": 2.0227, "rewards/accuracies": 0.5, "rewards/chosen": -2.836545705795288, "rewards/margins": 0.6135947704315186, "rewards/rejected": -3.4501404762268066, "step": 5996 }, { "epoch": 0.93, "learning_rate": 9.748891540017635e-06, "logits/chosen": -2.8993418216705322, "logits/rejected": -3.0704755783081055, "logps/chosen": -103.47695922851562, "logps/rejected": -191.97711181640625, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": -2.380236864089966, "rewards/margins": 2.934568405151367, "rewards/rejected": -5.314805030822754, "step": 5997 }, { "epoch": 0.93, "learning_rate": 9.748158099486487e-06, "logits/chosen": -2.549968957901001, "logits/rejected": -2.9501609802246094, "logps/chosen": -146.00460815429688, "logps/rejected": -221.37234497070312, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.6900771856307983, "rewards/margins": 4.328671932220459, "rewards/rejected": -5.018749237060547, "step": 5998 }, { "epoch": 0.93, "learning_rate": 9.747424658955341e-06, "logits/chosen": -2.155402660369873, "logits/rejected": -3.0317344665527344, "logps/chosen": -97.88345336914062, "logps/rejected": -385.00018310546875, "loss": 0.2247, "rewards/accuracies": 1.0, "rewards/chosen": -0.24064579606056213, "rewards/margins": 3.6899373531341553, "rewards/rejected": -3.9305832386016846, "step": 5999 }, { "epoch": 0.93, "learning_rate": 9.746691218424193e-06, "logits/chosen": -2.3195736408233643, "logits/rejected": -2.765439987182617, "logps/chosen": -88.08700561523438, "logps/rejected": -194.5843963623047, "loss": 1.4647, "rewards/accuracies": 0.5, "rewards/chosen": -4.490273475646973, "rewards/margins": -0.15176868438720703, "rewards/rejected": -4.338504791259766, "step": 6000 }, { "epoch": 0.93, "learning_rate": 9.745957777893045e-06, "logits/chosen": -0.9395917057991028, "logits/rejected": -1.5323741436004639, "logps/chosen": -236.51296997070312, "logps/rejected": -510.9094543457031, "loss": 1.6692, "rewards/accuracies": 0.5, "rewards/chosen": -2.626020908355713, "rewards/margins": 2.261693000793457, "rewards/rejected": -4.88771390914917, "step": 6001 }, { "epoch": 0.93, "learning_rate": 9.745224337361896e-06, "logits/chosen": -1.27373206615448, "logits/rejected": -2.7605178356170654, "logps/chosen": -77.91552734375, "logps/rejected": -320.4559020996094, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.6936362981796265, "rewards/margins": 5.21239709854126, "rewards/rejected": -5.906033515930176, "step": 6002 }, { "epoch": 0.93, "learning_rate": 9.744490896830748e-06, "logits/chosen": -2.9284439086914062, "logits/rejected": -2.5067906379699707, "logps/chosen": -97.77416229248047, "logps/rejected": -90.0988540649414, "loss": 1.3846, "rewards/accuracies": 0.5, "rewards/chosen": -1.8247066736221313, "rewards/margins": 1.1860170364379883, "rewards/rejected": -3.01072359085083, "step": 6003 }, { "epoch": 0.93, "learning_rate": 9.7437574562996e-06, "logits/chosen": -2.9279983043670654, "logits/rejected": -2.4677019119262695, "logps/chosen": -157.3133544921875, "logps/rejected": -215.7464599609375, "loss": 2.5112, "rewards/accuracies": 0.5, "rewards/chosen": -2.4755213260650635, "rewards/margins": 1.6122794151306152, "rewards/rejected": -4.0878005027771, "step": 6004 }, { "epoch": 0.93, "learning_rate": 9.743024015768452e-06, "logits/chosen": -2.120518207550049, "logits/rejected": -2.9314467906951904, "logps/chosen": -29.869352340698242, "logps/rejected": -175.2830810546875, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -1.4029828310012817, "rewards/margins": 2.745149612426758, "rewards/rejected": -4.14813232421875, "step": 6005 }, { "epoch": 0.93, "learning_rate": 9.742290575237304e-06, "logits/chosen": -2.5232367515563965, "logits/rejected": -2.6740851402282715, "logps/chosen": -295.85968017578125, "logps/rejected": -305.65289306640625, "loss": 0.1252, "rewards/accuracies": 1.0, "rewards/chosen": -1.1969105005264282, "rewards/margins": 3.371799945831299, "rewards/rejected": -4.5687103271484375, "step": 6006 }, { "epoch": 0.93, "learning_rate": 9.741557134706156e-06, "logits/chosen": -2.7234456539154053, "logits/rejected": -3.2065823078155518, "logps/chosen": -38.62123107910156, "logps/rejected": -159.93777465820312, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.2674809694290161, "rewards/margins": 5.362964630126953, "rewards/rejected": -6.630445957183838, "step": 6007 }, { "epoch": 0.93, "learning_rate": 9.74082369417501e-06, "logits/chosen": -2.782341241836548, "logits/rejected": -2.700805902481079, "logps/chosen": -174.71133422851562, "logps/rejected": -193.17681884765625, "loss": 0.8143, "rewards/accuracies": 0.5, "rewards/chosen": -1.1636933088302612, "rewards/margins": 1.8055455684661865, "rewards/rejected": -2.969238758087158, "step": 6008 }, { "epoch": 0.93, "learning_rate": 9.740090253643861e-06, "logits/chosen": -2.755894660949707, "logits/rejected": -2.870494842529297, "logps/chosen": -75.76081848144531, "logps/rejected": -267.0135498046875, "loss": 0.0449, "rewards/accuracies": 1.0, "rewards/chosen": -0.93560391664505, "rewards/margins": 5.571240425109863, "rewards/rejected": -6.506844520568848, "step": 6009 }, { "epoch": 0.93, "learning_rate": 9.739356813112713e-06, "logits/chosen": -2.9107234477996826, "logits/rejected": -3.233158588409424, "logps/chosen": -523.95068359375, "logps/rejected": -771.60693359375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.5476135611534119, "rewards/margins": 6.7933430671691895, "rewards/rejected": -7.340956687927246, "step": 6010 }, { "epoch": 0.93, "learning_rate": 9.738623372581565e-06, "logits/chosen": -2.7254836559295654, "logits/rejected": -2.896296739578247, "logps/chosen": -56.510169982910156, "logps/rejected": -197.07118225097656, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.5239801406860352, "rewards/margins": 4.69000768661499, "rewards/rejected": -5.213987827301025, "step": 6011 }, { "epoch": 0.93, "learning_rate": 9.737889932050417e-06, "logits/chosen": -3.19223690032959, "logits/rejected": -2.5980632305145264, "logps/chosen": -937.078369140625, "logps/rejected": -424.0716552734375, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": -1.4025787115097046, "rewards/margins": 3.485088348388672, "rewards/rejected": -4.887667179107666, "step": 6012 }, { "epoch": 0.94, "learning_rate": 9.737156491519269e-06, "logits/chosen": -2.7458560466766357, "logits/rejected": -3.2122230529785156, "logps/chosen": -169.0197296142578, "logps/rejected": -372.9054870605469, "loss": 0.2199, "rewards/accuracies": 1.0, "rewards/chosen": -3.0336670875549316, "rewards/margins": 3.2911529541015625, "rewards/rejected": -6.324820041656494, "step": 6013 }, { "epoch": 0.94, "learning_rate": 9.73642305098812e-06, "logits/chosen": -3.189643621444702, "logits/rejected": -2.97114634513855, "logps/chosen": -201.80014038085938, "logps/rejected": -313.6196594238281, "loss": 1.9865, "rewards/accuracies": 0.5, "rewards/chosen": -2.0933828353881836, "rewards/margins": 2.692023754119873, "rewards/rejected": -4.785406589508057, "step": 6014 }, { "epoch": 0.94, "learning_rate": 9.735689610456974e-06, "logits/chosen": -2.7992289066314697, "logits/rejected": -3.009295701980591, "logps/chosen": -96.08097839355469, "logps/rejected": -162.26739501953125, "loss": 0.2924, "rewards/accuracies": 1.0, "rewards/chosen": -2.2728216648101807, "rewards/margins": 3.2923288345336914, "rewards/rejected": -5.565150260925293, "step": 6015 }, { "epoch": 0.94, "learning_rate": 9.734956169925826e-06, "logits/chosen": -2.4625773429870605, "logits/rejected": -3.03296160697937, "logps/chosen": -141.54331970214844, "logps/rejected": -340.86199951171875, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -1.239988923072815, "rewards/margins": 3.7271759510040283, "rewards/rejected": -4.967164993286133, "step": 6016 }, { "epoch": 0.94, "learning_rate": 9.73422272939468e-06, "logits/chosen": -1.8635213375091553, "logits/rejected": -2.8965866565704346, "logps/chosen": -114.3121337890625, "logps/rejected": -222.0189666748047, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 0.22323112189769745, "rewards/margins": 4.594725608825684, "rewards/rejected": -4.371494293212891, "step": 6017 }, { "epoch": 0.94, "learning_rate": 9.733489288863532e-06, "logits/chosen": -2.9406073093414307, "logits/rejected": -2.493776321411133, "logps/chosen": -697.4640502929688, "logps/rejected": -374.2264709472656, "loss": 1.688, "rewards/accuracies": 0.5, "rewards/chosen": -1.986803412437439, "rewards/margins": 0.7138063907623291, "rewards/rejected": -2.7006099224090576, "step": 6018 }, { "epoch": 0.94, "learning_rate": 9.732755848332384e-06, "logits/chosen": -2.628406286239624, "logits/rejected": -2.9764745235443115, "logps/chosen": -278.58642578125, "logps/rejected": -209.84686279296875, "loss": 0.0752, "rewards/accuracies": 1.0, "rewards/chosen": -1.6246689558029175, "rewards/margins": 4.125979900360107, "rewards/rejected": -5.7506489753723145, "step": 6019 }, { "epoch": 0.94, "learning_rate": 9.732022407801235e-06, "logits/chosen": -2.6060407161712646, "logits/rejected": -2.987964153289795, "logps/chosen": -858.101318359375, "logps/rejected": -649.107177734375, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -1.4072265625, "rewards/margins": 3.855360507965088, "rewards/rejected": -5.262587070465088, "step": 6020 }, { "epoch": 0.94, "learning_rate": 9.731288967270087e-06, "logits/chosen": -2.718424081802368, "logits/rejected": -2.0505218505859375, "logps/chosen": -396.3445739746094, "logps/rejected": -375.74652099609375, "loss": 2.2058, "rewards/accuracies": 0.5, "rewards/chosen": -2.63552188873291, "rewards/margins": 0.9289677143096924, "rewards/rejected": -3.5644896030426025, "step": 6021 }, { "epoch": 0.94, "learning_rate": 9.730555526738939e-06, "logits/chosen": -0.7480267882347107, "logits/rejected": -2.889075756072998, "logps/chosen": -84.7340087890625, "logps/rejected": -669.2264404296875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.001205563545227, "rewards/margins": 7.308752059936523, "rewards/rejected": -8.309957504272461, "step": 6022 }, { "epoch": 0.94, "learning_rate": 9.729822086207791e-06, "logits/chosen": -3.168656587600708, "logits/rejected": -3.261091947555542, "logps/chosen": -567.1895751953125, "logps/rejected": -446.3121337890625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.5584484338760376, "rewards/margins": 5.911887168884277, "rewards/rejected": -6.470335483551025, "step": 6023 }, { "epoch": 0.94, "learning_rate": 9.729088645676643e-06, "logits/chosen": -2.996480703353882, "logits/rejected": -2.3191747665405273, "logps/chosen": -676.730712890625, "logps/rejected": -468.4808349609375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.4389235973358154, "rewards/margins": 6.220059394836426, "rewards/rejected": -7.658982753753662, "step": 6024 }, { "epoch": 0.94, "learning_rate": 9.728355205145495e-06, "logits/chosen": -2.7948222160339355, "logits/rejected": -2.9156901836395264, "logps/chosen": -417.732421875, "logps/rejected": -588.1542358398438, "loss": 1.9087, "rewards/accuracies": 0.5, "rewards/chosen": -3.529674530029297, "rewards/margins": -0.2561606168746948, "rewards/rejected": -3.2735137939453125, "step": 6025 }, { "epoch": 0.94, "learning_rate": 9.727621764614348e-06, "logits/chosen": -2.25587797164917, "logits/rejected": -2.425307273864746, "logps/chosen": -144.31671142578125, "logps/rejected": -218.71109008789062, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.1936393976211548, "rewards/margins": 4.4554266929626465, "rewards/rejected": -5.649065971374512, "step": 6026 }, { "epoch": 0.94, "learning_rate": 9.7268883240832e-06, "logits/chosen": -2.7524921894073486, "logits/rejected": -3.0504043102264404, "logps/chosen": -40.950103759765625, "logps/rejected": -167.10501098632812, "loss": 0.0427, "rewards/accuracies": 1.0, "rewards/chosen": -0.9086178541183472, "rewards/margins": 3.139894485473633, "rewards/rejected": -4.0485124588012695, "step": 6027 }, { "epoch": 0.94, "learning_rate": 9.726154883552052e-06, "logits/chosen": -3.049666404724121, "logits/rejected": -3.069657802581787, "logps/chosen": -169.61866760253906, "logps/rejected": -209.88845825195312, "loss": 1.3781, "rewards/accuracies": 0.5, "rewards/chosen": -2.5439841747283936, "rewards/margins": 1.0671892166137695, "rewards/rejected": -3.611173391342163, "step": 6028 }, { "epoch": 0.94, "learning_rate": 9.725421443020904e-06, "logits/chosen": -2.403866767883301, "logits/rejected": -2.8476645946502686, "logps/chosen": -470.2266540527344, "logps/rejected": -534.8538818359375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.7281837463378906, "rewards/margins": 5.097951412200928, "rewards/rejected": -5.826135635375977, "step": 6029 }, { "epoch": 0.94, "learning_rate": 9.724688002489756e-06, "logits/chosen": -2.0600757598876953, "logits/rejected": -2.7670984268188477, "logps/chosen": -61.842857360839844, "logps/rejected": -422.0981140136719, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -1.1802282333374023, "rewards/margins": 4.961802005767822, "rewards/rejected": -6.142030239105225, "step": 6030 }, { "epoch": 0.94, "learning_rate": 9.723954561958608e-06, "logits/chosen": -2.353107452392578, "logits/rejected": -2.9541451930999756, "logps/chosen": -329.5281982421875, "logps/rejected": -631.5637817382812, "loss": 2.0095, "rewards/accuracies": 0.5, "rewards/chosen": -2.139594316482544, "rewards/margins": 1.3739922046661377, "rewards/rejected": -3.5135865211486816, "step": 6031 }, { "epoch": 0.94, "learning_rate": 9.72322112142746e-06, "logits/chosen": -2.2939937114715576, "logits/rejected": -2.4653120040893555, "logps/chosen": -234.9615936279297, "logps/rejected": -244.21786499023438, "loss": 1.4961, "rewards/accuracies": 0.5, "rewards/chosen": -2.2381255626678467, "rewards/margins": 2.7757482528686523, "rewards/rejected": -5.01387357711792, "step": 6032 }, { "epoch": 0.94, "learning_rate": 9.722487680896311e-06, "logits/chosen": -2.131906270980835, "logits/rejected": -2.6162846088409424, "logps/chosen": -197.7191925048828, "logps/rejected": -195.17930603027344, "loss": 0.4295, "rewards/accuracies": 0.5, "rewards/chosen": -2.955261468887329, "rewards/margins": 2.6235616207122803, "rewards/rejected": -5.578823089599609, "step": 6033 }, { "epoch": 0.94, "learning_rate": 9.721754240365163e-06, "logits/chosen": -2.5504815578460693, "logits/rejected": -2.9497551918029785, "logps/chosen": -73.391845703125, "logps/rejected": -277.4831237792969, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.050605297088623, "rewards/margins": 4.87738037109375, "rewards/rejected": -5.927985191345215, "step": 6034 }, { "epoch": 0.94, "learning_rate": 9.721020799834017e-06, "logits/chosen": -2.6265153884887695, "logits/rejected": -3.0655086040496826, "logps/chosen": -224.0896759033203, "logps/rejected": -354.59783935546875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.40549010038375854, "rewards/margins": 5.735757350921631, "rewards/rejected": -6.141247749328613, "step": 6035 }, { "epoch": 0.94, "learning_rate": 9.720287359302869e-06, "logits/chosen": -2.1363236904144287, "logits/rejected": -2.7525806427001953, "logps/chosen": -272.8103942871094, "logps/rejected": -496.66229248046875, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -1.2775322198867798, "rewards/margins": 4.063089847564697, "rewards/rejected": -5.3406219482421875, "step": 6036 }, { "epoch": 0.94, "learning_rate": 9.71955391877172e-06, "logits/chosen": -2.156111240386963, "logits/rejected": -2.8827502727508545, "logps/chosen": -75.42617797851562, "logps/rejected": -420.85894775390625, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -0.5760582089424133, "rewards/margins": 5.417724132537842, "rewards/rejected": -5.993782043457031, "step": 6037 }, { "epoch": 0.94, "learning_rate": 9.718820478240573e-06, "logits/chosen": -2.228055000305176, "logits/rejected": -2.736175537109375, "logps/chosen": -209.53912353515625, "logps/rejected": -283.24530029296875, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.5767127871513367, "rewards/margins": 5.2314581871032715, "rewards/rejected": -5.808171272277832, "step": 6038 }, { "epoch": 0.94, "learning_rate": 9.718087037709424e-06, "logits/chosen": -1.8223085403442383, "logits/rejected": -2.56345796585083, "logps/chosen": -145.67501831054688, "logps/rejected": -387.81500244140625, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -0.051952362060546875, "rewards/margins": 6.104665756225586, "rewards/rejected": -6.156618118286133, "step": 6039 }, { "epoch": 0.94, "learning_rate": 9.717353597178276e-06, "logits/chosen": -2.4075143337249756, "logits/rejected": -2.976701021194458, "logps/chosen": -370.5090637207031, "logps/rejected": -545.433349609375, "loss": 0.0626, "rewards/accuracies": 1.0, "rewards/chosen": -1.5518242120742798, "rewards/margins": 4.319197654724121, "rewards/rejected": -5.8710222244262695, "step": 6040 }, { "epoch": 0.94, "learning_rate": 9.716620156647128e-06, "logits/chosen": -2.8506221771240234, "logits/rejected": -1.7136914730072021, "logps/chosen": -196.9199981689453, "logps/rejected": -126.04843139648438, "loss": 0.043, "rewards/accuracies": 1.0, "rewards/chosen": -1.7213791608810425, "rewards/margins": 4.967572212219238, "rewards/rejected": -6.688951015472412, "step": 6041 }, { "epoch": 0.94, "learning_rate": 9.71588671611598e-06, "logits/chosen": -2.9229323863983154, "logits/rejected": -2.687403678894043, "logps/chosen": -121.84367370605469, "logps/rejected": -214.10389709472656, "loss": 2.3716, "rewards/accuracies": 0.0, "rewards/chosen": -4.279600143432617, "rewards/margins": -2.0180978775024414, "rewards/rejected": -2.261502265930176, "step": 6042 }, { "epoch": 0.94, "learning_rate": 9.715153275584832e-06, "logits/chosen": -2.0172650814056396, "logits/rejected": -2.7070891857147217, "logps/chosen": -173.4926300048828, "logps/rejected": -301.4399719238281, "loss": 1.4679, "rewards/accuracies": 0.5, "rewards/chosen": -2.4979748725891113, "rewards/margins": 0.42301011085510254, "rewards/rejected": -2.920984983444214, "step": 6043 }, { "epoch": 0.94, "learning_rate": 9.714419835053686e-06, "logits/chosen": -2.4638028144836426, "logits/rejected": -2.9419662952423096, "logps/chosen": -320.35040283203125, "logps/rejected": -463.7857666015625, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.5639923214912415, "rewards/margins": 6.21516752243042, "rewards/rejected": -6.7791595458984375, "step": 6044 }, { "epoch": 0.94, "learning_rate": 9.713686394522537e-06, "logits/chosen": -1.321368932723999, "logits/rejected": -2.6849405765533447, "logps/chosen": -92.29178619384766, "logps/rejected": -365.94024658203125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.23054391145706177, "rewards/margins": 5.875828742980957, "rewards/rejected": -6.106372356414795, "step": 6045 }, { "epoch": 0.94, "learning_rate": 9.71295295399139e-06, "logits/chosen": -1.7031015157699585, "logits/rejected": -2.650862455368042, "logps/chosen": -119.220703125, "logps/rejected": -368.3013916015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.30014514923095703, "rewards/margins": 7.218569755554199, "rewards/rejected": -7.518714904785156, "step": 6046 }, { "epoch": 0.94, "learning_rate": 9.712219513460241e-06, "logits/chosen": -3.0692100524902344, "logits/rejected": -3.217038154602051, "logps/chosen": -61.74945068359375, "logps/rejected": -115.44865417480469, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": -4.007828235626221, "rewards/margins": 3.1615395545959473, "rewards/rejected": -7.169367790222168, "step": 6047 }, { "epoch": 0.94, "learning_rate": 9.711486072929093e-06, "logits/chosen": -2.7354886531829834, "logits/rejected": -2.8789188861846924, "logps/chosen": -171.22840881347656, "logps/rejected": -233.47561645507812, "loss": 2.3839, "rewards/accuracies": 0.5, "rewards/chosen": -2.761223793029785, "rewards/margins": 1.0421843528747559, "rewards/rejected": -3.803408145904541, "step": 6048 }, { "epoch": 0.94, "learning_rate": 9.710752632397947e-06, "logits/chosen": -2.9824469089508057, "logits/rejected": -2.5061442852020264, "logps/chosen": -654.1787109375, "logps/rejected": -572.9111938476562, "loss": 2.5821, "rewards/accuracies": 0.5, "rewards/chosen": -3.6897857189178467, "rewards/margins": -0.5220808982849121, "rewards/rejected": -3.1677048206329346, "step": 6049 }, { "epoch": 0.94, "learning_rate": 9.710019191866799e-06, "logits/chosen": -2.35689115524292, "logits/rejected": -2.925049066543579, "logps/chosen": -56.805152893066406, "logps/rejected": -246.03701782226562, "loss": 0.1474, "rewards/accuracies": 1.0, "rewards/chosen": -1.9817171096801758, "rewards/margins": 3.9960074424743652, "rewards/rejected": -5.977724552154541, "step": 6050 }, { "epoch": 0.94, "learning_rate": 9.70928575133565e-06, "logits/chosen": -2.9778316020965576, "logits/rejected": -2.6240246295928955, "logps/chosen": -155.46250915527344, "logps/rejected": -229.64031982421875, "loss": 1.6969, "rewards/accuracies": 0.5, "rewards/chosen": -2.5002267360687256, "rewards/margins": 2.3353934288024902, "rewards/rejected": -4.835620403289795, "step": 6051 }, { "epoch": 0.94, "learning_rate": 9.708552310804504e-06, "logits/chosen": -1.8867716789245605, "logits/rejected": -2.547473192214966, "logps/chosen": -94.119873046875, "logps/rejected": -307.913818359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.830082356929779, "rewards/margins": 6.671541213989258, "rewards/rejected": -7.501623630523682, "step": 6052 }, { "epoch": 0.94, "learning_rate": 9.707818870273356e-06, "logits/chosen": -1.5186644792556763, "logits/rejected": -2.9073586463928223, "logps/chosen": -156.99044799804688, "logps/rejected": -391.68878173828125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.08731690049171448, "rewards/margins": 6.1018524169921875, "rewards/rejected": -6.189169406890869, "step": 6053 }, { "epoch": 0.94, "learning_rate": 9.707085429742208e-06, "logits/chosen": -2.5751936435699463, "logits/rejected": -2.9789247512817383, "logps/chosen": -37.70049285888672, "logps/rejected": -156.74295043945312, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.1856565475463867, "rewards/margins": 5.696351051330566, "rewards/rejected": -7.882007598876953, "step": 6054 }, { "epoch": 0.94, "learning_rate": 9.70635198921106e-06, "logits/chosen": -2.721662998199463, "logits/rejected": -2.4231343269348145, "logps/chosen": -172.96343994140625, "logps/rejected": -237.34791564941406, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -1.129809856414795, "rewards/margins": 4.5663275718688965, "rewards/rejected": -5.696137428283691, "step": 6055 }, { "epoch": 0.94, "learning_rate": 9.705618548679911e-06, "logits/chosen": -2.768766164779663, "logits/rejected": -2.6724579334259033, "logps/chosen": -545.0537719726562, "logps/rejected": -426.0624084472656, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.0990092754364014, "rewards/margins": 6.373093605041504, "rewards/rejected": -7.472102642059326, "step": 6056 }, { "epoch": 0.94, "learning_rate": 9.704885108148763e-06, "logits/chosen": -0.585849940776825, "logits/rejected": -2.2383198738098145, "logps/chosen": -86.74835205078125, "logps/rejected": -302.3174743652344, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -2.232847213745117, "rewards/margins": 4.488295078277588, "rewards/rejected": -6.721142768859863, "step": 6057 }, { "epoch": 0.94, "learning_rate": 9.704151667617615e-06, "logits/chosen": -2.6823630332946777, "logits/rejected": -2.963012218475342, "logps/chosen": -388.35955810546875, "logps/rejected": -436.5512390136719, "loss": 2.5695, "rewards/accuracies": 0.5, "rewards/chosen": -2.895782470703125, "rewards/margins": 2.5597612857818604, "rewards/rejected": -5.455543518066406, "step": 6058 }, { "epoch": 0.94, "learning_rate": 9.703418227086467e-06, "logits/chosen": -2.249891996383667, "logits/rejected": -3.266918897628784, "logps/chosen": -49.202781677246094, "logps/rejected": -243.81539916992188, "loss": 0.0392, "rewards/accuracies": 1.0, "rewards/chosen": -0.626231849193573, "rewards/margins": 3.284428119659424, "rewards/rejected": -3.9106597900390625, "step": 6059 }, { "epoch": 0.94, "learning_rate": 9.702684786555319e-06, "logits/chosen": -3.167249917984009, "logits/rejected": -2.889784097671509, "logps/chosen": -205.0928192138672, "logps/rejected": -269.29815673828125, "loss": 2.8579, "rewards/accuracies": 0.5, "rewards/chosen": -3.1772327423095703, "rewards/margins": -0.6061077117919922, "rewards/rejected": -2.571125030517578, "step": 6060 }, { "epoch": 0.94, "learning_rate": 9.701951346024173e-06, "logits/chosen": -2.171529531478882, "logits/rejected": -3.1102781295776367, "logps/chosen": -182.39639282226562, "logps/rejected": -306.7544250488281, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.98526531457901, "rewards/margins": 5.0567426681518555, "rewards/rejected": -6.0420074462890625, "step": 6061 }, { "epoch": 0.94, "learning_rate": 9.701217905493024e-06, "logits/chosen": -2.489985704421997, "logits/rejected": -3.1852049827575684, "logps/chosen": -86.53054809570312, "logps/rejected": -227.82318115234375, "loss": 2.1726, "rewards/accuracies": 0.5, "rewards/chosen": -3.5960159301757812, "rewards/margins": -0.7491368055343628, "rewards/rejected": -2.846879243850708, "step": 6062 }, { "epoch": 0.94, "learning_rate": 9.700484464961876e-06, "logits/chosen": -2.3655738830566406, "logits/rejected": -3.101234197616577, "logps/chosen": -149.48516845703125, "logps/rejected": -303.71734619140625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.628913164138794, "rewards/margins": 6.278794288635254, "rewards/rejected": -6.907707214355469, "step": 6063 }, { "epoch": 0.94, "learning_rate": 9.699751024430728e-06, "logits/chosen": -2.888270378112793, "logits/rejected": -1.8850698471069336, "logps/chosen": -250.21348571777344, "logps/rejected": -181.57827758789062, "loss": 1.1374, "rewards/accuracies": 0.5, "rewards/chosen": -2.27304744720459, "rewards/margins": 1.3075635433197021, "rewards/rejected": -3.580610990524292, "step": 6064 }, { "epoch": 0.94, "learning_rate": 9.69901758389958e-06, "logits/chosen": -2.2015841007232666, "logits/rejected": -2.0998587608337402, "logps/chosen": -173.2357177734375, "logps/rejected": -277.3817138671875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.1047836542129517, "rewards/margins": 5.707655906677246, "rewards/rejected": -6.812439918518066, "step": 6065 }, { "epoch": 0.94, "learning_rate": 9.698284143368432e-06, "logits/chosen": -2.9275550842285156, "logits/rejected": -3.0895113945007324, "logps/chosen": -215.460693359375, "logps/rejected": -287.96905517578125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7338725924491882, "rewards/margins": 5.939472198486328, "rewards/rejected": -6.673344612121582, "step": 6066 }, { "epoch": 0.94, "learning_rate": 9.697550702837284e-06, "logits/chosen": -2.750253200531006, "logits/rejected": -2.1215391159057617, "logps/chosen": -722.7843017578125, "logps/rejected": -483.060302734375, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.2958618104457855, "rewards/margins": 4.7890729904174805, "rewards/rejected": -5.084935188293457, "step": 6067 }, { "epoch": 0.94, "learning_rate": 9.696817262306136e-06, "logits/chosen": -2.7548892498016357, "logits/rejected": -3.04641056060791, "logps/chosen": -202.4203338623047, "logps/rejected": -262.0745849609375, "loss": 2.0669, "rewards/accuracies": 0.5, "rewards/chosen": -1.64129638671875, "rewards/margins": 1.6735076904296875, "rewards/rejected": -3.3148040771484375, "step": 6068 }, { "epoch": 0.94, "learning_rate": 9.696083821774988e-06, "logits/chosen": -2.816462755203247, "logits/rejected": -2.1956734657287598, "logps/chosen": -319.5123291015625, "logps/rejected": -158.75657653808594, "loss": 3.1716, "rewards/accuracies": 0.5, "rewards/chosen": -3.5800278186798096, "rewards/margins": -0.4724855422973633, "rewards/rejected": -3.1075422763824463, "step": 6069 }, { "epoch": 0.94, "learning_rate": 9.695350381243841e-06, "logits/chosen": -2.7628509998321533, "logits/rejected": -2.707331657409668, "logps/chosen": -111.29102325439453, "logps/rejected": -302.87261962890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9003852605819702, "rewards/margins": 6.957777976989746, "rewards/rejected": -7.858162879943848, "step": 6070 }, { "epoch": 0.94, "learning_rate": 9.694616940712693e-06, "logits/chosen": -1.6529442071914673, "logits/rejected": -2.927003860473633, "logps/chosen": -219.49285888671875, "logps/rejected": -613.465087890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.1530403196811676, "rewards/margins": 7.959949493408203, "rewards/rejected": -7.806909084320068, "step": 6071 }, { "epoch": 0.94, "learning_rate": 9.693883500181545e-06, "logits/chosen": -1.6683616638183594, "logits/rejected": -2.686387777328491, "logps/chosen": -82.9696044921875, "logps/rejected": -148.5960235595703, "loss": 0.955, "rewards/accuracies": 0.5, "rewards/chosen": -3.0167171955108643, "rewards/margins": 0.9887351989746094, "rewards/rejected": -4.0054521560668945, "step": 6072 }, { "epoch": 0.94, "learning_rate": 9.693150059650397e-06, "logits/chosen": -2.8574278354644775, "logits/rejected": -2.5043184757232666, "logps/chosen": -176.31201171875, "logps/rejected": -177.6110076904297, "loss": 2.1411, "rewards/accuracies": 0.5, "rewards/chosen": -2.4019744396209717, "rewards/margins": 1.5760869979858398, "rewards/rejected": -3.9780616760253906, "step": 6073 }, { "epoch": 0.94, "learning_rate": 9.692416619119249e-06, "logits/chosen": -2.4710919857025146, "logits/rejected": -0.5820761919021606, "logps/chosen": -452.6024475097656, "logps/rejected": -62.892608642578125, "loss": 6.0972, "rewards/accuracies": 0.5, "rewards/chosen": -7.1031951904296875, "rewards/margins": -4.894685745239258, "rewards/rejected": -2.208510160446167, "step": 6074 }, { "epoch": 0.94, "learning_rate": 9.6916831785881e-06, "logits/chosen": -2.4413039684295654, "logits/rejected": -2.4279627799987793, "logps/chosen": -247.07635498046875, "logps/rejected": -341.92279052734375, "loss": 2.4348, "rewards/accuracies": 0.5, "rewards/chosen": -2.776782512664795, "rewards/margins": -0.5929850339889526, "rewards/rejected": -2.1837973594665527, "step": 6075 }, { "epoch": 0.94, "learning_rate": 9.690949738056952e-06, "logits/chosen": -1.799545168876648, "logits/rejected": -2.8000223636627197, "logps/chosen": -202.16220092773438, "logps/rejected": -397.9342956542969, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.9061124920845032, "rewards/margins": 5.7123703956604, "rewards/rejected": -6.618483066558838, "step": 6076 }, { "epoch": 0.95, "learning_rate": 9.690216297525804e-06, "logits/chosen": -2.9778246879577637, "logits/rejected": -2.3268356323242188, "logps/chosen": -667.467529296875, "logps/rejected": -527.8792724609375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.3032592833042145, "rewards/margins": 6.508200645446777, "rewards/rejected": -6.811459541320801, "step": 6077 }, { "epoch": 0.95, "learning_rate": 9.689482856994656e-06, "logits/chosen": -2.4130783081054688, "logits/rejected": -2.9837183952331543, "logps/chosen": -78.17118835449219, "logps/rejected": -83.572509765625, "loss": 2.5963, "rewards/accuracies": 0.5, "rewards/chosen": -2.7528529167175293, "rewards/margins": 0.332622766494751, "rewards/rejected": -3.0854756832122803, "step": 6078 }, { "epoch": 0.95, "learning_rate": 9.68874941646351e-06, "logits/chosen": -2.8140816688537598, "logits/rejected": -1.8309309482574463, "logps/chosen": -484.20941162109375, "logps/rejected": -277.45513916015625, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.47110825777053833, "rewards/margins": 5.183437347412109, "rewards/rejected": -4.712328910827637, "step": 6079 }, { "epoch": 0.95, "learning_rate": 9.688015975932362e-06, "logits/chosen": -1.540380597114563, "logits/rejected": -2.7585701942443848, "logps/chosen": -103.95173645019531, "logps/rejected": -191.6322479248047, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.5697296261787415, "rewards/margins": 5.407052040100098, "rewards/rejected": -5.976781368255615, "step": 6080 }, { "epoch": 0.95, "learning_rate": 9.687282535401214e-06, "logits/chosen": -3.122358560562134, "logits/rejected": -2.669779062271118, "logps/chosen": -457.43798828125, "logps/rejected": -385.43951416015625, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -0.4882568418979645, "rewards/margins": 4.776208400726318, "rewards/rejected": -5.26446533203125, "step": 6081 }, { "epoch": 0.95, "learning_rate": 9.686549094870065e-06, "logits/chosen": -1.839307188987732, "logits/rejected": -2.876572847366333, "logps/chosen": -280.1961975097656, "logps/rejected": -427.0787353515625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5356219410896301, "rewards/margins": 5.041016578674316, "rewards/rejected": -5.576638698577881, "step": 6082 }, { "epoch": 0.95, "learning_rate": 9.685815654338917e-06, "logits/chosen": -1.1727265119552612, "logits/rejected": -2.7202301025390625, "logps/chosen": -105.30543518066406, "logps/rejected": -383.2020568847656, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.7198055386543274, "rewards/margins": 6.088079452514648, "rewards/rejected": -6.80788516998291, "step": 6083 }, { "epoch": 0.95, "learning_rate": 9.68508221380777e-06, "logits/chosen": -1.763796329498291, "logits/rejected": -2.805021047592163, "logps/chosen": -31.529563903808594, "logps/rejected": -202.0267791748047, "loss": 0.0901, "rewards/accuracies": 1.0, "rewards/chosen": -1.6056721210479736, "rewards/margins": 3.4937620162963867, "rewards/rejected": -5.099433898925781, "step": 6084 }, { "epoch": 0.95, "learning_rate": 9.684348773276623e-06, "logits/chosen": -2.843815565109253, "logits/rejected": -3.077934503555298, "logps/chosen": -221.9327392578125, "logps/rejected": -217.21478271484375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.49995651841163635, "rewards/margins": 5.393842697143555, "rewards/rejected": -5.893798828125, "step": 6085 }, { "epoch": 0.95, "learning_rate": 9.683615332745475e-06, "logits/chosen": -3.1945865154266357, "logits/rejected": -2.6714959144592285, "logps/chosen": -174.08309936523438, "logps/rejected": -92.77244567871094, "loss": 1.5485, "rewards/accuracies": 0.5, "rewards/chosen": -3.062056064605713, "rewards/margins": 0.5822916030883789, "rewards/rejected": -3.644347667694092, "step": 6086 }, { "epoch": 0.95, "learning_rate": 9.682881892214326e-06, "logits/chosen": -1.634342908859253, "logits/rejected": -2.3960964679718018, "logps/chosen": -97.58698272705078, "logps/rejected": -304.9745178222656, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.29177531599998474, "rewards/margins": 6.51603889465332, "rewards/rejected": -6.807814121246338, "step": 6087 }, { "epoch": 0.95, "learning_rate": 9.68214845168318e-06, "logits/chosen": -2.1835455894470215, "logits/rejected": -2.6717538833618164, "logps/chosen": -119.30897521972656, "logps/rejected": -260.1181335449219, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -0.6212224364280701, "rewards/margins": 5.406618118286133, "rewards/rejected": -6.027840614318848, "step": 6088 }, { "epoch": 0.95, "learning_rate": 9.681415011152032e-06, "logits/chosen": -1.805736780166626, "logits/rejected": -2.8456871509552, "logps/chosen": -112.09809875488281, "logps/rejected": -409.81597900390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.36172905564308167, "rewards/margins": 7.539512634277344, "rewards/rejected": -7.901241302490234, "step": 6089 }, { "epoch": 0.95, "learning_rate": 9.680681570620884e-06, "logits/chosen": -2.6805012226104736, "logits/rejected": -2.3660433292388916, "logps/chosen": -329.769287109375, "logps/rejected": -293.86669921875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.7526405453681946, "rewards/margins": 4.624634742736816, "rewards/rejected": -5.377274990081787, "step": 6090 }, { "epoch": 0.95, "learning_rate": 9.679948130089736e-06, "logits/chosen": -3.0975420475006104, "logits/rejected": -2.574384927749634, "logps/chosen": -479.80621337890625, "logps/rejected": -293.3150939941406, "loss": 2.1817, "rewards/accuracies": 0.5, "rewards/chosen": -3.3463387489318848, "rewards/margins": -0.9425629377365112, "rewards/rejected": -2.403775930404663, "step": 6091 }, { "epoch": 0.95, "learning_rate": 9.679214689558588e-06, "logits/chosen": -2.6714537143707275, "logits/rejected": -2.824033498764038, "logps/chosen": -119.03985595703125, "logps/rejected": -174.20263671875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.4152502417564392, "rewards/margins": 5.300081253051758, "rewards/rejected": -5.71533203125, "step": 6092 }, { "epoch": 0.95, "learning_rate": 9.67848124902744e-06, "logits/chosen": -1.3806174993515015, "logits/rejected": -2.654770851135254, "logps/chosen": -180.9877166748047, "logps/rejected": -535.4418334960938, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.5484275817871094, "rewards/margins": 7.877928733825684, "rewards/rejected": -8.426356315612793, "step": 6093 }, { "epoch": 0.95, "learning_rate": 9.677747808496291e-06, "logits/chosen": -2.666785717010498, "logits/rejected": -2.9667608737945557, "logps/chosen": -37.05214309692383, "logps/rejected": -195.61000061035156, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -1.2646459341049194, "rewards/margins": 4.013301849365234, "rewards/rejected": -5.277947902679443, "step": 6094 }, { "epoch": 0.95, "learning_rate": 9.677014367965143e-06, "logits/chosen": -1.857714295387268, "logits/rejected": -2.670057535171509, "logps/chosen": -87.40122985839844, "logps/rejected": -127.31420135498047, "loss": 0.0955, "rewards/accuracies": 1.0, "rewards/chosen": -1.2682327032089233, "rewards/margins": 2.6916005611419678, "rewards/rejected": -3.9598331451416016, "step": 6095 }, { "epoch": 0.95, "learning_rate": 9.676280927433995e-06, "logits/chosen": -2.5277061462402344, "logits/rejected": -3.2204408645629883, "logps/chosen": -201.79315185546875, "logps/rejected": -445.290771484375, "loss": 0.0984, "rewards/accuracies": 1.0, "rewards/chosen": -1.7856528759002686, "rewards/margins": 5.081914901733398, "rewards/rejected": -6.867568016052246, "step": 6096 }, { "epoch": 0.95, "learning_rate": 9.675547486902849e-06, "logits/chosen": -3.1322126388549805, "logits/rejected": -3.2385480403900146, "logps/chosen": -99.96383666992188, "logps/rejected": -91.28971099853516, "loss": 1.1627, "rewards/accuracies": 0.5, "rewards/chosen": -3.026759147644043, "rewards/margins": 0.8802413940429688, "rewards/rejected": -3.9070005416870117, "step": 6097 }, { "epoch": 0.95, "learning_rate": 9.6748140463717e-06, "logits/chosen": -2.023212194442749, "logits/rejected": -2.8178067207336426, "logps/chosen": -110.1508560180664, "logps/rejected": -430.87322998046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7770683169364929, "rewards/margins": 6.614614486694336, "rewards/rejected": -7.3916826248168945, "step": 6098 }, { "epoch": 0.95, "learning_rate": 9.674080605840552e-06, "logits/chosen": -2.138824224472046, "logits/rejected": -2.887281894683838, "logps/chosen": -559.5154418945312, "logps/rejected": -739.838623046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0289678573608398, "rewards/margins": 6.942217826843262, "rewards/rejected": -7.971185684204102, "step": 6099 }, { "epoch": 0.95, "learning_rate": 9.673347165309404e-06, "logits/chosen": -1.2787710428237915, "logits/rejected": -2.7725281715393066, "logps/chosen": -50.49164962768555, "logps/rejected": -375.3931884765625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.9081984162330627, "rewards/margins": 6.501837730407715, "rewards/rejected": -7.410036087036133, "step": 6100 }, { "epoch": 0.95, "learning_rate": 9.672613724778256e-06, "logits/chosen": -2.309603214263916, "logits/rejected": -2.751565456390381, "logps/chosen": -252.95352172851562, "logps/rejected": -278.6875, "loss": 5.4961, "rewards/accuracies": 0.0, "rewards/chosen": -6.574320316314697, "rewards/margins": -5.49200439453125, "rewards/rejected": -1.0823161602020264, "step": 6101 }, { "epoch": 0.95, "learning_rate": 9.671880284247108e-06, "logits/chosen": -2.577855348587036, "logits/rejected": -3.166327714920044, "logps/chosen": -264.2305603027344, "logps/rejected": -309.48077392578125, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -1.9791374206542969, "rewards/margins": 4.73569917678833, "rewards/rejected": -6.714837074279785, "step": 6102 }, { "epoch": 0.95, "learning_rate": 9.67114684371596e-06, "logits/chosen": -2.7193076610565186, "logits/rejected": -2.899186849594116, "logps/chosen": -61.07892990112305, "logps/rejected": -100.74250030517578, "loss": 0.1057, "rewards/accuracies": 1.0, "rewards/chosen": -0.12673501670360565, "rewards/margins": 3.1227893829345703, "rewards/rejected": -3.2495243549346924, "step": 6103 }, { "epoch": 0.95, "learning_rate": 9.670413403184812e-06, "logits/chosen": -2.416468858718872, "logits/rejected": -2.6050467491149902, "logps/chosen": -217.117431640625, "logps/rejected": -333.21240234375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.5029106140136719, "rewards/margins": 7.148639678955078, "rewards/rejected": -7.65155029296875, "step": 6104 }, { "epoch": 0.95, "learning_rate": 9.669679962653664e-06, "logits/chosen": -1.5352001190185547, "logits/rejected": -2.574475049972534, "logps/chosen": -131.68382263183594, "logps/rejected": -229.8292694091797, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.21337050199508667, "rewards/margins": 7.458530902862549, "rewards/rejected": -7.671901702880859, "step": 6105 }, { "epoch": 0.95, "learning_rate": 9.668946522122517e-06, "logits/chosen": -1.7444311380386353, "logits/rejected": -2.8695530891418457, "logps/chosen": -74.919677734375, "logps/rejected": -261.9267578125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.28448474407196045, "rewards/margins": 4.748131275177002, "rewards/rejected": -5.032615661621094, "step": 6106 }, { "epoch": 0.95, "learning_rate": 9.668213081591369e-06, "logits/chosen": -2.4234673976898193, "logits/rejected": -1.8447833061218262, "logps/chosen": -367.3833923339844, "logps/rejected": -487.3018798828125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.5172080993652344, "rewards/margins": 5.598773002624512, "rewards/rejected": -7.115981101989746, "step": 6107 }, { "epoch": 0.95, "learning_rate": 9.667479641060221e-06, "logits/chosen": -0.9214006066322327, "logits/rejected": -2.607511043548584, "logps/chosen": -60.53500747680664, "logps/rejected": -601.300048828125, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.5639377236366272, "rewards/margins": 6.686295509338379, "rewards/rejected": -7.250232696533203, "step": 6108 }, { "epoch": 0.95, "learning_rate": 9.666746200529073e-06, "logits/chosen": -2.836087942123413, "logits/rejected": -2.7079644203186035, "logps/chosen": -214.1885986328125, "logps/rejected": -252.87994384765625, "loss": 0.0326, "rewards/accuracies": 1.0, "rewards/chosen": -1.159070611000061, "rewards/margins": 4.702702522277832, "rewards/rejected": -5.8617730140686035, "step": 6109 }, { "epoch": 0.95, "learning_rate": 9.666012759997925e-06, "logits/chosen": -2.7747442722320557, "logits/rejected": -1.6523734331130981, "logps/chosen": -368.0215148925781, "logps/rejected": -342.2553405761719, "loss": 0.0491, "rewards/accuracies": 1.0, "rewards/chosen": -1.4755226373672485, "rewards/margins": 5.394911766052246, "rewards/rejected": -6.870434761047363, "step": 6110 }, { "epoch": 0.95, "learning_rate": 9.665279319466777e-06, "logits/chosen": -2.8506834506988525, "logits/rejected": -2.7735157012939453, "logps/chosen": -418.4374694824219, "logps/rejected": -347.7838134765625, "loss": 3.1936, "rewards/accuracies": 0.5, "rewards/chosen": -3.1400084495544434, "rewards/margins": -1.233067274093628, "rewards/rejected": -1.9069411754608154, "step": 6111 }, { "epoch": 0.95, "learning_rate": 9.664545878935629e-06, "logits/chosen": -1.7665796279907227, "logits/rejected": -2.875670909881592, "logps/chosen": -237.5020751953125, "logps/rejected": -638.4476318359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5455581545829773, "rewards/margins": 6.875056266784668, "rewards/rejected": -7.420614242553711, "step": 6112 }, { "epoch": 0.95, "learning_rate": 9.66381243840448e-06, "logits/chosen": -2.759603500366211, "logits/rejected": -1.861368179321289, "logps/chosen": -186.89974975585938, "logps/rejected": -175.900146484375, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -0.9392250180244446, "rewards/margins": 4.498722076416016, "rewards/rejected": -5.4379472732543945, "step": 6113 }, { "epoch": 0.95, "learning_rate": 9.663078997873332e-06, "logits/chosen": -1.159228801727295, "logits/rejected": -2.8099706172943115, "logps/chosen": -196.01846313476562, "logps/rejected": -758.4595336914062, "loss": 1.152, "rewards/accuracies": 0.5, "rewards/chosen": -2.1997947692871094, "rewards/margins": 1.6933648586273193, "rewards/rejected": -3.8931596279144287, "step": 6114 }, { "epoch": 0.95, "learning_rate": 9.662345557342186e-06, "logits/chosen": -2.5993030071258545, "logits/rejected": -2.668851852416992, "logps/chosen": -282.42962646484375, "logps/rejected": -396.265380859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7218043804168701, "rewards/margins": 6.926084041595459, "rewards/rejected": -7.64788818359375, "step": 6115 }, { "epoch": 0.95, "learning_rate": 9.661612116811038e-06, "logits/chosen": -2.6371960639953613, "logits/rejected": -2.8791399002075195, "logps/chosen": -258.4276428222656, "logps/rejected": -377.09173583984375, "loss": 0.5421, "rewards/accuracies": 0.5, "rewards/chosen": -0.7460883855819702, "rewards/margins": 4.178832530975342, "rewards/rejected": -4.924921035766602, "step": 6116 }, { "epoch": 0.95, "learning_rate": 9.66087867627989e-06, "logits/chosen": -1.4917254447937012, "logits/rejected": -2.6159777641296387, "logps/chosen": -143.97938537597656, "logps/rejected": -408.0995788574219, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.3255783319473267, "rewards/margins": 6.923844337463379, "rewards/rejected": -8.249423027038574, "step": 6117 }, { "epoch": 0.95, "learning_rate": 9.660145235748743e-06, "logits/chosen": -1.982509970664978, "logits/rejected": -2.8121986389160156, "logps/chosen": -242.9597625732422, "logps/rejected": -266.3782958984375, "loss": 1.6383, "rewards/accuracies": 0.5, "rewards/chosen": -2.2128021717071533, "rewards/margins": 1.0066673755645752, "rewards/rejected": -3.2194695472717285, "step": 6118 }, { "epoch": 0.95, "learning_rate": 9.659411795217595e-06, "logits/chosen": -2.194807529449463, "logits/rejected": -2.5998711585998535, "logps/chosen": -251.6197967529297, "logps/rejected": -285.7749328613281, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9251174926757812, "rewards/margins": 6.985745429992676, "rewards/rejected": -7.910862922668457, "step": 6119 }, { "epoch": 0.95, "learning_rate": 9.658678354686447e-06, "logits/chosen": -2.29935359954834, "logits/rejected": -2.9173598289489746, "logps/chosen": -146.40415954589844, "logps/rejected": -180.14768981933594, "loss": 1.6532, "rewards/accuracies": 0.5, "rewards/chosen": -1.9456188678741455, "rewards/margins": 0.9783586263656616, "rewards/rejected": -2.9239776134490967, "step": 6120 }, { "epoch": 0.95, "learning_rate": 9.657944914155299e-06, "logits/chosen": -2.744760751724243, "logits/rejected": -3.0409774780273438, "logps/chosen": -25.162994384765625, "logps/rejected": -137.64720153808594, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.297293484210968, "rewards/margins": 4.776354789733887, "rewards/rejected": -5.073647975921631, "step": 6121 }, { "epoch": 0.95, "learning_rate": 9.65721147362415e-06, "logits/chosen": -2.7647178173065186, "logits/rejected": -2.535762310028076, "logps/chosen": -78.5840835571289, "logps/rejected": -128.91220092773438, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": 0.20431843400001526, "rewards/margins": 5.674272537231445, "rewards/rejected": -5.469954013824463, "step": 6122 }, { "epoch": 0.95, "learning_rate": 9.656478033093003e-06, "logits/chosen": -2.2161056995391846, "logits/rejected": -3.186903953552246, "logps/chosen": -94.0877456665039, "logps/rejected": -164.1233673095703, "loss": 0.044, "rewards/accuracies": 1.0, "rewards/chosen": -0.4806045591831207, "rewards/margins": 3.2147843837738037, "rewards/rejected": -3.6953887939453125, "step": 6123 }, { "epoch": 0.95, "learning_rate": 9.655744592561856e-06, "logits/chosen": -1.188610315322876, "logits/rejected": -3.1337592601776123, "logps/chosen": -125.11599731445312, "logps/rejected": -597.9832763671875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.8407118320465088, "rewards/margins": 7.151656150817871, "rewards/rejected": -7.992367744445801, "step": 6124 }, { "epoch": 0.95, "learning_rate": 9.655011152030708e-06, "logits/chosen": -2.2578673362731934, "logits/rejected": -3.183837413787842, "logps/chosen": -325.4646911621094, "logps/rejected": -405.99420166015625, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": -1.395459532737732, "rewards/margins": 3.413126230239868, "rewards/rejected": -4.808586120605469, "step": 6125 }, { "epoch": 0.95, "learning_rate": 9.65427771149956e-06, "logits/chosen": -2.509230375289917, "logits/rejected": -2.963853359222412, "logps/chosen": -271.10101318359375, "logps/rejected": -387.2508850097656, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.509467363357544, "rewards/margins": 5.851903438568115, "rewards/rejected": -6.361371040344238, "step": 6126 }, { "epoch": 0.95, "learning_rate": 9.653544270968412e-06, "logits/chosen": -2.7268409729003906, "logits/rejected": -3.001298427581787, "logps/chosen": -34.486427307128906, "logps/rejected": -200.1953125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 0.012002289295196533, "rewards/margins": 5.8004536628723145, "rewards/rejected": -5.788451194763184, "step": 6127 }, { "epoch": 0.95, "learning_rate": 9.652810830437264e-06, "logits/chosen": -2.1033270359039307, "logits/rejected": -2.9640541076660156, "logps/chosen": -81.99730682373047, "logps/rejected": -293.25115966796875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.5595948100090027, "rewards/margins": 5.820719242095947, "rewards/rejected": -6.380313873291016, "step": 6128 }, { "epoch": 0.95, "learning_rate": 9.652077389906116e-06, "logits/chosen": -2.486069440841675, "logits/rejected": -0.9693480730056763, "logps/chosen": -495.2683410644531, "logps/rejected": -282.5794982910156, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": 0.02019958198070526, "rewards/margins": 4.060436248779297, "rewards/rejected": -4.040236473083496, "step": 6129 }, { "epoch": 0.95, "learning_rate": 9.651343949374967e-06, "logits/chosen": -2.7100560665130615, "logits/rejected": -2.6563220024108887, "logps/chosen": -272.2699279785156, "logps/rejected": -264.55279541015625, "loss": 3.0803, "rewards/accuracies": 0.5, "rewards/chosen": -3.4399094581604004, "rewards/margins": -0.25235819816589355, "rewards/rejected": -3.187551259994507, "step": 6130 }, { "epoch": 0.95, "learning_rate": 9.65061050884382e-06, "logits/chosen": -2.3082222938537598, "logits/rejected": -2.299978256225586, "logps/chosen": -120.71739959716797, "logps/rejected": -159.67694091796875, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -0.7863609194755554, "rewards/margins": 5.631180763244629, "rewards/rejected": -6.41754150390625, "step": 6131 }, { "epoch": 0.95, "learning_rate": 9.649877068312671e-06, "logits/chosen": -2.321441411972046, "logits/rejected": -2.6288211345672607, "logps/chosen": -119.1043930053711, "logps/rejected": -234.52879333496094, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.05127754807472229, "rewards/margins": 4.564844131469727, "rewards/rejected": -4.616121768951416, "step": 6132 }, { "epoch": 0.95, "learning_rate": 9.649143627781525e-06, "logits/chosen": -2.5285959243774414, "logits/rejected": -2.8170809745788574, "logps/chosen": -81.00061798095703, "logps/rejected": -228.6878662109375, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -0.16426125168800354, "rewards/margins": 5.448716163635254, "rewards/rejected": -5.612977981567383, "step": 6133 }, { "epoch": 0.95, "learning_rate": 9.648410187250377e-06, "logits/chosen": -2.628355026245117, "logits/rejected": -2.610187292098999, "logps/chosen": -94.21052551269531, "logps/rejected": -145.92843627929688, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": -0.9013261795043945, "rewards/margins": 3.0341382026672363, "rewards/rejected": -3.935464382171631, "step": 6134 }, { "epoch": 0.95, "learning_rate": 9.647676746719228e-06, "logits/chosen": -2.1169967651367188, "logits/rejected": -2.988591194152832, "logps/chosen": -148.7418212890625, "logps/rejected": -329.56072998046875, "loss": 1.3048, "rewards/accuracies": 0.5, "rewards/chosen": -2.7650070190429688, "rewards/margins": 1.1538411378860474, "rewards/rejected": -3.9188480377197266, "step": 6135 }, { "epoch": 0.95, "learning_rate": 9.64694330618808e-06, "logits/chosen": -2.438244342803955, "logits/rejected": -1.3599817752838135, "logps/chosen": -169.73597717285156, "logps/rejected": -62.654205322265625, "loss": 2.3241, "rewards/accuracies": 0.5, "rewards/chosen": -2.767303466796875, "rewards/margins": -0.9148144721984863, "rewards/rejected": -1.8524889945983887, "step": 6136 }, { "epoch": 0.95, "learning_rate": 9.646209865656932e-06, "logits/chosen": -2.889878749847412, "logits/rejected": -3.2812907695770264, "logps/chosen": -89.90213012695312, "logps/rejected": -178.82757568359375, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -1.0689334869384766, "rewards/margins": 3.848233461380005, "rewards/rejected": -4.917166709899902, "step": 6137 }, { "epoch": 0.95, "learning_rate": 9.645476425125784e-06, "logits/chosen": -3.028128147125244, "logits/rejected": -2.608736276626587, "logps/chosen": -200.19558715820312, "logps/rejected": -218.40908813476562, "loss": 0.6194, "rewards/accuracies": 0.5, "rewards/chosen": -1.3943150043487549, "rewards/margins": 3.5406064987182617, "rewards/rejected": -4.9349212646484375, "step": 6138 }, { "epoch": 0.95, "learning_rate": 9.644742984594636e-06, "logits/chosen": -2.4650347232818604, "logits/rejected": -3.064038038253784, "logps/chosen": -43.42454147338867, "logps/rejected": -187.5340576171875, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": 0.08686552941799164, "rewards/margins": 4.652032375335693, "rewards/rejected": -4.56516695022583, "step": 6139 }, { "epoch": 0.95, "learning_rate": 9.644009544063488e-06, "logits/chosen": -2.9690864086151123, "logits/rejected": -3.164586067199707, "logps/chosen": -238.29388427734375, "logps/rejected": -321.8786315917969, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.24025383591651917, "rewards/margins": 6.72229528427124, "rewards/rejected": -6.962549209594727, "step": 6140 }, { "epoch": 0.96, "learning_rate": 9.643276103532341e-06, "logits/chosen": -2.523195505142212, "logits/rejected": -3.036421775817871, "logps/chosen": -352.8623046875, "logps/rejected": -367.84344482421875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.07930970937013626, "rewards/margins": 6.322044372558594, "rewards/rejected": -6.4013543128967285, "step": 6141 }, { "epoch": 0.96, "learning_rate": 9.642542663001193e-06, "logits/chosen": -1.5234529972076416, "logits/rejected": -2.7686405181884766, "logps/chosen": -127.69261932373047, "logps/rejected": -352.37255859375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.49729442596435547, "rewards/margins": 6.244805335998535, "rewards/rejected": -6.742099761962891, "step": 6142 }, { "epoch": 0.96, "learning_rate": 9.641809222470045e-06, "logits/chosen": -2.6518640518188477, "logits/rejected": -1.8438810110092163, "logps/chosen": -446.9171142578125, "logps/rejected": -284.9561767578125, "loss": 0.0912, "rewards/accuracies": 1.0, "rewards/chosen": -1.82247793674469, "rewards/margins": 4.0419464111328125, "rewards/rejected": -5.864424705505371, "step": 6143 }, { "epoch": 0.96, "learning_rate": 9.641075781938897e-06, "logits/chosen": -1.4710967540740967, "logits/rejected": -2.948174476623535, "logps/chosen": -374.49462890625, "logps/rejected": -487.46185302734375, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.4480239748954773, "rewards/margins": 4.355734348297119, "rewards/rejected": -4.80375862121582, "step": 6144 }, { "epoch": 0.96, "learning_rate": 9.640342341407749e-06, "logits/chosen": -2.7720303535461426, "logits/rejected": -2.5483946800231934, "logps/chosen": -864.14599609375, "logps/rejected": -544.5487670898438, "loss": 0.0348, "rewards/accuracies": 1.0, "rewards/chosen": -1.877742052078247, "rewards/margins": 4.62506103515625, "rewards/rejected": -6.502802848815918, "step": 6145 }, { "epoch": 0.96, "learning_rate": 9.6396089008766e-06, "logits/chosen": -1.6697181463241577, "logits/rejected": -3.0363214015960693, "logps/chosen": -74.60227966308594, "logps/rejected": -407.86126708984375, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.6750543713569641, "rewards/margins": 6.136927127838135, "rewards/rejected": -6.811981201171875, "step": 6146 }, { "epoch": 0.96, "learning_rate": 9.638875460345453e-06, "logits/chosen": -2.8355143070220947, "logits/rejected": -1.681128978729248, "logps/chosen": -227.38656616210938, "logps/rejected": -66.43952941894531, "loss": 1.058, "rewards/accuracies": 0.5, "rewards/chosen": -2.412541151046753, "rewards/margins": -0.12362706661224365, "rewards/rejected": -2.288914203643799, "step": 6147 }, { "epoch": 0.96, "learning_rate": 9.638142019814305e-06, "logits/chosen": -1.829878330230713, "logits/rejected": -2.825251340866089, "logps/chosen": -88.7238998413086, "logps/rejected": -349.06121826171875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.1788894683122635, "rewards/margins": 6.9618940353393555, "rewards/rejected": -7.140783786773682, "step": 6148 }, { "epoch": 0.96, "learning_rate": 9.637408579283156e-06, "logits/chosen": -1.180103063583374, "logits/rejected": -2.875006914138794, "logps/chosen": -43.68769836425781, "logps/rejected": -238.96572875976562, "loss": 0.0726, "rewards/accuracies": 1.0, "rewards/chosen": -0.9774819016456604, "rewards/margins": 3.5125601291656494, "rewards/rejected": -4.490042209625244, "step": 6149 }, { "epoch": 0.96, "learning_rate": 9.63667513875201e-06, "logits/chosen": -2.8024611473083496, "logits/rejected": -2.9974052906036377, "logps/chosen": -375.5470275878906, "logps/rejected": -485.7466125488281, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": -0.925297737121582, "rewards/margins": 4.861909866333008, "rewards/rejected": -5.787208080291748, "step": 6150 }, { "epoch": 0.96, "learning_rate": 9.635941698220862e-06, "logits/chosen": -2.184906244277954, "logits/rejected": -2.7615649700164795, "logps/chosen": -48.86031723022461, "logps/rejected": -158.65460205078125, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": 0.05741921067237854, "rewards/margins": 5.858107089996338, "rewards/rejected": -5.80068826675415, "step": 6151 }, { "epoch": 0.96, "learning_rate": 9.635208257689716e-06, "logits/chosen": -1.8332502841949463, "logits/rejected": -2.794776439666748, "logps/chosen": -212.51072692871094, "logps/rejected": -304.16058349609375, "loss": 2.7244, "rewards/accuracies": 0.5, "rewards/chosen": -2.8189964294433594, "rewards/margins": -1.044848084449768, "rewards/rejected": -1.7741484642028809, "step": 6152 }, { "epoch": 0.96, "learning_rate": 9.634474817158567e-06, "logits/chosen": -1.9353212118148804, "logits/rejected": -2.785959482192993, "logps/chosen": -164.52056884765625, "logps/rejected": -400.6906433105469, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.1352977752685547, "rewards/margins": 7.196352958679199, "rewards/rejected": -7.331650733947754, "step": 6153 }, { "epoch": 0.96, "learning_rate": 9.63374137662742e-06, "logits/chosen": -2.667257070541382, "logits/rejected": -2.923598051071167, "logps/chosen": -158.07920837402344, "logps/rejected": -273.4339294433594, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": -0.17199860513210297, "rewards/margins": 4.968984603881836, "rewards/rejected": -5.1409831047058105, "step": 6154 }, { "epoch": 0.96, "learning_rate": 9.633007936096271e-06, "logits/chosen": -2.743515729904175, "logits/rejected": -2.5066072940826416, "logps/chosen": -59.638084411621094, "logps/rejected": -242.9805908203125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": 0.21098214387893677, "rewards/margins": 7.321846008300781, "rewards/rejected": -7.110864162445068, "step": 6155 }, { "epoch": 0.96, "learning_rate": 9.632274495565123e-06, "logits/chosen": -1.7620131969451904, "logits/rejected": -2.8339879512786865, "logps/chosen": -171.87046813964844, "logps/rejected": -507.45123291015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.2989000380039215, "rewards/margins": 7.784428596496582, "rewards/rejected": -8.083328247070312, "step": 6156 }, { "epoch": 0.96, "learning_rate": 9.631541055033975e-06, "logits/chosen": -2.3796706199645996, "logits/rejected": -2.7691895961761475, "logps/chosen": -234.33299255371094, "logps/rejected": -430.53668212890625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.36253243684768677, "rewards/margins": 6.992159366607666, "rewards/rejected": -7.354691505432129, "step": 6157 }, { "epoch": 0.96, "learning_rate": 9.630807614502827e-06, "logits/chosen": -1.9172412157058716, "logits/rejected": -2.858023166656494, "logps/chosen": -318.2041015625, "logps/rejected": -562.7210083007812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 1.2032750844955444, "rewards/margins": 6.935046195983887, "rewards/rejected": -5.731770992279053, "step": 6158 }, { "epoch": 0.96, "learning_rate": 9.63007417397168e-06, "logits/chosen": -3.0980947017669678, "logits/rejected": -2.587129831314087, "logps/chosen": -200.73684692382812, "logps/rejected": -44.772396087646484, "loss": 1.878, "rewards/accuracies": 0.0, "rewards/chosen": -3.5568687915802, "rewards/margins": -1.6224801540374756, "rewards/rejected": -1.9343886375427246, "step": 6159 }, { "epoch": 0.96, "learning_rate": 9.629340733440532e-06, "logits/chosen": -2.0352962017059326, "logits/rejected": -3.1645870208740234, "logps/chosen": -103.993408203125, "logps/rejected": -430.44586181640625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.14012594521045685, "rewards/margins": 7.500014305114746, "rewards/rejected": -7.359888076782227, "step": 6160 }, { "epoch": 0.96, "learning_rate": 9.628607292909384e-06, "logits/chosen": -1.2466957569122314, "logits/rejected": -2.4474339485168457, "logps/chosen": -153.6005096435547, "logps/rejected": -586.8013916015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.4898356795310974, "rewards/margins": 9.739175796508789, "rewards/rejected": -10.229011535644531, "step": 6161 }, { "epoch": 0.96, "learning_rate": 9.627873852378236e-06, "logits/chosen": -2.433058977127075, "logits/rejected": -2.988995313644409, "logps/chosen": -94.92890167236328, "logps/rejected": -279.38409423828125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.048639681190252304, "rewards/margins": 5.598031520843506, "rewards/rejected": -5.646671295166016, "step": 6162 }, { "epoch": 0.96, "learning_rate": 9.627140411847088e-06, "logits/chosen": -2.3902809619903564, "logits/rejected": -2.6310224533081055, "logps/chosen": -66.29573059082031, "logps/rejected": -100.57252502441406, "loss": 0.8507, "rewards/accuracies": 0.5, "rewards/chosen": -2.040120840072632, "rewards/margins": 2.3147597312927246, "rewards/rejected": -4.354880332946777, "step": 6163 }, { "epoch": 0.96, "learning_rate": 9.62640697131594e-06, "logits/chosen": -2.876013994216919, "logits/rejected": -1.218023657798767, "logps/chosen": -405.81402587890625, "logps/rejected": -183.5952606201172, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": 0.2748333215713501, "rewards/margins": 4.57865571975708, "rewards/rejected": -4.3038225173950195, "step": 6164 }, { "epoch": 0.96, "learning_rate": 9.625673530784792e-06, "logits/chosen": -1.1262463331222534, "logits/rejected": -2.4228744506835938, "logps/chosen": -223.34861755371094, "logps/rejected": -523.6932373046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8245170712471008, "rewards/margins": 8.933599472045898, "rewards/rejected": -9.758116722106934, "step": 6165 }, { "epoch": 0.96, "learning_rate": 9.624940090253643e-06, "logits/chosen": -2.4508190155029297, "logits/rejected": -2.7305679321289062, "logps/chosen": -120.42750549316406, "logps/rejected": -203.13148498535156, "loss": 0.3999, "rewards/accuracies": 0.5, "rewards/chosen": 0.04753074049949646, "rewards/margins": 2.5158395767211914, "rewards/rejected": -2.468308687210083, "step": 6166 }, { "epoch": 0.96, "learning_rate": 9.624206649722495e-06, "logits/chosen": -2.041907548904419, "logits/rejected": -2.7750027179718018, "logps/chosen": -147.72402954101562, "logps/rejected": -351.85125732421875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.07097821682691574, "rewards/margins": 8.45300006866455, "rewards/rejected": -8.3820219039917, "step": 6167 }, { "epoch": 0.96, "learning_rate": 9.623473209191349e-06, "logits/chosen": -2.768906354904175, "logits/rejected": -3.059400796890259, "logps/chosen": -52.4947395324707, "logps/rejected": -155.06808471679688, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.5791950821876526, "rewards/margins": 4.745922088623047, "rewards/rejected": -5.325117111206055, "step": 6168 }, { "epoch": 0.96, "learning_rate": 9.6227397686602e-06, "logits/chosen": -2.534224271774292, "logits/rejected": -2.8600525856018066, "logps/chosen": -261.8709716796875, "logps/rejected": -353.995849609375, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": 0.5270569324493408, "rewards/margins": 6.839177131652832, "rewards/rejected": -6.312119960784912, "step": 6169 }, { "epoch": 0.96, "learning_rate": 9.622006328129053e-06, "logits/chosen": -1.4754374027252197, "logits/rejected": -3.21734356880188, "logps/chosen": -54.18136215209961, "logps/rejected": -399.84429931640625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.16325554251670837, "rewards/margins": 7.567096710205078, "rewards/rejected": -7.730352401733398, "step": 6170 }, { "epoch": 0.96, "learning_rate": 9.621272887597905e-06, "logits/chosen": -2.904757499694824, "logits/rejected": -3.0705204010009766, "logps/chosen": -133.3963165283203, "logps/rejected": -235.34426879882812, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.4614672064781189, "rewards/margins": 6.194982051849365, "rewards/rejected": -6.656449317932129, "step": 6171 }, { "epoch": 0.96, "learning_rate": 9.620539447066756e-06, "logits/chosen": -2.7331624031066895, "logits/rejected": -2.7104239463806152, "logps/chosen": -73.40277099609375, "logps/rejected": -384.8343811035156, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.01255936548113823, "rewards/margins": 6.775153636932373, "rewards/rejected": -6.787713050842285, "step": 6172 }, { "epoch": 0.96, "learning_rate": 9.619806006535608e-06, "logits/chosen": -2.512638568878174, "logits/rejected": -2.9636764526367188, "logps/chosen": -29.349910736083984, "logps/rejected": -179.7412567138672, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -0.36977919936180115, "rewards/margins": 4.955883026123047, "rewards/rejected": -5.325662136077881, "step": 6173 }, { "epoch": 0.96, "learning_rate": 9.61907256600446e-06, "logits/chosen": -2.640489339828491, "logits/rejected": -1.6815763711929321, "logps/chosen": -222.52906799316406, "logps/rejected": -215.0687255859375, "loss": 3.8743, "rewards/accuracies": 0.5, "rewards/chosen": -3.4605765342712402, "rewards/margins": -1.3586704730987549, "rewards/rejected": -2.1019060611724854, "step": 6174 }, { "epoch": 0.96, "learning_rate": 9.618339125473312e-06, "logits/chosen": -1.412093162536621, "logits/rejected": -2.8355250358581543, "logps/chosen": -49.53288650512695, "logps/rejected": -372.0345153808594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.9984791278839111, "rewards/margins": 8.972147941589355, "rewards/rejected": -7.973669052124023, "step": 6175 }, { "epoch": 0.96, "learning_rate": 9.617605684942164e-06, "logits/chosen": -0.7988730072975159, "logits/rejected": -2.5899078845977783, "logps/chosen": -36.8182258605957, "logps/rejected": -343.52947998046875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.9094201326370239, "rewards/margins": 5.71535587310791, "rewards/rejected": -6.6247758865356445, "step": 6176 }, { "epoch": 0.96, "learning_rate": 9.616872244411018e-06, "logits/chosen": -3.0014195442199707, "logits/rejected": -0.7819682359695435, "logps/chosen": -927.798095703125, "logps/rejected": -252.64027404785156, "loss": 2.3498, "rewards/accuracies": 0.5, "rewards/chosen": -2.3568434715270996, "rewards/margins": 0.09742474555969238, "rewards/rejected": -2.454268217086792, "step": 6177 }, { "epoch": 0.96, "learning_rate": 9.61613880387987e-06, "logits/chosen": -1.43494713306427, "logits/rejected": -2.214524745941162, "logps/chosen": -413.703857421875, "logps/rejected": -624.0146484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8131850957870483, "rewards/margins": 9.096818923950195, "rewards/rejected": -9.910003662109375, "step": 6178 }, { "epoch": 0.96, "learning_rate": 9.615405363348721e-06, "logits/chosen": -2.144955635070801, "logits/rejected": -2.972364902496338, "logps/chosen": -100.56114196777344, "logps/rejected": -299.47821044921875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.45584574341773987, "rewards/margins": 5.8904619216918945, "rewards/rejected": -5.4346160888671875, "step": 6179 }, { "epoch": 0.96, "learning_rate": 9.614671922817573e-06, "logits/chosen": -1.3690378665924072, "logits/rejected": -2.3015403747558594, "logps/chosen": -143.81460571289062, "logps/rejected": -368.0129699707031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.02692107856273651, "rewards/margins": 9.619966506958008, "rewards/rejected": -9.593046188354492, "step": 6180 }, { "epoch": 0.96, "learning_rate": 9.613938482286425e-06, "logits/chosen": -2.946549415588379, "logits/rejected": -2.491321325302124, "logps/chosen": -414.8710632324219, "logps/rejected": -362.76812744140625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7630348205566406, "rewards/margins": 5.771768569946289, "rewards/rejected": -6.53480339050293, "step": 6181 }, { "epoch": 0.96, "learning_rate": 9.613205041755277e-06, "logits/chosen": -1.883713722229004, "logits/rejected": -2.978358507156372, "logps/chosen": -151.85511779785156, "logps/rejected": -305.0532531738281, "loss": 2.6385, "rewards/accuracies": 0.5, "rewards/chosen": -3.3996078968048096, "rewards/margins": 1.738215446472168, "rewards/rejected": -5.137823581695557, "step": 6182 }, { "epoch": 0.96, "learning_rate": 9.612471601224129e-06, "logits/chosen": -2.939507484436035, "logits/rejected": -2.130809783935547, "logps/chosen": -618.842529296875, "logps/rejected": -306.56781005859375, "loss": 2.7807, "rewards/accuracies": 0.5, "rewards/chosen": -2.4612855911254883, "rewards/margins": -0.8324716091156006, "rewards/rejected": -1.6288139820098877, "step": 6183 }, { "epoch": 0.96, "learning_rate": 9.611738160692982e-06, "logits/chosen": -3.0001699924468994, "logits/rejected": -2.812242269515991, "logps/chosen": -562.42431640625, "logps/rejected": -524.3187255859375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -0.6431518793106079, "rewards/margins": 6.638419151306152, "rewards/rejected": -7.2815704345703125, "step": 6184 }, { "epoch": 0.96, "learning_rate": 9.611004720161834e-06, "logits/chosen": -2.2092785835266113, "logits/rejected": -2.47491717338562, "logps/chosen": -505.0860290527344, "logps/rejected": -510.59716796875, "loss": 3.2316, "rewards/accuracies": 0.5, "rewards/chosen": -2.687042236328125, "rewards/margins": 2.087000846862793, "rewards/rejected": -4.774043560028076, "step": 6185 }, { "epoch": 0.96, "learning_rate": 9.610271279630688e-06, "logits/chosen": -2.383650779724121, "logits/rejected": -2.5713725090026855, "logps/chosen": -94.3414306640625, "logps/rejected": -246.9678955078125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": 0.06690063327550888, "rewards/margins": 7.531926155090332, "rewards/rejected": -7.465025424957275, "step": 6186 }, { "epoch": 0.96, "learning_rate": 9.60953783909954e-06, "logits/chosen": -3.174330234527588, "logits/rejected": -3.2430260181427, "logps/chosen": -244.04136657714844, "logps/rejected": -433.71728515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.046892549842596054, "rewards/margins": 9.167854309082031, "rewards/rejected": -9.120962142944336, "step": 6187 }, { "epoch": 0.96, "learning_rate": 9.608804398568392e-06, "logits/chosen": -2.792316436767578, "logits/rejected": -2.782371997833252, "logps/chosen": -174.244384765625, "logps/rejected": -97.69284057617188, "loss": 2.3831, "rewards/accuracies": 0.5, "rewards/chosen": -2.5310986042022705, "rewards/margins": 1.020113229751587, "rewards/rejected": -3.5512118339538574, "step": 6188 }, { "epoch": 0.96, "learning_rate": 9.608070958037243e-06, "logits/chosen": -2.2610926628112793, "logits/rejected": -2.687776565551758, "logps/chosen": -136.83740234375, "logps/rejected": -230.9487762451172, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 0.7388905882835388, "rewards/margins": 5.525484561920166, "rewards/rejected": -4.786593914031982, "step": 6189 }, { "epoch": 0.96, "learning_rate": 9.607337517506095e-06, "logits/chosen": -2.8193135261535645, "logits/rejected": -2.0050463676452637, "logps/chosen": -448.61767578125, "logps/rejected": -270.9756164550781, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 0.5278980731964111, "rewards/margins": 4.628643035888672, "rewards/rejected": -4.100744724273682, "step": 6190 }, { "epoch": 0.96, "learning_rate": 9.606604076974947e-06, "logits/chosen": -1.6446318626403809, "logits/rejected": -2.8287124633789062, "logps/chosen": -470.67120361328125, "logps/rejected": -545.6779174804688, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": -0.2995254695415497, "rewards/margins": 3.2556862831115723, "rewards/rejected": -3.5552115440368652, "step": 6191 }, { "epoch": 0.96, "learning_rate": 9.605870636443799e-06, "logits/chosen": -2.854572296142578, "logits/rejected": -3.0734338760375977, "logps/chosen": -78.84596252441406, "logps/rejected": -320.77978515625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.1055097579956055, "rewards/margins": 7.827588081359863, "rewards/rejected": -8.933097839355469, "step": 6192 }, { "epoch": 0.96, "learning_rate": 9.605137195912651e-06, "logits/chosen": -3.040191411972046, "logits/rejected": -2.590310573577881, "logps/chosen": -295.12030029296875, "logps/rejected": -87.6013412475586, "loss": 6.5424, "rewards/accuracies": 0.0, "rewards/chosen": -7.076851844787598, "rewards/margins": -6.538744926452637, "rewards/rejected": -0.5381061434745789, "step": 6193 }, { "epoch": 0.96, "learning_rate": 9.604403755381503e-06, "logits/chosen": -2.1468703746795654, "logits/rejected": -3.159616231918335, "logps/chosen": -304.9324645996094, "logps/rejected": -509.3553771972656, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -1.068185806274414, "rewards/margins": 4.754243850708008, "rewards/rejected": -5.822429656982422, "step": 6194 }, { "epoch": 0.96, "learning_rate": 9.603670314850356e-06, "logits/chosen": -2.942042827606201, "logits/rejected": -2.0524113178253174, "logps/chosen": -174.34202575683594, "logps/rejected": -157.81930541992188, "loss": 4.4891, "rewards/accuracies": 0.5, "rewards/chosen": -3.9571173191070557, "rewards/margins": -1.598790168762207, "rewards/rejected": -2.3583271503448486, "step": 6195 }, { "epoch": 0.96, "learning_rate": 9.602936874319208e-06, "logits/chosen": -1.3647394180297852, "logits/rejected": -2.9154090881347656, "logps/chosen": -20.22336196899414, "logps/rejected": -266.41900634765625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.01877489686012268, "rewards/margins": 7.71152925491333, "rewards/rejected": -7.730303764343262, "step": 6196 }, { "epoch": 0.96, "learning_rate": 9.60220343378806e-06, "logits/chosen": -2.259005069732666, "logits/rejected": -2.6178410053253174, "logps/chosen": -218.0055389404297, "logps/rejected": -501.25384521484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.03653869032859802, "rewards/margins": 7.645768642425537, "rewards/rejected": -7.609230041503906, "step": 6197 }, { "epoch": 0.96, "learning_rate": 9.601469993256912e-06, "logits/chosen": -2.0004982948303223, "logits/rejected": -2.7611751556396484, "logps/chosen": -19.9906005859375, "logps/rejected": -253.07174682617188, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.209268257021904, "rewards/margins": 6.250237941741943, "rewards/rejected": -6.459506034851074, "step": 6198 }, { "epoch": 0.96, "learning_rate": 9.600736552725764e-06, "logits/chosen": -1.9054429531097412, "logits/rejected": -2.898592472076416, "logps/chosen": -51.01580047607422, "logps/rejected": -189.10122680664062, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -0.11535787582397461, "rewards/margins": 4.7143235206604, "rewards/rejected": -4.829681396484375, "step": 6199 }, { "epoch": 0.96, "learning_rate": 9.600003112194616e-06, "logits/chosen": -1.391115665435791, "logits/rejected": -2.9140896797180176, "logps/chosen": -160.16903686523438, "logps/rejected": -507.57110595703125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.1985553801059723, "rewards/margins": 5.436120510101318, "rewards/rejected": -5.634675979614258, "step": 6200 }, { "epoch": 0.96, "learning_rate": 9.599269671663468e-06, "logits/chosen": -2.0658366680145264, "logits/rejected": -2.705101490020752, "logps/chosen": -195.94888305664062, "logps/rejected": -173.16458129882812, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/chosen": -2.2616701126098633, "rewards/margins": 3.2794265747070312, "rewards/rejected": -5.5410966873168945, "step": 6201 }, { "epoch": 0.96, "learning_rate": 9.59853623113232e-06, "logits/chosen": -2.560519218444824, "logits/rejected": -2.998885154724121, "logps/chosen": -72.44330596923828, "logps/rejected": -218.89715576171875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.04256439581513405, "rewards/margins": 6.562335968017578, "rewards/rejected": -6.604900360107422, "step": 6202 }, { "epoch": 0.96, "learning_rate": 9.597802790601171e-06, "logits/chosen": -3.3156092166900635, "logits/rejected": -2.670438528060913, "logps/chosen": -268.3734436035156, "logps/rejected": -156.18093872070312, "loss": 2.1813, "rewards/accuracies": 0.5, "rewards/chosen": -2.663663387298584, "rewards/margins": 0.2594902515411377, "rewards/rejected": -2.9231536388397217, "step": 6203 }, { "epoch": 0.96, "learning_rate": 9.597069350070025e-06, "logits/chosen": -1.5000861883163452, "logits/rejected": -2.800874710083008, "logps/chosen": -89.14373779296875, "logps/rejected": -290.2662353515625, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5011104345321655, "rewards/margins": 5.732787609100342, "rewards/rejected": -6.233898162841797, "step": 6204 }, { "epoch": 0.97, "learning_rate": 9.596335909538877e-06, "logits/chosen": -3.0215115547180176, "logits/rejected": -2.2872254848480225, "logps/chosen": -451.09124755859375, "logps/rejected": -309.3572082519531, "loss": 2.0875, "rewards/accuracies": 0.5, "rewards/chosen": -2.1683366298675537, "rewards/margins": -0.6713426113128662, "rewards/rejected": -1.4969940185546875, "step": 6205 }, { "epoch": 0.97, "learning_rate": 9.595602469007729e-06, "logits/chosen": -1.1174733638763428, "logits/rejected": -2.791579484939575, "logps/chosen": -26.823726654052734, "logps/rejected": -208.85491943359375, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -0.504841148853302, "rewards/margins": 3.4445242881774902, "rewards/rejected": -3.9493656158447266, "step": 6206 }, { "epoch": 0.97, "learning_rate": 9.59486902847658e-06, "logits/chosen": -2.3824539184570312, "logits/rejected": -3.184464931488037, "logps/chosen": -168.84576416015625, "logps/rejected": -383.7837219238281, "loss": 0.8785, "rewards/accuracies": 0.5, "rewards/chosen": -0.9359886646270752, "rewards/margins": 2.952808380126953, "rewards/rejected": -3.8887970447540283, "step": 6207 }, { "epoch": 0.97, "learning_rate": 9.594135587945433e-06, "logits/chosen": -1.2257977724075317, "logits/rejected": -2.262564182281494, "logps/chosen": -189.55108642578125, "logps/rejected": -299.6994323730469, "loss": 3.0467, "rewards/accuracies": 0.5, "rewards/chosen": -2.814896821975708, "rewards/margins": 1.196298360824585, "rewards/rejected": -4.011195659637451, "step": 6208 }, { "epoch": 0.97, "learning_rate": 9.593402147414284e-06, "logits/chosen": -2.428290367126465, "logits/rejected": -3.221618413925171, "logps/chosen": -123.84809875488281, "logps/rejected": -317.0482482910156, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.2694316804409027, "rewards/margins": 5.803444862365723, "rewards/rejected": -6.0728759765625, "step": 6209 }, { "epoch": 0.97, "learning_rate": 9.592668706883136e-06, "logits/chosen": -1.1974437236785889, "logits/rejected": -2.678617477416992, "logps/chosen": -51.82023620605469, "logps/rejected": -247.1780242919922, "loss": 1.5398, "rewards/accuracies": 0.5, "rewards/chosen": 0.35588839650154114, "rewards/margins": 2.387275218963623, "rewards/rejected": -2.031386613845825, "step": 6210 }, { "epoch": 0.97, "learning_rate": 9.591935266351988e-06, "logits/chosen": -2.8985085487365723, "logits/rejected": -2.6869962215423584, "logps/chosen": -850.5579833984375, "logps/rejected": -640.1892700195312, "loss": 2.526, "rewards/accuracies": 0.5, "rewards/chosen": -2.563978672027588, "rewards/margins": 0.6154861450195312, "rewards/rejected": -3.179464817047119, "step": 6211 }, { "epoch": 0.97, "learning_rate": 9.59120182582084e-06, "logits/chosen": -2.297940969467163, "logits/rejected": -2.705402374267578, "logps/chosen": -238.87142944335938, "logps/rejected": -206.28250122070312, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -0.4039852023124695, "rewards/margins": 3.482609748840332, "rewards/rejected": -3.886594772338867, "step": 6212 }, { "epoch": 0.97, "learning_rate": 9.590468385289694e-06, "logits/chosen": -2.3064839839935303, "logits/rejected": -2.7828941345214844, "logps/chosen": -219.83963012695312, "logps/rejected": -323.76123046875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 0.2289491891860962, "rewards/margins": 4.611095428466797, "rewards/rejected": -4.38214635848999, "step": 6213 }, { "epoch": 0.97, "learning_rate": 9.589734944758546e-06, "logits/chosen": -2.0120372772216797, "logits/rejected": -2.8521666526794434, "logps/chosen": -199.52804565429688, "logps/rejected": -363.22662353515625, "loss": 1.582, "rewards/accuracies": 0.5, "rewards/chosen": -0.3144546151161194, "rewards/margins": 3.505801200866699, "rewards/rejected": -3.8202555179595947, "step": 6214 }, { "epoch": 0.97, "learning_rate": 9.589001504227397e-06, "logits/chosen": -2.558736801147461, "logits/rejected": -2.9708974361419678, "logps/chosen": -215.01492309570312, "logps/rejected": -332.86114501953125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.5675468444824219, "rewards/margins": 6.198528289794922, "rewards/rejected": -5.6309814453125, "step": 6215 }, { "epoch": 0.97, "learning_rate": 9.58826806369625e-06, "logits/chosen": -2.924605369567871, "logits/rejected": -2.5740435123443604, "logps/chosen": -438.71240234375, "logps/rejected": -333.545654296875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 0.6139556765556335, "rewards/margins": 5.045104026794434, "rewards/rejected": -4.431148529052734, "step": 6216 }, { "epoch": 0.97, "learning_rate": 9.587534623165101e-06, "logits/chosen": -2.6010544300079346, "logits/rejected": -2.8360304832458496, "logps/chosen": -161.81826782226562, "logps/rejected": -225.08270263671875, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.0894954651594162, "rewards/margins": 4.793033599853516, "rewards/rejected": -4.882528781890869, "step": 6217 }, { "epoch": 0.97, "learning_rate": 9.586801182633955e-06, "logits/chosen": -2.9461143016815186, "logits/rejected": -2.0790441036224365, "logps/chosen": -302.48455810546875, "logps/rejected": -125.942138671875, "loss": 3.5007, "rewards/accuracies": 0.5, "rewards/chosen": -2.7475693225860596, "rewards/margins": -1.3122427463531494, "rewards/rejected": -1.435326337814331, "step": 6218 }, { "epoch": 0.97, "learning_rate": 9.586067742102807e-06, "logits/chosen": -2.820385456085205, "logits/rejected": -1.8819719552993774, "logps/chosen": -123.05520629882812, "logps/rejected": -16.783721923828125, "loss": 2.7127, "rewards/accuracies": 0.0, "rewards/chosen": -2.486788272857666, "rewards/margins": -2.552755832672119, "rewards/rejected": 0.06596766412258148, "step": 6219 }, { "epoch": 0.97, "learning_rate": 9.585334301571658e-06, "logits/chosen": -3.1887779235839844, "logits/rejected": -3.02856707572937, "logps/chosen": -96.57445526123047, "logps/rejected": -86.59585571289062, "loss": 2.523, "rewards/accuracies": 0.5, "rewards/chosen": -1.5858573913574219, "rewards/margins": -0.35785913467407227, "rewards/rejected": -1.2279982566833496, "step": 6220 }, { "epoch": 0.97, "learning_rate": 9.58460086104051e-06, "logits/chosen": -3.1431047916412354, "logits/rejected": -2.7564361095428467, "logps/chosen": -313.62841796875, "logps/rejected": -456.02386474609375, "loss": 1.5704, "rewards/accuracies": 0.5, "rewards/chosen": -2.0416243076324463, "rewards/margins": 0.23431265354156494, "rewards/rejected": -2.2759368419647217, "step": 6221 }, { "epoch": 0.97, "learning_rate": 9.583867420509364e-06, "logits/chosen": -2.7200851440429688, "logits/rejected": -2.0220234394073486, "logps/chosen": -415.40435791015625, "logps/rejected": -496.60394287109375, "loss": 2.3095, "rewards/accuracies": 0.5, "rewards/chosen": -1.7292202711105347, "rewards/margins": 0.9548954963684082, "rewards/rejected": -2.6841156482696533, "step": 6222 }, { "epoch": 0.97, "learning_rate": 9.583133979978216e-06, "logits/chosen": -2.711167097091675, "logits/rejected": -2.5128586292266846, "logps/chosen": -53.8497200012207, "logps/rejected": -163.27694702148438, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": 0.37630006670951843, "rewards/margins": 4.5555572509765625, "rewards/rejected": -4.179257392883301, "step": 6223 }, { "epoch": 0.97, "learning_rate": 9.582400539447068e-06, "logits/chosen": -1.364686369895935, "logits/rejected": -2.7502663135528564, "logps/chosen": -104.90660858154297, "logps/rejected": -545.2352294921875, "loss": 0.5793, "rewards/accuracies": 0.5, "rewards/chosen": -2.4384026527404785, "rewards/margins": 4.696839332580566, "rewards/rejected": -7.135241508483887, "step": 6224 }, { "epoch": 0.97, "learning_rate": 9.58166709891592e-06, "logits/chosen": -3.062361240386963, "logits/rejected": -2.086700439453125, "logps/chosen": -382.86883544921875, "logps/rejected": -211.37632751464844, "loss": 3.2365, "rewards/accuracies": 0.0, "rewards/chosen": -3.522914409637451, "rewards/margins": -3.1955935955047607, "rewards/rejected": -0.3273206949234009, "step": 6225 }, { "epoch": 0.97, "learning_rate": 9.580933658384771e-06, "logits/chosen": -2.494206428527832, "logits/rejected": -2.802155017852783, "logps/chosen": -86.93824768066406, "logps/rejected": -285.25958251953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.6204158067703247, "rewards/margins": 6.772459030151367, "rewards/rejected": -6.152043342590332, "step": 6226 }, { "epoch": 0.97, "learning_rate": 9.580200217853623e-06, "logits/chosen": -2.758876085281372, "logits/rejected": -2.1699655055999756, "logps/chosen": -96.96336364746094, "logps/rejected": -127.6331787109375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 0.5718387365341187, "rewards/margins": 5.396501541137695, "rewards/rejected": -4.824662685394287, "step": 6227 }, { "epoch": 0.97, "learning_rate": 9.579466777322475e-06, "logits/chosen": -2.7201104164123535, "logits/rejected": -2.8814425468444824, "logps/chosen": -176.2348175048828, "logps/rejected": -239.89186096191406, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 0.71990966796875, "rewards/margins": 5.780969619750977, "rewards/rejected": -5.061059951782227, "step": 6228 }, { "epoch": 0.97, "learning_rate": 9.578733336791327e-06, "logits/chosen": -2.469926357269287, "logits/rejected": -2.9782934188842773, "logps/chosen": -119.39543914794922, "logps/rejected": -376.19464111328125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": 0.2554527819156647, "rewards/margins": 6.421801567077637, "rewards/rejected": -6.166348457336426, "step": 6229 }, { "epoch": 0.97, "learning_rate": 9.577999896260179e-06, "logits/chosen": -2.965543508529663, "logits/rejected": -2.07104754447937, "logps/chosen": -298.6251220703125, "logps/rejected": -238.94622802734375, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -0.21572303771972656, "rewards/margins": 4.2700090408325195, "rewards/rejected": -4.485732078552246, "step": 6230 }, { "epoch": 0.97, "learning_rate": 9.577266455729033e-06, "logits/chosen": -2.8487496376037598, "logits/rejected": -2.4861602783203125, "logps/chosen": -566.4890747070312, "logps/rejected": -372.7986755371094, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.10355836153030396, "rewards/margins": 5.608241081237793, "rewards/rejected": -5.711799621582031, "step": 6231 }, { "epoch": 0.97, "learning_rate": 9.576533015197884e-06, "logits/chosen": -2.3735344409942627, "logits/rejected": -2.753835916519165, "logps/chosen": -258.829833984375, "logps/rejected": -253.6710205078125, "loss": 1.6416, "rewards/accuracies": 0.5, "rewards/chosen": 0.5052885413169861, "rewards/margins": 0.1411290168762207, "rewards/rejected": 0.3641594648361206, "step": 6232 }, { "epoch": 0.97, "learning_rate": 9.575799574666736e-06, "logits/chosen": -2.5737552642822266, "logits/rejected": -2.5110697746276855, "logps/chosen": -298.3368225097656, "logps/rejected": -349.8101806640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.18218308687210083, "rewards/margins": 9.033626556396484, "rewards/rejected": -8.85144329071045, "step": 6233 }, { "epoch": 0.97, "learning_rate": 9.575066134135588e-06, "logits/chosen": -2.847066879272461, "logits/rejected": -1.990869164466858, "logps/chosen": -218.5345458984375, "logps/rejected": -147.5896453857422, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": 1.0632301568984985, "rewards/margins": 6.095424652099609, "rewards/rejected": -5.0321946144104, "step": 6234 }, { "epoch": 0.97, "learning_rate": 9.57433269360444e-06, "logits/chosen": -2.5902957916259766, "logits/rejected": -2.402218818664551, "logps/chosen": -221.00555419921875, "logps/rejected": -302.375, "loss": 2.4603, "rewards/accuracies": 0.5, "rewards/chosen": -2.327911138534546, "rewards/margins": -0.10649681091308594, "rewards/rejected": -2.22141432762146, "step": 6235 }, { "epoch": 0.97, "learning_rate": 9.573599253073292e-06, "logits/chosen": -2.2776050567626953, "logits/rejected": -2.9711766242980957, "logps/chosen": -445.05584716796875, "logps/rejected": -438.46575927734375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 0.4428901672363281, "rewards/margins": 6.738068580627441, "rewards/rejected": -6.295178413391113, "step": 6236 }, { "epoch": 0.97, "learning_rate": 9.572865812542144e-06, "logits/chosen": -2.351957082748413, "logits/rejected": -2.675652503967285, "logps/chosen": -41.61209487915039, "logps/rejected": -129.4499969482422, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.5350133180618286, "rewards/margins": 5.6381611824035645, "rewards/rejected": -5.103147506713867, "step": 6237 }, { "epoch": 0.97, "learning_rate": 9.572132372010996e-06, "logits/chosen": -1.816841721534729, "logits/rejected": -2.934204578399658, "logps/chosen": -51.935298919677734, "logps/rejected": -322.94488525390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.5462117195129395, "rewards/margins": 9.035576820373535, "rewards/rejected": -8.489365577697754, "step": 6238 }, { "epoch": 0.97, "learning_rate": 9.57139893147985e-06, "logits/chosen": -2.3049187660217285, "logits/rejected": -2.7863502502441406, "logps/chosen": -353.8038635253906, "logps/rejected": -327.8482360839844, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -0.3440307676792145, "rewards/margins": 3.0080275535583496, "rewards/rejected": -3.3520584106445312, "step": 6239 }, { "epoch": 0.97, "learning_rate": 9.570665490948701e-06, "logits/chosen": -2.2579879760742188, "logits/rejected": -2.829129695892334, "logps/chosen": -258.1494140625, "logps/rejected": -426.56744384765625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.18399964272975922, "rewards/margins": 6.5301923751831055, "rewards/rejected": -6.7141923904418945, "step": 6240 }, { "epoch": 0.97, "learning_rate": 9.569932050417553e-06, "logits/chosen": -2.948814868927002, "logits/rejected": -2.5372610092163086, "logps/chosen": -138.4453582763672, "logps/rejected": -102.221435546875, "loss": 1.2671, "rewards/accuracies": 0.5, "rewards/chosen": -1.4007012844085693, "rewards/margins": 0.4571419954299927, "rewards/rejected": -1.857843279838562, "step": 6241 }, { "epoch": 0.97, "learning_rate": 9.569198609886405e-06, "logits/chosen": -2.6681532859802246, "logits/rejected": -3.1071929931640625, "logps/chosen": -115.41519165039062, "logps/rejected": -221.91513061523438, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 1.0492316484451294, "rewards/margins": 5.339356422424316, "rewards/rejected": -4.290124893188477, "step": 6242 }, { "epoch": 0.97, "learning_rate": 9.568465169355257e-06, "logits/chosen": -2.82309889793396, "logits/rejected": -2.4287166595458984, "logps/chosen": -277.63250732421875, "logps/rejected": -266.147705078125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.5478904843330383, "rewards/margins": 5.462343215942383, "rewards/rejected": -6.010233402252197, "step": 6243 }, { "epoch": 0.97, "learning_rate": 9.567731728824109e-06, "logits/chosen": -1.7414270639419556, "logits/rejected": -2.056220293045044, "logps/chosen": -310.41583251953125, "logps/rejected": -302.7808837890625, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -0.16147619485855103, "rewards/margins": 3.644184112548828, "rewards/rejected": -3.8056602478027344, "step": 6244 }, { "epoch": 0.97, "learning_rate": 9.56699828829296e-06, "logits/chosen": -2.848123788833618, "logits/rejected": -3.1253364086151123, "logps/chosen": -154.11199951171875, "logps/rejected": -224.41433715820312, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 0.5507427453994751, "rewards/margins": 6.447328567504883, "rewards/rejected": -5.896585941314697, "step": 6245 }, { "epoch": 0.97, "learning_rate": 9.566264847761812e-06, "logits/chosen": -2.8912549018859863, "logits/rejected": -2.9957327842712402, "logps/chosen": -41.89085388183594, "logps/rejected": -157.50164794921875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.5859649777412415, "rewards/margins": 6.074965476989746, "rewards/rejected": -5.48900032043457, "step": 6246 }, { "epoch": 0.97, "learning_rate": 9.565531407230664e-06, "logits/chosen": -2.7496628761291504, "logits/rejected": -2.715129852294922, "logps/chosen": -59.26285934448242, "logps/rejected": -393.7645568847656, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 0.48080921173095703, "rewards/margins": 5.273216247558594, "rewards/rejected": -4.792407035827637, "step": 6247 }, { "epoch": 0.97, "learning_rate": 9.564797966699518e-06, "logits/chosen": -1.3066436052322388, "logits/rejected": -2.4935710430145264, "logps/chosen": -50.26982116699219, "logps/rejected": -141.90023803710938, "loss": 2.1283, "rewards/accuracies": 0.5, "rewards/chosen": -1.9164133071899414, "rewards/margins": 0.65787672996521, "rewards/rejected": -2.5742900371551514, "step": 6248 }, { "epoch": 0.97, "learning_rate": 9.56406452616837e-06, "logits/chosen": -1.6952284574508667, "logits/rejected": -2.912860870361328, "logps/chosen": -45.4339599609375, "logps/rejected": -260.703369140625, "loss": 0.1181, "rewards/accuracies": 1.0, "rewards/chosen": -0.7549529075622559, "rewards/margins": 3.3142099380493164, "rewards/rejected": -4.069162845611572, "step": 6249 }, { "epoch": 0.97, "learning_rate": 9.563331085637222e-06, "logits/chosen": -2.2871692180633545, "logits/rejected": -2.933835506439209, "logps/chosen": -151.67398071289062, "logps/rejected": -325.2928466796875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 0.5354415774345398, "rewards/margins": 7.461771488189697, "rewards/rejected": -6.926329612731934, "step": 6250 }, { "epoch": 0.97, "learning_rate": 9.562597645106073e-06, "logits/chosen": -1.3442938327789307, "logits/rejected": -1.5233213901519775, "logps/chosen": -268.6041259765625, "logps/rejected": -118.39869689941406, "loss": 1.47, "rewards/accuracies": 0.5, "rewards/chosen": -2.973895311355591, "rewards/margins": 1.3360962867736816, "rewards/rejected": -4.309991836547852, "step": 6251 }, { "epoch": 0.97, "learning_rate": 9.561864204574927e-06, "logits/chosen": -2.29811954498291, "logits/rejected": -2.8288514614105225, "logps/chosen": -217.82498168945312, "logps/rejected": -181.51007080078125, "loss": 2.4616, "rewards/accuracies": 0.5, "rewards/chosen": -2.6351945400238037, "rewards/margins": 1.0629851818084717, "rewards/rejected": -3.6981797218322754, "step": 6252 }, { "epoch": 0.97, "learning_rate": 9.561130764043779e-06, "logits/chosen": -2.3019979000091553, "logits/rejected": -2.7445428371429443, "logps/chosen": -131.65127563476562, "logps/rejected": -161.10025024414062, "loss": 0.7268, "rewards/accuracies": 0.5, "rewards/chosen": -1.7385916709899902, "rewards/margins": 2.1608262062072754, "rewards/rejected": -3.8994178771972656, "step": 6253 }, { "epoch": 0.97, "learning_rate": 9.56039732351263e-06, "logits/chosen": -1.2580784559249878, "logits/rejected": -2.391002893447876, "logps/chosen": -54.210670471191406, "logps/rejected": -295.06268310546875, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": 0.49327123165130615, "rewards/margins": 5.342856407165527, "rewards/rejected": -4.849585056304932, "step": 6254 }, { "epoch": 0.97, "learning_rate": 9.559663882981483e-06, "logits/chosen": -2.6644909381866455, "logits/rejected": -2.873406171798706, "logps/chosen": -120.34095764160156, "logps/rejected": -138.04832458496094, "loss": 3.2106, "rewards/accuracies": 0.5, "rewards/chosen": -2.206413984298706, "rewards/margins": 0.5730042457580566, "rewards/rejected": -2.7794182300567627, "step": 6255 }, { "epoch": 0.97, "learning_rate": 9.558930442450335e-06, "logits/chosen": -2.0892281532287598, "logits/rejected": -2.714743137359619, "logps/chosen": -426.0549621582031, "logps/rejected": -688.579833984375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7132183313369751, "rewards/margins": 6.544251441955566, "rewards/rejected": -7.25747013092041, "step": 6256 }, { "epoch": 0.97, "learning_rate": 9.558197001919188e-06, "logits/chosen": -2.7049546241760254, "logits/rejected": -2.353573799133301, "logps/chosen": -299.29742431640625, "logps/rejected": -284.811767578125, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": 0.09219513833522797, "rewards/margins": 4.216672420501709, "rewards/rejected": -4.124477386474609, "step": 6257 }, { "epoch": 0.97, "learning_rate": 9.55746356138804e-06, "logits/chosen": -2.8158321380615234, "logits/rejected": -2.4518229961395264, "logps/chosen": -349.3868103027344, "logps/rejected": -315.224365234375, "loss": 0.68, "rewards/accuracies": 0.5, "rewards/chosen": -0.19333800673484802, "rewards/margins": 2.020174741744995, "rewards/rejected": -2.213512897491455, "step": 6258 }, { "epoch": 0.97, "learning_rate": 9.556730120856892e-06, "logits/chosen": -2.672205686569214, "logits/rejected": -2.831427812576294, "logps/chosen": -49.04267883300781, "logps/rejected": -179.615966796875, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": 0.7050316333770752, "rewards/margins": 5.096644401550293, "rewards/rejected": -4.391613006591797, "step": 6259 }, { "epoch": 0.97, "learning_rate": 9.555996680325744e-06, "logits/chosen": -3.041621685028076, "logits/rejected": -3.1786413192749023, "logps/chosen": -18.92930793762207, "logps/rejected": -175.82644653320312, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.8109999895095825, "rewards/margins": 5.313401222229004, "rewards/rejected": -4.502401351928711, "step": 6260 }, { "epoch": 0.97, "learning_rate": 9.555263239794596e-06, "logits/chosen": -2.8511769771575928, "logits/rejected": -1.9532065391540527, "logps/chosen": -429.50421142578125, "logps/rejected": -406.8714904785156, "loss": 3.0281, "rewards/accuracies": 0.5, "rewards/chosen": -2.760211229324341, "rewards/margins": -0.643770694732666, "rewards/rejected": -2.116440534591675, "step": 6261 }, { "epoch": 0.97, "learning_rate": 9.554529799263448e-06, "logits/chosen": -2.9909164905548096, "logits/rejected": -2.943552255630493, "logps/chosen": -438.87274169921875, "logps/rejected": -564.9078369140625, "loss": 2.8895, "rewards/accuracies": 0.5, "rewards/chosen": -2.3500754833221436, "rewards/margins": -0.6431968212127686, "rewards/rejected": -1.706878662109375, "step": 6262 }, { "epoch": 0.97, "learning_rate": 9.5537963587323e-06, "logits/chosen": -2.1161341667175293, "logits/rejected": -2.835574150085449, "logps/chosen": -264.0272521972656, "logps/rejected": -444.49444580078125, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.11399412155151367, "rewards/margins": 7.428476333618164, "rewards/rejected": -7.542470455169678, "step": 6263 }, { "epoch": 0.97, "learning_rate": 9.553062918201151e-06, "logits/chosen": -2.8076815605163574, "logits/rejected": -3.177905559539795, "logps/chosen": -408.41363525390625, "logps/rejected": -456.294921875, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": 0.7190209031105042, "rewards/margins": 5.336740016937256, "rewards/rejected": -4.617718696594238, "step": 6264 }, { "epoch": 0.97, "learning_rate": 9.552329477670003e-06, "logits/chosen": -0.7908903956413269, "logits/rejected": -1.081950306892395, "logps/chosen": -197.02099609375, "logps/rejected": -345.6508483886719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.7770099639892578, "rewards/margins": 8.438456535339355, "rewards/rejected": -7.661446571350098, "step": 6265 }, { "epoch": 0.97, "learning_rate": 9.551596037138857e-06, "logits/chosen": -2.0542805194854736, "logits/rejected": -3.0578818321228027, "logps/chosen": -91.57388305664062, "logps/rejected": -247.30307006835938, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 1.029673457145691, "rewards/margins": 6.530542850494385, "rewards/rejected": -5.500869274139404, "step": 6266 }, { "epoch": 0.97, "learning_rate": 9.550862596607709e-06, "logits/chosen": -1.707766056060791, "logits/rejected": -2.675924777984619, "logps/chosen": -116.61468505859375, "logps/rejected": -293.51513671875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.18008004128932953, "rewards/margins": 5.131270408630371, "rewards/rejected": -5.311350345611572, "step": 6267 }, { "epoch": 0.97, "learning_rate": 9.55012915607656e-06, "logits/chosen": -2.7842042446136475, "logits/rejected": -2.02517032623291, "logps/chosen": -232.85105895996094, "logps/rejected": -88.95230102539062, "loss": 3.8105, "rewards/accuracies": 0.0, "rewards/chosen": -3.4979255199432373, "rewards/margins": -3.7876222133636475, "rewards/rejected": 0.28969669342041016, "step": 6268 }, { "epoch": 0.97, "learning_rate": 9.549395715545412e-06, "logits/chosen": -2.320699691772461, "logits/rejected": -3.1983821392059326, "logps/chosen": -78.57925415039062, "logps/rejected": -381.49603271484375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 0.6717807054519653, "rewards/margins": 6.017419815063477, "rewards/rejected": -5.345639228820801, "step": 6269 }, { "epoch": 0.98, "learning_rate": 9.548662275014264e-06, "logits/chosen": -2.0754613876342773, "logits/rejected": -2.5697476863861084, "logps/chosen": -107.17799377441406, "logps/rejected": -159.40524291992188, "loss": 2.6128, "rewards/accuracies": 0.5, "rewards/chosen": -1.858249306678772, "rewards/margins": 0.36864137649536133, "rewards/rejected": -2.2268905639648438, "step": 6270 }, { "epoch": 0.98, "learning_rate": 9.547928834483116e-06, "logits/chosen": -2.7506043910980225, "logits/rejected": -2.9021143913269043, "logps/chosen": -62.783355712890625, "logps/rejected": -131.5442657470703, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": 0.04588586091995239, "rewards/margins": 4.749551773071289, "rewards/rejected": -4.703665733337402, "step": 6271 }, { "epoch": 0.98, "learning_rate": 9.547195393951968e-06, "logits/chosen": -2.1104583740234375, "logits/rejected": -2.8539199829101562, "logps/chosen": -408.6537170410156, "logps/rejected": -794.78759765625, "loss": 2.7785, "rewards/accuracies": 0.5, "rewards/chosen": -2.018233299255371, "rewards/margins": 0.4032166004180908, "rewards/rejected": -2.421450138092041, "step": 6272 }, { "epoch": 0.98, "learning_rate": 9.54646195342082e-06, "logits/chosen": -2.0598273277282715, "logits/rejected": -3.1103007793426514, "logps/chosen": -349.0176086425781, "logps/rejected": -410.3255615234375, "loss": 0.0695, "rewards/accuracies": 1.0, "rewards/chosen": -0.11231689900159836, "rewards/margins": 3.3989930152893066, "rewards/rejected": -3.5113096237182617, "step": 6273 }, { "epoch": 0.98, "learning_rate": 9.545728512889672e-06, "logits/chosen": -1.6928526163101196, "logits/rejected": -2.8828463554382324, "logps/chosen": -182.88925170898438, "logps/rejected": -461.2537841796875, "loss": 1.5882, "rewards/accuracies": 0.5, "rewards/chosen": -1.6672847270965576, "rewards/margins": -0.7746439576148987, "rewards/rejected": -0.8926407098770142, "step": 6274 }, { "epoch": 0.98, "learning_rate": 9.544995072358525e-06, "logits/chosen": -2.9844672679901123, "logits/rejected": -2.5583760738372803, "logps/chosen": -382.1125183105469, "logps/rejected": -245.25218200683594, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 0.21497344970703125, "rewards/margins": 5.625589370727539, "rewards/rejected": -5.410615921020508, "step": 6275 }, { "epoch": 0.98, "learning_rate": 9.544261631827377e-06, "logits/chosen": -2.943665027618408, "logits/rejected": -2.9853830337524414, "logps/chosen": -287.80426025390625, "logps/rejected": -274.152587890625, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.5774040222167969, "rewards/margins": 5.654262065887451, "rewards/rejected": -6.231666088104248, "step": 6276 }, { "epoch": 0.98, "learning_rate": 9.543528191296229e-06, "logits/chosen": -2.09281849861145, "logits/rejected": -2.796868085861206, "logps/chosen": -75.60553741455078, "logps/rejected": -153.63351440429688, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": 0.535595715045929, "rewards/margins": 3.336843967437744, "rewards/rejected": -2.80124831199646, "step": 6277 }, { "epoch": 0.98, "learning_rate": 9.542794750765081e-06, "logits/chosen": -3.0935988426208496, "logits/rejected": -2.6627037525177, "logps/chosen": -361.1808776855469, "logps/rejected": -250.38375854492188, "loss": 2.3789, "rewards/accuracies": 0.5, "rewards/chosen": -1.3073945045471191, "rewards/margins": 0.6177129745483398, "rewards/rejected": -1.925107479095459, "step": 6278 }, { "epoch": 0.98, "learning_rate": 9.542061310233933e-06, "logits/chosen": -2.2683517932891846, "logits/rejected": -3.0317513942718506, "logps/chosen": -262.5601806640625, "logps/rejected": -478.34442138671875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.5186333060264587, "rewards/margins": 6.021352767944336, "rewards/rejected": -5.502718925476074, "step": 6279 }, { "epoch": 0.98, "learning_rate": 9.541327869702785e-06, "logits/chosen": -2.4715676307678223, "logits/rejected": -2.3476898670196533, "logps/chosen": -117.70292663574219, "logps/rejected": -97.95915985107422, "loss": 0.871, "rewards/accuracies": 0.5, "rewards/chosen": -0.8473573923110962, "rewards/margins": 1.4226399660110474, "rewards/rejected": -2.2699973583221436, "step": 6280 }, { "epoch": 0.98, "learning_rate": 9.540594429171637e-06, "logits/chosen": -2.960142135620117, "logits/rejected": -2.621641159057617, "logps/chosen": -386.41827392578125, "logps/rejected": -293.2673645019531, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": 0.11248242855072021, "rewards/margins": 5.774561405181885, "rewards/rejected": -5.662078857421875, "step": 6281 }, { "epoch": 0.98, "learning_rate": 9.539860988640488e-06, "logits/chosen": -1.8538777828216553, "logits/rejected": -2.7375705242156982, "logps/chosen": -79.2629623413086, "logps/rejected": -343.188232421875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.6880056858062744, "rewards/margins": 7.130714416503906, "rewards/rejected": -6.4427080154418945, "step": 6282 }, { "epoch": 0.98, "learning_rate": 9.53912754810934e-06, "logits/chosen": -2.145456552505493, "logits/rejected": -2.7710587978363037, "logps/chosen": -156.1843719482422, "logps/rejected": -197.33062744140625, "loss": 1.9738, "rewards/accuracies": 0.5, "rewards/chosen": -2.1213130950927734, "rewards/margins": 0.44179463386535645, "rewards/rejected": -2.56310772895813, "step": 6283 }, { "epoch": 0.98, "learning_rate": 9.538394107578194e-06, "logits/chosen": -1.8645298480987549, "logits/rejected": -2.7738029956817627, "logps/chosen": -72.38233184814453, "logps/rejected": -311.8740539550781, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": 0.263825923204422, "rewards/margins": 3.3873419761657715, "rewards/rejected": -3.123516082763672, "step": 6284 }, { "epoch": 0.98, "learning_rate": 9.537660667047046e-06, "logits/chosen": -3.130871057510376, "logits/rejected": -2.623166561126709, "logps/chosen": -258.26397705078125, "logps/rejected": -104.38475036621094, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": 1.7420436143875122, "rewards/margins": 5.482870578765869, "rewards/rejected": -3.7408270835876465, "step": 6285 }, { "epoch": 0.98, "learning_rate": 9.5369272265159e-06, "logits/chosen": -2.761831521987915, "logits/rejected": -2.521413803100586, "logps/chosen": -72.42809295654297, "logps/rejected": -106.89163970947266, "loss": 1.8293, "rewards/accuracies": 0.5, "rewards/chosen": -2.1602799892425537, "rewards/margins": 0.676926851272583, "rewards/rejected": -2.8372068405151367, "step": 6286 }, { "epoch": 0.98, "learning_rate": 9.536193785984751e-06, "logits/chosen": -1.211989164352417, "logits/rejected": -2.7170190811157227, "logps/chosen": -80.07954406738281, "logps/rejected": -409.0859375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": 0.27652496099472046, "rewards/margins": 5.927680492401123, "rewards/rejected": -5.651155471801758, "step": 6287 }, { "epoch": 0.98, "learning_rate": 9.535460345453603e-06, "logits/chosen": -2.550231456756592, "logits/rejected": -1.7774486541748047, "logps/chosen": -478.9141845703125, "logps/rejected": -207.04685974121094, "loss": 2.8231, "rewards/accuracies": 0.5, "rewards/chosen": -3.354762315750122, "rewards/margins": -0.18668866157531738, "rewards/rejected": -3.1680736541748047, "step": 6288 }, { "epoch": 0.98, "learning_rate": 9.534726904922455e-06, "logits/chosen": -1.577161431312561, "logits/rejected": -1.8054089546203613, "logps/chosen": -401.3210754394531, "logps/rejected": -399.9771728515625, "loss": 1.7682, "rewards/accuracies": 0.5, "rewards/chosen": -2.056715488433838, "rewards/margins": 1.8013916015625, "rewards/rejected": -3.858107089996338, "step": 6289 }, { "epoch": 0.98, "learning_rate": 9.533993464391307e-06, "logits/chosen": -2.8285882472991943, "logits/rejected": -2.665602922439575, "logps/chosen": -457.6638488769531, "logps/rejected": -712.3812255859375, "loss": 2.7758, "rewards/accuracies": 0.5, "rewards/chosen": -2.1675431728363037, "rewards/margins": 0.6065902709960938, "rewards/rejected": -2.7741334438323975, "step": 6290 }, { "epoch": 0.98, "learning_rate": 9.533260023860159e-06, "logits/chosen": -2.26051664352417, "logits/rejected": -3.0034420490264893, "logps/chosen": -128.57748413085938, "logps/rejected": -347.1737060546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 1.410488486289978, "rewards/margins": 6.608720779418945, "rewards/rejected": -5.198232650756836, "step": 6291 }, { "epoch": 0.98, "learning_rate": 9.53252658332901e-06, "logits/chosen": -2.8228790760040283, "logits/rejected": -2.305210828781128, "logps/chosen": -445.1676330566406, "logps/rejected": -316.4565734863281, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.0840842723846436, "rewards/margins": 5.374274253845215, "rewards/rejected": -6.4583587646484375, "step": 6292 }, { "epoch": 0.98, "learning_rate": 9.531793142797864e-06, "logits/chosen": -2.7180566787719727, "logits/rejected": -2.886643886566162, "logps/chosen": -99.80693054199219, "logps/rejected": -135.06422424316406, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -0.6808784604072571, "rewards/margins": 3.8123414516448975, "rewards/rejected": -4.49321985244751, "step": 6293 }, { "epoch": 0.98, "learning_rate": 9.531059702266716e-06, "logits/chosen": -3.148163318634033, "logits/rejected": -2.833608388900757, "logps/chosen": -506.342529296875, "logps/rejected": -446.6517333984375, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": 0.2866344451904297, "rewards/margins": 3.7023520469665527, "rewards/rejected": -3.415717363357544, "step": 6294 }, { "epoch": 0.98, "learning_rate": 9.530326261735568e-06, "logits/chosen": -2.6519572734832764, "logits/rejected": -2.985689878463745, "logps/chosen": -157.26199340820312, "logps/rejected": -146.50750732421875, "loss": 0.6565, "rewards/accuracies": 0.5, "rewards/chosen": -1.9951698780059814, "rewards/margins": 1.01422119140625, "rewards/rejected": -3.0093910694122314, "step": 6295 }, { "epoch": 0.98, "learning_rate": 9.52959282120442e-06, "logits/chosen": -2.9512555599212646, "logits/rejected": -2.7829208374023438, "logps/chosen": -163.7523651123047, "logps/rejected": -283.3206787109375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.656170666217804, "rewards/margins": 5.624556064605713, "rewards/rejected": -4.968385696411133, "step": 6296 }, { "epoch": 0.98, "learning_rate": 9.528859380673272e-06, "logits/chosen": -0.8840811252593994, "logits/rejected": -2.745897054672241, "logps/chosen": -92.12826538085938, "logps/rejected": -459.47222900390625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.27614516019821167, "rewards/margins": 5.854462623596191, "rewards/rejected": -5.578317642211914, "step": 6297 }, { "epoch": 0.98, "learning_rate": 9.528125940142124e-06, "logits/chosen": -2.738856792449951, "logits/rejected": -2.2442426681518555, "logps/chosen": -300.3815612792969, "logps/rejected": -312.6432189941406, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": 0.7632921934127808, "rewards/margins": 5.484286308288574, "rewards/rejected": -4.720993995666504, "step": 6298 }, { "epoch": 0.98, "learning_rate": 9.527392499610975e-06, "logits/chosen": -2.8165924549102783, "logits/rejected": -2.7685797214508057, "logps/chosen": -215.29141235351562, "logps/rejected": -281.3154296875, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 0.17652054131031036, "rewards/margins": 5.061269283294678, "rewards/rejected": -4.884748935699463, "step": 6299 }, { "epoch": 0.98, "learning_rate": 9.526659059079827e-06, "logits/chosen": -2.7457823753356934, "logits/rejected": -1.3395187854766846, "logps/chosen": -254.9951171875, "logps/rejected": -154.09214782714844, "loss": 2.5654, "rewards/accuracies": 0.0, "rewards/chosen": -2.3754799365997314, "rewards/margins": -2.3550286293029785, "rewards/rejected": -0.020451359450817108, "step": 6300 }, { "epoch": 0.98, "learning_rate": 9.52592561854868e-06, "logits/chosen": -2.297546863555908, "logits/rejected": -3.0914535522460938, "logps/chosen": -363.8687744140625, "logps/rejected": -504.2030944824219, "loss": 0.1641, "rewards/accuracies": 1.0, "rewards/chosen": 0.0014137262478470802, "rewards/margins": 1.7913398742675781, "rewards/rejected": -1.7899261713027954, "step": 6301 }, { "epoch": 0.98, "learning_rate": 9.525192178017533e-06, "logits/chosen": -2.419396162033081, "logits/rejected": -3.282768964767456, "logps/chosen": -390.61834716796875, "logps/rejected": -378.748046875, "loss": 0.0481, "rewards/accuracies": 1.0, "rewards/chosen": 0.08518868684768677, "rewards/margins": 3.2317044734954834, "rewards/rejected": -3.1465158462524414, "step": 6302 }, { "epoch": 0.98, "learning_rate": 9.524458737486385e-06, "logits/chosen": -2.5659444332122803, "logits/rejected": -3.2562625408172607, "logps/chosen": -62.374412536621094, "logps/rejected": -395.51458740234375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": 0.532833993434906, "rewards/margins": 6.802215099334717, "rewards/rejected": -6.269380569458008, "step": 6303 }, { "epoch": 0.98, "learning_rate": 9.523725296955237e-06, "logits/chosen": -2.7809946537017822, "logits/rejected": -2.336379051208496, "logps/chosen": -592.58740234375, "logps/rejected": -403.5370178222656, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -1.2999787330627441, "rewards/margins": 4.153019905090332, "rewards/rejected": -5.452998161315918, "step": 6304 }, { "epoch": 0.98, "learning_rate": 9.522991856424088e-06, "logits/chosen": -3.1198337078094482, "logits/rejected": -2.7030868530273438, "logps/chosen": -434.875732421875, "logps/rejected": -344.65960693359375, "loss": 1.4928, "rewards/accuracies": 0.5, "rewards/chosen": -0.7748260498046875, "rewards/margins": 1.9416838884353638, "rewards/rejected": -2.716510057449341, "step": 6305 }, { "epoch": 0.98, "learning_rate": 9.52225841589294e-06, "logits/chosen": -2.9124131202697754, "logits/rejected": -3.0592267513275146, "logps/chosen": -240.9244384765625, "logps/rejected": -275.9859313964844, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 0.18938103318214417, "rewards/margins": 6.035560607910156, "rewards/rejected": -5.846179008483887, "step": 6306 }, { "epoch": 0.98, "learning_rate": 9.521524975361792e-06, "logits/chosen": -2.0020298957824707, "logits/rejected": -3.1296489238739014, "logps/chosen": -60.84025573730469, "logps/rejected": -303.3509521484375, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": 0.42430782318115234, "rewards/margins": 4.003092288970947, "rewards/rejected": -3.578784227371216, "step": 6307 }, { "epoch": 0.98, "learning_rate": 9.520791534830644e-06, "logits/chosen": -2.811777353286743, "logits/rejected": -1.361928939819336, "logps/chosen": -268.885498046875, "logps/rejected": -86.0907211303711, "loss": 1.8327, "rewards/accuracies": 0.5, "rewards/chosen": -2.109402656555176, "rewards/margins": -0.3506819009780884, "rewards/rejected": -1.758720874786377, "step": 6308 }, { "epoch": 0.98, "learning_rate": 9.520058094299496e-06, "logits/chosen": -2.0897154808044434, "logits/rejected": -3.0808751583099365, "logps/chosen": -348.1869201660156, "logps/rejected": -567.332763671875, "loss": 4.5537, "rewards/accuracies": 0.5, "rewards/chosen": -3.4714431762695312, "rewards/margins": -1.6404924392700195, "rewards/rejected": -1.8309509754180908, "step": 6309 }, { "epoch": 0.98, "learning_rate": 9.519324653768348e-06, "logits/chosen": -2.8441662788391113, "logits/rejected": -2.965668201446533, "logps/chosen": -38.27764129638672, "logps/rejected": -158.22567749023438, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": 0.4643077850341797, "rewards/margins": 4.814881324768066, "rewards/rejected": -4.350573539733887, "step": 6310 }, { "epoch": 0.98, "learning_rate": 9.518591213237201e-06, "logits/chosen": -2.5699446201324463, "logits/rejected": -2.494379758834839, "logps/chosen": -127.03509521484375, "logps/rejected": -376.23614501953125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.7284786701202393, "rewards/margins": 8.759591102600098, "rewards/rejected": -8.031112670898438, "step": 6311 }, { "epoch": 0.98, "learning_rate": 9.517857772706053e-06, "logits/chosen": -3.0044186115264893, "logits/rejected": -3.204127073287964, "logps/chosen": -223.8752899169922, "logps/rejected": -201.27859497070312, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -1.246832251548767, "rewards/margins": 5.015280723571777, "rewards/rejected": -6.262113094329834, "step": 6312 }, { "epoch": 0.98, "learning_rate": 9.517124332174905e-06, "logits/chosen": -2.499873638153076, "logits/rejected": -2.656989097595215, "logps/chosen": -184.44943237304688, "logps/rejected": -215.09762573242188, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": -0.6002194881439209, "rewards/margins": 3.4583849906921387, "rewards/rejected": -4.0586042404174805, "step": 6313 }, { "epoch": 0.98, "learning_rate": 9.516390891643757e-06, "logits/chosen": -2.6781833171844482, "logits/rejected": -3.0289950370788574, "logps/chosen": -176.94004821777344, "logps/rejected": -261.5010986328125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 0.2835443615913391, "rewards/margins": 4.472202301025391, "rewards/rejected": -4.188658237457275, "step": 6314 }, { "epoch": 0.98, "learning_rate": 9.515657451112609e-06, "logits/chosen": -2.752108335494995, "logits/rejected": -2.2807931900024414, "logps/chosen": -279.8907775878906, "logps/rejected": -261.69769287109375, "loss": 2.2522, "rewards/accuracies": 0.5, "rewards/chosen": -1.668463110923767, "rewards/margins": 0.6132469177246094, "rewards/rejected": -2.281710147857666, "step": 6315 }, { "epoch": 0.98, "learning_rate": 9.51492401058146e-06, "logits/chosen": -2.256775379180908, "logits/rejected": -3.0942749977111816, "logps/chosen": -59.857032775878906, "logps/rejected": -280.9822998046875, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": 0.1649457961320877, "rewards/margins": 4.118638515472412, "rewards/rejected": -3.953692674636841, "step": 6316 }, { "epoch": 0.98, "learning_rate": 9.514190570050313e-06, "logits/chosen": -2.7724781036376953, "logits/rejected": -2.782813549041748, "logps/chosen": -206.0525360107422, "logps/rejected": -201.15728759765625, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": -0.3186439871788025, "rewards/margins": 4.244647026062012, "rewards/rejected": -4.563291549682617, "step": 6317 }, { "epoch": 0.98, "learning_rate": 9.513457129519166e-06, "logits/chosen": -2.9538283348083496, "logits/rejected": -2.233367919921875, "logps/chosen": -201.67364501953125, "logps/rejected": -122.8611831665039, "loss": 0.1653, "rewards/accuracies": 1.0, "rewards/chosen": -1.6483856439590454, "rewards/margins": 1.7721149921417236, "rewards/rejected": -3.4205007553100586, "step": 6318 }, { "epoch": 0.98, "learning_rate": 9.512723688988018e-06, "logits/chosen": -2.418337106704712, "logits/rejected": -2.568406343460083, "logps/chosen": -126.68659973144531, "logps/rejected": -346.2350158691406, "loss": 2.5473, "rewards/accuracies": 0.5, "rewards/chosen": -2.347278356552124, "rewards/margins": 0.14110636711120605, "rewards/rejected": -2.48838472366333, "step": 6319 }, { "epoch": 0.98, "learning_rate": 9.511990248456872e-06, "logits/chosen": -3.0468060970306396, "logits/rejected": -2.977409601211548, "logps/chosen": -238.43580627441406, "logps/rejected": -256.9179992675781, "loss": 1.5193, "rewards/accuracies": 0.5, "rewards/chosen": -1.3903509378433228, "rewards/margins": 2.050489902496338, "rewards/rejected": -3.44084095954895, "step": 6320 }, { "epoch": 0.98, "learning_rate": 9.511256807925724e-06, "logits/chosen": -2.9899814128875732, "logits/rejected": -2.7803261280059814, "logps/chosen": -109.48478698730469, "logps/rejected": -66.88751220703125, "loss": 1.4559, "rewards/accuracies": 0.5, "rewards/chosen": -1.2548843622207642, "rewards/margins": 1.0656112432479858, "rewards/rejected": -2.32049560546875, "step": 6321 }, { "epoch": 0.98, "learning_rate": 9.510523367394575e-06, "logits/chosen": -2.4416990280151367, "logits/rejected": -2.709202289581299, "logps/chosen": -43.980491638183594, "logps/rejected": -137.206298828125, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -0.6510021686553955, "rewards/margins": 4.481945514678955, "rewards/rejected": -5.1329474449157715, "step": 6322 }, { "epoch": 0.98, "learning_rate": 9.509789926863427e-06, "logits/chosen": -2.374455451965332, "logits/rejected": -2.998368740081787, "logps/chosen": -93.33515930175781, "logps/rejected": -317.392333984375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.16057825088500977, "rewards/margins": 6.180262088775635, "rewards/rejected": -6.019683837890625, "step": 6323 }, { "epoch": 0.98, "learning_rate": 9.50905648633228e-06, "logits/chosen": -3.096161365509033, "logits/rejected": -2.907083034515381, "logps/chosen": -203.5160675048828, "logps/rejected": -132.92190551757812, "loss": 1.4328, "rewards/accuracies": 0.5, "rewards/chosen": -1.526491641998291, "rewards/margins": 1.4689866304397583, "rewards/rejected": -2.9954781532287598, "step": 6324 }, { "epoch": 0.98, "learning_rate": 9.508323045801131e-06, "logits/chosen": -2.2685916423797607, "logits/rejected": -1.6896798610687256, "logps/chosen": -428.0767822265625, "logps/rejected": -404.44964599609375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.3392486572265625, "rewards/margins": 5.202081680297852, "rewards/rejected": -5.541330337524414, "step": 6325 }, { "epoch": 0.98, "learning_rate": 9.507589605269983e-06, "logits/chosen": -2.291104555130005, "logits/rejected": -2.616462469100952, "logps/chosen": -39.87750244140625, "logps/rejected": -202.59368896484375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.7857439517974854, "rewards/margins": 6.946021556854248, "rewards/rejected": -7.7317657470703125, "step": 6326 }, { "epoch": 0.98, "learning_rate": 9.506856164738835e-06, "logits/chosen": -2.452667236328125, "logits/rejected": -3.024739980697632, "logps/chosen": -182.72579956054688, "logps/rejected": -331.984375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5763862729072571, "rewards/margins": 5.169477462768555, "rewards/rejected": -5.745863437652588, "step": 6327 }, { "epoch": 0.98, "learning_rate": 9.506122724207688e-06, "logits/chosen": -1.444880723953247, "logits/rejected": -2.110105514526367, "logps/chosen": -506.39825439453125, "logps/rejected": -345.899169921875, "loss": 0.2997, "rewards/accuracies": 1.0, "rewards/chosen": -2.1390953063964844, "rewards/margins": 3.0662641525268555, "rewards/rejected": -5.20535945892334, "step": 6328 }, { "epoch": 0.98, "learning_rate": 9.50538928367654e-06, "logits/chosen": -2.825212001800537, "logits/rejected": -2.4009060859680176, "logps/chosen": -440.1434326171875, "logps/rejected": -350.83941650390625, "loss": 2.9075, "rewards/accuracies": 0.5, "rewards/chosen": -2.5534424781799316, "rewards/margins": -1.2951219081878662, "rewards/rejected": -1.258320689201355, "step": 6329 }, { "epoch": 0.98, "learning_rate": 9.504655843145392e-06, "logits/chosen": -2.755919933319092, "logits/rejected": -1.559170126914978, "logps/chosen": -177.51486206054688, "logps/rejected": -88.04906463623047, "loss": 3.1332, "rewards/accuracies": 0.0, "rewards/chosen": -5.697429656982422, "rewards/margins": -2.9690980911254883, "rewards/rejected": -2.7283315658569336, "step": 6330 }, { "epoch": 0.98, "learning_rate": 9.503922402614244e-06, "logits/chosen": -2.9563028812408447, "logits/rejected": -1.1845060586929321, "logps/chosen": -283.1040954589844, "logps/rejected": -208.66891479492188, "loss": 1.8401, "rewards/accuracies": 0.5, "rewards/chosen": -3.0509629249572754, "rewards/margins": 2.5248985290527344, "rewards/rejected": -5.57586145401001, "step": 6331 }, { "epoch": 0.98, "learning_rate": 9.503188962083096e-06, "logits/chosen": -2.9289674758911133, "logits/rejected": -2.110623359680176, "logps/chosen": -284.7363586425781, "logps/rejected": -334.32122802734375, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -1.0582220554351807, "rewards/margins": 3.6415538787841797, "rewards/rejected": -4.699775695800781, "step": 6332 }, { "epoch": 0.98, "learning_rate": 9.502455521551948e-06, "logits/chosen": -2.403660774230957, "logits/rejected": -2.827873468399048, "logps/chosen": -125.53565216064453, "logps/rejected": -224.8395538330078, "loss": 2.6116, "rewards/accuracies": 0.5, "rewards/chosen": -2.6057770252227783, "rewards/margins": -0.38920092582702637, "rewards/rejected": -2.216576099395752, "step": 6333 }, { "epoch": 0.99, "learning_rate": 9.5017220810208e-06, "logits/chosen": -2.4048800468444824, "logits/rejected": -2.8020973205566406, "logps/chosen": -227.07135009765625, "logps/rejected": -486.62969970703125, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -0.20529861748218536, "rewards/margins": 4.309468746185303, "rewards/rejected": -4.514767646789551, "step": 6334 }, { "epoch": 0.99, "learning_rate": 9.500988640489652e-06, "logits/chosen": -1.583302617073059, "logits/rejected": -2.965721845626831, "logps/chosen": -16.747915267944336, "logps/rejected": -558.0245361328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.04053540527820587, "rewards/margins": 9.256412506103516, "rewards/rejected": -9.296948432922363, "step": 6335 }, { "epoch": 0.99, "learning_rate": 9.500255199958503e-06, "logits/chosen": -2.9399781227111816, "logits/rejected": -3.161783218383789, "logps/chosen": -30.811901092529297, "logps/rejected": -143.6268310546875, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -1.4040011167526245, "rewards/margins": 5.305696487426758, "rewards/rejected": -6.709697723388672, "step": 6336 }, { "epoch": 0.99, "learning_rate": 9.499521759427357e-06, "logits/chosen": -2.7080700397491455, "logits/rejected": -2.653190851211548, "logps/chosen": -541.515869140625, "logps/rejected": -470.9824523925781, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 0.6916656494140625, "rewards/margins": 6.054335117340088, "rewards/rejected": -5.362669467926025, "step": 6337 }, { "epoch": 0.99, "learning_rate": 9.498788318896209e-06, "logits/chosen": -2.0294744968414307, "logits/rejected": -2.4157779216766357, "logps/chosen": -171.33868408203125, "logps/rejected": -256.7635498046875, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -1.9445552825927734, "rewards/margins": 4.289218425750732, "rewards/rejected": -6.233773708343506, "step": 6338 }, { "epoch": 0.99, "learning_rate": 9.49805487836506e-06, "logits/chosen": -2.7797930240631104, "logits/rejected": -2.7682852745056152, "logps/chosen": -308.6628112792969, "logps/rejected": -339.02178955078125, "loss": 0.1247, "rewards/accuracies": 1.0, "rewards/chosen": 0.20440179109573364, "rewards/margins": 3.4916529655456543, "rewards/rejected": -3.2872512340545654, "step": 6339 }, { "epoch": 0.99, "learning_rate": 9.497321437833913e-06, "logits/chosen": -2.7220630645751953, "logits/rejected": -2.4921135902404785, "logps/chosen": -434.22589111328125, "logps/rejected": -476.07763671875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.34028205275535583, "rewards/margins": 6.951352119445801, "rewards/rejected": -7.291633605957031, "step": 6340 }, { "epoch": 0.99, "learning_rate": 9.496587997302765e-06, "logits/chosen": -1.812071681022644, "logits/rejected": -2.8946964740753174, "logps/chosen": -141.85220336914062, "logps/rejected": -409.0164794921875, "loss": 0.1711, "rewards/accuracies": 1.0, "rewards/chosen": -0.39102476835250854, "rewards/margins": 2.5276246070861816, "rewards/rejected": -2.918649196624756, "step": 6341 }, { "epoch": 0.99, "learning_rate": 9.495854556771616e-06, "logits/chosen": -1.5717294216156006, "logits/rejected": -2.735077142715454, "logps/chosen": -77.69955444335938, "logps/rejected": -189.432861328125, "loss": 0.0752, "rewards/accuracies": 1.0, "rewards/chosen": -2.404301404953003, "rewards/margins": 2.775322914123535, "rewards/rejected": -5.179624557495117, "step": 6342 }, { "epoch": 0.99, "learning_rate": 9.495121116240468e-06, "logits/chosen": -2.961101770401001, "logits/rejected": -2.953155279159546, "logps/chosen": -675.208251953125, "logps/rejected": -501.98095703125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.5382652282714844, "rewards/margins": 6.238913536071777, "rewards/rejected": -5.700648307800293, "step": 6343 }, { "epoch": 0.99, "learning_rate": 9.49438767570932e-06, "logits/chosen": -2.192145824432373, "logits/rejected": -3.1250381469726562, "logps/chosen": -41.244075775146484, "logps/rejected": -371.9517517089844, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": -2.2660698890686035, "rewards/margins": 3.2683234214782715, "rewards/rejected": -5.534393310546875, "step": 6344 }, { "epoch": 0.99, "learning_rate": 9.493654235178172e-06, "logits/chosen": -2.3657968044281006, "logits/rejected": -2.8234968185424805, "logps/chosen": -128.71714782714844, "logps/rejected": -186.90249633789062, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -2.1112060546875, "rewards/margins": 4.169078826904297, "rewards/rejected": -6.280284881591797, "step": 6345 }, { "epoch": 0.99, "learning_rate": 9.492920794647026e-06, "logits/chosen": -2.5996689796447754, "logits/rejected": -2.798295021057129, "logps/chosen": -481.599365234375, "logps/rejected": -586.2991333007812, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -2.4456288814544678, "rewards/margins": 4.8184309005737305, "rewards/rejected": -7.264059543609619, "step": 6346 }, { "epoch": 0.99, "learning_rate": 9.492187354115878e-06, "logits/chosen": -2.244257688522339, "logits/rejected": -2.5317349433898926, "logps/chosen": -168.4525604248047, "logps/rejected": -255.3515625, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": 0.09326477348804474, "rewards/margins": 6.567885398864746, "rewards/rejected": -6.474620819091797, "step": 6347 }, { "epoch": 0.99, "learning_rate": 9.49145391358473e-06, "logits/chosen": -2.986934185028076, "logits/rejected": -2.986325979232788, "logps/chosen": -157.30752563476562, "logps/rejected": -250.26565551757812, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.2986259460449219, "rewards/margins": 5.177752494812012, "rewards/rejected": -5.476378440856934, "step": 6348 }, { "epoch": 0.99, "learning_rate": 9.490720473053581e-06, "logits/chosen": -2.404456853866577, "logits/rejected": -3.050600528717041, "logps/chosen": -34.21891784667969, "logps/rejected": -203.70556640625, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -0.31439295411109924, "rewards/margins": 4.391012668609619, "rewards/rejected": -4.705405235290527, "step": 6349 }, { "epoch": 0.99, "learning_rate": 9.489987032522433e-06, "logits/chosen": -2.1586639881134033, "logits/rejected": -3.0883984565734863, "logps/chosen": -93.72624206542969, "logps/rejected": -376.71893310546875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.17414437234401703, "rewards/margins": 5.612647533416748, "rewards/rejected": -5.786791801452637, "step": 6350 }, { "epoch": 0.99, "learning_rate": 9.489253591991285e-06, "logits/chosen": -3.00789737701416, "logits/rejected": -3.133237838745117, "logps/chosen": -188.8931427001953, "logps/rejected": -205.64666748046875, "loss": 0.2071, "rewards/accuracies": 1.0, "rewards/chosen": -1.601611614227295, "rewards/margins": 2.6715033054351807, "rewards/rejected": -4.273115158081055, "step": 6351 }, { "epoch": 0.99, "learning_rate": 9.488520151460139e-06, "logits/chosen": -2.5788087844848633, "logits/rejected": -3.026165723800659, "logps/chosen": -288.72088623046875, "logps/rejected": -278.192626953125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 0.22171708941459656, "rewards/margins": 4.789156436920166, "rewards/rejected": -4.567439556121826, "step": 6352 }, { "epoch": 0.99, "learning_rate": 9.48778671092899e-06, "logits/chosen": -1.76255464553833, "logits/rejected": -2.500870943069458, "logps/chosen": -174.8797149658203, "logps/rejected": -288.82208251953125, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.37954139709472656, "rewards/margins": 5.654779434204102, "rewards/rejected": -6.034320831298828, "step": 6353 }, { "epoch": 0.99, "learning_rate": 9.487053270397842e-06, "logits/chosen": -2.7325589656829834, "logits/rejected": -3.0809943675994873, "logps/chosen": -576.244384765625, "logps/rejected": -518.385986328125, "loss": 0.254, "rewards/accuracies": 1.0, "rewards/chosen": -0.676547646522522, "rewards/margins": 2.177983522415161, "rewards/rejected": -2.8545312881469727, "step": 6354 }, { "epoch": 0.99, "learning_rate": 9.486319829866696e-06, "logits/chosen": -1.3300294876098633, "logits/rejected": -3.0293209552764893, "logps/chosen": -50.064208984375, "logps/rejected": -215.73709106445312, "loss": 0.6431, "rewards/accuracies": 0.5, "rewards/chosen": -2.649404287338257, "rewards/margins": 1.5160133838653564, "rewards/rejected": -4.165417671203613, "step": 6355 }, { "epoch": 0.99, "learning_rate": 9.485586389335548e-06, "logits/chosen": -2.894846200942993, "logits/rejected": -2.6944708824157715, "logps/chosen": -218.76641845703125, "logps/rejected": -279.99639892578125, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.27638548612594604, "rewards/margins": 4.520939826965332, "rewards/rejected": -4.797325134277344, "step": 6356 }, { "epoch": 0.99, "learning_rate": 9.4848529488044e-06, "logits/chosen": -2.9541187286376953, "logits/rejected": -3.032827138900757, "logps/chosen": -223.29319763183594, "logps/rejected": -90.84794616699219, "loss": 2.2895, "rewards/accuracies": 0.5, "rewards/chosen": -2.416861057281494, "rewards/margins": 0.08333754539489746, "rewards/rejected": -2.5001986026763916, "step": 6357 }, { "epoch": 0.99, "learning_rate": 9.484119508273252e-06, "logits/chosen": -1.9302023649215698, "logits/rejected": -2.840200662612915, "logps/chosen": -160.78280639648438, "logps/rejected": -401.9106140136719, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.6030635833740234, "rewards/margins": 4.796421051025391, "rewards/rejected": -5.399484634399414, "step": 6358 }, { "epoch": 0.99, "learning_rate": 9.483386067742103e-06, "logits/chosen": -2.256561756134033, "logits/rejected": -2.823434591293335, "logps/chosen": -91.91886901855469, "logps/rejected": -152.8598175048828, "loss": 1.0687, "rewards/accuracies": 0.5, "rewards/chosen": -1.693535566329956, "rewards/margins": 2.2022368907928467, "rewards/rejected": -3.8957724571228027, "step": 6359 }, { "epoch": 0.99, "learning_rate": 9.482652627210955e-06, "logits/chosen": -1.4803087711334229, "logits/rejected": -2.77256178855896, "logps/chosen": -184.091552734375, "logps/rejected": -353.2793273925781, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.5180355310440063, "rewards/margins": 6.433420181274414, "rewards/rejected": -5.915384769439697, "step": 6360 }, { "epoch": 0.99, "learning_rate": 9.481919186679807e-06, "logits/chosen": -2.2663657665252686, "logits/rejected": -2.8026087284088135, "logps/chosen": -61.38298797607422, "logps/rejected": -193.823486328125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 0.4817765951156616, "rewards/margins": 4.729199409484863, "rewards/rejected": -4.24742317199707, "step": 6361 }, { "epoch": 0.99, "learning_rate": 9.481185746148659e-06, "logits/chosen": -1.2891207933425903, "logits/rejected": -2.850209951400757, "logps/chosen": -360.52734375, "logps/rejected": -477.56341552734375, "loss": 4.0589, "rewards/accuracies": 0.5, "rewards/chosen": -4.4856276512146, "rewards/margins": -0.25435638427734375, "rewards/rejected": -4.231271266937256, "step": 6362 }, { "epoch": 0.99, "learning_rate": 9.480452305617511e-06, "logits/chosen": -1.3731251955032349, "logits/rejected": -2.4230809211730957, "logps/chosen": -237.19061279296875, "logps/rejected": -370.4949951171875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.1224281787872314, "rewards/margins": 4.985126495361328, "rewards/rejected": -6.107554912567139, "step": 6363 }, { "epoch": 0.99, "learning_rate": 9.479718865086365e-06, "logits/chosen": -2.8963236808776855, "logits/rejected": -3.200763702392578, "logps/chosen": -128.9947509765625, "logps/rejected": -235.92469787597656, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": 0.44109421968460083, "rewards/margins": 4.884657859802246, "rewards/rejected": -4.443563938140869, "step": 6364 }, { "epoch": 0.99, "learning_rate": 9.478985424555216e-06, "logits/chosen": -2.668303966522217, "logits/rejected": -3.1247658729553223, "logps/chosen": -52.470909118652344, "logps/rejected": -533.2169799804688, "loss": 0.0997, "rewards/accuracies": 1.0, "rewards/chosen": -2.1825389862060547, "rewards/margins": 2.6190426349639893, "rewards/rejected": -4.801581382751465, "step": 6365 }, { "epoch": 0.99, "learning_rate": 9.478251984024068e-06, "logits/chosen": -2.6801774501800537, "logits/rejected": -2.7728466987609863, "logps/chosen": -542.5338745117188, "logps/rejected": -489.5095520019531, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.696587324142456, "rewards/margins": 7.157302379608154, "rewards/rejected": -7.853889465332031, "step": 6366 }, { "epoch": 0.99, "learning_rate": 9.47751854349292e-06, "logits/chosen": -3.3078699111938477, "logits/rejected": -2.374752998352051, "logps/chosen": -326.48199462890625, "logps/rejected": -83.52110290527344, "loss": 1.2593, "rewards/accuracies": 0.5, "rewards/chosen": -1.4724022150039673, "rewards/margins": 2.1307899951934814, "rewards/rejected": -3.6031923294067383, "step": 6367 }, { "epoch": 0.99, "learning_rate": 9.476785102961772e-06, "logits/chosen": -2.7546939849853516, "logits/rejected": -2.6067590713500977, "logps/chosen": -357.2867431640625, "logps/rejected": -250.45492553710938, "loss": 3.0971, "rewards/accuracies": 0.5, "rewards/chosen": -2.7906906604766846, "rewards/margins": 0.5554795265197754, "rewards/rejected": -3.34617018699646, "step": 6368 }, { "epoch": 0.99, "learning_rate": 9.476051662430624e-06, "logits/chosen": -2.0434844493865967, "logits/rejected": -2.741312026977539, "logps/chosen": -170.57199096679688, "logps/rejected": -368.91510009765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3078511953353882, "rewards/margins": 7.686553478240967, "rewards/rejected": -8.994404792785645, "step": 6369 }, { "epoch": 0.99, "learning_rate": 9.475318221899476e-06, "logits/chosen": -2.5389695167541504, "logits/rejected": -2.9969730377197266, "logps/chosen": -113.40045928955078, "logps/rejected": -201.88145446777344, "loss": 0.0306, "rewards/accuracies": 1.0, "rewards/chosen": -0.8255848288536072, "rewards/margins": 5.072974681854248, "rewards/rejected": -5.8985595703125, "step": 6370 }, { "epoch": 0.99, "learning_rate": 9.474584781368328e-06, "logits/chosen": -1.2767624855041504, "logits/rejected": -2.8646347522735596, "logps/chosen": -63.453826904296875, "logps/rejected": -428.2154846191406, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -0.18864715099334717, "rewards/margins": 6.328986167907715, "rewards/rejected": -6.517633438110352, "step": 6371 }, { "epoch": 0.99, "learning_rate": 9.47385134083718e-06, "logits/chosen": -2.803798198699951, "logits/rejected": -2.47297739982605, "logps/chosen": -439.2223205566406, "logps/rejected": -348.5816955566406, "loss": 0.7071, "rewards/accuracies": 0.5, "rewards/chosen": -3.0104966163635254, "rewards/margins": 1.9248459339141846, "rewards/rejected": -4.935342788696289, "step": 6372 }, { "epoch": 0.99, "learning_rate": 9.473117900306033e-06, "logits/chosen": -2.6859092712402344, "logits/rejected": -2.8789477348327637, "logps/chosen": -225.93748474121094, "logps/rejected": -494.64190673828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.8711823225021362, "rewards/margins": 7.062339782714844, "rewards/rejected": -6.191157341003418, "step": 6373 }, { "epoch": 0.99, "learning_rate": 9.472384459774885e-06, "logits/chosen": -2.7876737117767334, "logits/rejected": -2.841488838195801, "logps/chosen": -468.3126220703125, "logps/rejected": -483.9718017578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.16953660547733307, "rewards/margins": 6.440052032470703, "rewards/rejected": -6.270515441894531, "step": 6374 }, { "epoch": 0.99, "learning_rate": 9.471651019243737e-06, "logits/chosen": -2.2858688831329346, "logits/rejected": -3.0329113006591797, "logps/chosen": -109.04524230957031, "logps/rejected": -353.56878662109375, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -0.6005212664604187, "rewards/margins": 6.801263809204102, "rewards/rejected": -7.401785373687744, "step": 6375 }, { "epoch": 0.99, "learning_rate": 9.470917578712589e-06, "logits/chosen": -2.86437726020813, "logits/rejected": -3.230438709259033, "logps/chosen": -280.1476135253906, "logps/rejected": -474.866455078125, "loss": 2.3484, "rewards/accuracies": 0.5, "rewards/chosen": -2.646399974822998, "rewards/margins": 0.26495981216430664, "rewards/rejected": -2.9113597869873047, "step": 6376 }, { "epoch": 0.99, "learning_rate": 9.47018413818144e-06, "logits/chosen": -2.6935524940490723, "logits/rejected": -1.5558784008026123, "logps/chosen": -128.44851684570312, "logps/rejected": -106.77940368652344, "loss": 0.1811, "rewards/accuracies": 1.0, "rewards/chosen": -0.6912965178489685, "rewards/margins": 1.888607144355774, "rewards/rejected": -2.5799036026000977, "step": 6377 }, { "epoch": 0.99, "learning_rate": 9.469450697650293e-06, "logits/chosen": -2.7183899879455566, "logits/rejected": -2.6094768047332764, "logps/chosen": -178.63943481445312, "logps/rejected": -292.76806640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.158050537109375, "rewards/margins": 6.6301798820495605, "rewards/rejected": -6.7882304191589355, "step": 6378 }, { "epoch": 0.99, "learning_rate": 9.468717257119144e-06, "logits/chosen": -2.3889899253845215, "logits/rejected": -2.812959671020508, "logps/chosen": -79.54055786132812, "logps/rejected": -322.9461669921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.214078426361084, "rewards/margins": 9.454241752624512, "rewards/rejected": -10.668319702148438, "step": 6379 }, { "epoch": 0.99, "learning_rate": 9.467983816587996e-06, "logits/chosen": -2.7806055545806885, "logits/rejected": -2.215182065963745, "logps/chosen": -936.1661376953125, "logps/rejected": -602.474853515625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.18058627843856812, "rewards/margins": 6.2377214431762695, "rewards/rejected": -6.418307781219482, "step": 6380 }, { "epoch": 0.99, "learning_rate": 9.467250376056848e-06, "logits/chosen": -2.2169017791748047, "logits/rejected": -2.690566301345825, "logps/chosen": -287.2306213378906, "logps/rejected": -535.6944580078125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.7128961086273193, "rewards/margins": 7.490497589111328, "rewards/rejected": -9.203393936157227, "step": 6381 }, { "epoch": 0.99, "learning_rate": 9.466516935525702e-06, "logits/chosen": -1.764821171760559, "logits/rejected": -3.0476834774017334, "logps/chosen": -157.0392608642578, "logps/rejected": -506.46343994140625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.3475837707519531, "rewards/margins": 6.195427894592285, "rewards/rejected": -5.847844123840332, "step": 6382 }, { "epoch": 0.99, "learning_rate": 9.465783494994554e-06, "logits/chosen": -2.5643138885498047, "logits/rejected": -2.5668790340423584, "logps/chosen": -82.3090591430664, "logps/rejected": -223.8297119140625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.21732939779758453, "rewards/margins": 8.248271942138672, "rewards/rejected": -8.465601921081543, "step": 6383 }, { "epoch": 0.99, "learning_rate": 9.465050054463405e-06, "logits/chosen": -1.8684146404266357, "logits/rejected": -2.8523755073547363, "logps/chosen": -117.1521987915039, "logps/rejected": -436.2933349609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.18406334519386292, "rewards/margins": 8.607965469360352, "rewards/rejected": -8.42390251159668, "step": 6384 }, { "epoch": 0.99, "learning_rate": 9.464316613932257e-06, "logits/chosen": -2.7645747661590576, "logits/rejected": -2.009760856628418, "logps/chosen": -590.9769897460938, "logps/rejected": -658.65869140625, "loss": 3.2752, "rewards/accuracies": 0.5, "rewards/chosen": -4.334140300750732, "rewards/margins": 0.08739304542541504, "rewards/rejected": -4.421533107757568, "step": 6385 }, { "epoch": 0.99, "learning_rate": 9.463583173401111e-06, "logits/chosen": -2.3444082736968994, "logits/rejected": -2.9545302391052246, "logps/chosen": -369.6490478515625, "logps/rejected": -390.463134765625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.06438294053077698, "rewards/margins": 6.323355674743652, "rewards/rejected": -6.387738227844238, "step": 6386 }, { "epoch": 0.99, "learning_rate": 9.462849732869963e-06, "logits/chosen": -2.0393636226654053, "logits/rejected": -2.965299606323242, "logps/chosen": -62.28980255126953, "logps/rejected": -393.81463623046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.5408040881156921, "rewards/margins": 7.119349479675293, "rewards/rejected": -6.578545570373535, "step": 6387 }, { "epoch": 0.99, "learning_rate": 9.462116292338815e-06, "logits/chosen": -1.1254730224609375, "logits/rejected": -2.7266182899475098, "logps/chosen": -71.57559967041016, "logps/rejected": -388.5589904785156, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.981596827507019, "rewards/margins": 7.050525665283203, "rewards/rejected": -8.032122611999512, "step": 6388 }, { "epoch": 0.99, "learning_rate": 9.461382851807667e-06, "logits/chosen": -2.7406747341156006, "logits/rejected": -1.7393090724945068, "logps/chosen": -482.8844909667969, "logps/rejected": -415.41168212890625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.9285446405410767, "rewards/margins": 5.593188285827637, "rewards/rejected": -6.521733283996582, "step": 6389 }, { "epoch": 0.99, "learning_rate": 9.460649411276518e-06, "logits/chosen": -1.8242613077163696, "logits/rejected": -2.9252254962921143, "logps/chosen": -154.9158172607422, "logps/rejected": -410.75836181640625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.9873554706573486, "rewards/margins": 6.18388032913208, "rewards/rejected": -7.171236038208008, "step": 6390 }, { "epoch": 0.99, "learning_rate": 9.459915970745372e-06, "logits/chosen": -1.9108607769012451, "logits/rejected": -2.961427927017212, "logps/chosen": -240.65321350097656, "logps/rejected": -366.71124267578125, "loss": 3.9849, "rewards/accuracies": 0.5, "rewards/chosen": -4.597729682922363, "rewards/margins": -0.873387336730957, "rewards/rejected": -3.7243423461914062, "step": 6391 }, { "epoch": 0.99, "learning_rate": 9.459182530214224e-06, "logits/chosen": -1.225571870803833, "logits/rejected": -2.812161922454834, "logps/chosen": -163.46810913085938, "logps/rejected": -380.3896789550781, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": 0.42173880338668823, "rewards/margins": 6.29026460647583, "rewards/rejected": -5.868525505065918, "step": 6392 }, { "epoch": 0.99, "learning_rate": 9.458449089683076e-06, "logits/chosen": -2.543699264526367, "logits/rejected": -2.976574182510376, "logps/chosen": -450.9535217285156, "logps/rejected": -539.6165161132812, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.2638435363769531, "rewards/margins": 5.024526596069336, "rewards/rejected": -5.288370132446289, "step": 6393 }, { "epoch": 0.99, "learning_rate": 9.457715649151928e-06, "logits/chosen": -2.461092948913574, "logits/rejected": -2.979182004928589, "logps/chosen": -203.9876708984375, "logps/rejected": -321.3392333984375, "loss": 0.0492, "rewards/accuracies": 1.0, "rewards/chosen": -0.04534295201301575, "rewards/margins": 4.571199417114258, "rewards/rejected": -4.616542339324951, "step": 6394 }, { "epoch": 0.99, "learning_rate": 9.45698220862078e-06, "logits/chosen": -2.003568410873413, "logits/rejected": -3.379240036010742, "logps/chosen": -270.2891540527344, "logps/rejected": -486.333251953125, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": -0.17206305265426636, "rewards/margins": 3.281095504760742, "rewards/rejected": -3.4531586170196533, "step": 6395 }, { "epoch": 0.99, "learning_rate": 9.456248768089631e-06, "logits/chosen": -2.811570167541504, "logits/rejected": -2.2068612575531006, "logps/chosen": -724.0966796875, "logps/rejected": -460.3266906738281, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.763348400592804, "rewards/margins": 6.8327813148498535, "rewards/rejected": -6.069433212280273, "step": 6396 }, { "epoch": 0.99, "learning_rate": 9.455515327558483e-06, "logits/chosen": -1.5559614896774292, "logits/rejected": -2.6918787956237793, "logps/chosen": -66.64122009277344, "logps/rejected": -489.9674072265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9502258896827698, "rewards/margins": 7.954104900360107, "rewards/rejected": -8.90433120727539, "step": 6397 }, { "epoch": 1.0, "learning_rate": 9.454781887027335e-06, "logits/chosen": -1.7843520641326904, "logits/rejected": -3.007295846939087, "logps/chosen": -231.9278564453125, "logps/rejected": -286.09185791015625, "loss": 3.2511, "rewards/accuracies": 0.5, "rewards/chosen": -3.220909595489502, "rewards/margins": -1.2076053619384766, "rewards/rejected": -2.0133042335510254, "step": 6398 }, { "epoch": 1.0, "learning_rate": 9.454048446496187e-06, "logits/chosen": -2.302130699157715, "logits/rejected": -2.9101970195770264, "logps/chosen": -198.40362548828125, "logps/rejected": -489.5626220703125, "loss": 0.0929, "rewards/accuracies": 1.0, "rewards/chosen": -0.7327451705932617, "rewards/margins": 5.2608537673950195, "rewards/rejected": -5.993598937988281, "step": 6399 }, { "epoch": 1.0, "learning_rate": 9.45331500596504e-06, "logits/chosen": -2.504293203353882, "logits/rejected": -2.739838123321533, "logps/chosen": -152.1866455078125, "logps/rejected": -201.9833221435547, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 0.03322640061378479, "rewards/margins": 5.831544399261475, "rewards/rejected": -5.798317909240723, "step": 6400 }, { "epoch": 1.0, "learning_rate": 9.452581565433892e-06, "logits/chosen": -2.7671492099761963, "logits/rejected": -2.8171441555023193, "logps/chosen": -109.10488891601562, "logps/rejected": -217.93917846679688, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -0.149342343211174, "rewards/margins": 4.177072048187256, "rewards/rejected": -4.326414585113525, "step": 6401 }, { "epoch": 1.0, "learning_rate": 9.451848124902744e-06, "logits/chosen": -2.7666819095611572, "logits/rejected": -2.8339784145355225, "logps/chosen": -123.1579818725586, "logps/rejected": -291.3084716796875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.2871543765068054, "rewards/margins": 7.785916328430176, "rewards/rejected": -7.498762130737305, "step": 6402 }, { "epoch": 1.0, "learning_rate": 9.451114684371596e-06, "logits/chosen": -2.7733993530273438, "logits/rejected": -2.1246330738067627, "logps/chosen": -159.064453125, "logps/rejected": -108.61441040039062, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.7238103747367859, "rewards/margins": 4.11478328704834, "rewards/rejected": -4.83859395980835, "step": 6403 }, { "epoch": 1.0, "learning_rate": 9.450381243840448e-06, "logits/chosen": -2.851951837539673, "logits/rejected": -3.0483083724975586, "logps/chosen": -88.40383911132812, "logps/rejected": -188.75607299804688, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.3873022198677063, "rewards/margins": 6.012599468231201, "rewards/rejected": -6.399901390075684, "step": 6404 }, { "epoch": 1.0, "learning_rate": 9.4496478033093e-06, "logits/chosen": -2.791863441467285, "logits/rejected": -2.4391930103302, "logps/chosen": -158.4158935546875, "logps/rejected": -208.8173828125, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -1.1567821502685547, "rewards/margins": 5.197999477386475, "rewards/rejected": -6.354781150817871, "step": 6405 }, { "epoch": 1.0, "learning_rate": 9.448914362778152e-06, "logits/chosen": -3.0661239624023438, "logits/rejected": -2.41871976852417, "logps/chosen": -413.2983703613281, "logps/rejected": -341.839111328125, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 0.75124591588974, "rewards/margins": 6.458996772766113, "rewards/rejected": -5.7077507972717285, "step": 6406 }, { "epoch": 1.0, "learning_rate": 9.448180922247004e-06, "logits/chosen": -2.418192148208618, "logits/rejected": -3.005107879638672, "logps/chosen": -59.174076080322266, "logps/rejected": -278.629638671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.06586933881044388, "rewards/margins": 6.689864158630371, "rewards/rejected": -6.623994827270508, "step": 6407 }, { "epoch": 1.0, "learning_rate": 9.447447481715856e-06, "logits/chosen": -1.5875500440597534, "logits/rejected": -2.7711470127105713, "logps/chosen": -90.43795776367188, "logps/rejected": -174.39663696289062, "loss": 0.0768, "rewards/accuracies": 1.0, "rewards/chosen": -0.807330310344696, "rewards/margins": 4.054973602294922, "rewards/rejected": -4.862304210662842, "step": 6408 }, { "epoch": 1.0, "learning_rate": 9.44671404118471e-06, "logits/chosen": -2.5289876461029053, "logits/rejected": -2.8689541816711426, "logps/chosen": -211.6250457763672, "logps/rejected": -393.436279296875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.3198680579662323, "rewards/margins": 6.404741287231445, "rewards/rejected": -6.724609375, "step": 6409 }, { "epoch": 1.0, "learning_rate": 9.445980600653561e-06, "logits/chosen": -2.595318555831909, "logits/rejected": -2.6044552326202393, "logps/chosen": -415.09942626953125, "logps/rejected": -523.20068359375, "loss": 3.2999, "rewards/accuracies": 0.5, "rewards/chosen": -4.066481590270996, "rewards/margins": 1.123047113418579, "rewards/rejected": -5.189528465270996, "step": 6410 }, { "epoch": 1.0, "learning_rate": 9.445247160122413e-06, "logits/chosen": -2.733999729156494, "logits/rejected": -3.0737104415893555, "logps/chosen": -86.65914916992188, "logps/rejected": -295.872802734375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": 0.2268121838569641, "rewards/margins": 7.350934028625488, "rewards/rejected": -7.124122142791748, "step": 6411 }, { "epoch": 1.0, "learning_rate": 9.444513719591265e-06, "logits/chosen": -2.9344210624694824, "logits/rejected": -2.4264204502105713, "logps/chosen": -225.445556640625, "logps/rejected": -293.0316467285156, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.3514343500137329, "rewards/margins": 7.495573997497559, "rewards/rejected": -7.144139289855957, "step": 6412 }, { "epoch": 1.0, "learning_rate": 9.443780279060117e-06, "logits/chosen": -3.174802303314209, "logits/rejected": -1.9441910982131958, "logps/chosen": -517.0457763671875, "logps/rejected": -336.614013671875, "loss": 1.1089, "rewards/accuracies": 0.5, "rewards/chosen": -2.1112060546875, "rewards/margins": 3.8814034461975098, "rewards/rejected": -5.99260950088501, "step": 6413 }, { "epoch": 1.0, "learning_rate": 9.443046838528969e-06, "logits/chosen": -1.1981879472732544, "logits/rejected": -2.7361395359039307, "logps/chosen": -224.52005004882812, "logps/rejected": -583.0191650390625, "loss": 2.2466, "rewards/accuracies": 0.5, "rewards/chosen": -4.101116180419922, "rewards/margins": -0.6902663707733154, "rewards/rejected": -3.4108498096466064, "step": 6414 }, { "epoch": 1.0, "learning_rate": 9.44231339799782e-06, "logits/chosen": -1.9641733169555664, "logits/rejected": -2.641352415084839, "logps/chosen": -202.88519287109375, "logps/rejected": -363.4105224609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.26222383975982666, "rewards/margins": 8.998613357543945, "rewards/rejected": -8.73638916015625, "step": 6415 }, { "epoch": 1.0, "learning_rate": 9.441579957466672e-06, "logits/chosen": -2.7057383060455322, "logits/rejected": -2.831834077835083, "logps/chosen": -315.69134521484375, "logps/rejected": -372.5289306640625, "loss": 5.2053, "rewards/accuracies": 0.0, "rewards/chosen": -5.491107940673828, "rewards/margins": -5.19984245300293, "rewards/rejected": -0.29126548767089844, "step": 6416 }, { "epoch": 1.0, "learning_rate": 9.440846516935524e-06, "logits/chosen": -3.013533592224121, "logits/rejected": -2.8256804943084717, "logps/chosen": -135.044921875, "logps/rejected": -202.46841430664062, "loss": 0.8057, "rewards/accuracies": 0.5, "rewards/chosen": -0.3395198881626129, "rewards/margins": 2.2636876106262207, "rewards/rejected": -2.6032073497772217, "step": 6417 }, { "epoch": 1.0, "learning_rate": 9.440113076404378e-06, "logits/chosen": -2.740252733230591, "logits/rejected": -2.9744958877563477, "logps/chosen": -93.02531433105469, "logps/rejected": -245.8682403564453, "loss": 0.0639, "rewards/accuracies": 1.0, "rewards/chosen": 0.49228498339653015, "rewards/margins": 5.470251560211182, "rewards/rejected": -4.97796630859375, "step": 6418 }, { "epoch": 1.0, "learning_rate": 9.43937963587323e-06, "logits/chosen": -2.545041799545288, "logits/rejected": -2.3837757110595703, "logps/chosen": -252.4228515625, "logps/rejected": -529.47607421875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.1197446808218956, "rewards/margins": 7.330441474914551, "rewards/rejected": -7.450186252593994, "step": 6419 }, { "epoch": 1.0, "learning_rate": 9.438646195342083e-06, "logits/chosen": -2.859894037246704, "logits/rejected": -2.802771806716919, "logps/chosen": -145.43907165527344, "logps/rejected": -178.35523986816406, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -1.9002143144607544, "rewards/margins": 4.726293563842773, "rewards/rejected": -6.626507759094238, "step": 6420 }, { "epoch": 1.0, "learning_rate": 9.437912754810935e-06, "logits/chosen": -2.433095693588257, "logits/rejected": -2.804530382156372, "logps/chosen": -85.09847259521484, "logps/rejected": -441.6265563964844, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": 0.5730751156806946, "rewards/margins": 5.967005252838135, "rewards/rejected": -5.393930435180664, "step": 6421 }, { "epoch": 1.0, "learning_rate": 9.437179314279787e-06, "logits/chosen": -3.2899389266967773, "logits/rejected": -3.4108197689056396, "logps/chosen": -18.06341552734375, "logps/rejected": -115.59732818603516, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.4371159076690674, "rewards/margins": 6.196389198303223, "rewards/rejected": -6.633505344390869, "step": 6422 }, { "epoch": 1.0, "learning_rate": 9.436445873748639e-06, "logits/chosen": -2.389052391052246, "logits/rejected": -3.4109139442443848, "logps/chosen": -159.53176879882812, "logps/rejected": -481.72760009765625, "loss": 3.1933, "rewards/accuracies": 0.5, "rewards/chosen": -3.4314301013946533, "rewards/margins": 0.2515137195587158, "rewards/rejected": -3.682943820953369, "step": 6423 }, { "epoch": 1.0, "learning_rate": 9.43571243321749e-06, "logits/chosen": -2.2973780632019043, "logits/rejected": -2.7597649097442627, "logps/chosen": -446.2751159667969, "logps/rejected": -420.51007080078125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.03432007133960724, "rewards/margins": 6.136605739593506, "rewards/rejected": -6.102285861968994, "step": 6424 }, { "epoch": 1.0, "learning_rate": 9.434978992686343e-06, "logits/chosen": -2.0495007038116455, "logits/rejected": -2.9275312423706055, "logps/chosen": -92.44749450683594, "logps/rejected": -214.52899169921875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -0.7844904065132141, "rewards/margins": 5.47510290145874, "rewards/rejected": -6.2595930099487305, "step": 6425 }, { "epoch": 1.0, "learning_rate": 9.434245552155196e-06, "logits/chosen": -2.8758440017700195, "logits/rejected": -2.9969277381896973, "logps/chosen": -104.19629669189453, "logps/rejected": -286.3457946777344, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.8361194133758545, "rewards/margins": 6.546255111694336, "rewards/rejected": -8.38237476348877, "step": 6426 }, { "epoch": 1.0, "learning_rate": 9.433512111624048e-06, "logits/chosen": -3.0869076251983643, "logits/rejected": -2.8548295497894287, "logps/chosen": -198.33978271484375, "logps/rejected": -179.24969482421875, "loss": 4.4093, "rewards/accuracies": 0.5, "rewards/chosen": -3.9406003952026367, "rewards/margins": -0.7592456340789795, "rewards/rejected": -3.1813549995422363, "step": 6427 }, { "epoch": 1.0, "learning_rate": 9.4327786710929e-06, "logits/chosen": -2.8469583988189697, "logits/rejected": -3.147627592086792, "logps/chosen": -125.79165649414062, "logps/rejected": -280.20135498046875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.16969034075737, "rewards/margins": 6.715364456176758, "rewards/rejected": -6.5456743240356445, "step": 6428 }, { "epoch": 1.0, "learning_rate": 9.432045230561752e-06, "logits/chosen": -2.181821823120117, "logits/rejected": -2.99910306930542, "logps/chosen": -133.8048553466797, "logps/rejected": -300.3674621582031, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.17209891974925995, "rewards/margins": 6.282405376434326, "rewards/rejected": -6.454504489898682, "step": 6429 }, { "epoch": 1.0, "learning_rate": 9.431311790030604e-06, "logits/chosen": -1.5205730199813843, "logits/rejected": -2.4095218181610107, "logps/chosen": -251.66622924804688, "logps/rejected": -462.23846435546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3129638731479645, "rewards/margins": 9.008252143859863, "rewards/rejected": -9.321215629577637, "step": 6430 }, { "epoch": 1.0, "learning_rate": 9.430578349499456e-06, "logits/chosen": -2.7224977016448975, "logits/rejected": -2.7398109436035156, "logps/chosen": -165.93125915527344, "logps/rejected": -298.349365234375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.5270717740058899, "rewards/margins": 6.22989559173584, "rewards/rejected": -5.702823638916016, "step": 6431 }, { "epoch": 1.0, "learning_rate": 9.429844908968307e-06, "logits/chosen": -2.320530652999878, "logits/rejected": -1.2279754877090454, "logps/chosen": -192.33827209472656, "logps/rejected": -114.3308334350586, "loss": 1.3997, "rewards/accuracies": 0.5, "rewards/chosen": -2.360149621963501, "rewards/margins": 1.5980147123336792, "rewards/rejected": -3.9581644535064697, "step": 6432 }, { "epoch": 1.0, "learning_rate": 9.42911146843716e-06, "logits/chosen": -1.9561189413070679, "logits/rejected": -3.1028242111206055, "logps/chosen": -118.37051391601562, "logps/rejected": -388.39239501953125, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 0.8565727472305298, "rewards/margins": 6.552382946014404, "rewards/rejected": -5.695809841156006, "step": 6433 }, { "epoch": 1.0, "learning_rate": 9.428378027906011e-06, "logits/chosen": -1.8062397241592407, "logits/rejected": -2.682849884033203, "logps/chosen": -110.00700378417969, "logps/rejected": -283.4315185546875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.24983271956443787, "rewards/margins": 6.119985580444336, "rewards/rejected": -5.870152950286865, "step": 6434 }, { "epoch": 1.0, "learning_rate": 9.427644587374865e-06, "logits/chosen": -1.8159111738204956, "logits/rejected": -1.9808188676834106, "logps/chosen": -338.1456298828125, "logps/rejected": -370.9162902832031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.5052345395088196, "rewards/margins": 8.843502044677734, "rewards/rejected": -9.348736763000488, "step": 6435 }, { "epoch": 1.0, "learning_rate": 9.426911146843717e-06, "logits/chosen": -2.358459234237671, "logits/rejected": -2.7582178115844727, "logps/chosen": -77.75166320800781, "logps/rejected": -231.4300537109375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.7145462036132812, "rewards/margins": 7.512873649597168, "rewards/rejected": -6.7983269691467285, "step": 6436 }, { "epoch": 1.0, "learning_rate": 9.426177706312569e-06, "logits/chosen": -2.502105712890625, "logits/rejected": -2.843808650970459, "logps/chosen": -219.76087951660156, "logps/rejected": -348.52191162109375, "loss": 2.1226, "rewards/accuracies": 0.5, "rewards/chosen": -2.7341277599334717, "rewards/margins": 0.9959962368011475, "rewards/rejected": -3.730123996734619, "step": 6437 }, { "epoch": 1.0, "learning_rate": 9.42544426578142e-06, "logits/chosen": -2.588913917541504, "logits/rejected": -1.9536069631576538, "logps/chosen": -165.6246337890625, "logps/rejected": -235.9448699951172, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.2720138430595398, "rewards/margins": 6.753969192504883, "rewards/rejected": -6.481955528259277, "step": 6438 }, { "epoch": 1.0, "learning_rate": 9.424710825250272e-06, "logits/chosen": -2.0116162300109863, "logits/rejected": -2.7984347343444824, "logps/chosen": -304.7349548339844, "logps/rejected": -366.66229248046875, "loss": 0.2065, "rewards/accuracies": 1.0, "rewards/chosen": 0.38273167610168457, "rewards/margins": 4.248504638671875, "rewards/rejected": -3.8657732009887695, "step": 6439 }, { "epoch": 1.0, "learning_rate": 9.423977384719124e-06, "logits/chosen": -1.7829605340957642, "logits/rejected": -2.8037002086639404, "logps/chosen": -213.4761962890625, "logps/rejected": -288.7803955078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.9397461414337158, "rewards/margins": 7.000127792358398, "rewards/rejected": -6.060381889343262, "step": 6440 }, { "epoch": 1.0, "learning_rate": 9.423243944187976e-06, "logits/chosen": -2.7171177864074707, "logits/rejected": -2.5290372371673584, "logps/chosen": -282.7510986328125, "logps/rejected": -218.8739776611328, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.773236095905304, "rewards/margins": 6.500380516052246, "rewards/rejected": -5.727144718170166, "step": 6441 }, { "epoch": 1.0, "learning_rate": 9.422510503656828e-06, "logits/chosen": -2.3205764293670654, "logits/rejected": -2.671933650970459, "logps/chosen": -54.41868591308594, "logps/rejected": -192.93968200683594, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7204458117485046, "rewards/margins": 6.869984149932861, "rewards/rejected": -6.149538516998291, "step": 6442 }, { "epoch": 1.0, "learning_rate": 9.42177706312568e-06, "logits/chosen": -2.5974884033203125, "logits/rejected": -2.373264789581299, "logps/chosen": -230.4342498779297, "logps/rejected": -203.60781860351562, "loss": 1.3791, "rewards/accuracies": 0.5, "rewards/chosen": -2.2999939918518066, "rewards/margins": 3.2110209465026855, "rewards/rejected": -5.511014938354492, "step": 6443 }, { "epoch": 1.0, "learning_rate": 9.421043622594533e-06, "logits/chosen": -2.69357967376709, "logits/rejected": -0.8807059526443481, "logps/chosen": -302.9682922363281, "logps/rejected": -112.38985443115234, "loss": 0.5779, "rewards/accuracies": 0.5, "rewards/chosen": -1.570812702178955, "rewards/margins": 2.1557865142822266, "rewards/rejected": -3.7265992164611816, "step": 6444 }, { "epoch": 1.0, "learning_rate": 9.420310182063385e-06, "logits/chosen": -1.6478739976882935, "logits/rejected": -2.9971306324005127, "logps/chosen": -131.82577514648438, "logps/rejected": -364.2806396484375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.1423633098602295, "rewards/margins": 6.733123779296875, "rewards/rejected": -6.875487327575684, "step": 6445 }, { "epoch": 1.0, "learning_rate": 9.419576741532237e-06, "logits/chosen": -3.0589184761047363, "logits/rejected": -2.8421459197998047, "logps/chosen": -120.49807739257812, "logps/rejected": -192.1469268798828, "loss": 0.0818, "rewards/accuracies": 1.0, "rewards/chosen": -0.3808742165565491, "rewards/margins": 3.4333462715148926, "rewards/rejected": -3.814220428466797, "step": 6446 }, { "epoch": 1.0, "learning_rate": 9.418843301001089e-06, "logits/chosen": -2.2766895294189453, "logits/rejected": -3.003974199295044, "logps/chosen": -134.69407653808594, "logps/rejected": -248.9698486328125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.5570077300071716, "rewards/margins": 6.434547424316406, "rewards/rejected": -5.87753963470459, "step": 6447 }, { "epoch": 1.0, "learning_rate": 9.418109860469941e-06, "logits/chosen": -2.4163334369659424, "logits/rejected": -2.6415536403656006, "logps/chosen": -107.98335266113281, "logps/rejected": -194.97906494140625, "loss": 1.1081, "rewards/accuracies": 0.5, "rewards/chosen": -2.057323455810547, "rewards/margins": 3.7800838947296143, "rewards/rejected": -5.83740758895874, "step": 6448 }, { "epoch": 1.0, "learning_rate": 9.417376419938793e-06, "logits/chosen": -2.322720766067505, "logits/rejected": -2.445688486099243, "logps/chosen": -239.32354736328125, "logps/rejected": -134.78045654296875, "loss": 0.5839, "rewards/accuracies": 0.5, "rewards/chosen": -2.012019157409668, "rewards/margins": 2.852311611175537, "rewards/rejected": -4.864330768585205, "step": 6449 }, { "epoch": 1.0, "learning_rate": 9.416642979407645e-06, "logits/chosen": -1.8364344835281372, "logits/rejected": -2.7672104835510254, "logps/chosen": -55.989234924316406, "logps/rejected": -208.24148559570312, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.7901449203491211, "rewards/margins": 5.156436920166016, "rewards/rejected": -5.946582317352295, "step": 6450 }, { "epoch": 1.0, "learning_rate": 9.415909538876497e-06, "logits/chosen": -2.2724084854125977, "logits/rejected": -2.599360227584839, "logps/chosen": -83.6131591796875, "logps/rejected": -325.8908996582031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.39866599440574646, "rewards/margins": 8.126470565795898, "rewards/rejected": -7.727805137634277, "step": 6451 }, { "epoch": 1.0, "learning_rate": 9.415176098345348e-06, "logits/chosen": -2.1194379329681396, "logits/rejected": -2.7559421062469482, "logps/chosen": -111.25093078613281, "logps/rejected": -252.4169921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.4044281244277954, "rewards/margins": 7.0511393547058105, "rewards/rejected": -6.646711349487305, "step": 6452 }, { "epoch": 1.0, "learning_rate": 9.414442657814202e-06, "logits/chosen": -2.8680148124694824, "logits/rejected": -1.8243141174316406, "logps/chosen": -545.260498046875, "logps/rejected": -326.7100524902344, "loss": 0.0973, "rewards/accuracies": 1.0, "rewards/chosen": 0.241282656788826, "rewards/margins": 4.02716588973999, "rewards/rejected": -3.7858834266662598, "step": 6453 }, { "epoch": 1.0, "learning_rate": 9.413709217283054e-06, "logits/chosen": -2.823368787765503, "logits/rejected": -2.316448211669922, "logps/chosen": -661.1007080078125, "logps/rejected": -490.7674255371094, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.35277098417282104, "rewards/margins": 6.625694274902344, "rewards/rejected": -6.978465557098389, "step": 6454 }, { "epoch": 1.0, "learning_rate": 9.412975776751907e-06, "logits/chosen": -3.165660858154297, "logits/rejected": -2.5986294746398926, "logps/chosen": -348.5850524902344, "logps/rejected": -213.5021209716797, "loss": 0.7843, "rewards/accuracies": 0.5, "rewards/chosen": 1.0806090831756592, "rewards/margins": 3.498769521713257, "rewards/rejected": -2.4181602001190186, "step": 6455 }, { "epoch": 1.0, "learning_rate": 9.41224233622076e-06, "logits/chosen": -2.0982043743133545, "logits/rejected": -2.7960915565490723, "logps/chosen": -113.46131896972656, "logps/rejected": -162.8428955078125, "loss": 0.8998, "rewards/accuracies": 0.5, "rewards/chosen": -1.289263129234314, "rewards/margins": 1.7402241230010986, "rewards/rejected": -3.029487133026123, "step": 6456 }, { "epoch": 1.0, "learning_rate": 9.411508895689611e-06, "logits/chosen": -1.8812341690063477, "logits/rejected": -2.7645366191864014, "logps/chosen": -75.91175842285156, "logps/rejected": -297.7568054199219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.013689808547496796, "rewards/margins": 8.011754989624023, "rewards/rejected": -7.998064994812012, "step": 6457 }, { "epoch": 1.0, "learning_rate": 9.410775455158463e-06, "logits/chosen": -2.7438056468963623, "logits/rejected": -1.727716326713562, "logps/chosen": -561.7101440429688, "logps/rejected": -310.65155029296875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 0.4733695983886719, "rewards/margins": 5.144839286804199, "rewards/rejected": -4.671469688415527, "step": 6458 }, { "epoch": 1.0, "learning_rate": 9.410042014627315e-06, "logits/chosen": -1.7254825830459595, "logits/rejected": -2.8385393619537354, "logps/chosen": -96.0489273071289, "logps/rejected": -218.3013916015625, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": 0.20377464592456818, "rewards/margins": 4.692713737487793, "rewards/rejected": -4.48893928527832, "step": 6459 }, { "epoch": 1.0, "learning_rate": 9.409308574096167e-06, "logits/chosen": -1.923527717590332, "logits/rejected": -2.9246163368225098, "logps/chosen": -82.92958068847656, "logps/rejected": -292.061767578125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.5739967823028564, "rewards/margins": 6.428507328033447, "rewards/rejected": -5.854510307312012, "step": 6460 }, { "epoch": 1.0, "learning_rate": 9.408575133565019e-06, "logits/chosen": -3.260873794555664, "logits/rejected": -2.8467600345611572, "logps/chosen": -389.5485534667969, "logps/rejected": -307.6119079589844, "loss": 0.0888, "rewards/accuracies": 1.0, "rewards/chosen": 1.3591461181640625, "rewards/margins": 5.762604713439941, "rewards/rejected": -4.403458595275879, "step": 6461 }, { "epoch": 1.0, "learning_rate": 9.407841693033872e-06, "logits/chosen": -1.7177510261535645, "logits/rejected": -2.951953172683716, "logps/chosen": -91.1543197631836, "logps/rejected": -448.0118713378906, "loss": 1.4989, "rewards/accuracies": 0.5, "rewards/chosen": -2.0792040824890137, "rewards/margins": 3.0026931762695312, "rewards/rejected": -5.081897258758545, "step": 6462 }, { "epoch": 1.01, "learning_rate": 9.407108252502724e-06, "logits/chosen": -2.6893606185913086, "logits/rejected": -2.491122245788574, "logps/chosen": -108.77723693847656, "logps/rejected": -119.94419860839844, "loss": 1.8133, "rewards/accuracies": 0.5, "rewards/chosen": -1.5158997774124146, "rewards/margins": 1.0079405307769775, "rewards/rejected": -2.5238404273986816, "step": 6463 }, { "epoch": 1.01, "learning_rate": 9.406374811971576e-06, "logits/chosen": -1.9182952642440796, "logits/rejected": -2.788892984390259, "logps/chosen": -118.30839538574219, "logps/rejected": -214.34573364257812, "loss": 0.243, "rewards/accuracies": 1.0, "rewards/chosen": -1.057848572731018, "rewards/margins": 3.3508501052856445, "rewards/rejected": -4.408698558807373, "step": 6464 }, { "epoch": 1.01, "learning_rate": 9.405641371440428e-06, "logits/chosen": -2.991286516189575, "logits/rejected": -2.660320520401001, "logps/chosen": -273.0075988769531, "logps/rejected": -187.70584106445312, "loss": 1.3491, "rewards/accuracies": 0.5, "rewards/chosen": -0.5868270397186279, "rewards/margins": 1.52590811252594, "rewards/rejected": -2.1127350330352783, "step": 6465 }, { "epoch": 1.01, "learning_rate": 9.40490793090928e-06, "logits/chosen": -2.6980626583099365, "logits/rejected": -2.4345102310180664, "logps/chosen": -294.51593017578125, "logps/rejected": -273.9934997558594, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 1.4457199573516846, "rewards/margins": 5.7393975257873535, "rewards/rejected": -4.29367733001709, "step": 6466 }, { "epoch": 1.01, "learning_rate": 9.404174490378132e-06, "logits/chosen": -3.0189085006713867, "logits/rejected": -3.232832670211792, "logps/chosen": -20.952804565429688, "logps/rejected": -155.14553833007812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.7200943827629089, "rewards/margins": 6.36679220199585, "rewards/rejected": -5.646697998046875, "step": 6467 }, { "epoch": 1.01, "learning_rate": 9.403441049846984e-06, "logits/chosen": -2.0175750255584717, "logits/rejected": -2.6662797927856445, "logps/chosen": -86.787109375, "logps/rejected": -200.4309539794922, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.6303531527519226, "rewards/margins": 7.033702850341797, "rewards/rejected": -6.40334939956665, "step": 6468 }, { "epoch": 1.01, "learning_rate": 9.402707609315835e-06, "logits/chosen": -2.540253162384033, "logits/rejected": -3.3688247203826904, "logps/chosen": -214.8235626220703, "logps/rejected": -524.4306640625, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": 0.07534870505332947, "rewards/margins": 4.269331455230713, "rewards/rejected": -4.19398307800293, "step": 6469 }, { "epoch": 1.01, "learning_rate": 9.401974168784687e-06, "logits/chosen": -3.1298229694366455, "logits/rejected": -2.7087769508361816, "logps/chosen": -120.25060272216797, "logps/rejected": -72.28694915771484, "loss": 0.0653, "rewards/accuracies": 1.0, "rewards/chosen": 1.0699434280395508, "rewards/margins": 4.872804641723633, "rewards/rejected": -3.802861452102661, "step": 6470 }, { "epoch": 1.01, "learning_rate": 9.401240728253541e-06, "logits/chosen": -2.5037996768951416, "logits/rejected": -3.0972976684570312, "logps/chosen": -17.306976318359375, "logps/rejected": -300.04608154296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.7237410545349121, "rewards/margins": 9.117730140686035, "rewards/rejected": -8.393988609313965, "step": 6471 }, { "epoch": 1.01, "learning_rate": 9.400507287722393e-06, "logits/chosen": -1.8181742429733276, "logits/rejected": -2.6924967765808105, "logps/chosen": -279.52838134765625, "logps/rejected": -328.84710693359375, "loss": 0.2459, "rewards/accuracies": 1.0, "rewards/chosen": 0.2604629695415497, "rewards/margins": 2.892770767211914, "rewards/rejected": -2.632307767868042, "step": 6472 }, { "epoch": 1.01, "learning_rate": 9.399773847191245e-06, "logits/chosen": -2.685011148452759, "logits/rejected": -2.683056592941284, "logps/chosen": -471.89990234375, "logps/rejected": -737.0230102539062, "loss": 0.089, "rewards/accuracies": 1.0, "rewards/chosen": -0.8726215362548828, "rewards/margins": 3.956376314163208, "rewards/rejected": -4.82899808883667, "step": 6473 }, { "epoch": 1.01, "learning_rate": 9.399040406660097e-06, "logits/chosen": -2.6130788326263428, "logits/rejected": -2.128185749053955, "logps/chosen": -432.8673400878906, "logps/rejected": -303.1285400390625, "loss": 1.1811, "rewards/accuracies": 0.5, "rewards/chosen": -2.091665267944336, "rewards/margins": 0.7460741996765137, "rewards/rejected": -2.8377397060394287, "step": 6474 }, { "epoch": 1.01, "learning_rate": 9.398306966128948e-06, "logits/chosen": -2.803471803665161, "logits/rejected": -3.037195920944214, "logps/chosen": -44.91510772705078, "logps/rejected": -147.23114013671875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.26451385021209717, "rewards/margins": 5.849845886230469, "rewards/rejected": -6.1143598556518555, "step": 6475 }, { "epoch": 1.01, "learning_rate": 9.3975735255978e-06, "logits/chosen": -2.968080759048462, "logits/rejected": -2.5099918842315674, "logps/chosen": -165.27162170410156, "logps/rejected": -171.74420166015625, "loss": 1.2182, "rewards/accuracies": 0.5, "rewards/chosen": -1.779808521270752, "rewards/margins": 2.5290493965148926, "rewards/rejected": -4.308857440948486, "step": 6476 }, { "epoch": 1.01, "learning_rate": 9.396840085066652e-06, "logits/chosen": -1.374260425567627, "logits/rejected": -2.9842705726623535, "logps/chosen": -167.87197875976562, "logps/rejected": -573.3436279296875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.5506348013877869, "rewards/margins": 7.743962287902832, "rewards/rejected": -7.1933274269104, "step": 6477 }, { "epoch": 1.01, "learning_rate": 9.396106644535504e-06, "logits/chosen": -2.216318130493164, "logits/rejected": -3.1154839992523193, "logps/chosen": -425.60101318359375, "logps/rejected": -532.1575927734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.1571242809295654, "rewards/margins": 9.844589233398438, "rewards/rejected": -8.68746566772461, "step": 6478 }, { "epoch": 1.01, "learning_rate": 9.395373204004356e-06, "logits/chosen": -2.8517842292785645, "logits/rejected": -3.2222843170166016, "logps/chosen": -59.26390075683594, "logps/rejected": -289.6597900390625, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -0.3155103921890259, "rewards/margins": 4.893994331359863, "rewards/rejected": -5.2095046043396, "step": 6479 }, { "epoch": 1.01, "learning_rate": 9.39463976347321e-06, "logits/chosen": -2.967452049255371, "logits/rejected": -2.4410524368286133, "logps/chosen": -788.0440063476562, "logps/rejected": -460.93365478515625, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -0.6411800980567932, "rewards/margins": 4.9808549880981445, "rewards/rejected": -5.622035026550293, "step": 6480 }, { "epoch": 1.01, "learning_rate": 9.393906322942061e-06, "logits/chosen": -2.9231157302856445, "logits/rejected": -1.6210471391677856, "logps/chosen": -455.8722839355469, "logps/rejected": -268.5141906738281, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.3210298418998718, "rewards/margins": 5.116674423217773, "rewards/rejected": -5.437704563140869, "step": 6481 }, { "epoch": 1.01, "learning_rate": 9.393172882410913e-06, "logits/chosen": -2.67918062210083, "logits/rejected": -3.005709648132324, "logps/chosen": -90.67607116699219, "logps/rejected": -273.9024658203125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": 0.8142738342285156, "rewards/margins": 5.571484565734863, "rewards/rejected": -4.757210731506348, "step": 6482 }, { "epoch": 1.01, "learning_rate": 9.392439441879765e-06, "logits/chosen": -2.1163415908813477, "logits/rejected": -2.935943841934204, "logps/chosen": -116.24922180175781, "logps/rejected": -279.2274169921875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.025600075721740723, "rewards/margins": 5.1533918380737305, "rewards/rejected": -5.178991794586182, "step": 6483 }, { "epoch": 1.01, "learning_rate": 9.391706001348617e-06, "logits/chosen": -2.718547821044922, "logits/rejected": -2.6295745372772217, "logps/chosen": -72.30878448486328, "logps/rejected": -160.04331970214844, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 0.2905588150024414, "rewards/margins": 5.583045959472656, "rewards/rejected": -5.292487144470215, "step": 6484 }, { "epoch": 1.01, "learning_rate": 9.390972560817469e-06, "logits/chosen": -2.4816083908081055, "logits/rejected": -1.2217316627502441, "logps/chosen": -246.74313354492188, "logps/rejected": -117.59539794921875, "loss": 0.0797, "rewards/accuracies": 1.0, "rewards/chosen": -0.17423170804977417, "rewards/margins": 4.362514972686768, "rewards/rejected": -4.536746978759766, "step": 6485 }, { "epoch": 1.01, "learning_rate": 9.39023912028632e-06, "logits/chosen": -2.673034429550171, "logits/rejected": -3.121029853820801, "logps/chosen": -366.373291015625, "logps/rejected": -398.679443359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.5963889360427856, "rewards/margins": 6.748689651489258, "rewards/rejected": -7.345078468322754, "step": 6486 }, { "epoch": 1.01, "learning_rate": 9.389505679755174e-06, "logits/chosen": -3.3498804569244385, "logits/rejected": -3.410324811935425, "logps/chosen": -63.215911865234375, "logps/rejected": -130.03158569335938, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.22065097093582153, "rewards/margins": 6.163349151611328, "rewards/rejected": -6.384000301361084, "step": 6487 }, { "epoch": 1.01, "learning_rate": 9.388772239224026e-06, "logits/chosen": -2.5259249210357666, "logits/rejected": -2.82993221282959, "logps/chosen": -81.49127960205078, "logps/rejected": -188.58026123046875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.32534128427505493, "rewards/margins": 6.455683708190918, "rewards/rejected": -6.13034200668335, "step": 6488 }, { "epoch": 1.01, "learning_rate": 9.38803879869288e-06, "logits/chosen": -1.9418864250183105, "logits/rejected": -2.9442174434661865, "logps/chosen": -46.71437072753906, "logps/rejected": -370.22967529296875, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -0.7631714344024658, "rewards/margins": 7.022189617156982, "rewards/rejected": -7.785361289978027, "step": 6489 }, { "epoch": 1.01, "learning_rate": 9.387305358161732e-06, "logits/chosen": -2.2839653491973877, "logits/rejected": -3.089252471923828, "logps/chosen": -362.7509765625, "logps/rejected": -425.1001281738281, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 0.2892009913921356, "rewards/margins": 5.771359443664551, "rewards/rejected": -5.482158660888672, "step": 6490 }, { "epoch": 1.01, "learning_rate": 9.386571917630584e-06, "logits/chosen": -2.7871344089508057, "logits/rejected": -1.848841667175293, "logps/chosen": -414.1451721191406, "logps/rejected": -352.88812255859375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": 0.30422210693359375, "rewards/margins": 5.1955718994140625, "rewards/rejected": -4.891349792480469, "step": 6491 }, { "epoch": 1.01, "learning_rate": 9.385838477099435e-06, "logits/chosen": -3.048029661178589, "logits/rejected": -2.807246685028076, "logps/chosen": -77.46803283691406, "logps/rejected": -83.35551452636719, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": 0.14409823715686798, "rewards/margins": 5.223755836486816, "rewards/rejected": -5.079657554626465, "step": 6492 }, { "epoch": 1.01, "learning_rate": 9.385105036568287e-06, "logits/chosen": -0.6853767037391663, "logits/rejected": -2.738424777984619, "logps/chosen": -118.3970718383789, "logps/rejected": -223.9546356201172, "loss": 1.3509, "rewards/accuracies": 0.5, "rewards/chosen": -3.0277974605560303, "rewards/margins": 1.3334360122680664, "rewards/rejected": -4.361233711242676, "step": 6493 }, { "epoch": 1.01, "learning_rate": 9.38437159603714e-06, "logits/chosen": -3.0767173767089844, "logits/rejected": -3.193953514099121, "logps/chosen": -450.2459411621094, "logps/rejected": -409.39892578125, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 0.037442028522491455, "rewards/margins": 4.362881660461426, "rewards/rejected": -4.325439453125, "step": 6494 }, { "epoch": 1.01, "learning_rate": 9.383638155505991e-06, "logits/chosen": -2.5549156665802, "logits/rejected": -2.459672689437866, "logps/chosen": -211.79380798339844, "logps/rejected": -279.12298583984375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 0.3641689419746399, "rewards/margins": 5.336796283721924, "rewards/rejected": -4.97262716293335, "step": 6495 }, { "epoch": 1.01, "learning_rate": 9.382904714974843e-06, "logits/chosen": -2.893475294113159, "logits/rejected": -1.8520735502243042, "logps/chosen": -441.39141845703125, "logps/rejected": -238.6631622314453, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.04004326090216637, "rewards/margins": 5.338974952697754, "rewards/rejected": -5.379018783569336, "step": 6496 }, { "epoch": 1.01, "learning_rate": 9.382171274443695e-06, "logits/chosen": -1.9425946474075317, "logits/rejected": -2.8853344917297363, "logps/chosen": -53.988033294677734, "logps/rejected": -266.53619384765625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5490264892578125, "rewards/margins": 5.270938873291016, "rewards/rejected": -5.819965362548828, "step": 6497 }, { "epoch": 1.01, "learning_rate": 9.381437833912548e-06, "logits/chosen": -2.846309185028076, "logits/rejected": -3.2369561195373535, "logps/chosen": -155.9005126953125, "logps/rejected": -262.339111328125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -0.16523079574108124, "rewards/margins": 5.171846389770508, "rewards/rejected": -5.3370771408081055, "step": 6498 }, { "epoch": 1.01, "learning_rate": 9.3807043933814e-06, "logits/chosen": -2.7520875930786133, "logits/rejected": -2.496502161026001, "logps/chosen": -67.70870208740234, "logps/rejected": -173.71539306640625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": 0.38454705476760864, "rewards/margins": 5.608114719390869, "rewards/rejected": -5.223567485809326, "step": 6499 }, { "epoch": 1.01, "learning_rate": 9.379970952850252e-06, "logits/chosen": -2.5222482681274414, "logits/rejected": -2.6987946033477783, "logps/chosen": -109.4291000366211, "logps/rejected": -122.89188385009766, "loss": 0.041, "rewards/accuracies": 1.0, "rewards/chosen": -1.0775206089019775, "rewards/margins": 3.8144266605377197, "rewards/rejected": -4.891947269439697, "step": 6500 }, { "epoch": 1.01, "learning_rate": 9.379237512319104e-06, "logits/chosen": -2.7606163024902344, "logits/rejected": -2.6885006427764893, "logps/chosen": -88.5189208984375, "logps/rejected": -193.522216796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.5525083541870117, "rewards/margins": 7.127553939819336, "rewards/rejected": -6.575045585632324, "step": 6501 }, { "epoch": 1.01, "learning_rate": 9.378504071787956e-06, "logits/chosen": -2.8225057125091553, "logits/rejected": -2.8238675594329834, "logps/chosen": -104.06980895996094, "logps/rejected": -264.2250061035156, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": 1.060319185256958, "rewards/margins": 4.612357139587402, "rewards/rejected": -3.5520379543304443, "step": 6502 }, { "epoch": 1.01, "learning_rate": 9.377770631256808e-06, "logits/chosen": -3.023167133331299, "logits/rejected": -1.6847608089447021, "logps/chosen": -272.4935302734375, "logps/rejected": -142.47328186035156, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": 0.4648857116699219, "rewards/margins": 4.14240837097168, "rewards/rejected": -3.677522659301758, "step": 6503 }, { "epoch": 1.01, "learning_rate": 9.37703719072566e-06, "logits/chosen": -2.914351463317871, "logits/rejected": -2.584834098815918, "logps/chosen": -244.89434814453125, "logps/rejected": -178.03964233398438, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.3595779836177826, "rewards/margins": 6.9151201248168945, "rewards/rejected": -6.5555419921875, "step": 6504 }, { "epoch": 1.01, "learning_rate": 9.376303750194512e-06, "logits/chosen": -2.5055572986602783, "logits/rejected": -3.264362096786499, "logps/chosen": -72.42201232910156, "logps/rejected": -314.7188720703125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 1.2085599899291992, "rewards/margins": 5.316021919250488, "rewards/rejected": -4.107461452484131, "step": 6505 }, { "epoch": 1.01, "learning_rate": 9.375570309663363e-06, "logits/chosen": -0.6025292873382568, "logits/rejected": -2.6884119510650635, "logps/chosen": -63.105384826660156, "logps/rejected": -422.86322021484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.24429798126220703, "rewards/margins": 7.283032417297363, "rewards/rejected": -7.52733039855957, "step": 6506 }, { "epoch": 1.01, "learning_rate": 9.374836869132217e-06, "logits/chosen": -3.20278000831604, "logits/rejected": -2.2926809787750244, "logps/chosen": -253.97735595703125, "logps/rejected": -145.10806274414062, "loss": 1.1795, "rewards/accuracies": 0.5, "rewards/chosen": -0.7206787467002869, "rewards/margins": 2.0289554595947266, "rewards/rejected": -2.749634027481079, "step": 6507 }, { "epoch": 1.01, "learning_rate": 9.374103428601069e-06, "logits/chosen": -1.5114926099777222, "logits/rejected": -2.5428342819213867, "logps/chosen": -239.71661376953125, "logps/rejected": -277.03631591796875, "loss": 0.2282, "rewards/accuracies": 1.0, "rewards/chosen": -0.29195019602775574, "rewards/margins": 3.4570655822753906, "rewards/rejected": -3.7490158081054688, "step": 6508 }, { "epoch": 1.01, "learning_rate": 9.37336998806992e-06, "logits/chosen": -2.771031618118286, "logits/rejected": -3.1116952896118164, "logps/chosen": -226.66744995117188, "logps/rejected": -337.78399658203125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.24818342924118042, "rewards/margins": 5.456722259521484, "rewards/rejected": -5.208539009094238, "step": 6509 }, { "epoch": 1.01, "learning_rate": 9.372636547538773e-06, "logits/chosen": -3.116668462753296, "logits/rejected": -2.0992088317871094, "logps/chosen": -224.6407012939453, "logps/rejected": -156.99249267578125, "loss": 0.1169, "rewards/accuracies": 1.0, "rewards/chosen": 0.2397758662700653, "rewards/margins": 3.145954132080078, "rewards/rejected": -2.9061784744262695, "step": 6510 }, { "epoch": 1.01, "learning_rate": 9.371903107007625e-06, "logits/chosen": -2.954190969467163, "logits/rejected": -3.0189363956451416, "logps/chosen": -249.43511962890625, "logps/rejected": -404.36474609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.1908545345067978, "rewards/margins": 7.355655670166016, "rewards/rejected": -7.164800643920898, "step": 6511 }, { "epoch": 1.01, "learning_rate": 9.371169666476476e-06, "logits/chosen": -2.2473604679107666, "logits/rejected": -3.1733691692352295, "logps/chosen": -57.43195343017578, "logps/rejected": -330.7915344238281, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 0.41552165150642395, "rewards/margins": 5.725608825683594, "rewards/rejected": -5.310087203979492, "step": 6512 }, { "epoch": 1.01, "learning_rate": 9.370436225945328e-06, "logits/chosen": -2.7266955375671387, "logits/rejected": -1.637585163116455, "logps/chosen": -389.109375, "logps/rejected": -240.82656860351562, "loss": 1.7918, "rewards/accuracies": 0.5, "rewards/chosen": -1.91413414478302, "rewards/margins": 1.0295056104660034, "rewards/rejected": -2.9436399936676025, "step": 6513 }, { "epoch": 1.01, "learning_rate": 9.36970278541418e-06, "logits/chosen": -1.2703174352645874, "logits/rejected": -3.001619577407837, "logps/chosen": -239.82887268066406, "logps/rejected": -707.6893920898438, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.5581817626953125, "rewards/margins": 5.054089546203613, "rewards/rejected": -5.612271308898926, "step": 6514 }, { "epoch": 1.01, "learning_rate": 9.368969344883034e-06, "logits/chosen": -2.997469902038574, "logits/rejected": -2.666343927383423, "logps/chosen": -257.9923400878906, "logps/rejected": -204.5237579345703, "loss": 0.8376, "rewards/accuracies": 0.5, "rewards/chosen": -2.19697642326355, "rewards/margins": 1.7197033166885376, "rewards/rejected": -3.916679859161377, "step": 6515 }, { "epoch": 1.01, "learning_rate": 9.368235904351886e-06, "logits/chosen": -2.761218786239624, "logits/rejected": -1.5025041103363037, "logps/chosen": -271.2209167480469, "logps/rejected": -165.94342041015625, "loss": 0.1447, "rewards/accuracies": 1.0, "rewards/chosen": 1.061649203300476, "rewards/margins": 2.4686667919158936, "rewards/rejected": -1.407017469406128, "step": 6516 }, { "epoch": 1.01, "learning_rate": 9.367502463820737e-06, "logits/chosen": -3.123544454574585, "logits/rejected": -2.451598644256592, "logps/chosen": -268.4739685058594, "logps/rejected": -47.54267120361328, "loss": 1.7674, "rewards/accuracies": 0.0, "rewards/chosen": -2.2128167152404785, "rewards/margins": -1.5801552534103394, "rewards/rejected": -0.6326614022254944, "step": 6517 }, { "epoch": 1.01, "learning_rate": 9.36676902328959e-06, "logits/chosen": -1.8242793083190918, "logits/rejected": -3.023045778274536, "logps/chosen": -40.812713623046875, "logps/rejected": -261.5724792480469, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": 0.7103336453437805, "rewards/margins": 4.698908805847168, "rewards/rejected": -3.988574981689453, "step": 6518 }, { "epoch": 1.01, "learning_rate": 9.366035582758441e-06, "logits/chosen": -2.4716224670410156, "logits/rejected": -2.517277479171753, "logps/chosen": -289.9310302734375, "logps/rejected": -215.48316955566406, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": 0.8581291437149048, "rewards/margins": 6.749052047729492, "rewards/rejected": -5.890922546386719, "step": 6519 }, { "epoch": 1.01, "learning_rate": 9.365302142227293e-06, "logits/chosen": -2.8270437717437744, "logits/rejected": -2.720477819442749, "logps/chosen": -241.0122833251953, "logps/rejected": -290.9190673828125, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8234251737594604, "rewards/margins": 4.630655288696289, "rewards/rejected": -5.454080581665039, "step": 6520 }, { "epoch": 1.01, "learning_rate": 9.364568701696147e-06, "logits/chosen": -2.975414752960205, "logits/rejected": -2.359747886657715, "logps/chosen": -471.5026550292969, "logps/rejected": -355.5003662109375, "loss": 0.4823, "rewards/accuracies": 0.5, "rewards/chosen": -0.34249573945999146, "rewards/margins": 2.0205469131469727, "rewards/rejected": -2.3630425930023193, "step": 6521 }, { "epoch": 1.01, "learning_rate": 9.363835261164999e-06, "logits/chosen": -2.1686863899230957, "logits/rejected": -2.8892300128936768, "logps/chosen": -357.58135986328125, "logps/rejected": -435.3922424316406, "loss": 0.1332, "rewards/accuracies": 1.0, "rewards/chosen": -0.47602006793022156, "rewards/margins": 2.7070131301879883, "rewards/rejected": -3.1830332279205322, "step": 6522 }, { "epoch": 1.01, "learning_rate": 9.36310182063385e-06, "logits/chosen": -2.3787193298339844, "logits/rejected": -2.9007418155670166, "logps/chosen": -494.9170837402344, "logps/rejected": -580.00634765625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.1430305540561676, "rewards/margins": 6.261165618896484, "rewards/rejected": -6.11813497543335, "step": 6523 }, { "epoch": 1.01, "learning_rate": 9.362368380102704e-06, "logits/chosen": -2.4591972827911377, "logits/rejected": -3.0632448196411133, "logps/chosen": -53.53263854980469, "logps/rejected": -239.5004119873047, "loss": 0.0666, "rewards/accuracies": 1.0, "rewards/chosen": -0.03858861327171326, "rewards/margins": 3.5755417346954346, "rewards/rejected": -3.6141304969787598, "step": 6524 }, { "epoch": 1.01, "learning_rate": 9.361634939571556e-06, "logits/chosen": -2.3471832275390625, "logits/rejected": -3.1029796600341797, "logps/chosen": -359.30023193359375, "logps/rejected": -488.962158203125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.8188425302505493, "rewards/margins": 5.779685020446777, "rewards/rejected": -6.598527431488037, "step": 6525 }, { "epoch": 1.01, "learning_rate": 9.360901499040408e-06, "logits/chosen": -2.9018046855926514, "logits/rejected": -2.8601105213165283, "logps/chosen": -109.24446105957031, "logps/rejected": -139.5761260986328, "loss": 0.8662, "rewards/accuracies": 0.5, "rewards/chosen": -2.7335987091064453, "rewards/margins": 0.5492331981658936, "rewards/rejected": -3.2828316688537598, "step": 6526 }, { "epoch": 1.02, "learning_rate": 9.36016805850926e-06, "logits/chosen": -1.4975359439849854, "logits/rejected": -2.524446964263916, "logps/chosen": -177.50648498535156, "logps/rejected": -329.8751525878906, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.034498587250709534, "rewards/margins": 6.485095977783203, "rewards/rejected": -6.450597763061523, "step": 6527 }, { "epoch": 1.02, "learning_rate": 9.359434617978112e-06, "logits/chosen": -2.631171941757202, "logits/rejected": -2.936615228652954, "logps/chosen": -646.4609985351562, "logps/rejected": -602.0443115234375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": 0.6729942560195923, "rewards/margins": 5.447251319885254, "rewards/rejected": -4.774256706237793, "step": 6528 }, { "epoch": 1.02, "learning_rate": 9.358701177446963e-06, "logits/chosen": -2.8983683586120605, "logits/rejected": -2.714585304260254, "logps/chosen": -150.96029663085938, "logps/rejected": -215.06655883789062, "loss": 0.1445, "rewards/accuracies": 1.0, "rewards/chosen": -1.6692681312561035, "rewards/margins": 3.207594394683838, "rewards/rejected": -4.876862525939941, "step": 6529 }, { "epoch": 1.02, "learning_rate": 9.357967736915815e-06, "logits/chosen": -1.9185950756072998, "logits/rejected": -2.900085210800171, "logps/chosen": -40.73096466064453, "logps/rejected": -203.78443908691406, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": -2.3890511989593506, "rewards/margins": 3.1271815299987793, "rewards/rejected": -5.516232490539551, "step": 6530 }, { "epoch": 1.02, "learning_rate": 9.357234296384667e-06, "logits/chosen": -2.002502202987671, "logits/rejected": -2.750180244445801, "logps/chosen": -75.60542297363281, "logps/rejected": -236.9228057861328, "loss": 0.0957, "rewards/accuracies": 1.0, "rewards/chosen": -0.9714661836624146, "rewards/margins": 5.284121513366699, "rewards/rejected": -6.255587577819824, "step": 6531 }, { "epoch": 1.02, "learning_rate": 9.356500855853519e-06, "logits/chosen": -2.4772348403930664, "logits/rejected": -2.895127058029175, "logps/chosen": -129.4181671142578, "logps/rejected": -237.93919372558594, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 0.4536973834037781, "rewards/margins": 6.437206745147705, "rewards/rejected": -5.983509540557861, "step": 6532 }, { "epoch": 1.02, "learning_rate": 9.355767415322373e-06, "logits/chosen": -1.7291994094848633, "logits/rejected": -3.033050537109375, "logps/chosen": -40.576210021972656, "logps/rejected": -273.44287109375, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.1338812112808228, "rewards/margins": 5.017230033874512, "rewards/rejected": -6.151111602783203, "step": 6533 }, { "epoch": 1.02, "learning_rate": 9.355033974791224e-06, "logits/chosen": -2.8350675106048584, "logits/rejected": -2.1053621768951416, "logps/chosen": -296.8939514160156, "logps/rejected": -158.87515258789062, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": 0.7152011394500732, "rewards/margins": 4.025864601135254, "rewards/rejected": -3.3106632232666016, "step": 6534 }, { "epoch": 1.02, "learning_rate": 9.354300534260076e-06, "logits/chosen": -1.813612937927246, "logits/rejected": -2.8546504974365234, "logps/chosen": -106.89117431640625, "logps/rejected": -322.1214904785156, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.5552704930305481, "rewards/margins": 5.591684341430664, "rewards/rejected": -6.1469550132751465, "step": 6535 }, { "epoch": 1.02, "learning_rate": 9.353567093728928e-06, "logits/chosen": -1.8540711402893066, "logits/rejected": -2.7363245487213135, "logps/chosen": -146.59165954589844, "logps/rejected": -374.3111267089844, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": -0.3057411313056946, "rewards/margins": 5.439023017883301, "rewards/rejected": -5.74476432800293, "step": 6536 }, { "epoch": 1.02, "learning_rate": 9.35283365319778e-06, "logits/chosen": -3.1252777576446533, "logits/rejected": -2.498914957046509, "logps/chosen": -575.622802734375, "logps/rejected": -307.42681884765625, "loss": 0.1579, "rewards/accuracies": 1.0, "rewards/chosen": -0.3392315208911896, "rewards/margins": 4.937223434448242, "rewards/rejected": -5.276454925537109, "step": 6537 }, { "epoch": 1.02, "learning_rate": 9.352100212666632e-06, "logits/chosen": -2.904470205307007, "logits/rejected": -2.319338798522949, "logps/chosen": -198.83937072753906, "logps/rejected": -185.34017944335938, "loss": 0.3363, "rewards/accuracies": 1.0, "rewards/chosen": -2.3994085788726807, "rewards/margins": 1.3118442296981812, "rewards/rejected": -3.7112526893615723, "step": 6538 }, { "epoch": 1.02, "learning_rate": 9.351366772135484e-06, "logits/chosen": -3.127434015274048, "logits/rejected": -3.049180269241333, "logps/chosen": -530.8932495117188, "logps/rejected": -424.02093505859375, "loss": 0.1521, "rewards/accuracies": 1.0, "rewards/chosen": -1.5116932392120361, "rewards/margins": 2.968379020690918, "rewards/rejected": -4.480072021484375, "step": 6539 }, { "epoch": 1.02, "learning_rate": 9.350633331604336e-06, "logits/chosen": -2.8680522441864014, "logits/rejected": -3.0512139797210693, "logps/chosen": -173.00570678710938, "logps/rejected": -289.33526611328125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.4333641231060028, "rewards/margins": 5.848516464233398, "rewards/rejected": -5.415152072906494, "step": 6540 }, { "epoch": 1.02, "learning_rate": 9.349899891073188e-06, "logits/chosen": -1.9249329566955566, "logits/rejected": -3.135416030883789, "logps/chosen": -75.75664520263672, "logps/rejected": -376.45111083984375, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -3.2647581100463867, "rewards/margins": 3.463963270187378, "rewards/rejected": -6.728721618652344, "step": 6541 }, { "epoch": 1.02, "learning_rate": 9.349166450542041e-06, "logits/chosen": -3.286708116531372, "logits/rejected": -3.248278856277466, "logps/chosen": -346.4733581542969, "logps/rejected": -467.2213439941406, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": -1.336777687072754, "rewards/margins": 3.234563112258911, "rewards/rejected": -4.571341037750244, "step": 6542 }, { "epoch": 1.02, "learning_rate": 9.348433010010893e-06, "logits/chosen": -2.6858623027801514, "logits/rejected": -2.9051973819732666, "logps/chosen": -45.41156005859375, "logps/rejected": -171.090087890625, "loss": 0.4655, "rewards/accuracies": 0.5, "rewards/chosen": -2.9451370239257812, "rewards/margins": 3.170044183731079, "rewards/rejected": -6.115180969238281, "step": 6543 }, { "epoch": 1.02, "learning_rate": 9.347699569479745e-06, "logits/chosen": -2.913595676422119, "logits/rejected": -2.915250301361084, "logps/chosen": -151.6724853515625, "logps/rejected": -328.4832763671875, "loss": 0.2941, "rewards/accuracies": 1.0, "rewards/chosen": -1.4131240844726562, "rewards/margins": 4.275802135467529, "rewards/rejected": -5.6889262199401855, "step": 6544 }, { "epoch": 1.02, "learning_rate": 9.346966128948597e-06, "logits/chosen": -3.286451578140259, "logits/rejected": -3.3730885982513428, "logps/chosen": -63.71348571777344, "logps/rejected": -130.49534606933594, "loss": 2.5215, "rewards/accuracies": 0.5, "rewards/chosen": -2.8879153728485107, "rewards/margins": 0.36905336380004883, "rewards/rejected": -3.2569687366485596, "step": 6545 }, { "epoch": 1.02, "learning_rate": 9.346232688417449e-06, "logits/chosen": -2.7615103721618652, "logits/rejected": -3.014575242996216, "logps/chosen": -55.01437759399414, "logps/rejected": -124.02198791503906, "loss": 1.4369, "rewards/accuracies": 0.5, "rewards/chosen": -3.257556438446045, "rewards/margins": 2.178290367126465, "rewards/rejected": -5.43584680557251, "step": 6546 }, { "epoch": 1.02, "learning_rate": 9.3454992478863e-06, "logits/chosen": -2.964188814163208, "logits/rejected": -1.7911632061004639, "logps/chosen": -367.2317810058594, "logps/rejected": -267.74847412109375, "loss": 0.1312, "rewards/accuracies": 1.0, "rewards/chosen": -0.39898794889450073, "rewards/margins": 5.990176677703857, "rewards/rejected": -6.389164447784424, "step": 6547 }, { "epoch": 1.02, "learning_rate": 9.344765807355152e-06, "logits/chosen": -2.8449926376342773, "logits/rejected": -2.2393009662628174, "logps/chosen": -111.47004699707031, "logps/rejected": -144.03988647460938, "loss": 0.0521, "rewards/accuracies": 1.0, "rewards/chosen": -0.5806743502616882, "rewards/margins": 4.577581405639648, "rewards/rejected": -5.158255577087402, "step": 6548 }, { "epoch": 1.02, "learning_rate": 9.344032366824004e-06, "logits/chosen": -2.9288156032562256, "logits/rejected": -3.074549674987793, "logps/chosen": -66.16162109375, "logps/rejected": -206.84933471679688, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.18388310074806213, "rewards/margins": 5.025147914886475, "rewards/rejected": -5.209031105041504, "step": 6549 }, { "epoch": 1.02, "learning_rate": 9.343298926292856e-06, "logits/chosen": -2.7528250217437744, "logits/rejected": -2.238584518432617, "logps/chosen": -302.2441711425781, "logps/rejected": -241.39035034179688, "loss": 1.7738, "rewards/accuracies": 0.5, "rewards/chosen": -2.249467611312866, "rewards/margins": 2.105854034423828, "rewards/rejected": -4.355321884155273, "step": 6550 }, { "epoch": 1.02, "learning_rate": 9.34256548576171e-06, "logits/chosen": -2.9346187114715576, "logits/rejected": -2.8445379734039307, "logps/chosen": -377.93841552734375, "logps/rejected": -348.6746826171875, "loss": 2.4882, "rewards/accuracies": 0.5, "rewards/chosen": -3.40915846824646, "rewards/margins": 0.8319840431213379, "rewards/rejected": -4.241142272949219, "step": 6551 }, { "epoch": 1.02, "learning_rate": 9.341832045230562e-06, "logits/chosen": -2.45741868019104, "logits/rejected": -2.844456672668457, "logps/chosen": -58.77996063232422, "logps/rejected": -216.75657653808594, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.8306264877319336, "rewards/margins": 6.217665672302246, "rewards/rejected": -7.04829216003418, "step": 6552 }, { "epoch": 1.02, "learning_rate": 9.341098604699414e-06, "logits/chosen": -2.1850998401641846, "logits/rejected": -2.863132953643799, "logps/chosen": -122.15194702148438, "logps/rejected": -173.9501953125, "loss": 0.24, "rewards/accuracies": 1.0, "rewards/chosen": 0.6261608004570007, "rewards/margins": 2.6338863372802734, "rewards/rejected": -2.007725477218628, "step": 6553 }, { "epoch": 1.02, "learning_rate": 9.340365164168265e-06, "logits/chosen": -2.3770596981048584, "logits/rejected": -3.118271827697754, "logps/chosen": -88.94808959960938, "logps/rejected": -314.89111328125, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/chosen": -0.6185007095336914, "rewards/margins": 4.2196807861328125, "rewards/rejected": -4.838181495666504, "step": 6554 }, { "epoch": 1.02, "learning_rate": 9.339631723637119e-06, "logits/chosen": -2.865870475769043, "logits/rejected": -2.8950018882751465, "logps/chosen": -141.3245849609375, "logps/rejected": -225.23077392578125, "loss": 0.4414, "rewards/accuracies": 0.5, "rewards/chosen": -0.9683006405830383, "rewards/margins": 2.796513319015503, "rewards/rejected": -3.7648138999938965, "step": 6555 }, { "epoch": 1.02, "learning_rate": 9.338898283105971e-06, "logits/chosen": -2.640462636947632, "logits/rejected": -2.1644837856292725, "logps/chosen": -235.80984497070312, "logps/rejected": -311.50360107421875, "loss": 0.5201, "rewards/accuracies": 0.5, "rewards/chosen": -0.03928680717945099, "rewards/margins": 4.029549598693848, "rewards/rejected": -4.068836212158203, "step": 6556 }, { "epoch": 1.02, "learning_rate": 9.338164842574823e-06, "logits/chosen": -2.489314556121826, "logits/rejected": -1.1073710918426514, "logps/chosen": -350.97283935546875, "logps/rejected": -268.2496337890625, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": 0.12421111762523651, "rewards/margins": 4.254262924194336, "rewards/rejected": -4.130051612854004, "step": 6557 }, { "epoch": 1.02, "learning_rate": 9.337431402043675e-06, "logits/chosen": -2.3713371753692627, "logits/rejected": -2.842811107635498, "logps/chosen": -602.951171875, "logps/rejected": -440.7630310058594, "loss": 0.7278, "rewards/accuracies": 0.5, "rewards/chosen": -2.121800184249878, "rewards/margins": 1.705095648765564, "rewards/rejected": -3.8268959522247314, "step": 6558 }, { "epoch": 1.02, "learning_rate": 9.336697961512527e-06, "logits/chosen": -2.44201397895813, "logits/rejected": -2.547762870788574, "logps/chosen": -122.49461364746094, "logps/rejected": -129.57383728027344, "loss": 1.9106, "rewards/accuracies": 0.5, "rewards/chosen": -1.914036512374878, "rewards/margins": 0.5596346855163574, "rewards/rejected": -2.4736711978912354, "step": 6559 }, { "epoch": 1.02, "learning_rate": 9.33596452098138e-06, "logits/chosen": -2.689608335494995, "logits/rejected": -1.7703174352645874, "logps/chosen": -262.0298767089844, "logps/rejected": -228.24334716796875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": 0.7155567407608032, "rewards/margins": 4.774812698364258, "rewards/rejected": -4.059256076812744, "step": 6560 }, { "epoch": 1.02, "learning_rate": 9.335231080450232e-06, "logits/chosen": -1.9397341012954712, "logits/rejected": -2.7287731170654297, "logps/chosen": -82.57516479492188, "logps/rejected": -307.52838134765625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": 0.19017449021339417, "rewards/margins": 6.230595588684082, "rewards/rejected": -6.0404205322265625, "step": 6561 }, { "epoch": 1.02, "learning_rate": 9.334497639919084e-06, "logits/chosen": -2.890714406967163, "logits/rejected": -2.277068853378296, "logps/chosen": -148.708984375, "logps/rejected": -227.19503784179688, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.2808364927768707, "rewards/margins": 5.788910865783691, "rewards/rejected": -5.5080742835998535, "step": 6562 }, { "epoch": 1.02, "learning_rate": 9.333764199387936e-06, "logits/chosen": -2.873317003250122, "logits/rejected": -2.3959951400756836, "logps/chosen": -125.42913055419922, "logps/rejected": -121.15125274658203, "loss": 0.748, "rewards/accuracies": 0.5, "rewards/chosen": -1.686292290687561, "rewards/margins": 1.2451993227005005, "rewards/rejected": -2.9314916133880615, "step": 6563 }, { "epoch": 1.02, "learning_rate": 9.333030758856788e-06, "logits/chosen": -2.164860486984253, "logits/rejected": -2.765953302383423, "logps/chosen": -95.45655822753906, "logps/rejected": -329.79168701171875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.11424046009778976, "rewards/margins": 6.3828630447387695, "rewards/rejected": -6.497103214263916, "step": 6564 }, { "epoch": 1.02, "learning_rate": 9.33229731832564e-06, "logits/chosen": -2.106998920440674, "logits/rejected": -2.9049644470214844, "logps/chosen": -162.73434448242188, "logps/rejected": -380.964111328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.012388229370117188, "rewards/margins": 6.822111129760742, "rewards/rejected": -6.809722900390625, "step": 6565 }, { "epoch": 1.02, "learning_rate": 9.331563877794491e-06, "logits/chosen": -2.318696975708008, "logits/rejected": -2.6519060134887695, "logps/chosen": -83.88402557373047, "logps/rejected": -237.7226104736328, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -0.23824608325958252, "rewards/margins": 4.229130744934082, "rewards/rejected": -4.467376708984375, "step": 6566 }, { "epoch": 1.02, "learning_rate": 9.330830437263343e-06, "logits/chosen": -2.747673511505127, "logits/rejected": -2.220700979232788, "logps/chosen": -97.90858459472656, "logps/rejected": -207.18807983398438, "loss": 0.3157, "rewards/accuracies": 1.0, "rewards/chosen": -1.7347235679626465, "rewards/margins": 4.425153732299805, "rewards/rejected": -6.159877300262451, "step": 6567 }, { "epoch": 1.02, "learning_rate": 9.330096996732195e-06, "logits/chosen": -2.316864490509033, "logits/rejected": -2.5658679008483887, "logps/chosen": -321.53515625, "logps/rejected": -297.567138671875, "loss": 0.7149, "rewards/accuracies": 0.5, "rewards/chosen": -1.0499496459960938, "rewards/margins": 2.8612184524536133, "rewards/rejected": -3.911167860031128, "step": 6568 }, { "epoch": 1.02, "learning_rate": 9.329363556201049e-06, "logits/chosen": -2.3846142292022705, "logits/rejected": -2.9730582237243652, "logps/chosen": -92.13426208496094, "logps/rejected": -285.4039001464844, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 0.42704200744628906, "rewards/margins": 5.413292407989502, "rewards/rejected": -4.986250400543213, "step": 6569 }, { "epoch": 1.02, "learning_rate": 9.3286301156699e-06, "logits/chosen": -1.9479987621307373, "logits/rejected": -3.317762851715088, "logps/chosen": -249.0779266357422, "logps/rejected": -500.5658264160156, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": 0.33066827058792114, "rewards/margins": 4.462214469909668, "rewards/rejected": -4.1315460205078125, "step": 6570 }, { "epoch": 1.02, "learning_rate": 9.327896675138752e-06, "logits/chosen": -2.8667125701904297, "logits/rejected": -2.9179959297180176, "logps/chosen": -25.227163314819336, "logps/rejected": -129.5423583984375, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": -0.9502974152565002, "rewards/margins": 3.9352290630340576, "rewards/rejected": -4.885526657104492, "step": 6571 }, { "epoch": 1.02, "learning_rate": 9.327163234607604e-06, "logits/chosen": -3.097785711288452, "logits/rejected": -1.9818586111068726, "logps/chosen": -356.128173828125, "logps/rejected": -135.4677734375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": 0.5804443359375, "rewards/margins": 6.343472480773926, "rewards/rejected": -5.763028144836426, "step": 6572 }, { "epoch": 1.02, "learning_rate": 9.326429794076456e-06, "logits/chosen": -2.8976235389709473, "logits/rejected": -2.8539462089538574, "logps/chosen": -77.8357925415039, "logps/rejected": -161.46185302734375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.1109251081943512, "rewards/margins": 6.543486595153809, "rewards/rejected": -6.432561874389648, "step": 6573 }, { "epoch": 1.02, "learning_rate": 9.325696353545308e-06, "logits/chosen": -2.8966426849365234, "logits/rejected": -2.704549789428711, "logps/chosen": -120.624267578125, "logps/rejected": -33.47077560424805, "loss": 2.5192, "rewards/accuracies": 0.5, "rewards/chosen": -3.3714897632598877, "rewards/margins": -1.6052775382995605, "rewards/rejected": -1.7662121057510376, "step": 6574 }, { "epoch": 1.02, "learning_rate": 9.32496291301416e-06, "logits/chosen": -1.499539852142334, "logits/rejected": -2.8967320919036865, "logps/chosen": -140.65309143066406, "logps/rejected": -458.58489990234375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": 0.6911171078681946, "rewards/margins": 5.344033241271973, "rewards/rejected": -4.652915954589844, "step": 6575 }, { "epoch": 1.02, "learning_rate": 9.324229472483012e-06, "logits/chosen": -2.533486843109131, "logits/rejected": -2.884047746658325, "logps/chosen": -457.8139953613281, "logps/rejected": -501.092529296875, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -0.4097587466239929, "rewards/margins": 4.6112470626831055, "rewards/rejected": -5.021005630493164, "step": 6576 }, { "epoch": 1.02, "learning_rate": 9.323496031951864e-06, "logits/chosen": -2.2926557064056396, "logits/rejected": -2.952312707901001, "logps/chosen": -264.166015625, "logps/rejected": -368.33978271484375, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -0.32944488525390625, "rewards/margins": 4.5570878982543945, "rewards/rejected": -4.886532783508301, "step": 6577 }, { "epoch": 1.02, "learning_rate": 9.322762591420717e-06, "logits/chosen": -2.9532101154327393, "logits/rejected": -3.2203176021575928, "logps/chosen": -84.89906311035156, "logps/rejected": -228.66358947753906, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": 0.44348031282424927, "rewards/margins": 7.0314555168151855, "rewards/rejected": -6.587975025177002, "step": 6578 }, { "epoch": 1.02, "learning_rate": 9.32202915088957e-06, "logits/chosen": -3.2574360370635986, "logits/rejected": -3.183833122253418, "logps/chosen": -236.760986328125, "logps/rejected": -228.59536743164062, "loss": 0.062, "rewards/accuracies": 1.0, "rewards/chosen": 0.41784679889678955, "rewards/margins": 4.6343231201171875, "rewards/rejected": -4.2164764404296875, "step": 6579 }, { "epoch": 1.02, "learning_rate": 9.321295710358421e-06, "logits/chosen": -2.7068090438842773, "logits/rejected": -3.1477572917938232, "logps/chosen": -374.6852111816406, "logps/rejected": -628.3876342773438, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.48312908411026, "rewards/margins": 6.9809064865112305, "rewards/rejected": -7.4640350341796875, "step": 6580 }, { "epoch": 1.02, "learning_rate": 9.320562269827273e-06, "logits/chosen": -2.937833070755005, "logits/rejected": -2.907465934753418, "logps/chosen": -107.65577697753906, "logps/rejected": -222.57528686523438, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.1829644441604614, "rewards/margins": 5.114811897277832, "rewards/rejected": -6.297776222229004, "step": 6581 }, { "epoch": 1.02, "learning_rate": 9.319828829296125e-06, "logits/chosen": -1.2022157907485962, "logits/rejected": -2.7837743759155273, "logps/chosen": -93.10389709472656, "logps/rejected": -361.580810546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.1282401978969574, "rewards/margins": 6.869312286376953, "rewards/rejected": -6.741072177886963, "step": 6582 }, { "epoch": 1.02, "learning_rate": 9.319095388764977e-06, "logits/chosen": -2.701453447341919, "logits/rejected": -3.1313529014587402, "logps/chosen": -75.71363830566406, "logps/rejected": -201.9156494140625, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -0.2085496038198471, "rewards/margins": 4.151867389678955, "rewards/rejected": -4.360417366027832, "step": 6583 }, { "epoch": 1.02, "learning_rate": 9.318361948233829e-06, "logits/chosen": -1.9804645776748657, "logits/rejected": -2.8216934204101562, "logps/chosen": -247.2578887939453, "logps/rejected": -345.37835693359375, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": 0.6623215675354004, "rewards/margins": 4.896922588348389, "rewards/rejected": -4.234601020812988, "step": 6584 }, { "epoch": 1.02, "learning_rate": 9.31762850770268e-06, "logits/chosen": -2.766497850418091, "logits/rejected": -1.8836150169372559, "logps/chosen": -113.99955749511719, "logps/rejected": -125.09896850585938, "loss": 0.0776, "rewards/accuracies": 1.0, "rewards/chosen": 0.2788354754447937, "rewards/margins": 2.8951361179351807, "rewards/rejected": -2.616300582885742, "step": 6585 }, { "epoch": 1.02, "learning_rate": 9.316895067171532e-06, "logits/chosen": -2.990713119506836, "logits/rejected": -3.1515324115753174, "logps/chosen": -409.05413818359375, "logps/rejected": -446.8865966796875, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -0.6425089240074158, "rewards/margins": 5.5163893699646, "rewards/rejected": -6.15889835357666, "step": 6586 }, { "epoch": 1.02, "learning_rate": 9.316161626640386e-06, "logits/chosen": -1.7660924196243286, "logits/rejected": -2.8679370880126953, "logps/chosen": -94.15818786621094, "logps/rejected": -189.95394897460938, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 0.9553054571151733, "rewards/margins": 5.366162300109863, "rewards/rejected": -4.4108567237854, "step": 6587 }, { "epoch": 1.02, "learning_rate": 9.315428186109238e-06, "logits/chosen": -3.3654232025146484, "logits/rejected": -2.515916109085083, "logps/chosen": -309.01177978515625, "logps/rejected": -74.86686706542969, "loss": 0.129, "rewards/accuracies": 1.0, "rewards/chosen": 0.3947429656982422, "rewards/margins": 4.305153846740723, "rewards/rejected": -3.9104106426239014, "step": 6588 }, { "epoch": 1.02, "learning_rate": 9.314694745578091e-06, "logits/chosen": -1.5490591526031494, "logits/rejected": -2.6081080436706543, "logps/chosen": -172.832763671875, "logps/rejected": -428.4647216796875, "loss": 0.2927, "rewards/accuracies": 1.0, "rewards/chosen": -2.7372283935546875, "rewards/margins": 3.1700310707092285, "rewards/rejected": -5.907259464263916, "step": 6589 }, { "epoch": 1.02, "learning_rate": 9.313961305046943e-06, "logits/chosen": -1.5405876636505127, "logits/rejected": -3.09765362739563, "logps/chosen": -59.519596099853516, "logps/rejected": -330.0615234375, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.29044267535209656, "rewards/margins": 3.708388566970825, "rewards/rejected": -3.4179458618164062, "step": 6590 }, { "epoch": 1.03, "learning_rate": 9.313227864515795e-06, "logits/chosen": -2.9625275135040283, "logits/rejected": -2.472374439239502, "logps/chosen": -420.23883056640625, "logps/rejected": -164.39476013183594, "loss": 0.6427, "rewards/accuracies": 0.5, "rewards/chosen": -1.6843719482421875, "rewards/margins": 1.4602882862091064, "rewards/rejected": -3.144660234451294, "step": 6591 }, { "epoch": 1.03, "learning_rate": 9.312494423984647e-06, "logits/chosen": -2.8356449604034424, "logits/rejected": -3.030402660369873, "logps/chosen": -83.70184326171875, "logps/rejected": -123.30133056640625, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -1.1361987590789795, "rewards/margins": 4.394708633422852, "rewards/rejected": -5.53090763092041, "step": 6592 }, { "epoch": 1.03, "learning_rate": 9.311760983453499e-06, "logits/chosen": -2.955181360244751, "logits/rejected": -2.6524393558502197, "logps/chosen": -309.9793701171875, "logps/rejected": -161.12042236328125, "loss": 0.07, "rewards/accuracies": 1.0, "rewards/chosen": 1.526824951171875, "rewards/margins": 4.874969005584717, "rewards/rejected": -3.3481438159942627, "step": 6593 }, { "epoch": 1.03, "learning_rate": 9.31102754292235e-06, "logits/chosen": -2.4431991577148438, "logits/rejected": -2.7971880435943604, "logps/chosen": -185.08462524414062, "logps/rejected": -311.98406982421875, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.8681972622871399, "rewards/margins": 4.988173961639404, "rewards/rejected": -5.8563714027404785, "step": 6594 }, { "epoch": 1.03, "learning_rate": 9.310294102391203e-06, "logits/chosen": -2.9793472290039062, "logits/rejected": -2.814600944519043, "logps/chosen": -458.5350341796875, "logps/rejected": -391.55767822265625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.45699846744537354, "rewards/margins": 5.984529495239258, "rewards/rejected": -5.527531623840332, "step": 6595 }, { "epoch": 1.03, "learning_rate": 9.309560661860056e-06, "logits/chosen": -2.7951629161834717, "logits/rejected": -2.1807689666748047, "logps/chosen": -442.7389831542969, "logps/rejected": -440.17291259765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.5452133417129517, "rewards/margins": 7.780046463012695, "rewards/rejected": -6.234832763671875, "step": 6596 }, { "epoch": 1.03, "learning_rate": 9.308827221328908e-06, "logits/chosen": -2.2502429485321045, "logits/rejected": -2.456747055053711, "logps/chosen": -771.7080688476562, "logps/rejected": -800.7744140625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.6274307370185852, "rewards/margins": 7.959153652191162, "rewards/rejected": -8.586584091186523, "step": 6597 }, { "epoch": 1.03, "learning_rate": 9.30809378079776e-06, "logits/chosen": -2.363710641860962, "logits/rejected": -2.9849812984466553, "logps/chosen": -127.89288330078125, "logps/rejected": -299.1376953125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": 0.7968825101852417, "rewards/margins": 5.930349349975586, "rewards/rejected": -5.133467197418213, "step": 6598 }, { "epoch": 1.03, "learning_rate": 9.307360340266612e-06, "logits/chosen": -2.900496244430542, "logits/rejected": -2.83866548538208, "logps/chosen": -484.05548095703125, "logps/rejected": -326.708984375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.06112021952867508, "rewards/margins": 5.445993423461914, "rewards/rejected": -5.507113456726074, "step": 6599 }, { "epoch": 1.03, "learning_rate": 9.306626899735464e-06, "logits/chosen": -0.9947602152824402, "logits/rejected": -2.72664737701416, "logps/chosen": -26.56601333618164, "logps/rejected": -226.3614501953125, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -0.13587552309036255, "rewards/margins": 4.930380821228027, "rewards/rejected": -5.066256523132324, "step": 6600 }, { "epoch": 1.03, "learning_rate": 9.305893459204316e-06, "logits/chosen": -2.8933866024017334, "logits/rejected": -2.3869235515594482, "logps/chosen": -689.4097900390625, "logps/rejected": -483.3641357421875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.0616683959960938, "rewards/margins": 5.965130805969238, "rewards/rejected": -7.026799201965332, "step": 6601 }, { "epoch": 1.03, "learning_rate": 9.305160018673167e-06, "logits/chosen": -2.5231051445007324, "logits/rejected": -2.0667386054992676, "logps/chosen": -193.956298828125, "logps/rejected": -304.1320495605469, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.32327574491500854, "rewards/margins": 4.8745436668396, "rewards/rejected": -5.197819709777832, "step": 6602 }, { "epoch": 1.03, "learning_rate": 9.30442657814202e-06, "logits/chosen": -2.8144755363464355, "logits/rejected": -2.55564022064209, "logps/chosen": -730.0656127929688, "logps/rejected": -556.0963134765625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": 1.0237396955490112, "rewards/margins": 5.476132392883301, "rewards/rejected": -4.452392578125, "step": 6603 }, { "epoch": 1.03, "learning_rate": 9.303693137610871e-06, "logits/chosen": -2.983368396759033, "logits/rejected": -2.5543429851531982, "logps/chosen": -157.68350219726562, "logps/rejected": -199.49984741210938, "loss": 0.322, "rewards/accuracies": 1.0, "rewards/chosen": -1.04778254032135, "rewards/margins": 3.5511691570281982, "rewards/rejected": -4.598951816558838, "step": 6604 }, { "epoch": 1.03, "learning_rate": 9.302959697079725e-06, "logits/chosen": -2.3860692977905273, "logits/rejected": -2.608761787414551, "logps/chosen": -210.09024047851562, "logps/rejected": -347.85284423828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 1.2202500104904175, "rewards/margins": 8.289971351623535, "rewards/rejected": -7.0697221755981445, "step": 6605 }, { "epoch": 1.03, "learning_rate": 9.302226256548577e-06, "logits/chosen": -1.1740145683288574, "logits/rejected": -2.8655741214752197, "logps/chosen": -41.31052780151367, "logps/rejected": -241.548828125, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": -0.5974389910697937, "rewards/margins": 3.6786954402923584, "rewards/rejected": -4.276134490966797, "step": 6606 }, { "epoch": 1.03, "learning_rate": 9.301492816017429e-06, "logits/chosen": -2.600968599319458, "logits/rejected": -2.8171262741088867, "logps/chosen": -591.3073120117188, "logps/rejected": -512.4408569335938, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 1.2713249921798706, "rewards/margins": 7.677365303039551, "rewards/rejected": -6.406040191650391, "step": 6607 }, { "epoch": 1.03, "learning_rate": 9.30075937548628e-06, "logits/chosen": -2.7810964584350586, "logits/rejected": -2.1409313678741455, "logps/chosen": -176.4679412841797, "logps/rejected": -315.59124755859375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": 1.2471928596496582, "rewards/margins": 5.892575740814209, "rewards/rejected": -4.645382881164551, "step": 6608 }, { "epoch": 1.03, "learning_rate": 9.300025934955132e-06, "logits/chosen": -1.9422857761383057, "logits/rejected": -3.0886902809143066, "logps/chosen": -245.47813415527344, "logps/rejected": -268.6277160644531, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": 0.5586814880371094, "rewards/margins": 4.9926862716674805, "rewards/rejected": -4.434004783630371, "step": 6609 }, { "epoch": 1.03, "learning_rate": 9.299292494423984e-06, "logits/chosen": -2.7105305194854736, "logits/rejected": -2.9342610836029053, "logps/chosen": -396.5755310058594, "logps/rejected": -469.388427734375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.30634254217147827, "rewards/margins": 4.8751630783081055, "rewards/rejected": -5.181506156921387, "step": 6610 }, { "epoch": 1.03, "learning_rate": 9.298559053892836e-06, "logits/chosen": -1.9409759044647217, "logits/rejected": -3.1072587966918945, "logps/chosen": -99.4610366821289, "logps/rejected": -328.9677734375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.20749303698539734, "rewards/margins": 6.960687637329102, "rewards/rejected": -7.1681809425354, "step": 6611 }, { "epoch": 1.03, "learning_rate": 9.297825613361688e-06, "logits/chosen": -2.5182459354400635, "logits/rejected": -2.661917209625244, "logps/chosen": -95.43695831298828, "logps/rejected": -317.784423828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.8518047332763672, "rewards/margins": 7.784609794616699, "rewards/rejected": -6.932805061340332, "step": 6612 }, { "epoch": 1.03, "learning_rate": 9.297092172830542e-06, "logits/chosen": -2.888461112976074, "logits/rejected": -2.4984219074249268, "logps/chosen": -169.7793426513672, "logps/rejected": -165.5293731689453, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 0.6442798972129822, "rewards/margins": 4.134739398956299, "rewards/rejected": -3.490459442138672, "step": 6613 }, { "epoch": 1.03, "learning_rate": 9.296358732299393e-06, "logits/chosen": -2.710537910461426, "logits/rejected": -2.7974367141723633, "logps/chosen": -220.8245849609375, "logps/rejected": -181.84994506835938, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": 0.032749295234680176, "rewards/margins": 6.059667587280273, "rewards/rejected": -6.026918411254883, "step": 6614 }, { "epoch": 1.03, "learning_rate": 9.295625291768245e-06, "logits/chosen": -2.6838834285736084, "logits/rejected": -2.815117835998535, "logps/chosen": -45.25264358520508, "logps/rejected": -258.101806640625, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6444779634475708, "rewards/margins": 5.055117607116699, "rewards/rejected": -5.6995954513549805, "step": 6615 }, { "epoch": 1.03, "learning_rate": 9.294891851237097e-06, "logits/chosen": -2.149735689163208, "logits/rejected": -3.138640880584717, "logps/chosen": -139.98245239257812, "logps/rejected": -368.2555236816406, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.43845710158348083, "rewards/margins": 6.052101135253906, "rewards/rejected": -6.490557670593262, "step": 6616 }, { "epoch": 1.03, "learning_rate": 9.294158410705949e-06, "logits/chosen": -2.8816254138946533, "logits/rejected": -3.1162819862365723, "logps/chosen": -200.7738494873047, "logps/rejected": -383.76251220703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.2447715848684311, "rewards/margins": 7.094310760498047, "rewards/rejected": -7.339082717895508, "step": 6617 }, { "epoch": 1.03, "learning_rate": 9.293424970174801e-06, "logits/chosen": -2.69869327545166, "logits/rejected": -2.8153927326202393, "logps/chosen": -37.235595703125, "logps/rejected": -204.6527099609375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.31940993666648865, "rewards/margins": 6.063060283660889, "rewards/rejected": -6.38247013092041, "step": 6618 }, { "epoch": 1.03, "learning_rate": 9.292691529643653e-06, "logits/chosen": -2.2760026454925537, "logits/rejected": -2.911038398742676, "logps/chosen": -75.44841003417969, "logps/rejected": -148.20626831054688, "loss": 0.2425, "rewards/accuracies": 1.0, "rewards/chosen": -2.0299670696258545, "rewards/margins": 2.137822389602661, "rewards/rejected": -4.167789459228516, "step": 6619 }, { "epoch": 1.03, "learning_rate": 9.291958089112505e-06, "logits/chosen": -2.226377010345459, "logits/rejected": -3.0601773262023926, "logps/chosen": -150.7657470703125, "logps/rejected": -448.32659912109375, "loss": 0.045, "rewards/accuracies": 1.0, "rewards/chosen": -0.3019661009311676, "rewards/margins": 4.817362308502197, "rewards/rejected": -5.119328498840332, "step": 6620 }, { "epoch": 1.03, "learning_rate": 9.291224648581358e-06, "logits/chosen": -2.7161083221435547, "logits/rejected": -2.8848164081573486, "logps/chosen": -34.01164245605469, "logps/rejected": -209.6631317138672, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7284320592880249, "rewards/margins": 6.7879838943481445, "rewards/rejected": -7.516415596008301, "step": 6621 }, { "epoch": 1.03, "learning_rate": 9.29049120805021e-06, "logits/chosen": -2.3444643020629883, "logits/rejected": -2.8889353275299072, "logps/chosen": -51.83379364013672, "logps/rejected": -154.52474975585938, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.31319180130958557, "rewards/margins": 5.189347267150879, "rewards/rejected": -5.502538681030273, "step": 6622 }, { "epoch": 1.03, "learning_rate": 9.289757767519064e-06, "logits/chosen": -2.0968453884124756, "logits/rejected": -2.980621337890625, "logps/chosen": -219.72262573242188, "logps/rejected": -310.2593994140625, "loss": 0.0708, "rewards/accuracies": 1.0, "rewards/chosen": -0.24571934342384338, "rewards/margins": 3.3889968395233154, "rewards/rejected": -3.634716033935547, "step": 6623 }, { "epoch": 1.03, "learning_rate": 9.289024326987916e-06, "logits/chosen": -1.4715020656585693, "logits/rejected": -3.0003719329833984, "logps/chosen": -195.0545196533203, "logps/rejected": -404.24310302734375, "loss": 0.4342, "rewards/accuracies": 0.5, "rewards/chosen": -1.7607219219207764, "rewards/margins": 2.157867908477783, "rewards/rejected": -3.9185898303985596, "step": 6624 }, { "epoch": 1.03, "learning_rate": 9.288290886456767e-06, "logits/chosen": -2.654327630996704, "logits/rejected": -2.3946335315704346, "logps/chosen": -181.3650665283203, "logps/rejected": -314.293212890625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.15082359313964844, "rewards/margins": 7.287607192993164, "rewards/rejected": -7.136783599853516, "step": 6625 }, { "epoch": 1.03, "learning_rate": 9.28755744592562e-06, "logits/chosen": -1.2034317255020142, "logits/rejected": -2.9035732746124268, "logps/chosen": -32.960723876953125, "logps/rejected": -325.10418701171875, "loss": 0.0739, "rewards/accuracies": 1.0, "rewards/chosen": 0.13940975069999695, "rewards/margins": 5.59797477722168, "rewards/rejected": -5.458564758300781, "step": 6626 }, { "epoch": 1.03, "learning_rate": 9.286824005394471e-06, "logits/chosen": -2.348067045211792, "logits/rejected": -2.623141288757324, "logps/chosen": -568.4505004882812, "logps/rejected": -719.05029296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.4945339262485504, "rewards/margins": 9.99871826171875, "rewards/rejected": -9.504183769226074, "step": 6627 }, { "epoch": 1.03, "learning_rate": 9.286090564863323e-06, "logits/chosen": -2.4650285243988037, "logits/rejected": -2.7542548179626465, "logps/chosen": -97.56737518310547, "logps/rejected": -303.4834289550781, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.1961158812046051, "rewards/margins": 6.650332450866699, "rewards/rejected": -6.454216003417969, "step": 6628 }, { "epoch": 1.03, "learning_rate": 9.285357124332175e-06, "logits/chosen": -1.7559653520584106, "logits/rejected": -2.9749133586883545, "logps/chosen": -56.805633544921875, "logps/rejected": -204.10650634765625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": 0.11126299202442169, "rewards/margins": 5.354264736175537, "rewards/rejected": -5.243001937866211, "step": 6629 }, { "epoch": 1.03, "learning_rate": 9.284623683801027e-06, "logits/chosen": -2.4397315979003906, "logits/rejected": -2.931417226791382, "logps/chosen": -55.868438720703125, "logps/rejected": -286.80950927734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.19828176498413086, "rewards/margins": 7.114973545074463, "rewards/rejected": -6.916691780090332, "step": 6630 }, { "epoch": 1.03, "learning_rate": 9.28389024326988e-06, "logits/chosen": -2.3734357357025146, "logits/rejected": -3.1221113204956055, "logps/chosen": -181.7167510986328, "logps/rejected": -321.5322265625, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.175608828663826, "rewards/margins": 4.626044273376465, "rewards/rejected": -4.8016533851623535, "step": 6631 }, { "epoch": 1.03, "learning_rate": 9.283156802738732e-06, "logits/chosen": -2.002565383911133, "logits/rejected": -2.597919464111328, "logps/chosen": -182.7061767578125, "logps/rejected": -284.0997314453125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": 0.6657512784004211, "rewards/margins": 5.825571537017822, "rewards/rejected": -5.159820556640625, "step": 6632 }, { "epoch": 1.03, "learning_rate": 9.282423362207584e-06, "logits/chosen": -1.8057695627212524, "logits/rejected": -2.4022743701934814, "logps/chosen": -125.18128204345703, "logps/rejected": -160.40789794921875, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.11906814575195312, "rewards/margins": 5.1326141357421875, "rewards/rejected": -5.251682281494141, "step": 6633 }, { "epoch": 1.03, "learning_rate": 9.281689921676436e-06, "logits/chosen": -2.883021831512451, "logits/rejected": -3.128246307373047, "logps/chosen": -276.69207763671875, "logps/rejected": -389.7623596191406, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -0.2793594300746918, "rewards/margins": 6.465924263000488, "rewards/rejected": -6.745283603668213, "step": 6634 }, { "epoch": 1.03, "learning_rate": 9.280956481145288e-06, "logits/chosen": -3.076352119445801, "logits/rejected": -3.2117857933044434, "logps/chosen": -546.92431640625, "logps/rejected": -500.52166748046875, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -0.24896393716335297, "rewards/margins": 4.118213176727295, "rewards/rejected": -4.3671770095825195, "step": 6635 }, { "epoch": 1.03, "learning_rate": 9.28022304061414e-06, "logits/chosen": -1.7312719821929932, "logits/rejected": -2.9041049480438232, "logps/chosen": -121.74679565429688, "logps/rejected": -386.0308837890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.21965372562408447, "rewards/margins": 8.572933197021484, "rewards/rejected": -8.353279113769531, "step": 6636 }, { "epoch": 1.03, "learning_rate": 9.279489600082992e-06, "logits/chosen": -2.79032301902771, "logits/rejected": -2.923564910888672, "logps/chosen": -442.1341552734375, "logps/rejected": -543.2169189453125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.19526594877243042, "rewards/margins": 6.411187171936035, "rewards/rejected": -6.215921401977539, "step": 6637 }, { "epoch": 1.03, "learning_rate": 9.278756159551844e-06, "logits/chosen": -2.9494171142578125, "logits/rejected": -3.2988812923431396, "logps/chosen": -149.73263549804688, "logps/rejected": -219.91629028320312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.709191620349884, "rewards/margins": 6.295529365539551, "rewards/rejected": -5.586338043212891, "step": 6638 }, { "epoch": 1.03, "learning_rate": 9.278022719020695e-06, "logits/chosen": -2.8281471729278564, "logits/rejected": -2.5512752532958984, "logps/chosen": -290.88427734375, "logps/rejected": -257.75872802734375, "loss": 2.2861, "rewards/accuracies": 0.5, "rewards/chosen": -1.3421334028244019, "rewards/margins": 1.407465934753418, "rewards/rejected": -2.7495994567871094, "step": 6639 }, { "epoch": 1.03, "learning_rate": 9.277289278489549e-06, "logits/chosen": -2.76816463470459, "logits/rejected": -2.907557725906372, "logps/chosen": -74.55387115478516, "logps/rejected": -193.30624389648438, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.3575472831726074, "rewards/margins": 6.070701599121094, "rewards/rejected": -7.428249359130859, "step": 6640 }, { "epoch": 1.03, "learning_rate": 9.276555837958401e-06, "logits/chosen": -2.3586647510528564, "logits/rejected": -3.1243464946746826, "logps/chosen": -438.75445556640625, "logps/rejected": -660.99072265625, "loss": 0.1778, "rewards/accuracies": 1.0, "rewards/chosen": -2.649402618408203, "rewards/margins": 4.245726585388184, "rewards/rejected": -6.895129203796387, "step": 6641 }, { "epoch": 1.03, "learning_rate": 9.275822397427253e-06, "logits/chosen": -2.624345541000366, "logits/rejected": -2.98526930809021, "logps/chosen": -147.83322143554688, "logps/rejected": -234.56809997558594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 1.7088623046875, "rewards/margins": 9.232815742492676, "rewards/rejected": -7.523953437805176, "step": 6642 }, { "epoch": 1.03, "learning_rate": 9.275088956896105e-06, "logits/chosen": -2.9926724433898926, "logits/rejected": -3.1149532794952393, "logps/chosen": -397.6295471191406, "logps/rejected": -415.71173095703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.5081159472465515, "rewards/margins": 7.363750457763672, "rewards/rejected": -6.8556342124938965, "step": 6643 }, { "epoch": 1.03, "learning_rate": 9.274355516364957e-06, "logits/chosen": -2.5468547344207764, "logits/rejected": -2.4769649505615234, "logps/chosen": -348.67425537109375, "logps/rejected": -280.0872497558594, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.5283053517341614, "rewards/margins": 7.201647758483887, "rewards/rejected": -7.729952812194824, "step": 6644 }, { "epoch": 1.03, "learning_rate": 9.273622075833808e-06, "logits/chosen": -2.7780468463897705, "logits/rejected": -2.9996042251586914, "logps/chosen": -76.59500122070312, "logps/rejected": -219.1971435546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.5187942981719971, "rewards/margins": 7.352025032043457, "rewards/rejected": -6.833230972290039, "step": 6645 }, { "epoch": 1.03, "learning_rate": 9.27288863530266e-06, "logits/chosen": -1.8820446729660034, "logits/rejected": -2.8048367500305176, "logps/chosen": -148.3323516845703, "logps/rejected": -334.8888244628906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.2883949279785156, "rewards/margins": 9.690656661987305, "rewards/rejected": -8.402262687683105, "step": 6646 }, { "epoch": 1.03, "learning_rate": 9.272155194771512e-06, "logits/chosen": -2.266331434249878, "logits/rejected": -3.014078378677368, "logps/chosen": -183.279541015625, "logps/rejected": -285.6649169921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.053614430129528046, "rewards/margins": 7.064023017883301, "rewards/rejected": -7.010408878326416, "step": 6647 }, { "epoch": 1.03, "learning_rate": 9.271421754240364e-06, "logits/chosen": -2.7388956546783447, "logits/rejected": -2.9124951362609863, "logps/chosen": -75.57542419433594, "logps/rejected": -170.6815948486328, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7511547207832336, "rewards/margins": 7.65559720993042, "rewards/rejected": -8.40675163269043, "step": 6648 }, { "epoch": 1.03, "learning_rate": 9.270688313709218e-06, "logits/chosen": -2.2298924922943115, "logits/rejected": -2.9968421459198, "logps/chosen": -154.50067138671875, "logps/rejected": -430.81591796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6818914413452148, "rewards/margins": 9.66819953918457, "rewards/rejected": -10.350090026855469, "step": 6649 }, { "epoch": 1.03, "learning_rate": 9.26995487317807e-06, "logits/chosen": -2.6258442401885986, "logits/rejected": -1.6477470397949219, "logps/chosen": -186.1959991455078, "logps/rejected": -72.48275756835938, "loss": 2.4656, "rewards/accuracies": 0.5, "rewards/chosen": -3.0319221019744873, "rewards/margins": -0.11758160591125488, "rewards/rejected": -2.9143404960632324, "step": 6650 }, { "epoch": 1.03, "learning_rate": 9.269221432646921e-06, "logits/chosen": -2.785179853439331, "logits/rejected": -2.058476448059082, "logps/chosen": -181.23162841796875, "logps/rejected": -204.67807006835938, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 1.1324485540390015, "rewards/margins": 7.033028602600098, "rewards/rejected": -5.900579929351807, "step": 6651 }, { "epoch": 1.03, "learning_rate": 9.268487992115773e-06, "logits/chosen": -2.0317654609680176, "logits/rejected": -2.867889404296875, "logps/chosen": -103.07987976074219, "logps/rejected": -259.6756591796875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.5029104351997375, "rewards/margins": 7.0181379318237305, "rewards/rejected": -6.515227794647217, "step": 6652 }, { "epoch": 1.03, "learning_rate": 9.267754551584625e-06, "logits/chosen": -1.8926795721054077, "logits/rejected": -2.678292751312256, "logps/chosen": -74.2012939453125, "logps/rejected": -143.3889617919922, "loss": 0.2285, "rewards/accuracies": 1.0, "rewards/chosen": -1.31832754611969, "rewards/margins": 4.198848724365234, "rewards/rejected": -5.517176151275635, "step": 6653 }, { "epoch": 1.03, "learning_rate": 9.267021111053477e-06, "logits/chosen": -1.3704142570495605, "logits/rejected": -2.529137372970581, "logps/chosen": -95.13796997070312, "logps/rejected": -200.32908630371094, "loss": 0.0705, "rewards/accuracies": 1.0, "rewards/chosen": -1.0915539264678955, "rewards/margins": 4.4749040603637695, "rewards/rejected": -5.566457748413086, "step": 6654 }, { "epoch": 1.03, "learning_rate": 9.26628767052233e-06, "logits/chosen": -2.9960665702819824, "logits/rejected": -2.927389621734619, "logps/chosen": -481.07958984375, "logps/rejected": -323.61102294921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.11857986450195312, "rewards/margins": 6.990527153015137, "rewards/rejected": -7.10910701751709, "step": 6655 }, { "epoch": 1.04, "learning_rate": 9.265554229991182e-06, "logits/chosen": -2.7788422107696533, "logits/rejected": -3.175417184829712, "logps/chosen": -89.03069305419922, "logps/rejected": -238.0709686279297, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": 0.16481085121631622, "rewards/margins": 7.2739176750183105, "rewards/rejected": -7.10910701751709, "step": 6656 }, { "epoch": 1.04, "learning_rate": 9.264820789460034e-06, "logits/chosen": -2.482384204864502, "logits/rejected": -2.4339118003845215, "logps/chosen": -346.2569580078125, "logps/rejected": -298.10455322265625, "loss": 1.0247, "rewards/accuracies": 0.5, "rewards/chosen": -2.0663812160491943, "rewards/margins": 3.50787091255188, "rewards/rejected": -5.574252128601074, "step": 6657 }, { "epoch": 1.04, "learning_rate": 9.264087348928888e-06, "logits/chosen": -3.084077835083008, "logits/rejected": -2.0957274436950684, "logps/chosen": -232.49501037597656, "logps/rejected": -129.3463592529297, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.4620537757873535, "rewards/margins": 9.195068359375, "rewards/rejected": -7.733014106750488, "step": 6658 }, { "epoch": 1.04, "learning_rate": 9.26335390839774e-06, "logits/chosen": -1.610182285308838, "logits/rejected": -2.229210615158081, "logps/chosen": -138.45361328125, "logps/rejected": -337.4114685058594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.06381568312644958, "rewards/margins": 8.58470344543457, "rewards/rejected": -8.520888328552246, "step": 6659 }, { "epoch": 1.04, "learning_rate": 9.262620467866592e-06, "logits/chosen": -2.3962533473968506, "logits/rejected": -2.6936519145965576, "logps/chosen": -225.37388610839844, "logps/rejected": -248.712646484375, "loss": 2.6673, "rewards/accuracies": 0.5, "rewards/chosen": -3.711831569671631, "rewards/margins": 0.43144845962524414, "rewards/rejected": -4.143280029296875, "step": 6660 }, { "epoch": 1.04, "learning_rate": 9.261887027335444e-06, "logits/chosen": -2.9575700759887695, "logits/rejected": -1.8368223905563354, "logps/chosen": -315.373779296875, "logps/rejected": -172.95208740234375, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -1.1093918085098267, "rewards/margins": 3.924931526184082, "rewards/rejected": -5.034323215484619, "step": 6661 }, { "epoch": 1.04, "learning_rate": 9.261153586804295e-06, "logits/chosen": -2.7225799560546875, "logits/rejected": -3.0714399814605713, "logps/chosen": -293.7339782714844, "logps/rejected": -395.4256286621094, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -0.5310280323028564, "rewards/margins": 4.52631950378418, "rewards/rejected": -5.057347297668457, "step": 6662 }, { "epoch": 1.04, "learning_rate": 9.260420146273147e-06, "logits/chosen": -2.0724146366119385, "logits/rejected": -3.19044828414917, "logps/chosen": -170.8302764892578, "logps/rejected": -459.38275146484375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5792381167411804, "rewards/margins": 7.33623743057251, "rewards/rejected": -7.915475845336914, "step": 6663 }, { "epoch": 1.04, "learning_rate": 9.259686705742e-06, "logits/chosen": -2.6599888801574707, "logits/rejected": -2.9679200649261475, "logps/chosen": -32.9966926574707, "logps/rejected": -137.43325805664062, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 0.050255488604307175, "rewards/margins": 6.217827796936035, "rewards/rejected": -6.167572021484375, "step": 6664 }, { "epoch": 1.04, "learning_rate": 9.258953265210851e-06, "logits/chosen": -2.746582269668579, "logits/rejected": -0.8185998201370239, "logps/chosen": -299.4638671875, "logps/rejected": -200.57913208007812, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 1.2524044513702393, "rewards/margins": 6.250556468963623, "rewards/rejected": -4.998151779174805, "step": 6665 }, { "epoch": 1.04, "learning_rate": 9.258219824679703e-06, "logits/chosen": -2.7344272136688232, "logits/rejected": -2.6900386810302734, "logps/chosen": -282.15850830078125, "logps/rejected": -404.20794677734375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 1.6834297180175781, "rewards/margins": 6.56626033782959, "rewards/rejected": -4.882830619812012, "step": 6666 }, { "epoch": 1.04, "learning_rate": 9.257486384148556e-06, "logits/chosen": -2.913699150085449, "logits/rejected": -2.593137502670288, "logps/chosen": -345.478759765625, "logps/rejected": -507.37603759765625, "loss": 1.0588, "rewards/accuracies": 0.5, "rewards/chosen": -4.258247375488281, "rewards/margins": 3.1028199195861816, "rewards/rejected": -7.361066818237305, "step": 6667 }, { "epoch": 1.04, "learning_rate": 9.256752943617408e-06, "logits/chosen": -2.3689920902252197, "logits/rejected": -2.9950082302093506, "logps/chosen": -41.16992950439453, "logps/rejected": -188.45175170898438, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": 0.04040110111236572, "rewards/margins": 5.0606465339660645, "rewards/rejected": -5.02024507522583, "step": 6668 }, { "epoch": 1.04, "learning_rate": 9.25601950308626e-06, "logits/chosen": -1.6388963460922241, "logits/rejected": -2.798292398452759, "logps/chosen": -113.24417114257812, "logps/rejected": -276.4018249511719, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.35197561979293823, "rewards/margins": 7.283574104309082, "rewards/rejected": -7.635549545288086, "step": 6669 }, { "epoch": 1.04, "learning_rate": 9.255286062555112e-06, "logits/chosen": -3.1030068397521973, "logits/rejected": -2.416135787963867, "logps/chosen": -352.66107177734375, "logps/rejected": -193.3292236328125, "loss": 0.7563, "rewards/accuracies": 0.5, "rewards/chosen": -1.0355525016784668, "rewards/margins": 2.9536495208740234, "rewards/rejected": -3.9892022609710693, "step": 6670 }, { "epoch": 1.04, "learning_rate": 9.254552622023964e-06, "logits/chosen": -2.8241336345672607, "logits/rejected": -3.0606064796447754, "logps/chosen": -618.1246337890625, "logps/rejected": -640.1531982421875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.0120941400527954, "rewards/margins": 7.626626968383789, "rewards/rejected": -8.638721466064453, "step": 6671 }, { "epoch": 1.04, "learning_rate": 9.253819181492816e-06, "logits/chosen": -2.9355506896972656, "logits/rejected": -1.8385591506958008, "logps/chosen": -432.4985656738281, "logps/rejected": -229.76211547851562, "loss": 0.0684, "rewards/accuracies": 1.0, "rewards/chosen": -0.14369353652000427, "rewards/margins": 3.845247507095337, "rewards/rejected": -3.988941192626953, "step": 6672 }, { "epoch": 1.04, "learning_rate": 9.253085740961668e-06, "logits/chosen": -2.7138264179229736, "logits/rejected": -2.8070895671844482, "logps/chosen": -374.5801696777344, "logps/rejected": -489.77825927734375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": 0.7151825428009033, "rewards/margins": 6.7980804443359375, "rewards/rejected": -6.082898139953613, "step": 6673 }, { "epoch": 1.04, "learning_rate": 9.25235230043052e-06, "logits/chosen": -2.6658596992492676, "logits/rejected": -1.3996888399124146, "logps/chosen": -739.453125, "logps/rejected": -400.07220458984375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.00598926842212677, "rewards/margins": 6.952834129333496, "rewards/rejected": -6.946845054626465, "step": 6674 }, { "epoch": 1.04, "learning_rate": 9.251618859899372e-06, "logits/chosen": -2.9769413471221924, "logits/rejected": -2.9539237022399902, "logps/chosen": -136.26744079589844, "logps/rejected": -229.5143280029297, "loss": 0.2911, "rewards/accuracies": 1.0, "rewards/chosen": -0.7327869534492493, "rewards/margins": 3.4922080039978027, "rewards/rejected": -4.224995136260986, "step": 6675 }, { "epoch": 1.04, "learning_rate": 9.250885419368225e-06, "logits/chosen": -2.1333909034729004, "logits/rejected": -2.6826744079589844, "logps/chosen": -97.23348999023438, "logps/rejected": -327.8531494140625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.9699677228927612, "rewards/margins": 5.9839186668396, "rewards/rejected": -6.95388650894165, "step": 6676 }, { "epoch": 1.04, "learning_rate": 9.250151978837077e-06, "logits/chosen": -2.728313446044922, "logits/rejected": -2.885239362716675, "logps/chosen": -110.02552032470703, "logps/rejected": -252.69314575195312, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.944237470626831, "rewards/margins": 5.45457124710083, "rewards/rejected": -6.398808479309082, "step": 6677 }, { "epoch": 1.04, "learning_rate": 9.249418538305929e-06, "logits/chosen": -1.205161690711975, "logits/rejected": -2.815457582473755, "logps/chosen": -147.80555725097656, "logps/rejected": -564.3713989257812, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.6021908521652222, "rewards/margins": 6.391269207000732, "rewards/rejected": -6.993460178375244, "step": 6678 }, { "epoch": 1.04, "learning_rate": 9.24868509777478e-06, "logits/chosen": -2.7169342041015625, "logits/rejected": -2.7954773902893066, "logps/chosen": -373.1188659667969, "logps/rejected": -391.7033996582031, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -1.7202346324920654, "rewards/margins": 5.164205074310303, "rewards/rejected": -6.884439945220947, "step": 6679 }, { "epoch": 1.04, "learning_rate": 9.247951657243633e-06, "logits/chosen": -2.7353994846343994, "logits/rejected": -2.754260540008545, "logps/chosen": -130.2852783203125, "logps/rejected": -166.88145446777344, "loss": 0.2855, "rewards/accuracies": 1.0, "rewards/chosen": -0.5874013900756836, "rewards/margins": 3.3694190979003906, "rewards/rejected": -3.956820487976074, "step": 6680 }, { "epoch": 1.04, "learning_rate": 9.247218216712484e-06, "logits/chosen": -2.7207789421081543, "logits/rejected": -3.177217960357666, "logps/chosen": -59.31235885620117, "logps/rejected": -272.88433837890625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.2514631450176239, "rewards/margins": 5.343789577484131, "rewards/rejected": -5.595252513885498, "step": 6681 }, { "epoch": 1.04, "learning_rate": 9.246484776181336e-06, "logits/chosen": -2.49444842338562, "logits/rejected": -2.821209192276001, "logps/chosen": -92.86734771728516, "logps/rejected": -367.49713134765625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.4920528531074524, "rewards/margins": 7.87790584564209, "rewards/rejected": -7.385852813720703, "step": 6682 }, { "epoch": 1.04, "learning_rate": 9.245751335650188e-06, "logits/chosen": -1.9421545267105103, "logits/rejected": -2.8313674926757812, "logps/chosen": -95.42047882080078, "logps/rejected": -408.6959228515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.4121875762939453, "rewards/margins": 7.554075241088867, "rewards/rejected": -7.141887187957764, "step": 6683 }, { "epoch": 1.04, "learning_rate": 9.24501789511904e-06, "logits/chosen": -3.166656732559204, "logits/rejected": -3.1146433353424072, "logps/chosen": -119.02969360351562, "logps/rejected": -131.62498474121094, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.3026435971260071, "rewards/margins": 6.359991550445557, "rewards/rejected": -6.057348251342773, "step": 6684 }, { "epoch": 1.04, "learning_rate": 9.244284454587894e-06, "logits/chosen": -2.978652238845825, "logits/rejected": -3.104675769805908, "logps/chosen": -104.25999450683594, "logps/rejected": -254.9298095703125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.6756746768951416, "rewards/margins": 6.245651721954346, "rewards/rejected": -7.921326637268066, "step": 6685 }, { "epoch": 1.04, "learning_rate": 9.243551014056746e-06, "logits/chosen": -2.164085865020752, "logits/rejected": -2.9160354137420654, "logps/chosen": -412.41412353515625, "logps/rejected": -595.9520874023438, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.5492817163467407, "rewards/margins": 5.47096061706543, "rewards/rejected": -6.020242691040039, "step": 6686 }, { "epoch": 1.04, "learning_rate": 9.242817573525597e-06, "logits/chosen": -1.2808475494384766, "logits/rejected": -2.869203805923462, "logps/chosen": -135.85659790039062, "logps/rejected": -388.5698547363281, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.7260586023330688, "rewards/margins": 5.151614189147949, "rewards/rejected": -5.8776726722717285, "step": 6687 }, { "epoch": 1.04, "learning_rate": 9.24208413299445e-06, "logits/chosen": -2.7892262935638428, "logits/rejected": -2.2935128211975098, "logps/chosen": -91.16846466064453, "logps/rejected": -118.98970794677734, "loss": 0.7505, "rewards/accuracies": 0.5, "rewards/chosen": -0.9142187237739563, "rewards/margins": 0.813414454460144, "rewards/rejected": -1.7276331186294556, "step": 6688 }, { "epoch": 1.04, "learning_rate": 9.241350692463303e-06, "logits/chosen": -1.1980019807815552, "logits/rejected": -3.2103943824768066, "logps/chosen": -45.582584381103516, "logps/rejected": -389.49456787109375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.061415433883667, "rewards/margins": 8.903153419494629, "rewards/rejected": -7.841737747192383, "step": 6689 }, { "epoch": 1.04, "learning_rate": 9.240617251932155e-06, "logits/chosen": -2.9594924449920654, "logits/rejected": -2.85418963432312, "logps/chosen": -201.27627563476562, "logps/rejected": -207.4976348876953, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.5212745666503906, "rewards/margins": 6.0099077224731445, "rewards/rejected": -5.488633155822754, "step": 6690 }, { "epoch": 1.04, "learning_rate": 9.239883811401007e-06, "logits/chosen": -2.7504005432128906, "logits/rejected": -2.895265579223633, "logps/chosen": -115.62291717529297, "logps/rejected": -248.14761352539062, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 0.38790369033813477, "rewards/margins": 5.917405128479004, "rewards/rejected": -5.529501438140869, "step": 6691 }, { "epoch": 1.04, "learning_rate": 9.239150370869859e-06, "logits/chosen": -2.128929853439331, "logits/rejected": -2.8985347747802734, "logps/chosen": -383.5751647949219, "logps/rejected": -479.1463623046875, "loss": 0.7889, "rewards/accuracies": 0.5, "rewards/chosen": -0.4176139831542969, "rewards/margins": 2.301313877105713, "rewards/rejected": -2.7189278602600098, "step": 6692 }, { "epoch": 1.04, "learning_rate": 9.23841693033871e-06, "logits/chosen": -2.810669422149658, "logits/rejected": -0.984828531742096, "logps/chosen": -290.76763916015625, "logps/rejected": -165.5632781982422, "loss": 1.5649, "rewards/accuracies": 0.5, "rewards/chosen": -2.630162239074707, "rewards/margins": 2.8405046463012695, "rewards/rejected": -5.470666408538818, "step": 6693 }, { "epoch": 1.04, "learning_rate": 9.237683489807564e-06, "logits/chosen": -2.314539670944214, "logits/rejected": -2.912277936935425, "logps/chosen": -106.55187225341797, "logps/rejected": -365.31597900390625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.0771836042404175, "rewards/margins": 5.638524055480957, "rewards/rejected": -6.715707302093506, "step": 6694 }, { "epoch": 1.04, "learning_rate": 9.236950049276416e-06, "logits/chosen": -2.9034838676452637, "logits/rejected": -3.0660665035247803, "logps/chosen": -286.3963317871094, "logps/rejected": -365.0886535644531, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.8267965316772461, "rewards/margins": 6.904539108276367, "rewards/rejected": -6.077742576599121, "step": 6695 }, { "epoch": 1.04, "learning_rate": 9.236216608745268e-06, "logits/chosen": -2.4374687671661377, "logits/rejected": -2.9954488277435303, "logps/chosen": -95.01632690429688, "logps/rejected": -240.6982421875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.03017711639404297, "rewards/margins": 5.9968719482421875, "rewards/rejected": -5.9666948318481445, "step": 6696 }, { "epoch": 1.04, "learning_rate": 9.23548316821412e-06, "logits/chosen": -2.3608648777008057, "logits/rejected": -2.915330171585083, "logps/chosen": -47.67132568359375, "logps/rejected": -234.80218505859375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.26464882493019104, "rewards/margins": 7.3903584480285645, "rewards/rejected": -7.655007362365723, "step": 6697 }, { "epoch": 1.04, "learning_rate": 9.234749727682971e-06, "logits/chosen": -1.741965413093567, "logits/rejected": -2.917086124420166, "logps/chosen": -69.11846923828125, "logps/rejected": -223.45936584472656, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": 0.05861702188849449, "rewards/margins": 5.081493377685547, "rewards/rejected": -5.022876739501953, "step": 6698 }, { "epoch": 1.04, "learning_rate": 9.234016287151823e-06, "logits/chosen": -2.862492322921753, "logits/rejected": -2.798943281173706, "logps/chosen": -451.68585205078125, "logps/rejected": -367.1889343261719, "loss": 1.2441, "rewards/accuracies": 0.5, "rewards/chosen": -2.3549585342407227, "rewards/margins": 2.9895148277282715, "rewards/rejected": -5.344473361968994, "step": 6699 }, { "epoch": 1.04, "learning_rate": 9.233282846620675e-06, "logits/chosen": -2.860902786254883, "logits/rejected": -1.9338194131851196, "logps/chosen": -447.88232421875, "logps/rejected": -341.99310302734375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.5335269570350647, "rewards/margins": 6.415882110595703, "rewards/rejected": -6.949409008026123, "step": 6700 }, { "epoch": 1.04, "learning_rate": 9.232549406089527e-06, "logits/chosen": -2.964695692062378, "logits/rejected": -3.5039596557617188, "logps/chosen": -291.8798828125, "logps/rejected": -388.4267578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.2688308656215668, "rewards/margins": 7.396254539489746, "rewards/rejected": -7.6650848388671875, "step": 6701 }, { "epoch": 1.04, "learning_rate": 9.23181596555838e-06, "logits/chosen": -2.7030131816864014, "logits/rejected": -2.4143009185791016, "logps/chosen": -233.89093017578125, "logps/rejected": -241.8280487060547, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.0949063301086426, "rewards/margins": 5.676757335662842, "rewards/rejected": -6.771663665771484, "step": 6702 }, { "epoch": 1.04, "learning_rate": 9.231082525027233e-06, "logits/chosen": -2.6961898803710938, "logits/rejected": -1.7979323863983154, "logps/chosen": -724.7830810546875, "logps/rejected": -443.35546875, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.670681893825531, "rewards/margins": 7.133949279785156, "rewards/rejected": -7.804631233215332, "step": 6703 }, { "epoch": 1.04, "learning_rate": 9.230349084496084e-06, "logits/chosen": -2.8377761840820312, "logits/rejected": -2.839357376098633, "logps/chosen": -86.24821472167969, "logps/rejected": -248.95849609375, "loss": 0.6855, "rewards/accuracies": 0.5, "rewards/chosen": -2.5502607822418213, "rewards/margins": 4.2270965576171875, "rewards/rejected": -6.777357578277588, "step": 6704 }, { "epoch": 1.04, "learning_rate": 9.229615643964936e-06, "logits/chosen": -2.8389992713928223, "logits/rejected": -2.6757922172546387, "logps/chosen": -105.92630004882812, "logps/rejected": -127.67108917236328, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.07370815426111221, "rewards/margins": 6.772082805633545, "rewards/rejected": -6.6983747482299805, "step": 6705 }, { "epoch": 1.04, "learning_rate": 9.228882203433788e-06, "logits/chosen": -2.6546900272369385, "logits/rejected": -3.151975393295288, "logps/chosen": -266.7767333984375, "logps/rejected": -334.13983154296875, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 0.669464111328125, "rewards/margins": 6.410007476806641, "rewards/rejected": -5.740543365478516, "step": 6706 }, { "epoch": 1.04, "learning_rate": 9.22814876290264e-06, "logits/chosen": -2.9030637741088867, "logits/rejected": -2.3564467430114746, "logps/chosen": -420.2706604003906, "logps/rejected": -357.4556884765625, "loss": 0.1002, "rewards/accuracies": 1.0, "rewards/chosen": -1.7095673084259033, "rewards/margins": 4.1112213134765625, "rewards/rejected": -5.820788383483887, "step": 6707 }, { "epoch": 1.04, "learning_rate": 9.227415322371492e-06, "logits/chosen": -1.3203035593032837, "logits/rejected": -1.2431254386901855, "logps/chosen": -247.71498107910156, "logps/rejected": -374.5785217285156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.41085049510002136, "rewards/margins": 8.641389846801758, "rewards/rejected": -8.230539321899414, "step": 6708 }, { "epoch": 1.04, "learning_rate": 9.226681881840344e-06, "logits/chosen": -2.6357839107513428, "logits/rejected": -2.879936456680298, "logps/chosen": -226.3934783935547, "logps/rejected": -569.97412109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.09672203660011292, "rewards/margins": 10.440427780151367, "rewards/rejected": -10.537149429321289, "step": 6709 }, { "epoch": 1.04, "learning_rate": 9.225948441309196e-06, "logits/chosen": -2.149792432785034, "logits/rejected": -2.899798631668091, "logps/chosen": -219.55389404296875, "logps/rejected": -340.3001708984375, "loss": 1.2647, "rewards/accuracies": 0.5, "rewards/chosen": -1.840074896812439, "rewards/margins": 1.3971643447875977, "rewards/rejected": -3.237239122390747, "step": 6710 }, { "epoch": 1.04, "learning_rate": 9.22521500077805e-06, "logits/chosen": -2.9127883911132812, "logits/rejected": -3.049978017807007, "logps/chosen": -246.3842010498047, "logps/rejected": -384.005126953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.7659851312637329, "rewards/margins": 6.979848384857178, "rewards/rejected": -6.213863372802734, "step": 6711 }, { "epoch": 1.04, "learning_rate": 9.224481560246901e-06, "logits/chosen": -2.797734260559082, "logits/rejected": -3.0192370414733887, "logps/chosen": -107.6083984375, "logps/rejected": -324.4321594238281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.11215034127235413, "rewards/margins": 8.119790077209473, "rewards/rejected": -8.00763988494873, "step": 6712 }, { "epoch": 1.04, "learning_rate": 9.223748119715753e-06, "logits/chosen": -2.176849603652954, "logits/rejected": -2.849717855453491, "logps/chosen": -138.6138153076172, "logps/rejected": -214.14913940429688, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": 0.7124775052070618, "rewards/margins": 4.34182596206665, "rewards/rejected": -3.6293485164642334, "step": 6713 }, { "epoch": 1.04, "learning_rate": 9.223014679184605e-06, "logits/chosen": -1.7703678607940674, "logits/rejected": -2.8192052841186523, "logps/chosen": -35.641136169433594, "logps/rejected": -287.06134033203125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -1.9301129579544067, "rewards/margins": 6.370182991027832, "rewards/rejected": -8.30029582977295, "step": 6714 }, { "epoch": 1.04, "learning_rate": 9.222281238653457e-06, "logits/chosen": -3.019697427749634, "logits/rejected": -2.279829502105713, "logps/chosen": -508.6005554199219, "logps/rejected": -402.6595764160156, "loss": 0.5082, "rewards/accuracies": 0.5, "rewards/chosen": -1.5155792236328125, "rewards/margins": 0.6132277846336365, "rewards/rejected": -2.1288070678710938, "step": 6715 }, { "epoch": 1.04, "learning_rate": 9.221547798122309e-06, "logits/chosen": -1.6263575553894043, "logits/rejected": -3.3292884826660156, "logps/chosen": -136.51284790039062, "logps/rejected": -364.81072998046875, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.4266822934150696, "rewards/margins": 6.048519134521484, "rewards/rejected": -6.475201606750488, "step": 6716 }, { "epoch": 1.04, "learning_rate": 9.22081435759116e-06, "logits/chosen": -2.7990734577178955, "logits/rejected": -1.1619417667388916, "logps/chosen": -302.85748291015625, "logps/rejected": -209.03114318847656, "loss": 0.2696, "rewards/accuracies": 1.0, "rewards/chosen": 0.6167858242988586, "rewards/margins": 3.395301580429077, "rewards/rejected": -2.7785158157348633, "step": 6717 }, { "epoch": 1.04, "learning_rate": 9.220080917060012e-06, "logits/chosen": -2.6069865226745605, "logits/rejected": -2.358593225479126, "logps/chosen": -657.3306884765625, "logps/rejected": -564.4181518554688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8323503732681274, "rewards/margins": 6.85338020324707, "rewards/rejected": -7.685730934143066, "step": 6718 }, { "epoch": 1.04, "learning_rate": 9.219347476528864e-06, "logits/chosen": -3.2984325885772705, "logits/rejected": -2.9362049102783203, "logps/chosen": -619.423828125, "logps/rejected": -316.1656494140625, "loss": 0.2481, "rewards/accuracies": 1.0, "rewards/chosen": -0.6999237537384033, "rewards/margins": 2.0905113220214844, "rewards/rejected": -2.7904350757598877, "step": 6719 }, { "epoch": 1.05, "learning_rate": 9.218614035997718e-06, "logits/chosen": -3.148163080215454, "logits/rejected": -2.8905251026153564, "logps/chosen": -274.3912353515625, "logps/rejected": -168.21484375, "loss": 1.054, "rewards/accuracies": 0.5, "rewards/chosen": -2.1279191970825195, "rewards/margins": 2.6282739639282227, "rewards/rejected": -4.756193161010742, "step": 6720 }, { "epoch": 1.05, "learning_rate": 9.21788059546657e-06, "logits/chosen": -2.5036191940307617, "logits/rejected": -2.8861641883850098, "logps/chosen": -184.73931884765625, "logps/rejected": -328.8447265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.6196678280830383, "rewards/margins": 6.89446496963501, "rewards/rejected": -7.514132499694824, "step": 6721 }, { "epoch": 1.05, "learning_rate": 9.217147154935422e-06, "logits/chosen": -3.037548303604126, "logits/rejected": -2.8683998584747314, "logps/chosen": -396.28631591796875, "logps/rejected": -280.2101135253906, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.23158417642116547, "rewards/margins": 5.884743690490723, "rewards/rejected": -6.11632776260376, "step": 6722 }, { "epoch": 1.05, "learning_rate": 9.216413714404275e-06, "logits/chosen": -2.7530155181884766, "logits/rejected": -2.0469582080841064, "logps/chosen": -150.56588745117188, "logps/rejected": -123.85142517089844, "loss": 0.5341, "rewards/accuracies": 0.5, "rewards/chosen": -1.9849720001220703, "rewards/margins": 1.599136471748352, "rewards/rejected": -3.584108352661133, "step": 6723 }, { "epoch": 1.05, "learning_rate": 9.215680273873127e-06, "logits/chosen": -2.978375196456909, "logits/rejected": -2.762897253036499, "logps/chosen": -733.135498046875, "logps/rejected": -532.0279541015625, "loss": 0.2445, "rewards/accuracies": 1.0, "rewards/chosen": -0.9847198724746704, "rewards/margins": 3.348585367202759, "rewards/rejected": -4.333305358886719, "step": 6724 }, { "epoch": 1.05, "learning_rate": 9.214946833341979e-06, "logits/chosen": -2.170818328857422, "logits/rejected": -2.966075897216797, "logps/chosen": -348.6521911621094, "logps/rejected": -566.4130859375, "loss": 0.2014, "rewards/accuracies": 1.0, "rewards/chosen": -1.4590911865234375, "rewards/margins": 3.960601806640625, "rewards/rejected": -5.4196929931640625, "step": 6725 }, { "epoch": 1.05, "learning_rate": 9.214213392810831e-06, "logits/chosen": -2.1411077976226807, "logits/rejected": -2.4428532123565674, "logps/chosen": -64.44252014160156, "logps/rejected": -206.53367614746094, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.2438444197177887, "rewards/margins": 5.6638946533203125, "rewards/rejected": -5.907739162445068, "step": 6726 }, { "epoch": 1.05, "learning_rate": 9.213479952279683e-06, "logits/chosen": -3.0004265308380127, "logits/rejected": -2.948657512664795, "logps/chosen": -83.80974578857422, "logps/rejected": -169.49911499023438, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": 0.21403563022613525, "rewards/margins": 5.479195594787598, "rewards/rejected": -5.265159606933594, "step": 6727 }, { "epoch": 1.05, "learning_rate": 9.212746511748535e-06, "logits/chosen": -1.7164002656936646, "logits/rejected": -3.184391498565674, "logps/chosen": -50.55950164794922, "logps/rejected": -301.68463134765625, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -0.24513569474220276, "rewards/margins": 3.762515068054199, "rewards/rejected": -4.007650852203369, "step": 6728 }, { "epoch": 1.05, "learning_rate": 9.212013071217388e-06, "logits/chosen": -3.0800247192382812, "logits/rejected": -2.1922402381896973, "logps/chosen": -1159.3463134765625, "logps/rejected": -632.6280517578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.1253631114959717, "rewards/margins": 6.332536697387695, "rewards/rejected": -7.457899570465088, "step": 6729 }, { "epoch": 1.05, "learning_rate": 9.21127963068624e-06, "logits/chosen": -1.4795914888381958, "logits/rejected": -3.071000814437866, "logps/chosen": -68.78158569335938, "logps/rejected": -300.1025390625, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.4249569177627563, "rewards/margins": 4.377346515655518, "rewards/rejected": -5.802303314208984, "step": 6730 }, { "epoch": 1.05, "learning_rate": 9.210546190155092e-06, "logits/chosen": -2.6391730308532715, "logits/rejected": -2.235279083251953, "logps/chosen": -124.52566528320312, "logps/rejected": -148.6442108154297, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": 0.6675354242324829, "rewards/margins": 6.091301918029785, "rewards/rejected": -5.423766136169434, "step": 6731 }, { "epoch": 1.05, "learning_rate": 9.209812749623944e-06, "logits/chosen": -2.1717641353607178, "logits/rejected": -2.5689053535461426, "logps/chosen": -251.24737548828125, "logps/rejected": -360.31646728515625, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -1.779119610786438, "rewards/margins": 4.967474460601807, "rewards/rejected": -6.746594429016113, "step": 6732 }, { "epoch": 1.05, "learning_rate": 9.209079309092796e-06, "logits/chosen": -2.1210198402404785, "logits/rejected": -2.45522141456604, "logps/chosen": -71.89574432373047, "logps/rejected": -195.4092559814453, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.27480798959732056, "rewards/margins": 5.266549110412598, "rewards/rejected": -5.541357040405273, "step": 6733 }, { "epoch": 1.05, "learning_rate": 9.208345868561648e-06, "logits/chosen": -2.4978365898132324, "logits/rejected": -3.1087706089019775, "logps/chosen": -238.53652954101562, "logps/rejected": -450.444580078125, "loss": 0.1413, "rewards/accuracies": 1.0, "rewards/chosen": -1.9994721412658691, "rewards/margins": 3.508985757827759, "rewards/rejected": -5.508458137512207, "step": 6734 }, { "epoch": 1.05, "learning_rate": 9.2076124280305e-06, "logits/chosen": -2.7691428661346436, "logits/rejected": -2.9231812953948975, "logps/chosen": -111.11634826660156, "logps/rejected": -138.99172973632812, "loss": 0.1227, "rewards/accuracies": 1.0, "rewards/chosen": -0.963651716709137, "rewards/margins": 3.9168736934661865, "rewards/rejected": -4.880525588989258, "step": 6735 }, { "epoch": 1.05, "learning_rate": 9.206878987499351e-06, "logits/chosen": -1.1172142028808594, "logits/rejected": -2.789964199066162, "logps/chosen": -175.13124084472656, "logps/rejected": -683.079345703125, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/chosen": -2.4398727416992188, "rewards/margins": 3.055267810821533, "rewards/rejected": -5.495140075683594, "step": 6736 }, { "epoch": 1.05, "learning_rate": 9.206145546968203e-06, "logits/chosen": -2.427563190460205, "logits/rejected": -2.9638662338256836, "logps/chosen": -223.4029541015625, "logps/rejected": -316.2154541015625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.2778762876987457, "rewards/margins": 6.128244876861572, "rewards/rejected": -5.850368499755859, "step": 6737 }, { "epoch": 1.05, "learning_rate": 9.205412106437057e-06, "logits/chosen": -2.4064717292785645, "logits/rejected": -2.6042168140411377, "logps/chosen": -174.82879638671875, "logps/rejected": -320.25018310546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.39598655700683594, "rewards/margins": 6.473450183868408, "rewards/rejected": -6.869436740875244, "step": 6738 }, { "epoch": 1.05, "learning_rate": 9.204678665905909e-06, "logits/chosen": -2.506540536880493, "logits/rejected": -2.900723695755005, "logps/chosen": -598.1015014648438, "logps/rejected": -710.9832153320312, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": -1.7464828491210938, "rewards/margins": 6.4579267501831055, "rewards/rejected": -8.2044095993042, "step": 6739 }, { "epoch": 1.05, "learning_rate": 9.20394522537476e-06, "logits/chosen": -2.8981728553771973, "logits/rejected": -3.0346555709838867, "logps/chosen": -153.51271057128906, "logps/rejected": -246.247802734375, "loss": 1.037, "rewards/accuracies": 0.5, "rewards/chosen": -1.8527954816818237, "rewards/margins": 2.7973241806030273, "rewards/rejected": -4.650119304656982, "step": 6740 }, { "epoch": 1.05, "learning_rate": 9.203211784843612e-06, "logits/chosen": -3.301414966583252, "logits/rejected": -3.222437620162964, "logps/chosen": -307.99566650390625, "logps/rejected": -381.3663330078125, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -2.011829137802124, "rewards/margins": 5.004033088684082, "rewards/rejected": -7.015862464904785, "step": 6741 }, { "epoch": 1.05, "learning_rate": 9.202478344312464e-06, "logits/chosen": -2.5535242557525635, "logits/rejected": -3.3019368648529053, "logps/chosen": -476.7216796875, "logps/rejected": -732.5402221679688, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.23307210206985474, "rewards/margins": 6.6735029220581055, "rewards/rejected": -6.9065752029418945, "step": 6742 }, { "epoch": 1.05, "learning_rate": 9.201744903781316e-06, "logits/chosen": -2.605079174041748, "logits/rejected": -3.001972198486328, "logps/chosen": -38.42949676513672, "logps/rejected": -196.38720703125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.37410154938697815, "rewards/margins": 6.428128242492676, "rewards/rejected": -6.0540266036987305, "step": 6743 }, { "epoch": 1.05, "learning_rate": 9.201011463250168e-06, "logits/chosen": -3.231344223022461, "logits/rejected": -3.050614833831787, "logps/chosen": -380.6370544433594, "logps/rejected": -239.03378295898438, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": -1.0943882465362549, "rewards/margins": 2.9284512996673584, "rewards/rejected": -4.022839546203613, "step": 6744 }, { "epoch": 1.05, "learning_rate": 9.20027802271902e-06, "logits/chosen": -3.340097427368164, "logits/rejected": -2.4912538528442383, "logps/chosen": -427.0221252441406, "logps/rejected": -252.71963500976562, "loss": 1.0771, "rewards/accuracies": 0.5, "rewards/chosen": -2.4031991958618164, "rewards/margins": 1.2957459688186646, "rewards/rejected": -3.6989450454711914, "step": 6745 }, { "epoch": 1.05, "learning_rate": 9.199544582187872e-06, "logits/chosen": -2.0585155487060547, "logits/rejected": -2.860483407974243, "logps/chosen": -328.2843322753906, "logps/rejected": -534.2559814453125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 0.23569336533546448, "rewards/margins": 6.997250556945801, "rewards/rejected": -6.761557102203369, "step": 6746 }, { "epoch": 1.05, "learning_rate": 9.198811141656725e-06, "logits/chosen": -2.269059181213379, "logits/rejected": -2.741929054260254, "logps/chosen": -98.3648681640625, "logps/rejected": -167.47105407714844, "loss": 1.2406, "rewards/accuracies": 0.5, "rewards/chosen": -2.906975507736206, "rewards/margins": 2.45143985748291, "rewards/rejected": -5.358415603637695, "step": 6747 }, { "epoch": 1.05, "learning_rate": 9.198077701125577e-06, "logits/chosen": -2.503791570663452, "logits/rejected": -3.040616512298584, "logps/chosen": -119.61412048339844, "logps/rejected": -335.80340576171875, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": -0.4462902247905731, "rewards/margins": 4.528039932250977, "rewards/rejected": -4.974330425262451, "step": 6748 }, { "epoch": 1.05, "learning_rate": 9.197344260594429e-06, "logits/chosen": -1.8977690935134888, "logits/rejected": -2.099246025085449, "logps/chosen": -137.7410888671875, "logps/rejected": -340.08990478515625, "loss": 0.1678, "rewards/accuracies": 1.0, "rewards/chosen": -2.5631136894226074, "rewards/margins": 5.624054908752441, "rewards/rejected": -8.18716812133789, "step": 6749 }, { "epoch": 1.05, "learning_rate": 9.196610820063281e-06, "logits/chosen": -2.928206443786621, "logits/rejected": -2.788398504257202, "logps/chosen": -171.9103240966797, "logps/rejected": -189.01393127441406, "loss": 1.0339, "rewards/accuracies": 0.5, "rewards/chosen": -2.7493386268615723, "rewards/margins": 2.146143674850464, "rewards/rejected": -4.895482063293457, "step": 6750 }, { "epoch": 1.05, "learning_rate": 9.195877379532133e-06, "logits/chosen": -2.559941530227661, "logits/rejected": -3.1872150897979736, "logps/chosen": -135.32400512695312, "logps/rejected": -395.69232177734375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5674343705177307, "rewards/margins": 7.251456260681152, "rewards/rejected": -7.818890571594238, "step": 6751 }, { "epoch": 1.05, "learning_rate": 9.195143939000985e-06, "logits/chosen": -2.6095659732818604, "logits/rejected": -2.2988834381103516, "logps/chosen": -49.314064025878906, "logps/rejected": -193.0865020751953, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.16779875755310059, "rewards/margins": 7.286527633666992, "rewards/rejected": -7.454326629638672, "step": 6752 }, { "epoch": 1.05, "learning_rate": 9.194410498469837e-06, "logits/chosen": -2.0056800842285156, "logits/rejected": -2.7489163875579834, "logps/chosen": -31.788530349731445, "logps/rejected": -206.02781677246094, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.0231853723526, "rewards/margins": 6.1070356369018555, "rewards/rejected": -7.130221366882324, "step": 6753 }, { "epoch": 1.05, "learning_rate": 9.193677057938689e-06, "logits/chosen": -2.8702354431152344, "logits/rejected": -2.867351531982422, "logps/chosen": -28.279951095581055, "logps/rejected": -234.86314392089844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.34301671385765076, "rewards/margins": 8.137147903442383, "rewards/rejected": -7.794131278991699, "step": 6754 }, { "epoch": 1.05, "learning_rate": 9.192943617407542e-06, "logits/chosen": -1.6075026988983154, "logits/rejected": -2.850267171859741, "logps/chosen": -216.85220336914062, "logps/rejected": -475.76751708984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5812252163887024, "rewards/margins": 7.866307735443115, "rewards/rejected": -8.447532653808594, "step": 6755 }, { "epoch": 1.05, "learning_rate": 9.192210176876394e-06, "logits/chosen": -2.9632112979888916, "logits/rejected": -2.8769845962524414, "logps/chosen": -57.031524658203125, "logps/rejected": -78.57368469238281, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.3728586435317993, "rewards/margins": 4.642704010009766, "rewards/rejected": -5.015562534332275, "step": 6756 }, { "epoch": 1.05, "learning_rate": 9.191476736345248e-06, "logits/chosen": -2.5429866313934326, "logits/rejected": -2.9092023372650146, "logps/chosen": -80.71021270751953, "logps/rejected": -309.0804443359375, "loss": 0.1505, "rewards/accuracies": 1.0, "rewards/chosen": -0.982745885848999, "rewards/margins": 2.292884349822998, "rewards/rejected": -3.275630235671997, "step": 6757 }, { "epoch": 1.05, "learning_rate": 9.1907432958141e-06, "logits/chosen": -2.2403459548950195, "logits/rejected": -2.8964016437530518, "logps/chosen": -179.97769165039062, "logps/rejected": -292.06781005859375, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.1968742311000824, "rewards/margins": 5.595569610595703, "rewards/rejected": -5.792443752288818, "step": 6758 }, { "epoch": 1.05, "learning_rate": 9.190009855282951e-06, "logits/chosen": -2.337514877319336, "logits/rejected": -2.828223943710327, "logps/chosen": -278.9189758300781, "logps/rejected": -419.84600830078125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -1.0770745277404785, "rewards/margins": 6.245501518249512, "rewards/rejected": -7.322575569152832, "step": 6759 }, { "epoch": 1.05, "learning_rate": 9.189276414751803e-06, "logits/chosen": -2.002870798110962, "logits/rejected": -2.479464292526245, "logps/chosen": -100.40828704833984, "logps/rejected": -404.847900390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.23952274024486542, "rewards/margins": 8.414534568786621, "rewards/rejected": -8.654057502746582, "step": 6760 }, { "epoch": 1.05, "learning_rate": 9.188542974220655e-06, "logits/chosen": -2.86712384223938, "logits/rejected": -2.777587890625, "logps/chosen": -691.20556640625, "logps/rejected": -872.761962890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.8409286737442017, "rewards/margins": 8.07364273071289, "rewards/rejected": -8.914570808410645, "step": 6761 }, { "epoch": 1.05, "learning_rate": 9.187809533689507e-06, "logits/chosen": -2.08490252494812, "logits/rejected": -2.498366594314575, "logps/chosen": -357.013916015625, "logps/rejected": -360.1902160644531, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.5635743141174316, "rewards/margins": 8.766733169555664, "rewards/rejected": -10.330307960510254, "step": 6762 }, { "epoch": 1.05, "learning_rate": 9.187076093158359e-06, "logits/chosen": -2.3702034950256348, "logits/rejected": -2.8473265171051025, "logps/chosen": -220.49700927734375, "logps/rejected": -346.21697998046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.752163052558899, "rewards/margins": 8.83314323425293, "rewards/rejected": -10.585306167602539, "step": 6763 }, { "epoch": 1.05, "learning_rate": 9.18634265262721e-06, "logits/chosen": -2.6972978115081787, "logits/rejected": -2.47774338722229, "logps/chosen": -153.0436553955078, "logps/rejected": -478.48345947265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.6700546145439148, "rewards/margins": 8.885385513305664, "rewards/rejected": -9.555439949035645, "step": 6764 }, { "epoch": 1.05, "learning_rate": 9.185609212096064e-06, "logits/chosen": -2.1046245098114014, "logits/rejected": -2.919154644012451, "logps/chosen": -89.63385009765625, "logps/rejected": -254.94854736328125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6781843304634094, "rewards/margins": 6.390167236328125, "rewards/rejected": -7.068351745605469, "step": 6765 }, { "epoch": 1.05, "learning_rate": 9.184875771564916e-06, "logits/chosen": -3.023530960083008, "logits/rejected": -2.6694765090942383, "logps/chosen": -147.9921112060547, "logps/rejected": -162.2762451171875, "loss": 0.2036, "rewards/accuracies": 1.0, "rewards/chosen": -1.0134525299072266, "rewards/margins": 3.738642692565918, "rewards/rejected": -4.7520952224731445, "step": 6766 }, { "epoch": 1.05, "learning_rate": 9.184142331033768e-06, "logits/chosen": -2.3390488624572754, "logits/rejected": -3.065464973449707, "logps/chosen": -85.35694885253906, "logps/rejected": -225.3107147216797, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.2140998840332031, "rewards/margins": 4.470977783203125, "rewards/rejected": -5.685077667236328, "step": 6767 }, { "epoch": 1.05, "learning_rate": 9.18340889050262e-06, "logits/chosen": -3.060209035873413, "logits/rejected": -2.911607265472412, "logps/chosen": -345.69720458984375, "logps/rejected": -481.3878173828125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7015068531036377, "rewards/margins": 7.932677268981934, "rewards/rejected": -8.634183883666992, "step": 6768 }, { "epoch": 1.05, "learning_rate": 9.182675449971472e-06, "logits/chosen": -2.8575637340545654, "logits/rejected": -2.811340093612671, "logps/chosen": -366.47967529296875, "logps/rejected": -427.13446044921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.11651074886322021, "rewards/margins": 9.008330345153809, "rewards/rejected": -9.124841690063477, "step": 6769 }, { "epoch": 1.05, "learning_rate": 9.181942009440324e-06, "logits/chosen": -2.7471606731414795, "logits/rejected": -1.678667426109314, "logps/chosen": -459.343994140625, "logps/rejected": -287.5123291015625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 0.9843207001686096, "rewards/margins": 7.894141674041748, "rewards/rejected": -6.909820556640625, "step": 6770 }, { "epoch": 1.05, "learning_rate": 9.181208568909176e-06, "logits/chosen": -3.329533338546753, "logits/rejected": -3.301276683807373, "logps/chosen": -134.47718811035156, "logps/rejected": -204.2303924560547, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -1.097627878189087, "rewards/margins": 3.786700963973999, "rewards/rejected": -4.884328842163086, "step": 6771 }, { "epoch": 1.05, "learning_rate": 9.180475128378027e-06, "logits/chosen": -2.247127056121826, "logits/rejected": -3.219881534576416, "logps/chosen": -128.2050323486328, "logps/rejected": -398.4897155761719, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -1.1154305934906006, "rewards/margins": 5.718506813049316, "rewards/rejected": -6.83393669128418, "step": 6772 }, { "epoch": 1.05, "learning_rate": 9.17974168784688e-06, "logits/chosen": -2.7967498302459717, "logits/rejected": -1.499488115310669, "logps/chosen": -377.4164123535156, "logps/rejected": -126.94911193847656, "loss": 0.2996, "rewards/accuracies": 1.0, "rewards/chosen": -0.394387423992157, "rewards/margins": 3.5609726905822754, "rewards/rejected": -3.955360174179077, "step": 6773 }, { "epoch": 1.05, "learning_rate": 9.179008247315733e-06, "logits/chosen": -2.384777069091797, "logits/rejected": -3.0222008228302, "logps/chosen": -61.19639205932617, "logps/rejected": -301.3419189453125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.2984660267829895, "rewards/margins": 7.534711837768555, "rewards/rejected": -7.83317756652832, "step": 6774 }, { "epoch": 1.05, "learning_rate": 9.178274806784585e-06, "logits/chosen": -2.8241114616394043, "logits/rejected": -2.7632713317871094, "logps/chosen": -226.2269744873047, "logps/rejected": -210.87881469726562, "loss": 0.6626, "rewards/accuracies": 0.5, "rewards/chosen": -2.042414903640747, "rewards/margins": 3.0001463890075684, "rewards/rejected": -5.042561054229736, "step": 6775 }, { "epoch": 1.05, "learning_rate": 9.177541366253437e-06, "logits/chosen": -2.7716939449310303, "logits/rejected": -1.011336326599121, "logps/chosen": -315.4851989746094, "logps/rejected": -151.44149780273438, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.5703901052474976, "rewards/margins": 6.703112602233887, "rewards/rejected": -6.132721900939941, "step": 6776 }, { "epoch": 1.05, "learning_rate": 9.176807925722289e-06, "logits/chosen": -0.9064706563949585, "logits/rejected": -2.214729070663452, "logps/chosen": -194.76089477539062, "logps/rejected": -605.7173461914062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.7423187494277954, "rewards/margins": 9.1605806350708, "rewards/rejected": -9.902898788452148, "step": 6777 }, { "epoch": 1.05, "learning_rate": 9.17607448519114e-06, "logits/chosen": -2.8381335735321045, "logits/rejected": -2.0387160778045654, "logps/chosen": -152.7350616455078, "logps/rejected": -267.0041809082031, "loss": 0.2305, "rewards/accuracies": 1.0, "rewards/chosen": -1.5546398162841797, "rewards/margins": 3.945711851119995, "rewards/rejected": -5.500351905822754, "step": 6778 }, { "epoch": 1.05, "learning_rate": 9.175341044659992e-06, "logits/chosen": -0.8032784461975098, "logits/rejected": -2.7745611667633057, "logps/chosen": -115.04358673095703, "logps/rejected": -536.55810546875, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -0.9013693928718567, "rewards/margins": 4.181678295135498, "rewards/rejected": -5.083047866821289, "step": 6779 }, { "epoch": 1.05, "learning_rate": 9.174607604128844e-06, "logits/chosen": -3.418553352355957, "logits/rejected": -3.3452188968658447, "logps/chosen": -191.7882843017578, "logps/rejected": -219.0181884765625, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -0.689440131187439, "rewards/margins": 4.654389381408691, "rewards/rejected": -5.343829154968262, "step": 6780 }, { "epoch": 1.05, "learning_rate": 9.173874163597696e-06, "logits/chosen": -1.5857194662094116, "logits/rejected": -2.125384569168091, "logps/chosen": -206.96612548828125, "logps/rejected": -297.2840576171875, "loss": 1.8889, "rewards/accuracies": 0.5, "rewards/chosen": -4.030893802642822, "rewards/margins": 1.836269497871399, "rewards/rejected": -5.867163181304932, "step": 6781 }, { "epoch": 1.05, "learning_rate": 9.173140723066548e-06, "logits/chosen": -3.309239387512207, "logits/rejected": -3.2890310287475586, "logps/chosen": -247.56350708007812, "logps/rejected": -228.4588623046875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": 0.021828457713127136, "rewards/margins": 6.00699520111084, "rewards/rejected": -5.985166549682617, "step": 6782 }, { "epoch": 1.05, "learning_rate": 9.172407282535401e-06, "logits/chosen": -2.390899896621704, "logits/rejected": -2.878708839416504, "logps/chosen": -143.57144165039062, "logps/rejected": -175.44366455078125, "loss": 0.2394, "rewards/accuracies": 1.0, "rewards/chosen": -1.1349501609802246, "rewards/margins": 3.1342267990112305, "rewards/rejected": -4.269177436828613, "step": 6783 }, { "epoch": 1.06, "learning_rate": 9.171673842004253e-06, "logits/chosen": -3.0763635635375977, "logits/rejected": -1.8885531425476074, "logps/chosen": -364.5415954589844, "logps/rejected": -264.9190673828125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.5012100338935852, "rewards/margins": 5.9699387550354, "rewards/rejected": -6.471148490905762, "step": 6784 }, { "epoch": 1.06, "learning_rate": 9.170940401473105e-06, "logits/chosen": -2.6024131774902344, "logits/rejected": -3.091494083404541, "logps/chosen": -447.41912841796875, "logps/rejected": -595.796630859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.11030122637748718, "rewards/margins": 8.520967483520508, "rewards/rejected": -8.410665512084961, "step": 6785 }, { "epoch": 1.06, "learning_rate": 9.170206960941957e-06, "logits/chosen": -3.0228898525238037, "logits/rejected": -2.325695276260376, "logps/chosen": -211.28680419921875, "logps/rejected": -155.2886505126953, "loss": 0.9242, "rewards/accuracies": 0.5, "rewards/chosen": -1.2438758611679077, "rewards/margins": 2.1895911693573, "rewards/rejected": -3.433467149734497, "step": 6786 }, { "epoch": 1.06, "learning_rate": 9.169473520410809e-06, "logits/chosen": -2.0148584842681885, "logits/rejected": -2.9667303562164307, "logps/chosen": -116.00464630126953, "logps/rejected": -402.37579345703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.22752150893211365, "rewards/margins": 7.395203590393066, "rewards/rejected": -7.622725009918213, "step": 6787 }, { "epoch": 1.06, "learning_rate": 9.168740079879661e-06, "logits/chosen": -2.6562483310699463, "logits/rejected": -2.9838056564331055, "logps/chosen": -32.96221160888672, "logps/rejected": -378.2315673828125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.14710941910743713, "rewards/margins": 6.554535388946533, "rewards/rejected": -6.7016448974609375, "step": 6788 }, { "epoch": 1.06, "learning_rate": 9.168006639348514e-06, "logits/chosen": -1.115230917930603, "logits/rejected": -1.6800765991210938, "logps/chosen": -104.37185668945312, "logps/rejected": -339.00830078125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": 0.28307685256004333, "rewards/margins": 6.695842266082764, "rewards/rejected": -6.4127655029296875, "step": 6789 }, { "epoch": 1.06, "learning_rate": 9.167273198817366e-06, "logits/chosen": -1.6710665225982666, "logits/rejected": -2.8213860988616943, "logps/chosen": -67.9446029663086, "logps/rejected": -399.13397216796875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6362297534942627, "rewards/margins": 6.371225833892822, "rewards/rejected": -7.007455825805664, "step": 6790 }, { "epoch": 1.06, "learning_rate": 9.166539758286218e-06, "logits/chosen": -3.1979169845581055, "logits/rejected": -3.2702784538269043, "logps/chosen": -317.8800048828125, "logps/rejected": -201.18739318847656, "loss": 1.2983, "rewards/accuracies": 0.5, "rewards/chosen": -1.763675332069397, "rewards/margins": 2.409950017929077, "rewards/rejected": -4.173625469207764, "step": 6791 }, { "epoch": 1.06, "learning_rate": 9.165806317755072e-06, "logits/chosen": -2.6128201484680176, "logits/rejected": -2.893557548522949, "logps/chosen": -324.6717834472656, "logps/rejected": -462.57305908203125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.8596519827842712, "rewards/margins": 8.391304016113281, "rewards/rejected": -7.531652450561523, "step": 6792 }, { "epoch": 1.06, "learning_rate": 9.165072877223924e-06, "logits/chosen": -3.009685754776001, "logits/rejected": -2.6255321502685547, "logps/chosen": -501.17425537109375, "logps/rejected": -326.4997863769531, "loss": 0.0664, "rewards/accuracies": 1.0, "rewards/chosen": -1.0596015453338623, "rewards/margins": 4.328678607940674, "rewards/rejected": -5.388279914855957, "step": 6793 }, { "epoch": 1.06, "learning_rate": 9.164339436692776e-06, "logits/chosen": -2.907458543777466, "logits/rejected": -2.269381046295166, "logps/chosen": -142.08328247070312, "logps/rejected": -113.1623306274414, "loss": 1.4658, "rewards/accuracies": 0.5, "rewards/chosen": -3.6124207973480225, "rewards/margins": -0.3884751796722412, "rewards/rejected": -3.2239456176757812, "step": 6794 }, { "epoch": 1.06, "learning_rate": 9.163605996161627e-06, "logits/chosen": -1.5373269319534302, "logits/rejected": -3.087141752243042, "logps/chosen": -120.50066375732422, "logps/rejected": -306.94085693359375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.053905487060546875, "rewards/margins": 6.562593936920166, "rewards/rejected": -6.616499423980713, "step": 6795 }, { "epoch": 1.06, "learning_rate": 9.16287255563048e-06, "logits/chosen": -1.5461068153381348, "logits/rejected": -2.9030821323394775, "logps/chosen": -128.03173828125, "logps/rejected": -350.0137939453125, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": 0.0021152496337890625, "rewards/margins": 5.620718002319336, "rewards/rejected": -5.618602752685547, "step": 6796 }, { "epoch": 1.06, "learning_rate": 9.162139115099331e-06, "logits/chosen": -3.0973243713378906, "logits/rejected": -2.2101047039031982, "logps/chosen": -251.3051300048828, "logps/rejected": -90.46804809570312, "loss": 0.891, "rewards/accuracies": 0.5, "rewards/chosen": -1.872774600982666, "rewards/margins": 0.7854995727539062, "rewards/rejected": -2.6582741737365723, "step": 6797 }, { "epoch": 1.06, "learning_rate": 9.161405674568183e-06, "logits/chosen": -2.8489248752593994, "logits/rejected": -3.231752872467041, "logps/chosen": -15.152177810668945, "logps/rejected": -135.22286987304688, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 0.5901240110397339, "rewards/margins": 6.6140899658203125, "rewards/rejected": -6.023965835571289, "step": 6798 }, { "epoch": 1.06, "learning_rate": 9.160672234037035e-06, "logits/chosen": -2.7385880947113037, "logits/rejected": -2.0575802326202393, "logps/chosen": -529.7547607421875, "logps/rejected": -317.8712158203125, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.3945119380950928, "rewards/margins": 5.284221649169922, "rewards/rejected": -4.88970947265625, "step": 6799 }, { "epoch": 1.06, "learning_rate": 9.159938793505888e-06, "logits/chosen": -1.6889294385910034, "logits/rejected": -3.074909210205078, "logps/chosen": -127.52845764160156, "logps/rejected": -335.17840576171875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.346168875694275, "rewards/margins": 5.474272727966309, "rewards/rejected": -6.820441246032715, "step": 6800 }, { "epoch": 1.06, "learning_rate": 9.15920535297474e-06, "logits/chosen": -3.069952964782715, "logits/rejected": -1.6906547546386719, "logps/chosen": -527.7053833007812, "logps/rejected": -237.57936096191406, "loss": 1.2178, "rewards/accuracies": 0.5, "rewards/chosen": -0.08154296875, "rewards/margins": 3.141700267791748, "rewards/rejected": -3.223243236541748, "step": 6801 }, { "epoch": 1.06, "learning_rate": 9.158471912443592e-06, "logits/chosen": -2.8498129844665527, "logits/rejected": -0.9227383732795715, "logps/chosen": -175.65536499023438, "logps/rejected": -89.33924865722656, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.29973697662353516, "rewards/margins": 5.109562397003174, "rewards/rejected": -5.409299373626709, "step": 6802 }, { "epoch": 1.06, "learning_rate": 9.157738471912444e-06, "logits/chosen": -2.698335886001587, "logits/rejected": -1.5202503204345703, "logps/chosen": -435.5853576660156, "logps/rejected": -314.8904113769531, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.4178438186645508, "rewards/margins": 6.713822364807129, "rewards/rejected": -7.13166618347168, "step": 6803 }, { "epoch": 1.06, "learning_rate": 9.157005031381296e-06, "logits/chosen": -3.1060032844543457, "logits/rejected": -2.5417418479919434, "logps/chosen": -148.8261260986328, "logps/rejected": -210.14093017578125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": 0.17813679575920105, "rewards/margins": 6.475156784057617, "rewards/rejected": -6.297019958496094, "step": 6804 }, { "epoch": 1.06, "learning_rate": 9.156271590850148e-06, "logits/chosen": -2.608285903930664, "logits/rejected": -2.159477472305298, "logps/chosen": -650.2266235351562, "logps/rejected": -397.37542724609375, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.8636009097099304, "rewards/margins": 4.578936576843262, "rewards/rejected": -5.442537784576416, "step": 6805 }, { "epoch": 1.06, "learning_rate": 9.155538150319e-06, "logits/chosen": -0.8129087090492249, "logits/rejected": -2.861001491546631, "logps/chosen": -101.25627899169922, "logps/rejected": -563.14501953125, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -2.8866028785705566, "rewards/margins": 4.6941633224487305, "rewards/rejected": -7.580766677856445, "step": 6806 }, { "epoch": 1.06, "learning_rate": 9.154804709787852e-06, "logits/chosen": -1.0856235027313232, "logits/rejected": -2.7845847606658936, "logps/chosen": -34.23378372192383, "logps/rejected": -231.63052368164062, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -0.3025330603122711, "rewards/margins": 4.680764198303223, "rewards/rejected": -4.983297348022461, "step": 6807 }, { "epoch": 1.06, "learning_rate": 9.154071269256704e-06, "logits/chosen": -1.4266948699951172, "logits/rejected": -3.0053138732910156, "logps/chosen": -239.74539184570312, "logps/rejected": -549.0365600585938, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.6655113697052002, "rewards/margins": 5.982094764709473, "rewards/rejected": -6.647605895996094, "step": 6808 }, { "epoch": 1.06, "learning_rate": 9.153337828725557e-06, "logits/chosen": -2.90136981010437, "logits/rejected": -1.9848130941390991, "logps/chosen": -442.344482421875, "logps/rejected": -303.8172607421875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.1093215942382812, "rewards/margins": 6.416111946105957, "rewards/rejected": -7.525433540344238, "step": 6809 }, { "epoch": 1.06, "learning_rate": 9.152604388194409e-06, "logits/chosen": -2.072009563446045, "logits/rejected": -2.8581418991088867, "logps/chosen": -21.625825881958008, "logps/rejected": -283.478515625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.3691341280937195, "rewards/margins": 7.073494911193848, "rewards/rejected": -7.442628860473633, "step": 6810 }, { "epoch": 1.06, "learning_rate": 9.15187094766326e-06, "logits/chosen": -2.3002283573150635, "logits/rejected": -3.0521068572998047, "logps/chosen": -85.87964630126953, "logps/rejected": -310.8349304199219, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": 0.07261256873607635, "rewards/margins": 6.478511810302734, "rewards/rejected": -6.4058990478515625, "step": 6811 }, { "epoch": 1.06, "learning_rate": 9.151137507132113e-06, "logits/chosen": -2.5297670364379883, "logits/rejected": -2.8224408626556396, "logps/chosen": -68.15463256835938, "logps/rejected": -260.95098876953125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8230322599411011, "rewards/margins": 6.862070083618164, "rewards/rejected": -7.685102462768555, "step": 6812 }, { "epoch": 1.06, "learning_rate": 9.150404066600965e-06, "logits/chosen": -2.317974090576172, "logits/rejected": -2.728328227996826, "logps/chosen": -47.231590270996094, "logps/rejected": -149.44293212890625, "loss": 0.0863, "rewards/accuracies": 1.0, "rewards/chosen": -1.2991101741790771, "rewards/margins": 2.803537368774414, "rewards/rejected": -4.102647304534912, "step": 6813 }, { "epoch": 1.06, "learning_rate": 9.149670626069816e-06, "logits/chosen": -2.9522817134857178, "logits/rejected": -1.7457388639450073, "logps/chosen": -397.5103454589844, "logps/rejected": -247.8388671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.132232666015625, "rewards/margins": 8.086465835571289, "rewards/rejected": -7.954233646392822, "step": 6814 }, { "epoch": 1.06, "learning_rate": 9.148937185538668e-06, "logits/chosen": -2.8213915824890137, "logits/rejected": -1.7542322874069214, "logps/chosen": -151.75634765625, "logps/rejected": -168.26473999023438, "loss": 0.2685, "rewards/accuracies": 1.0, "rewards/chosen": -1.0984292030334473, "rewards/margins": 3.628333568572998, "rewards/rejected": -4.726762771606445, "step": 6815 }, { "epoch": 1.06, "learning_rate": 9.14820374500752e-06, "logits/chosen": -1.8699867725372314, "logits/rejected": -2.836482286453247, "logps/chosen": -89.91757202148438, "logps/rejected": -169.94297790527344, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.778477430343628, "rewards/margins": 4.361368656158447, "rewards/rejected": -6.139845848083496, "step": 6816 }, { "epoch": 1.06, "learning_rate": 9.147470304476372e-06, "logits/chosen": -2.259251832962036, "logits/rejected": -1.9822795391082764, "logps/chosen": -385.1243896484375, "logps/rejected": -392.3865661621094, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.233300805091858, "rewards/margins": 6.4355010986328125, "rewards/rejected": -7.668802261352539, "step": 6817 }, { "epoch": 1.06, "learning_rate": 9.146736863945226e-06, "logits/chosen": -2.8998520374298096, "logits/rejected": -2.814955949783325, "logps/chosen": -140.5594024658203, "logps/rejected": -189.14608764648438, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.4187972843647003, "rewards/margins": 6.548912048339844, "rewards/rejected": -6.967709541320801, "step": 6818 }, { "epoch": 1.06, "learning_rate": 9.146003423414078e-06, "logits/chosen": -2.8798933029174805, "logits/rejected": -2.6434245109558105, "logps/chosen": -166.04843139648438, "logps/rejected": -266.3155822753906, "loss": 0.0799, "rewards/accuracies": 1.0, "rewards/chosen": -1.3644325733184814, "rewards/margins": 4.888650894165039, "rewards/rejected": -6.253083229064941, "step": 6819 }, { "epoch": 1.06, "learning_rate": 9.14526998288293e-06, "logits/chosen": -2.3139498233795166, "logits/rejected": -2.966918468475342, "logps/chosen": -277.3852233886719, "logps/rejected": -448.800537109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.00441818218678236, "rewards/margins": 6.704631805419922, "rewards/rejected": -6.709050178527832, "step": 6820 }, { "epoch": 1.06, "learning_rate": 9.144536542351781e-06, "logits/chosen": -2.6514222621917725, "logits/rejected": -3.0539426803588867, "logps/chosen": -131.05433654785156, "logps/rejected": -274.0608825683594, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": 0.6685898303985596, "rewards/margins": 7.363403797149658, "rewards/rejected": -6.6948137283325195, "step": 6821 }, { "epoch": 1.06, "learning_rate": 9.143803101820633e-06, "logits/chosen": -1.9831351041793823, "logits/rejected": -2.742086410522461, "logps/chosen": -93.65617370605469, "logps/rejected": -208.19175720214844, "loss": 0.4442, "rewards/accuracies": 0.5, "rewards/chosen": -1.3421425819396973, "rewards/margins": 4.64160680770874, "rewards/rejected": -5.9837493896484375, "step": 6822 }, { "epoch": 1.06, "learning_rate": 9.143069661289485e-06, "logits/chosen": -2.4121928215026855, "logits/rejected": -2.552002191543579, "logps/chosen": -119.91549682617188, "logps/rejected": -289.3145446777344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1781948804855347, "rewards/margins": 7.52105712890625, "rewards/rejected": -8.699251174926758, "step": 6823 }, { "epoch": 1.06, "learning_rate": 9.142336220758339e-06, "logits/chosen": -1.5634723901748657, "logits/rejected": -2.576728343963623, "logps/chosen": -161.73751831054688, "logps/rejected": -618.5133666992188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7194286584854126, "rewards/margins": 9.397504806518555, "rewards/rejected": -10.11693286895752, "step": 6824 }, { "epoch": 1.06, "learning_rate": 9.14160278022719e-06, "logits/chosen": -3.2136802673339844, "logits/rejected": -3.0251777172088623, "logps/chosen": -160.24383544921875, "logps/rejected": -166.63414001464844, "loss": 1.0603, "rewards/accuracies": 0.5, "rewards/chosen": -3.2337698936462402, "rewards/margins": 2.3639137744903564, "rewards/rejected": -5.597683906555176, "step": 6825 }, { "epoch": 1.06, "learning_rate": 9.140869339696042e-06, "logits/chosen": -2.8048832416534424, "logits/rejected": -2.8316397666931152, "logps/chosen": -146.80819702148438, "logps/rejected": -173.2269287109375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.9321644306182861, "rewards/margins": 5.199001789093018, "rewards/rejected": -6.131166458129883, "step": 6826 }, { "epoch": 1.06, "learning_rate": 9.140135899164896e-06, "logits/chosen": -2.7296903133392334, "logits/rejected": -3.101142644882202, "logps/chosen": -45.98005676269531, "logps/rejected": -220.22244262695312, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.8109853267669678, "rewards/margins": 5.550752639770508, "rewards/rejected": -6.361738204956055, "step": 6827 }, { "epoch": 1.06, "learning_rate": 9.139402458633748e-06, "logits/chosen": -1.518201470375061, "logits/rejected": -3.008127450942993, "logps/chosen": -70.58787536621094, "logps/rejected": -325.4449462890625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -0.14095649123191833, "rewards/margins": 6.371803283691406, "rewards/rejected": -6.512759685516357, "step": 6828 }, { "epoch": 1.06, "learning_rate": 9.1386690181026e-06, "logits/chosen": -2.9013683795928955, "logits/rejected": -2.7853691577911377, "logps/chosen": -521.9310913085938, "logps/rejected": -572.996826171875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.14284628629684448, "rewards/margins": 6.478400230407715, "rewards/rejected": -6.621246337890625, "step": 6829 }, { "epoch": 1.06, "learning_rate": 9.137935577571452e-06, "logits/chosen": -2.3208439350128174, "logits/rejected": -3.0553858280181885, "logps/chosen": -86.67276000976562, "logps/rejected": -330.7423095703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.4853666424751282, "rewards/margins": 7.836617469787598, "rewards/rejected": -8.32198429107666, "step": 6830 }, { "epoch": 1.06, "learning_rate": 9.137202137040303e-06, "logits/chosen": -2.872825860977173, "logits/rejected": -2.250727653503418, "logps/chosen": -287.64739990234375, "logps/rejected": -260.03973388671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6497310400009155, "rewards/margins": 6.764298439025879, "rewards/rejected": -7.414029598236084, "step": 6831 }, { "epoch": 1.06, "learning_rate": 9.136468696509155e-06, "logits/chosen": -2.7158610820770264, "logits/rejected": -2.1945784091949463, "logps/chosen": -209.72811889648438, "logps/rejected": -216.0253143310547, "loss": 4.0019, "rewards/accuracies": 0.5, "rewards/chosen": -4.7274861335754395, "rewards/margins": -1.0073606967926025, "rewards/rejected": -3.720125198364258, "step": 6832 }, { "epoch": 1.06, "learning_rate": 9.135735255978007e-06, "logits/chosen": -2.485527992248535, "logits/rejected": -3.1254470348358154, "logps/chosen": -140.90040588378906, "logps/rejected": -413.6990661621094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.9448505640029907, "rewards/margins": 9.129619598388672, "rewards/rejected": -10.074470520019531, "step": 6833 }, { "epoch": 1.06, "learning_rate": 9.135001815446859e-06, "logits/chosen": -3.1612751483917236, "logits/rejected": -1.9216352701187134, "logps/chosen": -424.12457275390625, "logps/rejected": -212.92332458496094, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": 1.034555435180664, "rewards/margins": 6.214259147644043, "rewards/rejected": -5.179703712463379, "step": 6834 }, { "epoch": 1.06, "learning_rate": 9.134268374915711e-06, "logits/chosen": -2.814728021621704, "logits/rejected": -2.7033417224884033, "logps/chosen": -260.0104064941406, "logps/rejected": -524.8660278320312, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7747001647949219, "rewards/margins": 7.549909591674805, "rewards/rejected": -8.324609756469727, "step": 6835 }, { "epoch": 1.06, "learning_rate": 9.133534934384565e-06, "logits/chosen": -2.9743812084198, "logits/rejected": -3.1088368892669678, "logps/chosen": -363.04669189453125, "logps/rejected": -368.7402038574219, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": 0.620763897895813, "rewards/margins": 4.480156898498535, "rewards/rejected": -3.8593931198120117, "step": 6836 }, { "epoch": 1.06, "learning_rate": 9.132801493853416e-06, "logits/chosen": -1.7948912382125854, "logits/rejected": -2.86173152923584, "logps/chosen": -63.163516998291016, "logps/rejected": -352.3017272949219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1127281188964844, "rewards/margins": 8.43425464630127, "rewards/rejected": -9.546982765197754, "step": 6837 }, { "epoch": 1.06, "learning_rate": 9.132068053322268e-06, "logits/chosen": -1.9339054822921753, "logits/rejected": -2.602755069732666, "logps/chosen": -81.82138061523438, "logps/rejected": -192.7105255126953, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.986315131187439, "rewards/margins": 6.196799278259277, "rewards/rejected": -7.183114051818848, "step": 6838 }, { "epoch": 1.06, "learning_rate": 9.13133461279112e-06, "logits/chosen": -2.9015252590179443, "logits/rejected": -2.9904699325561523, "logps/chosen": -996.3606567382812, "logps/rejected": -540.5433349609375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": 0.2668701410293579, "rewards/margins": 7.758054733276367, "rewards/rejected": -7.491185188293457, "step": 6839 }, { "epoch": 1.06, "learning_rate": 9.130601172259972e-06, "logits/chosen": -2.099547863006592, "logits/rejected": -2.7093570232391357, "logps/chosen": -140.11083984375, "logps/rejected": -277.0146484375, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -0.9026672840118408, "rewards/margins": 5.427247047424316, "rewards/rejected": -6.329914093017578, "step": 6840 }, { "epoch": 1.06, "learning_rate": 9.129867731728824e-06, "logits/chosen": -2.75897216796875, "logits/rejected": -2.7245686054229736, "logps/chosen": -333.4842529296875, "logps/rejected": -457.2733154296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.38405075669288635, "rewards/margins": 9.982271194458008, "rewards/rejected": -9.598220825195312, "step": 6841 }, { "epoch": 1.06, "learning_rate": 9.129134291197676e-06, "logits/chosen": -3.0037918090820312, "logits/rejected": -2.899510383605957, "logps/chosen": -571.6348876953125, "logps/rejected": -520.388916015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.078648328781128, "rewards/margins": 6.756382942199707, "rewards/rejected": -8.835031509399414, "step": 6842 }, { "epoch": 1.06, "learning_rate": 9.128400850666528e-06, "logits/chosen": -0.9660692811012268, "logits/rejected": -2.5743958950042725, "logps/chosen": -109.82537841796875, "logps/rejected": -553.4686279296875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.44083672761917114, "rewards/margins": 11.445372581481934, "rewards/rejected": -11.886209487915039, "step": 6843 }, { "epoch": 1.06, "learning_rate": 9.12766741013538e-06, "logits/chosen": -2.8370625972747803, "logits/rejected": -3.216491460800171, "logps/chosen": -168.94252014160156, "logps/rejected": -336.32952880859375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.10441654920578003, "rewards/margins": 7.128515243530273, "rewards/rejected": -7.0240983963012695, "step": 6844 }, { "epoch": 1.06, "learning_rate": 9.126933969604233e-06, "logits/chosen": -1.9404233694076538, "logits/rejected": -2.9842631816864014, "logps/chosen": -253.10662841796875, "logps/rejected": -501.0330810546875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -0.9396289587020874, "rewards/margins": 5.596453666687012, "rewards/rejected": -6.5360822677612305, "step": 6845 }, { "epoch": 1.06, "learning_rate": 9.126200529073085e-06, "logits/chosen": -1.718012809753418, "logits/rejected": -2.6485254764556885, "logps/chosen": -82.84354400634766, "logps/rejected": -280.7926025390625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.1209239959716797, "rewards/margins": 7.838671684265137, "rewards/rejected": -8.959595680236816, "step": 6846 }, { "epoch": 1.06, "learning_rate": 9.125467088541937e-06, "logits/chosen": -2.1944005489349365, "logits/rejected": -2.814711093902588, "logps/chosen": -86.15032196044922, "logps/rejected": -210.5918426513672, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.17156563699245453, "rewards/margins": 6.155431747436523, "rewards/rejected": -6.32699728012085, "step": 6847 }, { "epoch": 1.07, "learning_rate": 9.124733648010789e-06, "logits/chosen": -1.49090576171875, "logits/rejected": -2.507951498031616, "logps/chosen": -161.64695739746094, "logps/rejected": -428.0706481933594, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.367010474205017, "rewards/margins": 7.3529744148254395, "rewards/rejected": -8.719985008239746, "step": 6848 }, { "epoch": 1.07, "learning_rate": 9.12400020747964e-06, "logits/chosen": -2.7142539024353027, "logits/rejected": -2.8165431022644043, "logps/chosen": -429.0859375, "logps/rejected": -562.3167114257812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.0005691051483154, "rewards/margins": 8.107513427734375, "rewards/rejected": -10.108081817626953, "step": 6849 }, { "epoch": 1.07, "learning_rate": 9.123266766948493e-06, "logits/chosen": -2.8057398796081543, "logits/rejected": -2.4373037815093994, "logps/chosen": -572.1681518554688, "logps/rejected": -450.6734313964844, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2688575983047485, "rewards/margins": 7.382712364196777, "rewards/rejected": -8.651570320129395, "step": 6850 }, { "epoch": 1.07, "learning_rate": 9.122533326417344e-06, "logits/chosen": -3.18084716796875, "logits/rejected": -2.714003562927246, "logps/chosen": -555.1558227539062, "logps/rejected": -257.62603759765625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 1.6745874881744385, "rewards/margins": 9.16919231414795, "rewards/rejected": -7.494604587554932, "step": 6851 }, { "epoch": 1.07, "learning_rate": 9.121799885886196e-06, "logits/chosen": -1.9533369541168213, "logits/rejected": -2.9806101322174072, "logps/chosen": -308.5362548828125, "logps/rejected": -352.88134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.5980850458145142, "rewards/margins": 9.20740795135498, "rewards/rejected": -8.609323501586914, "step": 6852 }, { "epoch": 1.07, "learning_rate": 9.121066445355048e-06, "logits/chosen": -1.919702172279358, "logits/rejected": -2.7457973957061768, "logps/chosen": -77.25665283203125, "logps/rejected": -87.25808715820312, "loss": 0.7565, "rewards/accuracies": 0.5, "rewards/chosen": -2.140873432159424, "rewards/margins": 1.8818104267120361, "rewards/rejected": -4.022683620452881, "step": 6853 }, { "epoch": 1.07, "learning_rate": 9.120333004823902e-06, "logits/chosen": -2.7404398918151855, "logits/rejected": -3.0822672843933105, "logps/chosen": -78.82620239257812, "logps/rejected": -208.52828979492188, "loss": 0.0637, "rewards/accuracies": 1.0, "rewards/chosen": -0.6109588742256165, "rewards/margins": 3.2561593055725098, "rewards/rejected": -3.8671181201934814, "step": 6854 }, { "epoch": 1.07, "learning_rate": 9.119599564292754e-06, "logits/chosen": -1.063357949256897, "logits/rejected": -2.784658193588257, "logps/chosen": -240.08377075195312, "logps/rejected": -515.3271484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.4637348651885986, "rewards/margins": 7.985513687133789, "rewards/rejected": -9.449249267578125, "step": 6855 }, { "epoch": 1.07, "learning_rate": 9.118866123761606e-06, "logits/chosen": -2.5955698490142822, "logits/rejected": -2.9203720092773438, "logps/chosen": -367.93853759765625, "logps/rejected": -390.48602294921875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.872758388519287, "rewards/margins": 5.511591911315918, "rewards/rejected": -8.384350776672363, "step": 6856 }, { "epoch": 1.07, "learning_rate": 9.118132683230457e-06, "logits/chosen": -2.5558791160583496, "logits/rejected": -2.489245891571045, "logps/chosen": -136.35452270507812, "logps/rejected": -258.1489562988281, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -0.1910947859287262, "rewards/margins": 7.022790908813477, "rewards/rejected": -7.21388578414917, "step": 6857 }, { "epoch": 1.07, "learning_rate": 9.117399242699311e-06, "logits/chosen": -1.8714683055877686, "logits/rejected": -2.8585853576660156, "logps/chosen": -335.2877502441406, "logps/rejected": -615.8544921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1699203252792358, "rewards/margins": 7.764103889465332, "rewards/rejected": -8.934024810791016, "step": 6858 }, { "epoch": 1.07, "learning_rate": 9.116665802168163e-06, "logits/chosen": -2.9614052772521973, "logits/rejected": -2.264159679412842, "logps/chosen": -223.07171630859375, "logps/rejected": -116.52249908447266, "loss": 1.3291, "rewards/accuracies": 0.5, "rewards/chosen": -1.6648086309432983, "rewards/margins": 1.7502869367599487, "rewards/rejected": -3.415095567703247, "step": 6859 }, { "epoch": 1.07, "learning_rate": 9.115932361637015e-06, "logits/chosen": -2.713747262954712, "logits/rejected": -1.5625054836273193, "logps/chosen": -189.49240112304688, "logps/rejected": -156.63037109375, "loss": 0.3871, "rewards/accuracies": 0.5, "rewards/chosen": -1.5948255062103271, "rewards/margins": 3.585770606994629, "rewards/rejected": -5.180596351623535, "step": 6860 }, { "epoch": 1.07, "learning_rate": 9.115198921105867e-06, "logits/chosen": -1.5550057888031006, "logits/rejected": -2.8756306171417236, "logps/chosen": -67.31538391113281, "logps/rejected": -308.8199768066406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.15963248908519745, "rewards/margins": 7.316800594329834, "rewards/rejected": -7.476432800292969, "step": 6861 }, { "epoch": 1.07, "learning_rate": 9.114465480574718e-06, "logits/chosen": -2.9889795780181885, "logits/rejected": -1.302915096282959, "logps/chosen": -289.37261962890625, "logps/rejected": -156.7029266357422, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.8244331479072571, "rewards/margins": 6.301995277404785, "rewards/rejected": -5.477561950683594, "step": 6862 }, { "epoch": 1.07, "learning_rate": 9.113732040043572e-06, "logits/chosen": -2.242964506149292, "logits/rejected": -2.8635263442993164, "logps/chosen": -83.828125, "logps/rejected": -160.306884765625, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.9628655314445496, "rewards/margins": 5.478411674499512, "rewards/rejected": -6.441277027130127, "step": 6863 }, { "epoch": 1.07, "learning_rate": 9.112998599512424e-06, "logits/chosen": -2.599276065826416, "logits/rejected": -2.697660207748413, "logps/chosen": -161.35195922851562, "logps/rejected": -197.2822265625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.8045206069946289, "rewards/margins": 5.645763397216797, "rewards/rejected": -6.450284004211426, "step": 6864 }, { "epoch": 1.07, "learning_rate": 9.112265158981276e-06, "logits/chosen": -1.7721688747406006, "logits/rejected": -2.706346273422241, "logps/chosen": -118.01261138916016, "logps/rejected": -323.9796142578125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.489129662513733, "rewards/margins": 6.403669834136963, "rewards/rejected": -7.892799377441406, "step": 6865 }, { "epoch": 1.07, "learning_rate": 9.111531718450128e-06, "logits/chosen": -2.8302714824676514, "logits/rejected": -3.1228067874908447, "logps/chosen": -299.61944580078125, "logps/rejected": -395.8865966796875, "loss": 0.2915, "rewards/accuracies": 1.0, "rewards/chosen": -0.8266708254814148, "rewards/margins": 4.529889106750488, "rewards/rejected": -5.356560230255127, "step": 6866 }, { "epoch": 1.07, "learning_rate": 9.11079827791898e-06, "logits/chosen": -0.8704456686973572, "logits/rejected": -2.7563796043395996, "logps/chosen": -59.4222412109375, "logps/rejected": -471.2919006347656, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -0.3714519441127777, "rewards/margins": 6.871031284332275, "rewards/rejected": -7.242483139038086, "step": 6867 }, { "epoch": 1.07, "learning_rate": 9.110064837387831e-06, "logits/chosen": -2.8118066787719727, "logits/rejected": -3.1787805557250977, "logps/chosen": -235.33404541015625, "logps/rejected": -308.96923828125, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.2199156284332275, "rewards/margins": 4.839644432067871, "rewards/rejected": -6.0595598220825195, "step": 6868 }, { "epoch": 1.07, "learning_rate": 9.109331396856683e-06, "logits/chosen": -2.6350655555725098, "logits/rejected": -2.664316177368164, "logps/chosen": -181.1417694091797, "logps/rejected": -224.9887237548828, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": 1.138190507888794, "rewards/margins": 7.213679790496826, "rewards/rejected": -6.075489521026611, "step": 6869 }, { "epoch": 1.07, "learning_rate": 9.108597956325535e-06, "logits/chosen": -1.1676461696624756, "logits/rejected": -2.966219186782837, "logps/chosen": -235.31890869140625, "logps/rejected": -442.28515625, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": -1.159497857093811, "rewards/margins": 4.593077182769775, "rewards/rejected": -5.752574920654297, "step": 6870 }, { "epoch": 1.07, "learning_rate": 9.107864515794387e-06, "logits/chosen": -1.7861518859863281, "logits/rejected": -2.9196934700012207, "logps/chosen": -125.14434814453125, "logps/rejected": -293.515869140625, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -1.866321086883545, "rewards/margins": 5.567926406860352, "rewards/rejected": -7.434247016906738, "step": 6871 }, { "epoch": 1.07, "learning_rate": 9.10713107526324e-06, "logits/chosen": -2.176470994949341, "logits/rejected": -2.459322929382324, "logps/chosen": -63.782691955566406, "logps/rejected": -318.93707275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.3276897072792053, "rewards/margins": 9.846237182617188, "rewards/rejected": -9.518548011779785, "step": 6872 }, { "epoch": 1.07, "learning_rate": 9.106397634732093e-06, "logits/chosen": -1.757617712020874, "logits/rejected": -2.492670774459839, "logps/chosen": -197.28627014160156, "logps/rejected": -285.98455810546875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.15143433213233948, "rewards/margins": 6.963004112243652, "rewards/rejected": -7.114438533782959, "step": 6873 }, { "epoch": 1.07, "learning_rate": 9.105664194200944e-06, "logits/chosen": -2.7399721145629883, "logits/rejected": -2.9226579666137695, "logps/chosen": -19.03265380859375, "logps/rejected": -121.66529846191406, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -0.21441015601158142, "rewards/margins": 4.439222812652588, "rewards/rejected": -4.653633117675781, "step": 6874 }, { "epoch": 1.07, "learning_rate": 9.104930753669796e-06, "logits/chosen": -1.1404547691345215, "logits/rejected": -3.057262420654297, "logps/chosen": -148.02413940429688, "logps/rejected": -561.3974609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.6727581024169922, "rewards/margins": 7.463968276977539, "rewards/rejected": -8.136726379394531, "step": 6875 }, { "epoch": 1.07, "learning_rate": 9.104197313138648e-06, "logits/chosen": -2.625947952270508, "logits/rejected": -2.9395267963409424, "logps/chosen": -289.400390625, "logps/rejected": -446.69183349609375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.6161079406738281, "rewards/margins": 6.817546844482422, "rewards/rejected": -8.43365478515625, "step": 6876 }, { "epoch": 1.07, "learning_rate": 9.1034638726075e-06, "logits/chosen": -2.075634717941284, "logits/rejected": -2.771902084350586, "logps/chosen": -116.48014831542969, "logps/rejected": -400.5340881347656, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8255943059921265, "rewards/margins": 8.238733291625977, "rewards/rejected": -9.06432819366455, "step": 6877 }, { "epoch": 1.07, "learning_rate": 9.102730432076352e-06, "logits/chosen": -3.0109238624572754, "logits/rejected": -2.6285972595214844, "logps/chosen": -260.068359375, "logps/rejected": -211.2759246826172, "loss": 1.0366, "rewards/accuracies": 0.5, "rewards/chosen": -1.6015793085098267, "rewards/margins": 2.9712436199188232, "rewards/rejected": -4.5728230476379395, "step": 6878 }, { "epoch": 1.07, "learning_rate": 9.101996991545204e-06, "logits/chosen": -2.6960840225219727, "logits/rejected": -2.5252702236175537, "logps/chosen": -182.15475463867188, "logps/rejected": -393.5255126953125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 1.55527925491333, "rewards/margins": 8.329300880432129, "rewards/rejected": -6.774022102355957, "step": 6879 }, { "epoch": 1.07, "learning_rate": 9.101263551014056e-06, "logits/chosen": -2.5081865787506104, "logits/rejected": -2.8776259422302246, "logps/chosen": -108.45053100585938, "logps/rejected": -256.8243103027344, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -0.6051612496376038, "rewards/margins": 5.252554893493652, "rewards/rejected": -5.8577165603637695, "step": 6880 }, { "epoch": 1.07, "learning_rate": 9.10053011048291e-06, "logits/chosen": -2.918792963027954, "logits/rejected": -3.0318753719329834, "logps/chosen": -683.6582641601562, "logps/rejected": -605.4476928710938, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.0432369709014893, "rewards/margins": 6.4584245681762695, "rewards/rejected": -8.501662254333496, "step": 6881 }, { "epoch": 1.07, "learning_rate": 9.099796669951761e-06, "logits/chosen": -3.1010048389434814, "logits/rejected": -2.7266290187835693, "logps/chosen": -309.097412109375, "logps/rejected": -209.0625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.8571838140487671, "rewards/margins": 6.181509017944336, "rewards/rejected": -5.324324607849121, "step": 6882 }, { "epoch": 1.07, "learning_rate": 9.099063229420613e-06, "logits/chosen": -1.4872502088546753, "logits/rejected": -1.9566532373428345, "logps/chosen": -170.97494506835938, "logps/rejected": -270.5567321777344, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.6929229497909546, "rewards/margins": 6.0407185554504395, "rewards/rejected": -6.733641624450684, "step": 6883 }, { "epoch": 1.07, "learning_rate": 9.098329788889465e-06, "logits/chosen": -1.5703641176223755, "logits/rejected": -2.775068998336792, "logps/chosen": -73.37513732910156, "logps/rejected": -398.0779724121094, "loss": 0.2454, "rewards/accuracies": 1.0, "rewards/chosen": -1.5145623683929443, "rewards/margins": 3.9108986854553223, "rewards/rejected": -5.4254608154296875, "step": 6884 }, { "epoch": 1.07, "learning_rate": 9.097596348358317e-06, "logits/chosen": -2.5616519451141357, "logits/rejected": -2.7416985034942627, "logps/chosen": -135.10238647460938, "logps/rejected": -221.58474731445312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 0.9286033511161804, "rewards/margins": 6.9977827072143555, "rewards/rejected": -6.069179058074951, "step": 6885 }, { "epoch": 1.07, "learning_rate": 9.096862907827169e-06, "logits/chosen": -2.7239749431610107, "logits/rejected": -2.1619999408721924, "logps/chosen": -327.79888916015625, "logps/rejected": -345.81427001953125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.549755573272705, "rewards/margins": 6.916094779968262, "rewards/rejected": -8.465849876403809, "step": 6886 }, { "epoch": 1.07, "learning_rate": 9.09612946729602e-06, "logits/chosen": -2.0902504920959473, "logits/rejected": -2.658656597137451, "logps/chosen": -48.78337478637695, "logps/rejected": -300.290283203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.14137020707130432, "rewards/margins": 9.741632461547852, "rewards/rejected": -9.883002281188965, "step": 6887 }, { "epoch": 1.07, "learning_rate": 9.095396026764872e-06, "logits/chosen": -2.8895578384399414, "logits/rejected": -3.0071728229522705, "logps/chosen": -729.97900390625, "logps/rejected": -708.5554809570312, "loss": 2.2344, "rewards/accuracies": 0.5, "rewards/chosen": -4.826986789703369, "rewards/margins": 1.8095693588256836, "rewards/rejected": -6.6365556716918945, "step": 6888 }, { "epoch": 1.07, "learning_rate": 9.094662586233726e-06, "logits/chosen": -2.8564882278442383, "logits/rejected": -2.1331913471221924, "logps/chosen": -231.6102294921875, "logps/rejected": -214.09005737304688, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.867871880531311, "rewards/margins": 7.315472602844238, "rewards/rejected": -6.447600364685059, "step": 6889 }, { "epoch": 1.07, "learning_rate": 9.093929145702578e-06, "logits/chosen": -2.937926769256592, "logits/rejected": -2.315671682357788, "logps/chosen": -182.7085723876953, "logps/rejected": -23.191734313964844, "loss": 1.5659, "rewards/accuracies": 0.0, "rewards/chosen": -2.2080063819885254, "rewards/margins": -1.3312528133392334, "rewards/rejected": -0.8767535090446472, "step": 6890 }, { "epoch": 1.07, "learning_rate": 9.09319570517143e-06, "logits/chosen": -2.846067428588867, "logits/rejected": -2.7736010551452637, "logps/chosen": -438.5437927246094, "logps/rejected": -402.6051940917969, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0912079811096191, "rewards/margins": 8.250462532043457, "rewards/rejected": -9.341670989990234, "step": 6891 }, { "epoch": 1.07, "learning_rate": 9.092462264640283e-06, "logits/chosen": -1.8415634632110596, "logits/rejected": -2.615502119064331, "logps/chosen": -133.88568115234375, "logps/rejected": -202.7582550048828, "loss": 1.017, "rewards/accuracies": 0.5, "rewards/chosen": -1.5559059381484985, "rewards/margins": 3.5408878326416016, "rewards/rejected": -5.0967936515808105, "step": 6892 }, { "epoch": 1.07, "learning_rate": 9.091728824109135e-06, "logits/chosen": -2.4795758724212646, "logits/rejected": -2.910923957824707, "logps/chosen": -127.57955932617188, "logps/rejected": -418.6440124511719, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.505257487297058, "rewards/margins": 4.823530673980713, "rewards/rejected": -6.328787803649902, "step": 6893 }, { "epoch": 1.07, "learning_rate": 9.090995383577987e-06, "logits/chosen": -3.1154797077178955, "logits/rejected": -2.8215491771698, "logps/chosen": -555.1910400390625, "logps/rejected": -130.8372802734375, "loss": 0.7865, "rewards/accuracies": 0.5, "rewards/chosen": -2.0481667518615723, "rewards/margins": 2.8430047035217285, "rewards/rejected": -4.891171455383301, "step": 6894 }, { "epoch": 1.07, "learning_rate": 9.090261943046839e-06, "logits/chosen": -2.959167003631592, "logits/rejected": -2.939509630203247, "logps/chosen": -200.80126953125, "logps/rejected": -264.05181884765625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.42410582304000854, "rewards/margins": 6.309141635894775, "rewards/rejected": -6.733247756958008, "step": 6895 }, { "epoch": 1.07, "learning_rate": 9.08952850251569e-06, "logits/chosen": -2.5235588550567627, "logits/rejected": -2.608032464981079, "logps/chosen": -134.13198852539062, "logps/rejected": -106.76481628417969, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": -1.0076245069503784, "rewards/margins": 3.5653889179229736, "rewards/rejected": -4.5730133056640625, "step": 6896 }, { "epoch": 1.07, "learning_rate": 9.088795061984543e-06, "logits/chosen": -1.9573861360549927, "logits/rejected": -2.993873119354248, "logps/chosen": -111.25138092041016, "logps/rejected": -369.576904296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.10877667367458344, "rewards/margins": 8.978670120239258, "rewards/rejected": -8.869893074035645, "step": 6897 }, { "epoch": 1.07, "learning_rate": 9.088061621453396e-06, "logits/chosen": -3.044947385787964, "logits/rejected": -2.503624677658081, "logps/chosen": -321.8118896484375, "logps/rejected": -259.154296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.5085212588310242, "rewards/margins": 8.103829383850098, "rewards/rejected": -7.595308303833008, "step": 6898 }, { "epoch": 1.07, "learning_rate": 9.087328180922248e-06, "logits/chosen": -2.1921823024749756, "logits/rejected": -2.6508054733276367, "logps/chosen": -257.853759765625, "logps/rejected": -366.9888916015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.3933807611465454, "rewards/margins": 8.634599685668945, "rewards/rejected": -9.02798080444336, "step": 6899 }, { "epoch": 1.07, "learning_rate": 9.0865947403911e-06, "logits/chosen": -2.8383970260620117, "logits/rejected": -2.978153705596924, "logps/chosen": -118.84119415283203, "logps/rejected": -243.7791748046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9290136694908142, "rewards/margins": 6.841197967529297, "rewards/rejected": -7.770211696624756, "step": 6900 }, { "epoch": 1.07, "learning_rate": 9.085861299859952e-06, "logits/chosen": -2.9963150024414062, "logits/rejected": -1.5601838827133179, "logps/chosen": -357.48602294921875, "logps/rejected": -186.86660766601562, "loss": 0.5357, "rewards/accuracies": 0.5, "rewards/chosen": -0.7830840349197388, "rewards/margins": 3.579106092453003, "rewards/rejected": -4.362190246582031, "step": 6901 }, { "epoch": 1.07, "learning_rate": 9.085127859328804e-06, "logits/chosen": -2.32808256149292, "logits/rejected": -3.212892770767212, "logps/chosen": -80.5614242553711, "logps/rejected": -340.85693359375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.6548741459846497, "rewards/margins": 5.7158966064453125, "rewards/rejected": -6.370770454406738, "step": 6902 }, { "epoch": 1.07, "learning_rate": 9.084394418797656e-06, "logits/chosen": -2.0127251148223877, "logits/rejected": -2.885171413421631, "logps/chosen": -170.447998046875, "logps/rejected": -265.63592529296875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.18812207877635956, "rewards/margins": 5.79702091217041, "rewards/rejected": -5.985142707824707, "step": 6903 }, { "epoch": 1.07, "learning_rate": 9.083660978266508e-06, "logits/chosen": -2.4062469005584717, "logits/rejected": -2.430772304534912, "logps/chosen": -295.96453857421875, "logps/rejected": -324.1666564941406, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.834896445274353, "rewards/margins": 6.925722122192383, "rewards/rejected": -7.760618686676025, "step": 6904 }, { "epoch": 1.07, "learning_rate": 9.08292753773536e-06, "logits/chosen": -3.1462783813476562, "logits/rejected": -2.752915382385254, "logps/chosen": -416.7674560546875, "logps/rejected": -185.42587280273438, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.8858734369277954, "rewards/margins": 5.930564880371094, "rewards/rejected": -6.816437721252441, "step": 6905 }, { "epoch": 1.07, "learning_rate": 9.082194097204211e-06, "logits/chosen": -2.7856948375701904, "logits/rejected": -2.097287893295288, "logps/chosen": -154.73609924316406, "logps/rejected": -227.6971435546875, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": -1.9109171628952026, "rewards/margins": 3.8264060020446777, "rewards/rejected": -5.737322807312012, "step": 6906 }, { "epoch": 1.07, "learning_rate": 9.081460656673065e-06, "logits/chosen": -1.3509111404418945, "logits/rejected": -2.525421619415283, "logps/chosen": -102.69001007080078, "logps/rejected": -404.92938232421875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5083635449409485, "rewards/margins": 6.529027938842773, "rewards/rejected": -7.037391185760498, "step": 6907 }, { "epoch": 1.07, "learning_rate": 9.080727216141917e-06, "logits/chosen": -1.7477504014968872, "logits/rejected": -3.0096683502197266, "logps/chosen": -144.93600463867188, "logps/rejected": -515.552734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.39306944608688354, "rewards/margins": 7.533660888671875, "rewards/rejected": -7.926730155944824, "step": 6908 }, { "epoch": 1.07, "learning_rate": 9.079993775610769e-06, "logits/chosen": -2.8622233867645264, "logits/rejected": -2.896192789077759, "logps/chosen": -43.930660247802734, "logps/rejected": -194.3453826904297, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.4253837764263153, "rewards/margins": 5.893641471862793, "rewards/rejected": -6.31902551651001, "step": 6909 }, { "epoch": 1.07, "learning_rate": 9.07926033507962e-06, "logits/chosen": -2.686713218688965, "logits/rejected": -2.140929937362671, "logps/chosen": -144.59739685058594, "logps/rejected": -220.57760620117188, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.2907097041606903, "rewards/margins": 5.08254337310791, "rewards/rejected": -5.373252868652344, "step": 6910 }, { "epoch": 1.07, "learning_rate": 9.078526894548472e-06, "logits/chosen": -2.351079225540161, "logits/rejected": -3.0820415019989014, "logps/chosen": -99.26715850830078, "logps/rejected": -304.6902160644531, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -0.4519628584384918, "rewards/margins": 5.39434289932251, "rewards/rejected": -5.846305847167969, "step": 6911 }, { "epoch": 1.07, "learning_rate": 9.077793454017324e-06, "logits/chosen": -2.558969020843506, "logits/rejected": -2.969719171524048, "logps/chosen": -291.9910583496094, "logps/rejected": -328.62896728515625, "loss": 0.3151, "rewards/accuracies": 1.0, "rewards/chosen": 0.2540718913078308, "rewards/margins": 2.7467832565307617, "rewards/rejected": -2.492711305618286, "step": 6912 }, { "epoch": 1.08, "learning_rate": 9.077060013486176e-06, "logits/chosen": -2.4043819904327393, "logits/rejected": -3.0487630367279053, "logps/chosen": -49.620147705078125, "logps/rejected": -312.10894775390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.28441399335861206, "rewards/margins": 8.775918006896973, "rewards/rejected": -9.060331344604492, "step": 6913 }, { "epoch": 1.08, "learning_rate": 9.076326572955028e-06, "logits/chosen": -1.3926706314086914, "logits/rejected": -2.635983943939209, "logps/chosen": -101.30015563964844, "logps/rejected": -311.06976318359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.7184417843818665, "rewards/margins": 9.384125709533691, "rewards/rejected": -10.102567672729492, "step": 6914 }, { "epoch": 1.08, "learning_rate": 9.07559313242388e-06, "logits/chosen": -2.337862253189087, "logits/rejected": -2.9220287799835205, "logps/chosen": -225.41131591796875, "logps/rejected": -312.97406005859375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.4093544483184814, "rewards/margins": 7.490288734436035, "rewards/rejected": -8.899642944335938, "step": 6915 }, { "epoch": 1.08, "learning_rate": 9.074859691892733e-06, "logits/chosen": -2.6754627227783203, "logits/rejected": -2.8404595851898193, "logps/chosen": -120.29267883300781, "logps/rejected": -246.4956817626953, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.10179652273654938, "rewards/margins": 5.132571220397949, "rewards/rejected": -5.234367370605469, "step": 6916 }, { "epoch": 1.08, "learning_rate": 9.074126251361585e-06, "logits/chosen": -2.594836473464966, "logits/rejected": -2.877662181854248, "logps/chosen": -35.53511047363281, "logps/rejected": -141.16717529296875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": 0.3365122973918915, "rewards/margins": 5.16741943359375, "rewards/rejected": -4.830907344818115, "step": 6917 }, { "epoch": 1.08, "learning_rate": 9.073392810830437e-06, "logits/chosen": -2.786109447479248, "logits/rejected": -2.882483720779419, "logps/chosen": -47.6548957824707, "logps/rejected": -159.60952758789062, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.25251102447509766, "rewards/margins": 5.215058326721191, "rewards/rejected": -5.467569351196289, "step": 6918 }, { "epoch": 1.08, "learning_rate": 9.072659370299289e-06, "logits/chosen": -2.7139980792999268, "logits/rejected": -2.2604448795318604, "logps/chosen": -518.4037475585938, "logps/rejected": -392.05303955078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2883266508579254, "rewards/margins": 7.626957893371582, "rewards/rejected": -7.338631629943848, "step": 6919 }, { "epoch": 1.08, "learning_rate": 9.071925929768141e-06, "logits/chosen": -1.9930977821350098, "logits/rejected": -2.929231882095337, "logps/chosen": -58.913761138916016, "logps/rejected": -403.76513671875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1652565002441406, "rewards/margins": 5.8409953117370605, "rewards/rejected": -7.006251811981201, "step": 6920 }, { "epoch": 1.08, "learning_rate": 9.071192489236993e-06, "logits/chosen": -2.9522705078125, "logits/rejected": -2.8821470737457275, "logps/chosen": -77.95903015136719, "logps/rejected": -133.0897674560547, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 0.9627411365509033, "rewards/margins": 4.7099761962890625, "rewards/rejected": -3.7472352981567383, "step": 6921 }, { "epoch": 1.08, "learning_rate": 9.070459048705845e-06, "logits/chosen": -2.2421693801879883, "logits/rejected": -2.8775360584259033, "logps/chosen": -132.67266845703125, "logps/rejected": -206.39923095703125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -0.382773220539093, "rewards/margins": 5.398951053619385, "rewards/rejected": -5.781723976135254, "step": 6922 }, { "epoch": 1.08, "learning_rate": 9.069725608174697e-06, "logits/chosen": -2.6673402786254883, "logits/rejected": -1.7703783512115479, "logps/chosen": -345.82904052734375, "logps/rejected": -210.56698608398438, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 0.6188262701034546, "rewards/margins": 7.33357048034668, "rewards/rejected": -6.714744567871094, "step": 6923 }, { "epoch": 1.08, "learning_rate": 9.06899216764355e-06, "logits/chosen": -2.342420816421509, "logits/rejected": -3.010193109512329, "logps/chosen": -164.57394409179688, "logps/rejected": -164.02566528320312, "loss": 0.7802, "rewards/accuracies": 0.5, "rewards/chosen": -1.1020294427871704, "rewards/margins": 3.684725761413574, "rewards/rejected": -4.786755084991455, "step": 6924 }, { "epoch": 1.08, "learning_rate": 9.068258727112402e-06, "logits/chosen": -0.6844058036804199, "logits/rejected": -1.9973831176757812, "logps/chosen": -145.75897216796875, "logps/rejected": -437.6609802246094, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.6785569190979004, "rewards/margins": 7.661075115203857, "rewards/rejected": -9.339632034301758, "step": 6925 }, { "epoch": 1.08, "learning_rate": 9.067525286581256e-06, "logits/chosen": -1.2983685731887817, "logits/rejected": -2.7620904445648193, "logps/chosen": -97.56031036376953, "logps/rejected": -360.8651123046875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.5231841802597046, "rewards/margins": 7.134584426879883, "rewards/rejected": -7.657768249511719, "step": 6926 }, { "epoch": 1.08, "learning_rate": 9.066791846050108e-06, "logits/chosen": -2.5370378494262695, "logits/rejected": -2.95933198928833, "logps/chosen": -65.65846252441406, "logps/rejected": -251.36102294921875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -0.13823232054710388, "rewards/margins": 6.314121246337891, "rewards/rejected": -6.4523539543151855, "step": 6927 }, { "epoch": 1.08, "learning_rate": 9.06605840551896e-06, "logits/chosen": -3.116685390472412, "logits/rejected": -2.65382719039917, "logps/chosen": -161.05007934570312, "logps/rejected": -213.2138214111328, "loss": 0.7206, "rewards/accuracies": 0.5, "rewards/chosen": -1.1168861389160156, "rewards/margins": 4.081284999847412, "rewards/rejected": -5.198171138763428, "step": 6928 }, { "epoch": 1.08, "learning_rate": 9.065324964987811e-06, "logits/chosen": -2.2407820224761963, "logits/rejected": -1.9636600017547607, "logps/chosen": -642.6233520507812, "logps/rejected": -343.50408935546875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 1.7117164134979248, "rewards/margins": 7.277467250823975, "rewards/rejected": -5.565751075744629, "step": 6929 }, { "epoch": 1.08, "learning_rate": 9.064591524456663e-06, "logits/chosen": -1.743156909942627, "logits/rejected": -2.760113000869751, "logps/chosen": -315.4840393066406, "logps/rejected": -494.078857421875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": 0.18956983089447021, "rewards/margins": 5.744574546813965, "rewards/rejected": -5.555005073547363, "step": 6930 }, { "epoch": 1.08, "learning_rate": 9.063858083925515e-06, "logits/chosen": -2.0284175872802734, "logits/rejected": -2.947906732559204, "logps/chosen": -63.57548522949219, "logps/rejected": -289.8232421875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -1.3376400470733643, "rewards/margins": 5.376012802124023, "rewards/rejected": -6.713653087615967, "step": 6931 }, { "epoch": 1.08, "learning_rate": 9.063124643394367e-06, "logits/chosen": -2.20123028755188, "logits/rejected": -2.8676464557647705, "logps/chosen": -73.49336242675781, "logps/rejected": -258.7166442871094, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.0935399979352951, "rewards/margins": 6.376045227050781, "rewards/rejected": -6.282505035400391, "step": 6932 }, { "epoch": 1.08, "learning_rate": 9.062391202863219e-06, "logits/chosen": -2.90043306350708, "logits/rejected": -2.61533522605896, "logps/chosen": -323.8870544433594, "logps/rejected": -312.38140869140625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.6706619262695312, "rewards/margins": 5.49480676651001, "rewards/rejected": -6.165468692779541, "step": 6933 }, { "epoch": 1.08, "learning_rate": 9.061657762332072e-06, "logits/chosen": -2.194713592529297, "logits/rejected": -3.169151544570923, "logps/chosen": -113.17787170410156, "logps/rejected": -338.7438659667969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.00284634530544281, "rewards/margins": 8.280817031860352, "rewards/rejected": -8.283662796020508, "step": 6934 }, { "epoch": 1.08, "learning_rate": 9.060924321800924e-06, "logits/chosen": -2.4873158931732178, "logits/rejected": -3.0277042388916016, "logps/chosen": -117.72008514404297, "logps/rejected": -252.8756866455078, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.0185017585754395, "rewards/margins": 5.590752601623535, "rewards/rejected": -6.609253883361816, "step": 6935 }, { "epoch": 1.08, "learning_rate": 9.060190881269776e-06, "logits/chosen": -2.738367795944214, "logits/rejected": -2.7331438064575195, "logps/chosen": -80.82565307617188, "logps/rejected": -176.8494873046875, "loss": 0.4402, "rewards/accuracies": 0.5, "rewards/chosen": -2.2514631748199463, "rewards/margins": 3.0182604789733887, "rewards/rejected": -5.269723892211914, "step": 6936 }, { "epoch": 1.08, "learning_rate": 9.059457440738628e-06, "logits/chosen": -2.2538821697235107, "logits/rejected": -2.8819262981414795, "logps/chosen": -163.29640197753906, "logps/rejected": -343.87469482421875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.2862407863140106, "rewards/margins": 7.964789390563965, "rewards/rejected": -8.251029968261719, "step": 6937 }, { "epoch": 1.08, "learning_rate": 9.05872400020748e-06, "logits/chosen": -3.1726837158203125, "logits/rejected": -2.535586357116699, "logps/chosen": -536.60302734375, "logps/rejected": -407.1549072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.350175380706787, "rewards/margins": 8.879631042480469, "rewards/rejected": -12.229806900024414, "step": 6938 }, { "epoch": 1.08, "learning_rate": 9.057990559676332e-06, "logits/chosen": -3.0568690299987793, "logits/rejected": -2.74159836769104, "logps/chosen": -677.2566528320312, "logps/rejected": -397.6089782714844, "loss": 0.4356, "rewards/accuracies": 0.5, "rewards/chosen": -1.5764985084533691, "rewards/margins": 4.580345153808594, "rewards/rejected": -6.156843185424805, "step": 6939 }, { "epoch": 1.08, "learning_rate": 9.057257119145184e-06, "logits/chosen": -2.711028814315796, "logits/rejected": -2.99576473236084, "logps/chosen": -357.3045349121094, "logps/rejected": -438.8948974609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.0500030517578125, "rewards/margins": 8.982122421264648, "rewards/rejected": -8.932119369506836, "step": 6940 }, { "epoch": 1.08, "learning_rate": 9.056523678614036e-06, "logits/chosen": -3.0336151123046875, "logits/rejected": -2.854954481124878, "logps/chosen": -160.63211059570312, "logps/rejected": -72.83798217773438, "loss": 3.4198, "rewards/accuracies": 0.0, "rewards/chosen": -5.21064567565918, "rewards/margins": -3.385854721069336, "rewards/rejected": -1.8247907161712646, "step": 6941 }, { "epoch": 1.08, "learning_rate": 9.055790238082887e-06, "logits/chosen": -1.6835293769836426, "logits/rejected": -2.8770694732666016, "logps/chosen": -92.99732971191406, "logps/rejected": -457.5343933105469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.03233146667480469, "rewards/margins": 10.901742935180664, "rewards/rejected": -10.934074401855469, "step": 6942 }, { "epoch": 1.08, "learning_rate": 9.055056797551741e-06, "logits/chosen": -1.7102253437042236, "logits/rejected": -3.0805282592773438, "logps/chosen": -82.49699401855469, "logps/rejected": -275.73577880859375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.13581161201000214, "rewards/margins": 6.298274993896484, "rewards/rejected": -6.162463188171387, "step": 6943 }, { "epoch": 1.08, "learning_rate": 9.054323357020593e-06, "logits/chosen": -2.762113332748413, "logits/rejected": -3.048649787902832, "logps/chosen": -127.15765380859375, "logps/rejected": -128.88015747070312, "loss": 1.1593, "rewards/accuracies": 0.5, "rewards/chosen": -1.5939277410507202, "rewards/margins": 1.6970059871673584, "rewards/rejected": -3.290933609008789, "step": 6944 }, { "epoch": 1.08, "learning_rate": 9.053589916489445e-06, "logits/chosen": -2.8527612686157227, "logits/rejected": -2.206462860107422, "logps/chosen": -432.8018493652344, "logps/rejected": -427.13720703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.7110871076583862, "rewards/margins": 6.620674133300781, "rewards/rejected": -8.331761360168457, "step": 6945 }, { "epoch": 1.08, "learning_rate": 9.052856475958297e-06, "logits/chosen": -2.7469968795776367, "logits/rejected": -2.1493911743164062, "logps/chosen": -437.24969482421875, "logps/rejected": -537.5320434570312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6890282034873962, "rewards/margins": 9.553953170776367, "rewards/rejected": -10.24298095703125, "step": 6946 }, { "epoch": 1.08, "learning_rate": 9.052123035427148e-06, "logits/chosen": -2.7275400161743164, "logits/rejected": -2.7982289791107178, "logps/chosen": -157.45184326171875, "logps/rejected": -162.19268798828125, "loss": 0.8084, "rewards/accuracies": 0.5, "rewards/chosen": -2.106928825378418, "rewards/margins": 1.8795753717422485, "rewards/rejected": -3.986504077911377, "step": 6947 }, { "epoch": 1.08, "learning_rate": 9.051389594896e-06, "logits/chosen": -1.5725936889648438, "logits/rejected": -3.105919361114502, "logps/chosen": -167.32508850097656, "logps/rejected": -280.9765319824219, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7759455442428589, "rewards/margins": 9.628793716430664, "rewards/rejected": -10.404739379882812, "step": 6948 }, { "epoch": 1.08, "learning_rate": 9.050656154364852e-06, "logits/chosen": -3.221343517303467, "logits/rejected": -2.8167741298675537, "logps/chosen": -206.34458923339844, "logps/rejected": -342.223876953125, "loss": 0.527, "rewards/accuracies": 0.5, "rewards/chosen": -1.2150696516036987, "rewards/margins": 3.41481876373291, "rewards/rejected": -4.629888534545898, "step": 6949 }, { "epoch": 1.08, "learning_rate": 9.049922713833704e-06, "logits/chosen": -1.1910572052001953, "logits/rejected": -2.588380813598633, "logps/chosen": -100.23869323730469, "logps/rejected": -401.7717590332031, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1295686960220337, "rewards/margins": 8.078970909118652, "rewards/rejected": -9.208539962768555, "step": 6950 }, { "epoch": 1.08, "learning_rate": 9.049189273302556e-06, "logits/chosen": -2.5312957763671875, "logits/rejected": -2.845059871673584, "logps/chosen": -66.28987121582031, "logps/rejected": -203.39291381835938, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.6151047945022583, "rewards/margins": 6.107118606567383, "rewards/rejected": -6.722223281860352, "step": 6951 }, { "epoch": 1.08, "learning_rate": 9.04845583277141e-06, "logits/chosen": -2.053633689880371, "logits/rejected": -2.5398075580596924, "logps/chosen": -182.45472717285156, "logps/rejected": -427.5203552246094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.2110874652862549, "rewards/margins": 8.529735565185547, "rewards/rejected": -9.740823745727539, "step": 6952 }, { "epoch": 1.08, "learning_rate": 9.047722392240261e-06, "logits/chosen": -2.00183367729187, "logits/rejected": -2.6493871212005615, "logps/chosen": -207.02471923828125, "logps/rejected": -362.36993408203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.5861167907714844, "rewards/margins": 11.668313026428223, "rewards/rejected": -14.254429817199707, "step": 6953 }, { "epoch": 1.08, "learning_rate": 9.046988951709113e-06, "logits/chosen": -2.8561084270477295, "logits/rejected": -3.1059980392456055, "logps/chosen": -286.1241149902344, "logps/rejected": -431.0118713378906, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.3028827905654907, "rewards/margins": 6.340572834014893, "rewards/rejected": -7.643455505371094, "step": 6954 }, { "epoch": 1.08, "learning_rate": 9.046255511177965e-06, "logits/chosen": -2.673691511154175, "logits/rejected": -1.8914480209350586, "logps/chosen": -148.0700225830078, "logps/rejected": -109.25967407226562, "loss": 1.557, "rewards/accuracies": 0.0, "rewards/chosen": -4.379006385803223, "rewards/margins": -1.1885865926742554, "rewards/rejected": -3.190419912338257, "step": 6955 }, { "epoch": 1.08, "learning_rate": 9.045522070646817e-06, "logits/chosen": -2.0820391178131104, "logits/rejected": -1.3406771421432495, "logps/chosen": -402.22918701171875, "logps/rejected": -453.9621887207031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6243919134140015, "rewards/margins": 11.025337219238281, "rewards/rejected": -11.649728775024414, "step": 6956 }, { "epoch": 1.08, "learning_rate": 9.044788630115669e-06, "logits/chosen": -2.7169837951660156, "logits/rejected": -1.8913660049438477, "logps/chosen": -329.3284606933594, "logps/rejected": -161.99453735351562, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.3224624693393707, "rewards/margins": 6.106557846069336, "rewards/rejected": -6.429019927978516, "step": 6957 }, { "epoch": 1.08, "learning_rate": 9.044055189584523e-06, "logits/chosen": -1.4881703853607178, "logits/rejected": -2.278280258178711, "logps/chosen": -210.86839294433594, "logps/rejected": -419.522705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.06617279350757599, "rewards/margins": 10.786611557006836, "rewards/rejected": -10.852785110473633, "step": 6958 }, { "epoch": 1.08, "learning_rate": 9.043321749053374e-06, "logits/chosen": -2.5722060203552246, "logits/rejected": -2.8784537315368652, "logps/chosen": -80.1620101928711, "logps/rejected": -139.8499755859375, "loss": 0.4998, "rewards/accuracies": 0.5, "rewards/chosen": -1.814695119857788, "rewards/margins": 2.394026041030884, "rewards/rejected": -4.208721160888672, "step": 6959 }, { "epoch": 1.08, "learning_rate": 9.042588308522226e-06, "logits/chosen": -2.996293306350708, "logits/rejected": -2.9405858516693115, "logps/chosen": -501.1642761230469, "logps/rejected": -302.2852478027344, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.0990593433380127, "rewards/margins": 6.686037540435791, "rewards/rejected": -8.785097122192383, "step": 6960 }, { "epoch": 1.08, "learning_rate": 9.04185486799108e-06, "logits/chosen": -1.8247711658477783, "logits/rejected": -2.920734405517578, "logps/chosen": -201.27589416503906, "logps/rejected": -306.4960632324219, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.4183595180511475, "rewards/margins": 4.78769588470459, "rewards/rejected": -6.206055641174316, "step": 6961 }, { "epoch": 1.08, "learning_rate": 9.041121427459932e-06, "logits/chosen": -2.851141929626465, "logits/rejected": -2.537217855453491, "logps/chosen": -215.0720672607422, "logps/rejected": -497.02777099609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9723854064941406, "rewards/margins": 9.776153564453125, "rewards/rejected": -10.748538970947266, "step": 6962 }, { "epoch": 1.08, "learning_rate": 9.040387986928784e-06, "logits/chosen": -2.5516955852508545, "logits/rejected": -1.9785369634628296, "logps/chosen": -176.07406616210938, "logps/rejected": -173.85401916503906, "loss": 0.195, "rewards/accuracies": 1.0, "rewards/chosen": -1.708993911743164, "rewards/margins": 3.6466448307037354, "rewards/rejected": -5.35563850402832, "step": 6963 }, { "epoch": 1.08, "learning_rate": 9.039654546397635e-06, "logits/chosen": -2.976738214492798, "logits/rejected": -2.9255571365356445, "logps/chosen": -95.03756713867188, "logps/rejected": -214.96376037597656, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.2525642514228821, "rewards/margins": 5.70522403717041, "rewards/rejected": -5.957788467407227, "step": 6964 }, { "epoch": 1.08, "learning_rate": 9.038921105866487e-06, "logits/chosen": -2.456753969192505, "logits/rejected": -2.869079113006592, "logps/chosen": -149.9268035888672, "logps/rejected": -262.0162353515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7984417080879211, "rewards/margins": 6.880970001220703, "rewards/rejected": -7.6794114112854, "step": 6965 }, { "epoch": 1.08, "learning_rate": 9.03818766533534e-06, "logits/chosen": -2.2879600524902344, "logits/rejected": -2.6575183868408203, "logps/chosen": -78.44482421875, "logps/rejected": -260.2481689453125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.680004894733429, "rewards/margins": 5.508481025695801, "rewards/rejected": -6.188486099243164, "step": 6966 }, { "epoch": 1.08, "learning_rate": 9.037454224804191e-06, "logits/chosen": -2.6343331336975098, "logits/rejected": -2.7788281440734863, "logps/chosen": -159.3725128173828, "logps/rejected": -309.90362548828125, "loss": 0.2852, "rewards/accuracies": 1.0, "rewards/chosen": -2.685098886489868, "rewards/margins": 5.89697790145874, "rewards/rejected": -8.582077026367188, "step": 6967 }, { "epoch": 1.08, "learning_rate": 9.036720784273043e-06, "logits/chosen": -1.6067323684692383, "logits/rejected": -2.8312981128692627, "logps/chosen": -128.69107055664062, "logps/rejected": -408.07977294921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.257908344268799, "rewards/margins": 7.587170124053955, "rewards/rejected": -10.845078468322754, "step": 6968 }, { "epoch": 1.08, "learning_rate": 9.035987343741895e-06, "logits/chosen": -2.7128262519836426, "logits/rejected": -1.7726227045059204, "logps/chosen": -404.4471435546875, "logps/rejected": -231.3048858642578, "loss": 1.0448, "rewards/accuracies": 0.5, "rewards/chosen": -1.7762482166290283, "rewards/margins": 2.684852123260498, "rewards/rejected": -4.4611005783081055, "step": 6969 }, { "epoch": 1.08, "learning_rate": 9.035253903210748e-06, "logits/chosen": -2.244525671005249, "logits/rejected": -2.7954049110412598, "logps/chosen": -245.33123779296875, "logps/rejected": -275.7355651855469, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 1.9766632318496704, "rewards/margins": 6.656609535217285, "rewards/rejected": -4.679945945739746, "step": 6970 }, { "epoch": 1.08, "learning_rate": 9.0345204626796e-06, "logits/chosen": -2.634275436401367, "logits/rejected": -3.218916654586792, "logps/chosen": -185.27584838867188, "logps/rejected": -406.2900695800781, "loss": 2.4085, "rewards/accuracies": 0.5, "rewards/chosen": -5.820595741271973, "rewards/margins": -1.4622188806533813, "rewards/rejected": -4.358376502990723, "step": 6971 }, { "epoch": 1.08, "learning_rate": 9.033787022148452e-06, "logits/chosen": -2.8539819717407227, "logits/rejected": -3.1618030071258545, "logps/chosen": -120.4889907836914, "logps/rejected": -241.8592529296875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.20266462862491608, "rewards/margins": 7.19583797454834, "rewards/rejected": -7.398502349853516, "step": 6972 }, { "epoch": 1.08, "learning_rate": 9.033053581617304e-06, "logits/chosen": -2.4956464767456055, "logits/rejected": -2.727510929107666, "logps/chosen": -70.12136840820312, "logps/rejected": -226.68601989746094, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7479695081710815, "rewards/margins": 8.665513038635254, "rewards/rejected": -9.413482666015625, "step": 6973 }, { "epoch": 1.08, "learning_rate": 9.032320141086156e-06, "logits/chosen": -1.8726835250854492, "logits/rejected": -2.8140695095062256, "logps/chosen": -62.13753890991211, "logps/rejected": -341.3173828125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.7311285138130188, "rewards/margins": 7.504292011260986, "rewards/rejected": -8.235420227050781, "step": 6974 }, { "epoch": 1.08, "learning_rate": 9.031586700555008e-06, "logits/chosen": -2.790466547012329, "logits/rejected": -2.291097402572632, "logps/chosen": -223.4718475341797, "logps/rejected": -223.886962890625, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -1.4126297235488892, "rewards/margins": 4.850694179534912, "rewards/rejected": -6.263323783874512, "step": 6975 }, { "epoch": 1.08, "learning_rate": 9.03085326002386e-06, "logits/chosen": -2.6552212238311768, "logits/rejected": -2.6250271797180176, "logps/chosen": -114.0235595703125, "logps/rejected": -219.23663330078125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.6329841613769531, "rewards/margins": 8.57394790649414, "rewards/rejected": -9.206932067871094, "step": 6976 }, { "epoch": 1.09, "learning_rate": 9.030119819492712e-06, "logits/chosen": -2.896728754043579, "logits/rejected": -2.1723861694335938, "logps/chosen": -756.9196166992188, "logps/rejected": -399.86053466796875, "loss": 0.7293, "rewards/accuracies": 0.5, "rewards/chosen": -1.1881415843963623, "rewards/margins": 2.0227904319763184, "rewards/rejected": -3.2109320163726807, "step": 6977 }, { "epoch": 1.09, "learning_rate": 9.029386378961563e-06, "logits/chosen": -2.645256996154785, "logits/rejected": -2.306586742401123, "logps/chosen": -887.5355224609375, "logps/rejected": -527.5556030273438, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.851922631263733, "rewards/margins": 7.222890377044678, "rewards/rejected": -9.074812889099121, "step": 6978 }, { "epoch": 1.09, "learning_rate": 9.028652938430417e-06, "logits/chosen": -1.6674832105636597, "logits/rejected": -2.851613759994507, "logps/chosen": -83.69216918945312, "logps/rejected": -421.4804382324219, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9799608588218689, "rewards/margins": 7.688688278198242, "rewards/rejected": -8.668649673461914, "step": 6979 }, { "epoch": 1.09, "learning_rate": 9.027919497899269e-06, "logits/chosen": -2.8463828563690186, "logits/rejected": -2.122042655944824, "logps/chosen": -195.23081970214844, "logps/rejected": -170.31491088867188, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -1.347399115562439, "rewards/margins": 5.373239517211914, "rewards/rejected": -6.720639228820801, "step": 6980 }, { "epoch": 1.09, "learning_rate": 9.02718605736812e-06, "logits/chosen": -2.1038403511047363, "logits/rejected": -2.8062422275543213, "logps/chosen": -103.00079345703125, "logps/rejected": -330.8820495605469, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.054013073444366455, "rewards/margins": 6.417989730834961, "rewards/rejected": -6.36397647857666, "step": 6981 }, { "epoch": 1.09, "learning_rate": 9.026452616836973e-06, "logits/chosen": -2.0779008865356445, "logits/rejected": -2.545692205429077, "logps/chosen": -95.3472671508789, "logps/rejected": -219.09820556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.0851234495639801, "rewards/margins": 8.898195266723633, "rewards/rejected": -8.983318328857422, "step": 6982 }, { "epoch": 1.09, "learning_rate": 9.025719176305825e-06, "logits/chosen": -2.4031126499176025, "logits/rejected": -2.733466625213623, "logps/chosen": -134.1404266357422, "logps/rejected": -143.8609161376953, "loss": 0.4982, "rewards/accuracies": 0.5, "rewards/chosen": -1.2208080291748047, "rewards/margins": 2.868818759918213, "rewards/rejected": -4.089626789093018, "step": 6983 }, { "epoch": 1.09, "learning_rate": 9.024985735774676e-06, "logits/chosen": -2.8510615825653076, "logits/rejected": -2.2067854404449463, "logps/chosen": -510.140625, "logps/rejected": -348.84698486328125, "loss": 1.9732, "rewards/accuracies": 0.5, "rewards/chosen": -3.2938950061798096, "rewards/margins": 0.4734649658203125, "rewards/rejected": -3.767359972000122, "step": 6984 }, { "epoch": 1.09, "learning_rate": 9.024252295243528e-06, "logits/chosen": -1.4441139698028564, "logits/rejected": -2.6844751834869385, "logps/chosen": -171.3840789794922, "logps/rejected": -496.6251525878906, "loss": 0.4899, "rewards/accuracies": 0.5, "rewards/chosen": -2.0918822288513184, "rewards/margins": 4.6341400146484375, "rewards/rejected": -6.726022243499756, "step": 6985 }, { "epoch": 1.09, "learning_rate": 9.02351885471238e-06, "logits/chosen": -2.5743820667266846, "logits/rejected": -2.960751533508301, "logps/chosen": -39.333885192871094, "logps/rejected": -226.5680694580078, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -1.9484833478927612, "rewards/margins": 5.366179943084717, "rewards/rejected": -7.314662933349609, "step": 6986 }, { "epoch": 1.09, "learning_rate": 9.022785414181234e-06, "logits/chosen": -1.9955216646194458, "logits/rejected": -3.125714063644409, "logps/chosen": -150.49835205078125, "logps/rejected": -395.467041015625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": 0.05998992919921875, "rewards/margins": 7.969790458679199, "rewards/rejected": -7.909801006317139, "step": 6987 }, { "epoch": 1.09, "learning_rate": 9.022051973650086e-06, "logits/chosen": -2.8935341835021973, "logits/rejected": -1.8064160346984863, "logps/chosen": -272.6524963378906, "logps/rejected": -93.96975708007812, "loss": 0.9405, "rewards/accuracies": 0.5, "rewards/chosen": -2.3129642009735107, "rewards/margins": 0.4099370837211609, "rewards/rejected": -2.7229013442993164, "step": 6988 }, { "epoch": 1.09, "learning_rate": 9.021318533118938e-06, "logits/chosen": -1.8178447484970093, "logits/rejected": -2.872746467590332, "logps/chosen": -149.02513122558594, "logps/rejected": -464.8487548828125, "loss": 0.3778, "rewards/accuracies": 0.5, "rewards/chosen": -4.729276657104492, "rewards/margins": 4.494873523712158, "rewards/rejected": -9.224149703979492, "step": 6989 }, { "epoch": 1.09, "learning_rate": 9.02058509258779e-06, "logits/chosen": -0.9893035292625427, "logits/rejected": -2.5131027698516846, "logps/chosen": -177.7819061279297, "logps/rejected": -522.4814453125, "loss": 0.3127, "rewards/accuracies": 1.0, "rewards/chosen": -3.7515904903411865, "rewards/margins": 3.9428906440734863, "rewards/rejected": -7.694480895996094, "step": 6990 }, { "epoch": 1.09, "learning_rate": 9.019851652056641e-06, "logits/chosen": -2.8405020236968994, "logits/rejected": -2.971341848373413, "logps/chosen": -47.16924285888672, "logps/rejected": -247.67059326171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.19160959124565125, "rewards/margins": 9.136414527893066, "rewards/rejected": -9.328023910522461, "step": 6991 }, { "epoch": 1.09, "learning_rate": 9.019118211525495e-06, "logits/chosen": -1.2887800931930542, "logits/rejected": -2.8820736408233643, "logps/chosen": -53.957054138183594, "logps/rejected": -509.4462890625, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -0.39810818433761597, "rewards/margins": 5.403526306152344, "rewards/rejected": -5.801634311676025, "step": 6992 }, { "epoch": 1.09, "learning_rate": 9.018384770994347e-06, "logits/chosen": -2.9793872833251953, "logits/rejected": -1.2858573198318481, "logps/chosen": -439.15008544921875, "logps/rejected": -164.44989013671875, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -1.1937706470489502, "rewards/margins": 3.8709897994995117, "rewards/rejected": -5.064760208129883, "step": 6993 }, { "epoch": 1.09, "learning_rate": 9.017651330463199e-06, "logits/chosen": -2.8303945064544678, "logits/rejected": -2.814858913421631, "logps/chosen": -211.1143798828125, "logps/rejected": -317.94891357421875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -0.5340919494628906, "rewards/margins": 4.447054386138916, "rewards/rejected": -4.981146335601807, "step": 6994 }, { "epoch": 1.09, "learning_rate": 9.01691788993205e-06, "logits/chosen": -2.9350790977478027, "logits/rejected": -2.1556661128997803, "logps/chosen": -317.125, "logps/rejected": -322.2757873535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6936401128768921, "rewards/margins": 10.175926208496094, "rewards/rejected": -10.869565963745117, "step": 6995 }, { "epoch": 1.09, "learning_rate": 9.016184449400904e-06, "logits/chosen": -2.74790620803833, "logits/rejected": -2.095245599746704, "logps/chosen": -101.34548950195312, "logps/rejected": -117.10855102539062, "loss": 0.3756, "rewards/accuracies": 0.5, "rewards/chosen": -2.6264898777008057, "rewards/margins": 4.1411261558532715, "rewards/rejected": -6.767616271972656, "step": 6996 }, { "epoch": 1.09, "learning_rate": 9.015451008869756e-06, "logits/chosen": -1.7288398742675781, "logits/rejected": -3.1866562366485596, "logps/chosen": -56.09693908691406, "logps/rejected": -302.16790771484375, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/chosen": -3.386744260787964, "rewards/margins": 4.443474292755127, "rewards/rejected": -7.83021879196167, "step": 6997 }, { "epoch": 1.09, "learning_rate": 9.014717568338608e-06, "logits/chosen": -2.5064284801483154, "logits/rejected": -2.7876899242401123, "logps/chosen": -120.3270263671875, "logps/rejected": -287.6736755371094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.7265075445175171, "rewards/margins": 7.936272621154785, "rewards/rejected": -8.66278076171875, "step": 6998 }, { "epoch": 1.09, "learning_rate": 9.01398412780746e-06, "logits/chosen": -2.3347764015197754, "logits/rejected": -2.8017117977142334, "logps/chosen": -679.815185546875, "logps/rejected": -594.5482788085938, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 1.2126067876815796, "rewards/margins": 7.5734686851501465, "rewards/rejected": -6.360861778259277, "step": 6999 }, { "epoch": 1.09, "learning_rate": 9.013250687276312e-06, "logits/chosen": -1.6453649997711182, "logits/rejected": -2.6948816776275635, "logps/chosen": -139.11045837402344, "logps/rejected": -411.7235107421875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.014384455978870392, "rewards/margins": 7.283961296081543, "rewards/rejected": -7.269576072692871, "step": 7000 }, { "epoch": 1.09, "learning_rate": 9.012517246745163e-06, "logits/chosen": -2.132261276245117, "logits/rejected": -2.8455095291137695, "logps/chosen": -143.63699340820312, "logps/rejected": -314.210693359375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.6259335875511169, "rewards/margins": 5.847311973571777, "rewards/rejected": -6.473245620727539, "step": 7001 }, { "epoch": 1.09, "learning_rate": 9.011783806214015e-06, "logits/chosen": -2.4428420066833496, "logits/rejected": -2.9511821269989014, "logps/chosen": -613.8521118164062, "logps/rejected": -602.9129638671875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.1689559817314148, "rewards/margins": 6.000993728637695, "rewards/rejected": -6.169949531555176, "step": 7002 }, { "epoch": 1.09, "learning_rate": 9.011050365682867e-06, "logits/chosen": -2.771803617477417, "logits/rejected": -3.0722107887268066, "logps/chosen": -285.6171569824219, "logps/rejected": -679.03662109375, "loss": 0.1234, "rewards/accuracies": 1.0, "rewards/chosen": -1.4089828729629517, "rewards/margins": 6.6866865158081055, "rewards/rejected": -8.095669746398926, "step": 7003 }, { "epoch": 1.09, "learning_rate": 9.010316925151719e-06, "logits/chosen": -2.972674608230591, "logits/rejected": -2.5730783939361572, "logps/chosen": -186.26873779296875, "logps/rejected": -184.1620330810547, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": 0.4066352844238281, "rewards/margins": 4.658812999725342, "rewards/rejected": -4.252177715301514, "step": 7004 }, { "epoch": 1.09, "learning_rate": 9.009583484620573e-06, "logits/chosen": -1.7470316886901855, "logits/rejected": -2.7697393894195557, "logps/chosen": -80.74383544921875, "logps/rejected": -284.1094665527344, "loss": 0.6026, "rewards/accuracies": 0.5, "rewards/chosen": -1.8032115697860718, "rewards/margins": 4.300851821899414, "rewards/rejected": -6.104063987731934, "step": 7005 }, { "epoch": 1.09, "learning_rate": 9.008850044089425e-06, "logits/chosen": -2.2037513256073, "logits/rejected": -2.8849501609802246, "logps/chosen": -249.78652954101562, "logps/rejected": -366.708251953125, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": 0.0870361328125, "rewards/margins": 4.893854141235352, "rewards/rejected": -4.806818008422852, "step": 7006 }, { "epoch": 1.09, "learning_rate": 9.008116603558276e-06, "logits/chosen": -1.4010785818099976, "logits/rejected": -2.6478536128997803, "logps/chosen": -95.39480590820312, "logps/rejected": -438.82598876953125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1020110845565796, "rewards/margins": 8.857511520385742, "rewards/rejected": -9.959522247314453, "step": 7007 }, { "epoch": 1.09, "learning_rate": 9.007383163027128e-06, "logits/chosen": -2.2722842693328857, "logits/rejected": -2.6377029418945312, "logps/chosen": -268.0577392578125, "logps/rejected": -258.93878173828125, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.0482025146484375, "rewards/margins": 5.114019870758057, "rewards/rejected": -6.162222385406494, "step": 7008 }, { "epoch": 1.09, "learning_rate": 9.00664972249598e-06, "logits/chosen": -2.747530937194824, "logits/rejected": -1.3520700931549072, "logps/chosen": -524.446044921875, "logps/rejected": -236.76583862304688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.35903501510620117, "rewards/margins": 7.72920560836792, "rewards/rejected": -8.088240623474121, "step": 7009 }, { "epoch": 1.09, "learning_rate": 9.005916281964832e-06, "logits/chosen": -2.197087526321411, "logits/rejected": -2.9552032947540283, "logps/chosen": -54.190757751464844, "logps/rejected": -321.5259704589844, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.09796541929244995, "rewards/margins": 8.183900833129883, "rewards/rejected": -8.281866073608398, "step": 7010 }, { "epoch": 1.09, "learning_rate": 9.005182841433684e-06, "logits/chosen": -2.83422589302063, "logits/rejected": -2.6462743282318115, "logps/chosen": -378.6915283203125, "logps/rejected": -637.367431640625, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -1.6727485656738281, "rewards/margins": 7.17723274230957, "rewards/rejected": -8.849981307983398, "step": 7011 }, { "epoch": 1.09, "learning_rate": 9.004449400902536e-06, "logits/chosen": -2.814358711242676, "logits/rejected": -1.8455865383148193, "logps/chosen": -343.43731689453125, "logps/rejected": -333.7552795410156, "loss": 0.1079, "rewards/accuracies": 1.0, "rewards/chosen": -1.0816597938537598, "rewards/margins": 4.760458946228027, "rewards/rejected": -5.842118740081787, "step": 7012 }, { "epoch": 1.09, "learning_rate": 9.003715960371388e-06, "logits/chosen": -1.9302128553390503, "logits/rejected": -2.8819262981414795, "logps/chosen": -285.24005126953125, "logps/rejected": -570.5953369140625, "loss": 0.2826, "rewards/accuracies": 1.0, "rewards/chosen": -2.352879762649536, "rewards/margins": 4.382534503936768, "rewards/rejected": -6.735414505004883, "step": 7013 }, { "epoch": 1.09, "learning_rate": 9.002982519840241e-06, "logits/chosen": -2.4491426944732666, "logits/rejected": -2.6928746700286865, "logps/chosen": -79.07246398925781, "logps/rejected": -235.85000610351562, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.4520809650421143, "rewards/margins": 6.246938705444336, "rewards/rejected": -7.699019908905029, "step": 7014 }, { "epoch": 1.09, "learning_rate": 9.002249079309093e-06, "logits/chosen": -3.162897825241089, "logits/rejected": -2.422194242477417, "logps/chosen": -912.5662231445312, "logps/rejected": -617.9885864257812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.6049561500549316, "rewards/margins": 7.0951128005981445, "rewards/rejected": -9.700069427490234, "step": 7015 }, { "epoch": 1.09, "learning_rate": 9.001515638777945e-06, "logits/chosen": -2.381866216659546, "logits/rejected": -2.7993149757385254, "logps/chosen": -135.1197052001953, "logps/rejected": -380.9410705566406, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.7345304489135742, "rewards/margins": 6.467486381530762, "rewards/rejected": -8.202016830444336, "step": 7016 }, { "epoch": 1.09, "learning_rate": 9.000782198246797e-06, "logits/chosen": -2.0368523597717285, "logits/rejected": -2.61529278755188, "logps/chosen": -140.93331909179688, "logps/rejected": -322.36541748046875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8768795132637024, "rewards/margins": 6.954804420471191, "rewards/rejected": -7.83168363571167, "step": 7017 }, { "epoch": 1.09, "learning_rate": 9.000048757715649e-06, "logits/chosen": -2.565732479095459, "logits/rejected": -2.419957160949707, "logps/chosen": -208.73789978027344, "logps/rejected": -314.61273193359375, "loss": 0.3098, "rewards/accuracies": 1.0, "rewards/chosen": -1.345772624015808, "rewards/margins": 2.831057071685791, "rewards/rejected": -4.176829814910889, "step": 7018 }, { "epoch": 1.09, "learning_rate": 8.9993153171845e-06, "logits/chosen": -2.491100549697876, "logits/rejected": -2.84086012840271, "logps/chosen": -102.58980560302734, "logps/rejected": -189.54148864746094, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -2.1852822303771973, "rewards/margins": 6.837125301361084, "rewards/rejected": -9.022407531738281, "step": 7019 }, { "epoch": 1.09, "learning_rate": 8.998581876653353e-06, "logits/chosen": -2.825035572052002, "logits/rejected": -3.0077476501464844, "logps/chosen": -189.55308532714844, "logps/rejected": -236.9351043701172, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.18304213881492615, "rewards/margins": 5.341527938842773, "rewards/rejected": -5.524570465087891, "step": 7020 }, { "epoch": 1.09, "learning_rate": 8.997848436122204e-06, "logits/chosen": -2.2436907291412354, "logits/rejected": -2.6991171836853027, "logps/chosen": -109.03153991699219, "logps/rejected": -231.90696716308594, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2455990314483643, "rewards/margins": 6.718360424041748, "rewards/rejected": -7.963959217071533, "step": 7021 }, { "epoch": 1.09, "learning_rate": 8.997114995591056e-06, "logits/chosen": -1.5921076536178589, "logits/rejected": -2.8369269371032715, "logps/chosen": -66.40559387207031, "logps/rejected": -221.40367126464844, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.688797116279602, "rewards/margins": 5.499711036682129, "rewards/rejected": -7.1885085105896, "step": 7022 }, { "epoch": 1.09, "learning_rate": 8.99638155505991e-06, "logits/chosen": -2.777529001235962, "logits/rejected": -2.951279640197754, "logps/chosen": -96.15988159179688, "logps/rejected": -289.83251953125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.973692774772644, "rewards/margins": 7.640597343444824, "rewards/rejected": -8.614290237426758, "step": 7023 }, { "epoch": 1.09, "learning_rate": 8.995648114528762e-06, "logits/chosen": -2.6235709190368652, "logits/rejected": -2.3394603729248047, "logps/chosen": -178.84878540039062, "logps/rejected": -171.84262084960938, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -0.9589927792549133, "rewards/margins": 4.795352458953857, "rewards/rejected": -5.754345417022705, "step": 7024 }, { "epoch": 1.09, "learning_rate": 8.994914673997614e-06, "logits/chosen": -2.650186061859131, "logits/rejected": -3.0453598499298096, "logps/chosen": -418.7621154785156, "logps/rejected": -408.0126953125, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -0.7735278010368347, "rewards/margins": 4.2244367599487305, "rewards/rejected": -4.997964859008789, "step": 7025 }, { "epoch": 1.09, "learning_rate": 8.994181233466467e-06, "logits/chosen": -2.7553443908691406, "logits/rejected": -1.884074091911316, "logps/chosen": -586.1957397460938, "logps/rejected": -322.01959228515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.9508514404296875, "rewards/margins": 8.834270477294922, "rewards/rejected": -7.883418083190918, "step": 7026 }, { "epoch": 1.09, "learning_rate": 8.993447792935319e-06, "logits/chosen": -2.651517152786255, "logits/rejected": -2.8136022090911865, "logps/chosen": -52.288246154785156, "logps/rejected": -173.37225341796875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": 0.41523438692092896, "rewards/margins": 5.4740190505981445, "rewards/rejected": -5.058784484863281, "step": 7027 }, { "epoch": 1.09, "learning_rate": 8.992714352404171e-06, "logits/chosen": -2.3118438720703125, "logits/rejected": -3.0233383178710938, "logps/chosen": -171.37918090820312, "logps/rejected": -404.58782958984375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.3510475158691406, "rewards/margins": 5.949370384216309, "rewards/rejected": -6.300417900085449, "step": 7028 }, { "epoch": 1.09, "learning_rate": 8.991980911873023e-06, "logits/chosen": -2.697394371032715, "logits/rejected": -1.5729694366455078, "logps/chosen": -374.56707763671875, "logps/rejected": -312.3361511230469, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.9324020147323608, "rewards/margins": 8.226082801818848, "rewards/rejected": -9.158485412597656, "step": 7029 }, { "epoch": 1.09, "learning_rate": 8.991247471341875e-06, "logits/chosen": -2.4486684799194336, "logits/rejected": -2.9193084239959717, "logps/chosen": -60.02552795410156, "logps/rejected": -299.2407531738281, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.12468968331813812, "rewards/margins": 5.937155723571777, "rewards/rejected": -6.061845779418945, "step": 7030 }, { "epoch": 1.09, "learning_rate": 8.990514030810727e-06, "logits/chosen": -2.8812122344970703, "logits/rejected": -2.4744110107421875, "logps/chosen": -302.9875793457031, "logps/rejected": -373.72021484375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.041815996170044, "rewards/margins": 5.890938758850098, "rewards/rejected": -6.9327545166015625, "step": 7031 }, { "epoch": 1.09, "learning_rate": 8.98978059027958e-06, "logits/chosen": -1.2358206510543823, "logits/rejected": -2.2119014263153076, "logps/chosen": -169.58648681640625, "logps/rejected": -512.3722534179688, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.4934890270233154, "rewards/margins": 6.5587968826293945, "rewards/rejected": -8.052286148071289, "step": 7032 }, { "epoch": 1.09, "learning_rate": 8.989047149748432e-06, "logits/chosen": -3.1126046180725098, "logits/rejected": -2.497921943664551, "logps/chosen": -183.7894287109375, "logps/rejected": -218.67889404296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.7153640985488892, "rewards/margins": 7.8521728515625, "rewards/rejected": -7.136809349060059, "step": 7033 }, { "epoch": 1.09, "learning_rate": 8.988313709217284e-06, "logits/chosen": -1.427182674407959, "logits/rejected": -2.6199636459350586, "logps/chosen": -58.89209747314453, "logps/rejected": -240.95498657226562, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.126263976097107, "rewards/margins": 4.643465995788574, "rewards/rejected": -5.769730091094971, "step": 7034 }, { "epoch": 1.09, "learning_rate": 8.987580268686136e-06, "logits/chosen": -2.1976752281188965, "logits/rejected": -1.6533540487289429, "logps/chosen": -304.9195251464844, "logps/rejected": -136.4023895263672, "loss": 1.4827, "rewards/accuracies": 0.5, "rewards/chosen": -2.6669650077819824, "rewards/margins": 1.7368009090423584, "rewards/rejected": -4.403765678405762, "step": 7035 }, { "epoch": 1.09, "learning_rate": 8.986846828154988e-06, "logits/chosen": -1.686109185218811, "logits/rejected": -2.929680585861206, "logps/chosen": -120.49365234375, "logps/rejected": -318.3656005859375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.194438174366951, "rewards/margins": 6.436839580535889, "rewards/rejected": -6.631278038024902, "step": 7036 }, { "epoch": 1.09, "learning_rate": 8.98611338762384e-06, "logits/chosen": -2.8805932998657227, "logits/rejected": -1.8379254341125488, "logps/chosen": -180.98385620117188, "logps/rejected": -131.1446990966797, "loss": 0.1248, "rewards/accuracies": 1.0, "rewards/chosen": -1.9096887111663818, "rewards/margins": 2.2860257625579834, "rewards/rejected": -4.195714473724365, "step": 7037 }, { "epoch": 1.09, "learning_rate": 8.985379947092691e-06, "logits/chosen": -2.719351053237915, "logits/rejected": -2.760148048400879, "logps/chosen": -227.73004150390625, "logps/rejected": -285.3309326171875, "loss": 0.3106, "rewards/accuracies": 1.0, "rewards/chosen": -2.5669336318969727, "rewards/margins": 3.745198965072632, "rewards/rejected": -6.312132835388184, "step": 7038 }, { "epoch": 1.09, "learning_rate": 8.984646506561543e-06, "logits/chosen": -2.005166530609131, "logits/rejected": -3.003298044204712, "logps/chosen": -70.34746551513672, "logps/rejected": -437.883544921875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.47892093658447266, "rewards/margins": 7.019462585449219, "rewards/rejected": -7.498383522033691, "step": 7039 }, { "epoch": 1.09, "learning_rate": 8.983913066030395e-06, "logits/chosen": -3.005190134048462, "logits/rejected": -1.9282170534133911, "logps/chosen": -225.86212158203125, "logps/rejected": -165.46270751953125, "loss": 0.2236, "rewards/accuracies": 1.0, "rewards/chosen": -2.682445764541626, "rewards/margins": 3.3394854068756104, "rewards/rejected": -6.021931171417236, "step": 7040 }, { "epoch": 1.1, "learning_rate": 8.983179625499249e-06, "logits/chosen": -2.812440872192383, "logits/rejected": -3.0196990966796875, "logps/chosen": -461.7779846191406, "logps/rejected": -512.5230712890625, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -2.2432169914245605, "rewards/margins": 4.836633205413818, "rewards/rejected": -7.079850196838379, "step": 7041 }, { "epoch": 1.1, "learning_rate": 8.9824461849681e-06, "logits/chosen": -3.164649248123169, "logits/rejected": -2.604412078857422, "logps/chosen": -295.26922607421875, "logps/rejected": -329.64263916015625, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -1.1372671127319336, "rewards/margins": 5.702033996582031, "rewards/rejected": -6.839301109313965, "step": 7042 }, { "epoch": 1.1, "learning_rate": 8.981712744436953e-06, "logits/chosen": -2.875645160675049, "logits/rejected": -2.806422472000122, "logps/chosen": -42.837791442871094, "logps/rejected": -164.2614288330078, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.5600993037223816, "rewards/margins": 7.808671951293945, "rewards/rejected": -8.36877155303955, "step": 7043 }, { "epoch": 1.1, "learning_rate": 8.980979303905804e-06, "logits/chosen": -3.044804573059082, "logits/rejected": -2.4485254287719727, "logps/chosen": -337.4830322265625, "logps/rejected": -213.43540954589844, "loss": 0.1548, "rewards/accuracies": 1.0, "rewards/chosen": -1.5129508972167969, "rewards/margins": 2.5708227157592773, "rewards/rejected": -4.083773612976074, "step": 7044 }, { "epoch": 1.1, "learning_rate": 8.980245863374656e-06, "logits/chosen": -2.6468453407287598, "logits/rejected": -2.9650468826293945, "logps/chosen": -90.25020599365234, "logps/rejected": -192.46856689453125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.220982164144516, "rewards/margins": 8.191636085510254, "rewards/rejected": -8.412618637084961, "step": 7045 }, { "epoch": 1.1, "learning_rate": 8.979512422843508e-06, "logits/chosen": -2.604233980178833, "logits/rejected": -2.803922414779663, "logps/chosen": -78.99064636230469, "logps/rejected": -188.07806396484375, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -0.573996365070343, "rewards/margins": 4.217808723449707, "rewards/rejected": -4.791805267333984, "step": 7046 }, { "epoch": 1.1, "learning_rate": 8.97877898231236e-06, "logits/chosen": -2.6043543815612793, "logits/rejected": -1.627733588218689, "logps/chosen": -225.48341369628906, "logps/rejected": -160.4076690673828, "loss": 1.8076, "rewards/accuracies": 0.5, "rewards/chosen": -3.262962579727173, "rewards/margins": 1.4325026273727417, "rewards/rejected": -4.695465087890625, "step": 7047 }, { "epoch": 1.1, "learning_rate": 8.978045541781212e-06, "logits/chosen": -2.627584934234619, "logits/rejected": -2.407031297683716, "logps/chosen": -261.92840576171875, "logps/rejected": -201.0898895263672, "loss": 1.009, "rewards/accuracies": 0.5, "rewards/chosen": -4.8369669914245605, "rewards/margins": 1.823946237564087, "rewards/rejected": -6.660913467407227, "step": 7048 }, { "epoch": 1.1, "learning_rate": 8.977312101250064e-06, "logits/chosen": -2.8846473693847656, "logits/rejected": -2.841815948486328, "logps/chosen": -521.4200439453125, "logps/rejected": -482.80322265625, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -0.11250381171703339, "rewards/margins": 4.539124011993408, "rewards/rejected": -4.651628017425537, "step": 7049 }, { "epoch": 1.1, "learning_rate": 8.976578660718917e-06, "logits/chosen": -2.453709840774536, "logits/rejected": -3.028984785079956, "logps/chosen": -75.74876403808594, "logps/rejected": -273.26470947265625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -1.3496137857437134, "rewards/margins": 4.9592132568359375, "rewards/rejected": -6.3088274002075195, "step": 7050 }, { "epoch": 1.1, "learning_rate": 8.97584522018777e-06, "logits/chosen": -3.184095621109009, "logits/rejected": -2.9676389694213867, "logps/chosen": -569.4081420898438, "logps/rejected": -560.4314575195312, "loss": 0.7674, "rewards/accuracies": 0.5, "rewards/chosen": -1.781923770904541, "rewards/margins": 3.8074841499328613, "rewards/rejected": -5.589407920837402, "step": 7051 }, { "epoch": 1.1, "learning_rate": 8.975111779656621e-06, "logits/chosen": -1.7384674549102783, "logits/rejected": -2.877950429916382, "logps/chosen": -65.96534729003906, "logps/rejected": -360.3699035644531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3080105781555176, "rewards/margins": 9.66738510131836, "rewards/rejected": -10.975395202636719, "step": 7052 }, { "epoch": 1.1, "learning_rate": 8.974378339125473e-06, "logits/chosen": -2.6315085887908936, "logits/rejected": -2.904872179031372, "logps/chosen": -152.9604034423828, "logps/rejected": -234.40118408203125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.5605999231338501, "rewards/margins": 6.430560111999512, "rewards/rejected": -5.869959831237793, "step": 7053 }, { "epoch": 1.1, "learning_rate": 8.973644898594325e-06, "logits/chosen": -1.8545643091201782, "logits/rejected": -2.552194595336914, "logps/chosen": -104.52313232421875, "logps/rejected": -275.2953796386719, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -1.2938731908798218, "rewards/margins": 5.255161762237549, "rewards/rejected": -6.54903507232666, "step": 7054 }, { "epoch": 1.1, "learning_rate": 8.972911458063177e-06, "logits/chosen": -1.5328524112701416, "logits/rejected": -3.0273070335388184, "logps/chosen": -75.33607482910156, "logps/rejected": -386.66314697265625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.6283370852470398, "rewards/margins": 6.918272972106934, "rewards/rejected": -7.546610355377197, "step": 7055 }, { "epoch": 1.1, "learning_rate": 8.972178017532029e-06, "logits/chosen": -2.4681267738342285, "logits/rejected": -3.0965230464935303, "logps/chosen": -72.81158447265625, "logps/rejected": -188.57369995117188, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -2.9817440509796143, "rewards/margins": 4.068752288818359, "rewards/rejected": -7.050496578216553, "step": 7056 }, { "epoch": 1.1, "learning_rate": 8.97144457700088e-06, "logits/chosen": -2.8103110790252686, "logits/rejected": -2.6034092903137207, "logps/chosen": -226.65257263183594, "logps/rejected": -326.52008056640625, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": -0.5605026483535767, "rewards/margins": 5.5219621658325195, "rewards/rejected": -6.082464694976807, "step": 7057 }, { "epoch": 1.1, "learning_rate": 8.970711136469734e-06, "logits/chosen": -1.9944982528686523, "logits/rejected": -2.663773775100708, "logps/chosen": -155.71853637695312, "logps/rejected": -198.5839080810547, "loss": 0.2825, "rewards/accuracies": 1.0, "rewards/chosen": -1.8519761562347412, "rewards/margins": 3.6688640117645264, "rewards/rejected": -5.520840167999268, "step": 7058 }, { "epoch": 1.1, "learning_rate": 8.969977695938586e-06, "logits/chosen": -2.776272773742676, "logits/rejected": -1.5884560346603394, "logps/chosen": -184.2539825439453, "logps/rejected": -117.74237060546875, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -1.7230498790740967, "rewards/margins": 4.033156394958496, "rewards/rejected": -5.756206035614014, "step": 7059 }, { "epoch": 1.1, "learning_rate": 8.96924425540744e-06, "logits/chosen": -2.654667854309082, "logits/rejected": -1.7451646327972412, "logps/chosen": -408.4231872558594, "logps/rejected": -388.1580810546875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": 0.045850157737731934, "rewards/margins": 7.748732566833496, "rewards/rejected": -7.702882289886475, "step": 7060 }, { "epoch": 1.1, "learning_rate": 8.968510814876291e-06, "logits/chosen": -2.912569284439087, "logits/rejected": -2.6403048038482666, "logps/chosen": -238.7106475830078, "logps/rejected": -283.60552978515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.625144600868225, "rewards/margins": 8.401901245117188, "rewards/rejected": -10.027046203613281, "step": 7061 }, { "epoch": 1.1, "learning_rate": 8.967777374345143e-06, "logits/chosen": -3.200242757797241, "logits/rejected": -2.8360419273376465, "logps/chosen": -453.8909912109375, "logps/rejected": -372.7347412109375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.036459386348724365, "rewards/margins": 5.923735618591309, "rewards/rejected": -5.887275695800781, "step": 7062 }, { "epoch": 1.1, "learning_rate": 8.967043933813995e-06, "logits/chosen": -2.4776573181152344, "logits/rejected": -2.9446346759796143, "logps/chosen": -109.2448501586914, "logps/rejected": -356.861328125, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -1.3399497270584106, "rewards/margins": 7.269375801086426, "rewards/rejected": -8.609325408935547, "step": 7063 }, { "epoch": 1.1, "learning_rate": 8.966310493282847e-06, "logits/chosen": -1.5230662822723389, "logits/rejected": -2.9094252586364746, "logps/chosen": -37.701663970947266, "logps/rejected": -302.6900634765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.6805257797241211, "rewards/margins": 8.515204429626465, "rewards/rejected": -9.195730209350586, "step": 7064 }, { "epoch": 1.1, "learning_rate": 8.965577052751699e-06, "logits/chosen": -3.068906545639038, "logits/rejected": -2.6605119705200195, "logps/chosen": -119.26863098144531, "logps/rejected": -134.64501953125, "loss": 0.3672, "rewards/accuracies": 0.5, "rewards/chosen": -2.9300966262817383, "rewards/margins": 2.977175235748291, "rewards/rejected": -5.907271862030029, "step": 7065 }, { "epoch": 1.1, "learning_rate": 8.96484361222055e-06, "logits/chosen": -1.7509104013442993, "logits/rejected": -2.8764359951019287, "logps/chosen": -65.42903900146484, "logps/rejected": -260.9859619140625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -2.6967973709106445, "rewards/margins": 5.373896598815918, "rewards/rejected": -8.070693969726562, "step": 7066 }, { "epoch": 1.1, "learning_rate": 8.964110171689403e-06, "logits/chosen": -2.2572011947631836, "logits/rejected": -2.653470039367676, "logps/chosen": -99.57011413574219, "logps/rejected": -366.84649658203125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.399282842874527, "rewards/margins": 9.545129776000977, "rewards/rejected": -9.944412231445312, "step": 7067 }, { "epoch": 1.1, "learning_rate": 8.963376731158256e-06, "logits/chosen": -0.9066870808601379, "logits/rejected": -2.618870496749878, "logps/chosen": -99.88911437988281, "logps/rejected": -352.9459533691406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.2405014038085938, "rewards/margins": 7.389772891998291, "rewards/rejected": -9.630273818969727, "step": 7068 }, { "epoch": 1.1, "learning_rate": 8.962643290627108e-06, "logits/chosen": -2.9962780475616455, "logits/rejected": -3.0811915397644043, "logps/chosen": -97.04777526855469, "logps/rejected": -157.09173583984375, "loss": 0.2045, "rewards/accuracies": 1.0, "rewards/chosen": -2.610924005508423, "rewards/margins": 3.6383297443389893, "rewards/rejected": -6.249253749847412, "step": 7069 }, { "epoch": 1.1, "learning_rate": 8.96190985009596e-06, "logits/chosen": -2.8147025108337402, "logits/rejected": -2.725188970565796, "logps/chosen": -169.48785400390625, "logps/rejected": -122.1100082397461, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -2.2429933547973633, "rewards/margins": 4.436306953430176, "rewards/rejected": -6.679300308227539, "step": 7070 }, { "epoch": 1.1, "learning_rate": 8.961176409564812e-06, "logits/chosen": -2.6462416648864746, "logits/rejected": -3.1487038135528564, "logps/chosen": -411.88763427734375, "logps/rejected": -487.58837890625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.8207931518554688, "rewards/margins": 6.5443220138549805, "rewards/rejected": -7.365115165710449, "step": 7071 }, { "epoch": 1.1, "learning_rate": 8.960442969033664e-06, "logits/chosen": -2.04927134513855, "logits/rejected": -2.7971761226654053, "logps/chosen": -134.481201171875, "logps/rejected": -501.44329833984375, "loss": 0.1741, "rewards/accuracies": 1.0, "rewards/chosen": -1.9722137451171875, "rewards/margins": 7.727984428405762, "rewards/rejected": -9.70019817352295, "step": 7072 }, { "epoch": 1.1, "learning_rate": 8.959709528502516e-06, "logits/chosen": -2.2214255332946777, "logits/rejected": -2.88669490814209, "logps/chosen": -505.6087646484375, "logps/rejected": -673.7425537109375, "loss": 0.1458, "rewards/accuracies": 1.0, "rewards/chosen": -2.171696901321411, "rewards/margins": 5.66639518737793, "rewards/rejected": -7.838091850280762, "step": 7073 }, { "epoch": 1.1, "learning_rate": 8.958976087971368e-06, "logits/chosen": -2.2913978099823, "logits/rejected": -2.5325233936309814, "logps/chosen": -191.97348022460938, "logps/rejected": -130.53048706054688, "loss": 2.4449, "rewards/accuracies": 0.5, "rewards/chosen": -5.170908451080322, "rewards/margins": -1.2464959621429443, "rewards/rejected": -3.924412488937378, "step": 7074 }, { "epoch": 1.1, "learning_rate": 8.95824264744022e-06, "logits/chosen": -2.5615127086639404, "logits/rejected": -2.735930919647217, "logps/chosen": -140.91799926757812, "logps/rejected": -205.22811889648438, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.22282066941261292, "rewards/margins": 5.672097682952881, "rewards/rejected": -5.894918441772461, "step": 7075 }, { "epoch": 1.1, "learning_rate": 8.957509206909073e-06, "logits/chosen": -2.0312395095825195, "logits/rejected": -2.897761583328247, "logps/chosen": -101.10692596435547, "logps/rejected": -273.53466796875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -0.2444307506084442, "rewards/margins": 6.691720962524414, "rewards/rejected": -6.93615198135376, "step": 7076 }, { "epoch": 1.1, "learning_rate": 8.956775766377925e-06, "logits/chosen": -3.0450711250305176, "logits/rejected": -2.721184253692627, "logps/chosen": -184.0689697265625, "logps/rejected": -188.464599609375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": 0.267526239156723, "rewards/margins": 5.025974273681641, "rewards/rejected": -4.758447647094727, "step": 7077 }, { "epoch": 1.1, "learning_rate": 8.956042325846777e-06, "logits/chosen": -2.353285789489746, "logits/rejected": -2.8069307804107666, "logps/chosen": -286.29998779296875, "logps/rejected": -364.79547119140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.3835388123989105, "rewards/margins": 8.482393264770508, "rewards/rejected": -8.86593246459961, "step": 7078 }, { "epoch": 1.1, "learning_rate": 8.955308885315629e-06, "logits/chosen": -2.808793067932129, "logits/rejected": -2.871793508529663, "logps/chosen": -64.4199447631836, "logps/rejected": -146.71461486816406, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -2.638991117477417, "rewards/margins": 4.486337184906006, "rewards/rejected": -7.125328063964844, "step": 7079 }, { "epoch": 1.1, "learning_rate": 8.95457544478448e-06, "logits/chosen": -1.9372026920318604, "logits/rejected": -2.958465576171875, "logps/chosen": -307.41339111328125, "logps/rejected": -286.4036865234375, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -1.429606556892395, "rewards/margins": 3.419767379760742, "rewards/rejected": -4.849373817443848, "step": 7080 }, { "epoch": 1.1, "learning_rate": 8.953842004253332e-06, "logits/chosen": -2.094771146774292, "logits/rejected": -2.6494691371917725, "logps/chosen": -118.92694091796875, "logps/rejected": -192.4081573486328, "loss": 0.1075, "rewards/accuracies": 1.0, "rewards/chosen": -3.2665855884552, "rewards/margins": 3.224424362182617, "rewards/rejected": -6.491009712219238, "step": 7081 }, { "epoch": 1.1, "learning_rate": 8.953108563722184e-06, "logits/chosen": -2.468050479888916, "logits/rejected": -2.6797678470611572, "logps/chosen": -405.7950439453125, "logps/rejected": -499.7143249511719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.3424148559570312, "rewards/margins": 7.499205589294434, "rewards/rejected": -8.841620445251465, "step": 7082 }, { "epoch": 1.1, "learning_rate": 8.952375123191036e-06, "logits/chosen": -2.6959667205810547, "logits/rejected": -2.8650238513946533, "logps/chosen": -46.41310119628906, "logps/rejected": -137.0614013671875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.974386692047119, "rewards/margins": 4.895322799682617, "rewards/rejected": -7.8697099685668945, "step": 7083 }, { "epoch": 1.1, "learning_rate": 8.951641682659888e-06, "logits/chosen": -2.512049674987793, "logits/rejected": -2.7642650604248047, "logps/chosen": -161.99163818359375, "logps/rejected": -369.11767578125, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -4.070248603820801, "rewards/margins": 5.8857808113098145, "rewards/rejected": -9.956029891967773, "step": 7084 }, { "epoch": 1.1, "learning_rate": 8.950908242128742e-06, "logits/chosen": -2.887113332748413, "logits/rejected": -3.0021591186523438, "logps/chosen": -377.36834716796875, "logps/rejected": -618.7728271484375, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": -1.8276748657226562, "rewards/margins": 4.257390022277832, "rewards/rejected": -6.085064888000488, "step": 7085 }, { "epoch": 1.1, "learning_rate": 8.950174801597593e-06, "logits/chosen": -2.474153518676758, "logits/rejected": -3.096155881881714, "logps/chosen": -155.69943237304688, "logps/rejected": -259.2332763671875, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": -0.9492881894111633, "rewards/margins": 4.469976902008057, "rewards/rejected": -5.419265270233154, "step": 7086 }, { "epoch": 1.1, "learning_rate": 8.949441361066445e-06, "logits/chosen": -2.859513998031616, "logits/rejected": -2.497659921646118, "logps/chosen": -250.39614868164062, "logps/rejected": -155.79782104492188, "loss": 0.1613, "rewards/accuracies": 1.0, "rewards/chosen": -1.2809224128723145, "rewards/margins": 1.9405978918075562, "rewards/rejected": -3.22152042388916, "step": 7087 }, { "epoch": 1.1, "learning_rate": 8.948707920535297e-06, "logits/chosen": -2.6436069011688232, "logits/rejected": -2.1362390518188477, "logps/chosen": -367.408447265625, "logps/rejected": -299.07916259765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": 0.04917678236961365, "rewards/margins": 7.066400051116943, "rewards/rejected": -7.017223358154297, "step": 7088 }, { "epoch": 1.1, "learning_rate": 8.947974480004149e-06, "logits/chosen": -2.6911027431488037, "logits/rejected": -1.424337387084961, "logps/chosen": -165.65692138671875, "logps/rejected": -187.45602416992188, "loss": 0.4191, "rewards/accuracies": 0.5, "rewards/chosen": -2.9045255184173584, "rewards/margins": 3.681570053100586, "rewards/rejected": -6.586095809936523, "step": 7089 }, { "epoch": 1.1, "learning_rate": 8.947241039473001e-06, "logits/chosen": -1.3659433126449585, "logits/rejected": -2.8857388496398926, "logps/chosen": -108.47351837158203, "logps/rejected": -397.3955078125, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -3.9730405807495117, "rewards/margins": 5.383910179138184, "rewards/rejected": -9.356950759887695, "step": 7090 }, { "epoch": 1.1, "learning_rate": 8.946507598941853e-06, "logits/chosen": -3.125983476638794, "logits/rejected": -1.4621471166610718, "logps/chosen": -340.49517822265625, "logps/rejected": -140.91082763671875, "loss": 1.5102, "rewards/accuracies": 0.5, "rewards/chosen": -2.1552460193634033, "rewards/margins": -0.24146044254302979, "rewards/rejected": -1.9137855768203735, "step": 7091 }, { "epoch": 1.1, "learning_rate": 8.945774158410706e-06, "logits/chosen": -2.374098062515259, "logits/rejected": -2.956214427947998, "logps/chosen": -58.1336669921875, "logps/rejected": -278.9041442871094, "loss": 0.3675, "rewards/accuracies": 0.5, "rewards/chosen": -3.354015588760376, "rewards/margins": 4.365135192871094, "rewards/rejected": -7.719151496887207, "step": 7092 }, { "epoch": 1.1, "learning_rate": 8.945040717879558e-06, "logits/chosen": -2.3785412311553955, "logits/rejected": -3.216682195663452, "logps/chosen": -701.724365234375, "logps/rejected": -795.5206298828125, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.2333114743232727, "rewards/margins": 4.570558547973633, "rewards/rejected": -4.803869724273682, "step": 7093 }, { "epoch": 1.1, "learning_rate": 8.944307277348412e-06, "logits/chosen": -2.675642728805542, "logits/rejected": -2.540372848510742, "logps/chosen": -120.05887603759766, "logps/rejected": -252.76669311523438, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.5865516662597656, "rewards/margins": 8.569320678710938, "rewards/rejected": -11.155872344970703, "step": 7094 }, { "epoch": 1.1, "learning_rate": 8.943573836817264e-06, "logits/chosen": -2.6311724185943604, "logits/rejected": -3.12320613861084, "logps/chosen": -72.32379913330078, "logps/rejected": -302.6666259765625, "loss": 0.1654, "rewards/accuracies": 1.0, "rewards/chosen": -3.3006041049957275, "rewards/margins": 5.659695625305176, "rewards/rejected": -8.960299491882324, "step": 7095 }, { "epoch": 1.1, "learning_rate": 8.942840396286116e-06, "logits/chosen": -1.4972892999649048, "logits/rejected": -2.7430531978607178, "logps/chosen": -76.39855194091797, "logps/rejected": -238.72396850585938, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -3.5436503887176514, "rewards/margins": 6.006237983703613, "rewards/rejected": -9.549888610839844, "step": 7096 }, { "epoch": 1.1, "learning_rate": 8.942106955754967e-06, "logits/chosen": -2.4723100662231445, "logits/rejected": -1.7532087564468384, "logps/chosen": -194.05484008789062, "logps/rejected": -188.71563720703125, "loss": 0.4062, "rewards/accuracies": 0.5, "rewards/chosen": -1.6592079401016235, "rewards/margins": 6.484016418457031, "rewards/rejected": -8.143224716186523, "step": 7097 }, { "epoch": 1.1, "learning_rate": 8.94137351522382e-06, "logits/chosen": -3.2385616302490234, "logits/rejected": -2.896432876586914, "logps/chosen": -465.88818359375, "logps/rejected": -296.6559753417969, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.5047294497489929, "rewards/margins": 5.835571765899658, "rewards/rejected": -6.340301513671875, "step": 7098 }, { "epoch": 1.1, "learning_rate": 8.940640074692671e-06, "logits/chosen": -3.038881778717041, "logits/rejected": -2.8685405254364014, "logps/chosen": -161.92091369628906, "logps/rejected": -176.7945556640625, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -0.08697128295898438, "rewards/margins": 4.872158050537109, "rewards/rejected": -4.959129333496094, "step": 7099 }, { "epoch": 1.1, "learning_rate": 8.939906634161523e-06, "logits/chosen": -3.16154146194458, "logits/rejected": -3.016627788543701, "logps/chosen": -101.77847290039062, "logps/rejected": -85.24417114257812, "loss": 2.5767, "rewards/accuracies": 0.5, "rewards/chosen": -5.736972808837891, "rewards/margins": -0.05917859077453613, "rewards/rejected": -5.677794456481934, "step": 7100 }, { "epoch": 1.1, "learning_rate": 8.939173193630375e-06, "logits/chosen": -0.9321191310882568, "logits/rejected": -2.7755179405212402, "logps/chosen": -138.4127960205078, "logps/rejected": -648.413818359375, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -1.590605616569519, "rewards/margins": 3.9855709075927734, "rewards/rejected": -5.576176643371582, "step": 7101 }, { "epoch": 1.1, "learning_rate": 8.938439753099227e-06, "logits/chosen": -1.7279709577560425, "logits/rejected": -2.791898488998413, "logps/chosen": -170.0251922607422, "logps/rejected": -384.88323974609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9568272233009338, "rewards/margins": 6.540431022644043, "rewards/rejected": -7.497258186340332, "step": 7102 }, { "epoch": 1.1, "learning_rate": 8.93770631256808e-06, "logits/chosen": -3.2481093406677246, "logits/rejected": -3.1480135917663574, "logps/chosen": -309.75909423828125, "logps/rejected": -279.8875732421875, "loss": 1.2747, "rewards/accuracies": 0.5, "rewards/chosen": -3.1716530323028564, "rewards/margins": 3.102386951446533, "rewards/rejected": -6.2740397453308105, "step": 7103 }, { "epoch": 1.1, "learning_rate": 8.936972872036932e-06, "logits/chosen": -2.7033193111419678, "logits/rejected": -3.2231335639953613, "logps/chosen": -146.54397583007812, "logps/rejected": -205.597412109375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.2396607398986816, "rewards/margins": 5.515381813049316, "rewards/rejected": -7.755042552947998, "step": 7104 }, { "epoch": 1.1, "learning_rate": 8.936239431505784e-06, "logits/chosen": -3.310227870941162, "logits/rejected": -2.3037524223327637, "logps/chosen": -863.047119140625, "logps/rejected": -490.3795166015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.6365447640419006, "rewards/margins": 8.278526306152344, "rewards/rejected": -7.641982078552246, "step": 7105 }, { "epoch": 1.11, "learning_rate": 8.935505990974636e-06, "logits/chosen": -2.8422691822052, "logits/rejected": -3.1382622718811035, "logps/chosen": -66.20753479003906, "logps/rejected": -235.07827758789062, "loss": 0.121, "rewards/accuracies": 1.0, "rewards/chosen": -4.481435298919678, "rewards/margins": 3.2994778156280518, "rewards/rejected": -7.780913352966309, "step": 7106 }, { "epoch": 1.11, "learning_rate": 8.934772550443488e-06, "logits/chosen": -1.9403892755508423, "logits/rejected": -2.7457966804504395, "logps/chosen": -100.11737060546875, "logps/rejected": -361.0920104980469, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.7532863616943359, "rewards/margins": 8.922357559204102, "rewards/rejected": -9.675644874572754, "step": 7107 }, { "epoch": 1.11, "learning_rate": 8.93403910991234e-06, "logits/chosen": -2.8456532955169678, "logits/rejected": -2.9953370094299316, "logps/chosen": -365.54150390625, "logps/rejected": -313.3428955078125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.28544002771377563, "rewards/margins": 6.355776786804199, "rewards/rejected": -6.070336818695068, "step": 7108 }, { "epoch": 1.11, "learning_rate": 8.933305669381192e-06, "logits/chosen": -2.6645102500915527, "logits/rejected": -3.1840708255767822, "logps/chosen": -69.06909942626953, "logps/rejected": -243.6769256591797, "loss": 0.0889, "rewards/accuracies": 1.0, "rewards/chosen": -2.6935980319976807, "rewards/margins": 3.2191553115844727, "rewards/rejected": -5.912753105163574, "step": 7109 }, { "epoch": 1.11, "learning_rate": 8.932572228850044e-06, "logits/chosen": -3.111304998397827, "logits/rejected": -2.98540997505188, "logps/chosen": -178.88204956054688, "logps/rejected": -297.97955322265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.20931857824325562, "rewards/margins": 8.821701049804688, "rewards/rejected": -9.03101921081543, "step": 7110 }, { "epoch": 1.11, "learning_rate": 8.931838788318895e-06, "logits/chosen": -2.6488037109375, "logits/rejected": -2.3922905921936035, "logps/chosen": -400.0315246582031, "logps/rejected": -375.503173828125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.1943728923797607, "rewards/margins": 6.624847412109375, "rewards/rejected": -9.819220542907715, "step": 7111 }, { "epoch": 1.11, "learning_rate": 8.931105347787749e-06, "logits/chosen": -2.5331716537475586, "logits/rejected": -2.7413766384124756, "logps/chosen": -62.411643981933594, "logps/rejected": -303.57489013671875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.364992618560791, "rewards/margins": 7.445849418640137, "rewards/rejected": -10.810842514038086, "step": 7112 }, { "epoch": 1.11, "learning_rate": 8.930371907256601e-06, "logits/chosen": -2.5953478813171387, "logits/rejected": -2.6267333030700684, "logps/chosen": -69.35184478759766, "logps/rejected": -169.4709930419922, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.4895254373550415, "rewards/margins": 6.826623916625977, "rewards/rejected": -8.31614875793457, "step": 7113 }, { "epoch": 1.11, "learning_rate": 8.929638466725453e-06, "logits/chosen": -2.4333903789520264, "logits/rejected": -3.238518476486206, "logps/chosen": -188.8573455810547, "logps/rejected": -452.550537109375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.756861925125122, "rewards/margins": 7.1554059982299805, "rewards/rejected": -8.91226863861084, "step": 7114 }, { "epoch": 1.11, "learning_rate": 8.928905026194305e-06, "logits/chosen": -1.2967538833618164, "logits/rejected": -2.739651918411255, "logps/chosen": -133.67068481445312, "logps/rejected": -277.800537109375, "loss": 0.3041, "rewards/accuracies": 1.0, "rewards/chosen": -2.520505905151367, "rewards/margins": 2.399662494659424, "rewards/rejected": -4.920168399810791, "step": 7115 }, { "epoch": 1.11, "learning_rate": 8.928171585663157e-06, "logits/chosen": -1.8627551794052124, "logits/rejected": -1.8902686834335327, "logps/chosen": -161.00225830078125, "logps/rejected": -168.62387084960938, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.703709125518799, "rewards/margins": 7.596746444702148, "rewards/rejected": -10.300455093383789, "step": 7116 }, { "epoch": 1.11, "learning_rate": 8.927438145132008e-06, "logits/chosen": -2.7282633781433105, "logits/rejected": -2.6124911308288574, "logps/chosen": -157.37570190429688, "logps/rejected": -239.7841796875, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -2.152561664581299, "rewards/margins": 4.199769020080566, "rewards/rejected": -6.352331161499023, "step": 7117 }, { "epoch": 1.11, "learning_rate": 8.92670470460086e-06, "logits/chosen": -2.681765079498291, "logits/rejected": -2.8958752155303955, "logps/chosen": -94.50968170166016, "logps/rejected": -254.56314086914062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.606257677078247, "rewards/margins": 7.629727363586426, "rewards/rejected": -9.235984802246094, "step": 7118 }, { "epoch": 1.11, "learning_rate": 8.925971264069712e-06, "logits/chosen": -2.9371492862701416, "logits/rejected": -1.9202951192855835, "logps/chosen": -379.7976989746094, "logps/rejected": -160.66539001464844, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.364172339439392, "rewards/margins": 6.677231788635254, "rewards/rejected": -8.041404724121094, "step": 7119 }, { "epoch": 1.11, "learning_rate": 8.925237823538564e-06, "logits/chosen": -0.992789626121521, "logits/rejected": -2.55891752243042, "logps/chosen": -112.26628112792969, "logps/rejected": -545.9720458984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.394003987312317, "rewards/margins": 8.890022277832031, "rewards/rejected": -10.284027099609375, "step": 7120 }, { "epoch": 1.11, "learning_rate": 8.924504383007418e-06, "logits/chosen": -2.960444927215576, "logits/rejected": -3.3257954120635986, "logps/chosen": -98.6915054321289, "logps/rejected": -186.62246704101562, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -2.714348077774048, "rewards/margins": 3.5105834007263184, "rewards/rejected": -6.224931716918945, "step": 7121 }, { "epoch": 1.11, "learning_rate": 8.92377094247627e-06, "logits/chosen": -2.538613796234131, "logits/rejected": -3.183486223220825, "logps/chosen": -282.35565185546875, "logps/rejected": -486.3147888183594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1391754150390625, "rewards/margins": 8.853201866149902, "rewards/rejected": -10.992377281188965, "step": 7122 }, { "epoch": 1.11, "learning_rate": 8.923037501945121e-06, "logits/chosen": -2.4546329975128174, "logits/rejected": -1.386125087738037, "logps/chosen": -116.89797973632812, "logps/rejected": -126.5420913696289, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -5.176530838012695, "rewards/margins": 4.171117782592773, "rewards/rejected": -9.347648620605469, "step": 7123 }, { "epoch": 1.11, "learning_rate": 8.922304061413973e-06, "logits/chosen": -2.234748601913452, "logits/rejected": -3.045957088470459, "logps/chosen": -126.85588073730469, "logps/rejected": -229.61557006835938, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.107682466506958, "rewards/margins": 6.13774299621582, "rewards/rejected": -8.245426177978516, "step": 7124 }, { "epoch": 1.11, "learning_rate": 8.921570620882825e-06, "logits/chosen": -1.6039988994598389, "logits/rejected": -2.815697431564331, "logps/chosen": -116.36730194091797, "logps/rejected": -484.291259765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.906236171722412, "rewards/margins": 8.976706504821777, "rewards/rejected": -11.882942199707031, "step": 7125 }, { "epoch": 1.11, "learning_rate": 8.920837180351679e-06, "logits/chosen": -2.5685787200927734, "logits/rejected": -2.8830974102020264, "logps/chosen": -100.39856719970703, "logps/rejected": -231.93881225585938, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.9281834363937378, "rewards/margins": 6.811277389526367, "rewards/rejected": -8.739460945129395, "step": 7126 }, { "epoch": 1.11, "learning_rate": 8.92010373982053e-06, "logits/chosen": -2.3419699668884277, "logits/rejected": -2.515247106552124, "logps/chosen": -239.87203979492188, "logps/rejected": -689.370849609375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.504849910736084, "rewards/margins": 8.219311714172363, "rewards/rejected": -12.724161148071289, "step": 7127 }, { "epoch": 1.11, "learning_rate": 8.919370299289382e-06, "logits/chosen": -1.8921027183532715, "logits/rejected": -2.9083824157714844, "logps/chosen": -131.4375762939453, "logps/rejected": -398.31134033203125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.7583328485488892, "rewards/margins": 6.744264125823975, "rewards/rejected": -8.502596855163574, "step": 7128 }, { "epoch": 1.11, "learning_rate": 8.918636858758234e-06, "logits/chosen": -2.8673973083496094, "logits/rejected": -2.192962408065796, "logps/chosen": -360.75714111328125, "logps/rejected": -359.6069030761719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.5188138484954834, "rewards/margins": 9.31378173828125, "rewards/rejected": -11.832595825195312, "step": 7129 }, { "epoch": 1.11, "learning_rate": 8.917903418227088e-06, "logits/chosen": -2.1450014114379883, "logits/rejected": -2.729581117630005, "logps/chosen": -133.6717071533203, "logps/rejected": -280.7021484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.5478380918502808, "rewards/margins": 7.7524943351745605, "rewards/rejected": -9.300332069396973, "step": 7130 }, { "epoch": 1.11, "learning_rate": 8.91716997769594e-06, "logits/chosen": -1.9161076545715332, "logits/rejected": -2.85483717918396, "logps/chosen": -135.22787475585938, "logps/rejected": -486.164794921875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -4.116387844085693, "rewards/margins": 9.07424259185791, "rewards/rejected": -13.190629959106445, "step": 7131 }, { "epoch": 1.11, "learning_rate": 8.916436537164792e-06, "logits/chosen": -1.2636321783065796, "logits/rejected": -2.0610601902008057, "logps/chosen": -177.01266479492188, "logps/rejected": -510.6951904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.159197211265564, "rewards/margins": 12.434975624084473, "rewards/rejected": -13.594173431396484, "step": 7132 }, { "epoch": 1.11, "learning_rate": 8.915703096633644e-06, "logits/chosen": -1.9098803997039795, "logits/rejected": -2.571012496948242, "logps/chosen": -124.76742553710938, "logps/rejected": -345.810546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.0400912761688232, "rewards/margins": 11.471108436584473, "rewards/rejected": -12.511199951171875, "step": 7133 }, { "epoch": 1.11, "learning_rate": 8.914969656102495e-06, "logits/chosen": -2.1111490726470947, "logits/rejected": -2.950517177581787, "logps/chosen": -110.6954116821289, "logps/rejected": -365.9447937011719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.8708457946777344, "rewards/margins": 7.7959489822387695, "rewards/rejected": -9.66679573059082, "step": 7134 }, { "epoch": 1.11, "learning_rate": 8.914236215571347e-06, "logits/chosen": -2.0776264667510986, "logits/rejected": -2.386667490005493, "logps/chosen": -461.99407958984375, "logps/rejected": -450.8160095214844, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6271733045578003, "rewards/margins": 7.285482883453369, "rewards/rejected": -8.9126558303833, "step": 7135 }, { "epoch": 1.11, "learning_rate": 8.9135027750402e-06, "logits/chosen": -2.8332786560058594, "logits/rejected": -1.5535826683044434, "logps/chosen": -529.9884033203125, "logps/rejected": -239.73992919921875, "loss": 0.1388, "rewards/accuracies": 1.0, "rewards/chosen": -3.4985764026641846, "rewards/margins": 3.617832660675049, "rewards/rejected": -7.1164093017578125, "step": 7136 }, { "epoch": 1.11, "learning_rate": 8.912769334509051e-06, "logits/chosen": -2.7871170043945312, "logits/rejected": -2.4956090450286865, "logps/chosen": -144.3985595703125, "logps/rejected": -123.84209442138672, "loss": 0.8944, "rewards/accuracies": 0.5, "rewards/chosen": -4.681519508361816, "rewards/margins": 3.242483377456665, "rewards/rejected": -7.924002647399902, "step": 7137 }, { "epoch": 1.11, "learning_rate": 8.912035893977903e-06, "logits/chosen": -2.7837748527526855, "logits/rejected": -2.937255382537842, "logps/chosen": -417.33734130859375, "logps/rejected": -455.6966552734375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": 0.8485870361328125, "rewards/margins": 7.383899211883545, "rewards/rejected": -6.535312175750732, "step": 7138 }, { "epoch": 1.11, "learning_rate": 8.911302453446757e-06, "logits/chosen": -2.3226819038391113, "logits/rejected": -2.5770955085754395, "logps/chosen": -159.48358154296875, "logps/rejected": -283.1142578125, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -3.208878993988037, "rewards/margins": 4.42556095123291, "rewards/rejected": -7.6344404220581055, "step": 7139 }, { "epoch": 1.11, "learning_rate": 8.910569012915608e-06, "logits/chosen": -1.668766975402832, "logits/rejected": -2.131774425506592, "logps/chosen": -438.6747131347656, "logps/rejected": -485.61322021484375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6183990240097046, "rewards/margins": 11.823033332824707, "rewards/rejected": -12.44143295288086, "step": 7140 }, { "epoch": 1.11, "learning_rate": 8.90983557238446e-06, "logits/chosen": -2.790304660797119, "logits/rejected": -2.6784894466400146, "logps/chosen": -129.15225219726562, "logps/rejected": -179.2610626220703, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.892092704772949, "rewards/margins": 6.94755220413208, "rewards/rejected": -10.839645385742188, "step": 7141 }, { "epoch": 1.11, "learning_rate": 8.909102131853312e-06, "logits/chosen": -2.648540496826172, "logits/rejected": -2.8862228393554688, "logps/chosen": -262.58233642578125, "logps/rejected": -119.18995666503906, "loss": 1.6188, "rewards/accuracies": 0.5, "rewards/chosen": -5.94407844543457, "rewards/margins": -1.1065489053726196, "rewards/rejected": -4.837529182434082, "step": 7142 }, { "epoch": 1.11, "learning_rate": 8.908368691322164e-06, "logits/chosen": -1.89670991897583, "logits/rejected": -2.7385079860687256, "logps/chosen": -339.9881896972656, "logps/rejected": -984.1466064453125, "loss": 0.1534, "rewards/accuracies": 1.0, "rewards/chosen": -4.716804027557373, "rewards/margins": 4.651712894439697, "rewards/rejected": -9.36851692199707, "step": 7143 }, { "epoch": 1.11, "learning_rate": 8.907635250791016e-06, "logits/chosen": -2.7771008014678955, "logits/rejected": -2.8933730125427246, "logps/chosen": -141.6402587890625, "logps/rejected": -386.7313232421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8941844701766968, "rewards/margins": 10.74660587310791, "rewards/rejected": -11.640790939331055, "step": 7144 }, { "epoch": 1.11, "learning_rate": 8.906901810259868e-06, "logits/chosen": -2.724163055419922, "logits/rejected": -2.6874430179595947, "logps/chosen": -69.18826293945312, "logps/rejected": -130.79122924804688, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -3.151405096054077, "rewards/margins": 4.68535041809082, "rewards/rejected": -7.836755752563477, "step": 7145 }, { "epoch": 1.11, "learning_rate": 8.90616836972872e-06, "logits/chosen": -2.883308172225952, "logits/rejected": -2.448481559753418, "logps/chosen": -147.4419708251953, "logps/rejected": -145.67051696777344, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -3.794907569885254, "rewards/margins": 5.7351226806640625, "rewards/rejected": -9.530029296875, "step": 7146 }, { "epoch": 1.11, "learning_rate": 8.905434929197572e-06, "logits/chosen": -1.758177638053894, "logits/rejected": -2.6779379844665527, "logps/chosen": -176.96054077148438, "logps/rejected": -435.7213439941406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3843400478363037, "rewards/margins": 9.421567916870117, "rewards/rejected": -11.805908203125, "step": 7147 }, { "epoch": 1.11, "learning_rate": 8.904701488666425e-06, "logits/chosen": -2.341600179672241, "logits/rejected": -2.6522388458251953, "logps/chosen": -168.13668823242188, "logps/rejected": -192.56849670410156, "loss": 2.1987, "rewards/accuracies": 0.5, "rewards/chosen": -3.684314012527466, "rewards/margins": 2.693049430847168, "rewards/rejected": -6.377363204956055, "step": 7148 }, { "epoch": 1.11, "learning_rate": 8.903968048135277e-06, "logits/chosen": -2.8223843574523926, "logits/rejected": -2.781223773956299, "logps/chosen": -163.79444885253906, "logps/rejected": -288.17608642578125, "loss": 0.9835, "rewards/accuracies": 0.5, "rewards/chosen": -3.893409252166748, "rewards/margins": 3.4706099033355713, "rewards/rejected": -7.364019393920898, "step": 7149 }, { "epoch": 1.11, "learning_rate": 8.903234607604129e-06, "logits/chosen": -2.830418109893799, "logits/rejected": -1.6915106773376465, "logps/chosen": -695.5679321289062, "logps/rejected": -313.4984130859375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.4316720962524414, "rewards/margins": 5.954652786254883, "rewards/rejected": -8.386324882507324, "step": 7150 }, { "epoch": 1.11, "learning_rate": 8.90250116707298e-06, "logits/chosen": -2.6992478370666504, "logits/rejected": -2.896127223968506, "logps/chosen": -223.05514526367188, "logps/rejected": -277.7838439941406, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -2.3073439598083496, "rewards/margins": 4.317942142486572, "rewards/rejected": -6.625286102294922, "step": 7151 }, { "epoch": 1.11, "learning_rate": 8.901767726541833e-06, "logits/chosen": -2.424733877182007, "logits/rejected": -3.2091665267944336, "logps/chosen": -63.40986633300781, "logps/rejected": -323.53424072265625, "loss": 0.0873, "rewards/accuracies": 1.0, "rewards/chosen": -1.281406283378601, "rewards/margins": 4.5624613761901855, "rewards/rejected": -5.843867301940918, "step": 7152 }, { "epoch": 1.11, "learning_rate": 8.901034286010685e-06, "logits/chosen": -2.912057399749756, "logits/rejected": -2.173670768737793, "logps/chosen": -499.863037109375, "logps/rejected": -237.23825073242188, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -0.5929629802703857, "rewards/margins": 7.226037502288818, "rewards/rejected": -7.819000244140625, "step": 7153 }, { "epoch": 1.11, "learning_rate": 8.900300845479536e-06, "logits/chosen": -2.65639066696167, "logits/rejected": -2.9830739498138428, "logps/chosen": -247.9822235107422, "logps/rejected": -294.63287353515625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.0547198057174683, "rewards/margins": 6.522931098937988, "rewards/rejected": -7.577651023864746, "step": 7154 }, { "epoch": 1.11, "learning_rate": 8.899567404948388e-06, "logits/chosen": -1.602770447731018, "logits/rejected": -2.664130449295044, "logps/chosen": -115.53857421875, "logps/rejected": -267.5625305175781, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.242577314376831, "rewards/margins": 6.830005645751953, "rewards/rejected": -9.072582244873047, "step": 7155 }, { "epoch": 1.11, "learning_rate": 8.89883396441724e-06, "logits/chosen": -2.6070213317871094, "logits/rejected": -2.3133785724639893, "logps/chosen": -257.2043151855469, "logps/rejected": -312.88226318359375, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -0.41145825386047363, "rewards/margins": 7.473243713378906, "rewards/rejected": -7.884702205657959, "step": 7156 }, { "epoch": 1.11, "learning_rate": 8.898100523886094e-06, "logits/chosen": -2.11545991897583, "logits/rejected": -2.807746171951294, "logps/chosen": -262.6688232421875, "logps/rejected": -430.3649597167969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3782039880752563, "rewards/margins": 10.236822128295898, "rewards/rejected": -11.615026473999023, "step": 7157 }, { "epoch": 1.11, "learning_rate": 8.897367083354946e-06, "logits/chosen": -2.824979305267334, "logits/rejected": -2.7328011989593506, "logps/chosen": -165.88693237304688, "logps/rejected": -274.7392883300781, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": -1.2074352502822876, "rewards/margins": 3.154860019683838, "rewards/rejected": -4.362295150756836, "step": 7158 }, { "epoch": 1.11, "learning_rate": 8.896633642823797e-06, "logits/chosen": -3.123248338699341, "logits/rejected": -2.448594808578491, "logps/chosen": -273.05364990234375, "logps/rejected": -219.0650634765625, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -0.06416779011487961, "rewards/margins": 4.975686073303223, "rewards/rejected": -5.039853572845459, "step": 7159 }, { "epoch": 1.11, "learning_rate": 8.895900202292651e-06, "logits/chosen": -1.4224536418914795, "logits/rejected": -2.9460127353668213, "logps/chosen": -166.86920166015625, "logps/rejected": -404.956787109375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -3.2360663414001465, "rewards/margins": 6.440106391906738, "rewards/rejected": -9.676173210144043, "step": 7160 }, { "epoch": 1.11, "learning_rate": 8.895166761761503e-06, "logits/chosen": -1.369330883026123, "logits/rejected": -2.722493886947632, "logps/chosen": -139.21505737304688, "logps/rejected": -442.23291015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.3768753111362457, "rewards/margins": 8.542913436889648, "rewards/rejected": -8.919788360595703, "step": 7161 }, { "epoch": 1.11, "learning_rate": 8.894433321230355e-06, "logits/chosen": -2.0223493576049805, "logits/rejected": -2.8065834045410156, "logps/chosen": -208.7076416015625, "logps/rejected": -317.9990539550781, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.0892837047576904, "rewards/margins": 5.523861885070801, "rewards/rejected": -7.61314582824707, "step": 7162 }, { "epoch": 1.11, "learning_rate": 8.893699880699207e-06, "logits/chosen": -2.14408540725708, "logits/rejected": -3.1241021156311035, "logps/chosen": -175.08621215820312, "logps/rejected": -346.1075439453125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.7033814191818237, "rewards/margins": 7.491513252258301, "rewards/rejected": -9.194894790649414, "step": 7163 }, { "epoch": 1.11, "learning_rate": 8.892966440168059e-06, "logits/chosen": -2.7895264625549316, "logits/rejected": -2.8575353622436523, "logps/chosen": -347.3941345214844, "logps/rejected": -324.218505859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.0655927658081055, "rewards/margins": 6.778697967529297, "rewards/rejected": -9.844290733337402, "step": 7164 }, { "epoch": 1.11, "learning_rate": 8.89223299963691e-06, "logits/chosen": -2.871596574783325, "logits/rejected": -1.813377857208252, "logps/chosen": -384.4271545410156, "logps/rejected": -266.17779541015625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.2324775457382202, "rewards/margins": 5.945864677429199, "rewards/rejected": -7.178342342376709, "step": 7165 }, { "epoch": 1.11, "learning_rate": 8.891499559105764e-06, "logits/chosen": -2.679466962814331, "logits/rejected": -3.042156457901001, "logps/chosen": -186.27713012695312, "logps/rejected": -412.46270751953125, "loss": 0.361, "rewards/accuracies": 0.5, "rewards/chosen": -1.552069902420044, "rewards/margins": 5.203346252441406, "rewards/rejected": -6.755415916442871, "step": 7166 }, { "epoch": 1.11, "learning_rate": 8.890766118574616e-06, "logits/chosen": -2.7275125980377197, "logits/rejected": -1.5928521156311035, "logps/chosen": -180.83465576171875, "logps/rejected": -118.37606811523438, "loss": 0.6942, "rewards/accuracies": 0.5, "rewards/chosen": -3.644364595413208, "rewards/margins": 3.560004234313965, "rewards/rejected": -7.204368591308594, "step": 7167 }, { "epoch": 1.11, "learning_rate": 8.890032678043468e-06, "logits/chosen": -3.321451425552368, "logits/rejected": -3.2009990215301514, "logps/chosen": -580.7169189453125, "logps/rejected": -601.6441650390625, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": -3.792964220046997, "rewards/margins": 3.4687390327453613, "rewards/rejected": -7.2617034912109375, "step": 7168 }, { "epoch": 1.11, "learning_rate": 8.88929923751232e-06, "logits/chosen": -1.52181875705719, "logits/rejected": -2.8912086486816406, "logps/chosen": -251.1881103515625, "logps/rejected": -539.44775390625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.6778579950332642, "rewards/margins": 6.637216091156006, "rewards/rejected": -7.3150739669799805, "step": 7169 }, { "epoch": 1.12, "learning_rate": 8.888565796981172e-06, "logits/chosen": -2.7985918521881104, "logits/rejected": -2.7793006896972656, "logps/chosen": -233.04257202148438, "logps/rejected": -280.7433166503906, "loss": 1.0881, "rewards/accuracies": 0.5, "rewards/chosen": -2.1203110218048096, "rewards/margins": 3.6228041648864746, "rewards/rejected": -5.743115425109863, "step": 7170 }, { "epoch": 1.12, "learning_rate": 8.887832356450023e-06, "logits/chosen": -1.4131399393081665, "logits/rejected": -2.4109795093536377, "logps/chosen": -145.6339569091797, "logps/rejected": -266.78369140625, "loss": 0.3982, "rewards/accuracies": 0.5, "rewards/chosen": -3.831724166870117, "rewards/margins": 3.3706324100494385, "rewards/rejected": -7.202356338500977, "step": 7171 }, { "epoch": 1.12, "learning_rate": 8.887098915918875e-06, "logits/chosen": -2.7703030109405518, "logits/rejected": -3.251657724380493, "logps/chosen": -416.71673583984375, "logps/rejected": -405.708251953125, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -2.937119245529175, "rewards/margins": 5.163200378417969, "rewards/rejected": -8.100319862365723, "step": 7172 }, { "epoch": 1.12, "learning_rate": 8.886365475387727e-06, "logits/chosen": -1.8349798917770386, "logits/rejected": -2.848165988922119, "logps/chosen": -62.15251159667969, "logps/rejected": -263.2938232421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.3374435901641846, "rewards/margins": 7.0552449226379395, "rewards/rejected": -8.392688751220703, "step": 7173 }, { "epoch": 1.12, "learning_rate": 8.88563203485658e-06, "logits/chosen": -2.7469985485076904, "logits/rejected": -2.640986919403076, "logps/chosen": -193.89096069335938, "logps/rejected": -167.01522827148438, "loss": 2.0193, "rewards/accuracies": 0.5, "rewards/chosen": -4.657326698303223, "rewards/margins": 0.6854891777038574, "rewards/rejected": -5.342815399169922, "step": 7174 }, { "epoch": 1.12, "learning_rate": 8.884898594325433e-06, "logits/chosen": -1.6360865831375122, "logits/rejected": -2.7979331016540527, "logps/chosen": -239.78509521484375, "logps/rejected": -627.803955078125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.429422616958618, "rewards/margins": 8.797815322875977, "rewards/rejected": -11.227237701416016, "step": 7175 }, { "epoch": 1.12, "learning_rate": 8.884165153794285e-06, "logits/chosen": -1.0965791940689087, "logits/rejected": -2.7782509326934814, "logps/chosen": -98.38638305664062, "logps/rejected": -396.66046142578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.4767231047153473, "rewards/margins": 9.548907279968262, "rewards/rejected": -10.025629997253418, "step": 7176 }, { "epoch": 1.12, "learning_rate": 8.883431713263136e-06, "logits/chosen": -2.5212316513061523, "logits/rejected": -2.8050925731658936, "logps/chosen": -256.7884521484375, "logps/rejected": -312.493408203125, "loss": 0.3667, "rewards/accuracies": 0.5, "rewards/chosen": -3.1137237548828125, "rewards/margins": 4.604394912719727, "rewards/rejected": -7.718118667602539, "step": 7177 }, { "epoch": 1.12, "learning_rate": 8.882698272731988e-06, "logits/chosen": -2.894761562347412, "logits/rejected": -1.5120232105255127, "logps/chosen": -413.2167663574219, "logps/rejected": -163.8197021484375, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": 0.824389636516571, "rewards/margins": 6.330263137817383, "rewards/rejected": -5.505873680114746, "step": 7178 }, { "epoch": 1.12, "learning_rate": 8.88196483220084e-06, "logits/chosen": -1.5444577932357788, "logits/rejected": -2.329878568649292, "logps/chosen": -205.07736206054688, "logps/rejected": -630.469482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.22505521774292, "rewards/margins": 10.205072402954102, "rewards/rejected": -13.430127143859863, "step": 7179 }, { "epoch": 1.12, "learning_rate": 8.881231391669692e-06, "logits/chosen": -2.659660816192627, "logits/rejected": -2.720471143722534, "logps/chosen": -190.1558837890625, "logps/rejected": -245.70513916015625, "loss": 1.3993, "rewards/accuracies": 0.5, "rewards/chosen": -4.182222366333008, "rewards/margins": 4.704103946685791, "rewards/rejected": -8.88632583618164, "step": 7180 }, { "epoch": 1.12, "learning_rate": 8.880497951138544e-06, "logits/chosen": -1.4412630796432495, "logits/rejected": -0.7982213497161865, "logps/chosen": -200.16246032714844, "logps/rejected": -191.9627685546875, "loss": 1.3362, "rewards/accuracies": 0.5, "rewards/chosen": -3.5820655822753906, "rewards/margins": 0.19165265560150146, "rewards/rejected": -3.7737183570861816, "step": 7181 }, { "epoch": 1.12, "learning_rate": 8.879764510607396e-06, "logits/chosen": -1.6406159400939941, "logits/rejected": -2.9913113117218018, "logps/chosen": -170.1329345703125, "logps/rejected": -515.576416015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.4683771133422852, "rewards/margins": 7.948688507080078, "rewards/rejected": -9.417065620422363, "step": 7182 }, { "epoch": 1.12, "learning_rate": 8.87903107007625e-06, "logits/chosen": -3.0975518226623535, "logits/rejected": -2.108830213546753, "logps/chosen": -765.3394775390625, "logps/rejected": -453.90283203125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.2012939453125, "rewards/margins": 5.8100080490112305, "rewards/rejected": -7.0113019943237305, "step": 7183 }, { "epoch": 1.12, "learning_rate": 8.878297629545101e-06, "logits/chosen": -3.1497952938079834, "logits/rejected": -3.099529504776001, "logps/chosen": -318.1694641113281, "logps/rejected": -293.230224609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.134369373321533, "rewards/margins": 7.11735725402832, "rewards/rejected": -10.251727104187012, "step": 7184 }, { "epoch": 1.12, "learning_rate": 8.877564189013953e-06, "logits/chosen": -2.8670806884765625, "logits/rejected": -2.935994863510132, "logps/chosen": -361.50732421875, "logps/rejected": -346.97393798828125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.8153327703475952, "rewards/margins": 6.515450477600098, "rewards/rejected": -7.330782890319824, "step": 7185 }, { "epoch": 1.12, "learning_rate": 8.876830748482805e-06, "logits/chosen": -2.604532480239868, "logits/rejected": -2.903430700302124, "logps/chosen": -271.7267761230469, "logps/rejected": -276.56005859375, "loss": 0.1079, "rewards/accuracies": 1.0, "rewards/chosen": -2.8099117279052734, "rewards/margins": 3.2855610847473145, "rewards/rejected": -6.095472812652588, "step": 7186 }, { "epoch": 1.12, "learning_rate": 8.876097307951657e-06, "logits/chosen": -1.8151037693023682, "logits/rejected": -2.8483831882476807, "logps/chosen": -46.8935432434082, "logps/rejected": -256.5631103515625, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -3.0448966026306152, "rewards/margins": 5.730764865875244, "rewards/rejected": -8.77566146850586, "step": 7187 }, { "epoch": 1.12, "learning_rate": 8.875363867420509e-06, "logits/chosen": -2.643284320831299, "logits/rejected": -2.904642105102539, "logps/chosen": -191.00103759765625, "logps/rejected": -279.0562744140625, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -1.572035312652588, "rewards/margins": 5.339116096496582, "rewards/rejected": -6.91115140914917, "step": 7188 }, { "epoch": 1.12, "learning_rate": 8.87463042688936e-06, "logits/chosen": -2.8837733268737793, "logits/rejected": -2.312993049621582, "logps/chosen": -728.9270629882812, "logps/rejected": -542.4507446289062, "loss": 0.2056, "rewards/accuracies": 1.0, "rewards/chosen": -5.160523414611816, "rewards/margins": 1.9683359861373901, "rewards/rejected": -7.128859043121338, "step": 7189 }, { "epoch": 1.12, "learning_rate": 8.873896986358213e-06, "logits/chosen": -3.040060520172119, "logits/rejected": -1.8841930627822876, "logps/chosen": -599.8956298828125, "logps/rejected": -488.9989013671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4454331398010254, "rewards/margins": 9.047405242919922, "rewards/rejected": -10.492837905883789, "step": 7190 }, { "epoch": 1.12, "learning_rate": 8.873163545827064e-06, "logits/chosen": -2.5256619453430176, "logits/rejected": -2.8920865058898926, "logps/chosen": -220.03575134277344, "logps/rejected": -251.48629760742188, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/chosen": -1.3949713706970215, "rewards/margins": 3.9460320472717285, "rewards/rejected": -5.34100341796875, "step": 7191 }, { "epoch": 1.12, "learning_rate": 8.872430105295918e-06, "logits/chosen": -2.700376510620117, "logits/rejected": -3.010502576828003, "logps/chosen": -216.61456298828125, "logps/rejected": -307.853271484375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.8370544910430908, "rewards/margins": 5.807056427001953, "rewards/rejected": -7.644110679626465, "step": 7192 }, { "epoch": 1.12, "learning_rate": 8.87169666476477e-06, "logits/chosen": -2.7776875495910645, "logits/rejected": -2.6927549839019775, "logps/chosen": -134.2356414794922, "logps/rejected": -244.6186981201172, "loss": 0.0791, "rewards/accuracies": 1.0, "rewards/chosen": -2.2290329933166504, "rewards/margins": 4.406637191772461, "rewards/rejected": -6.635669708251953, "step": 7193 }, { "epoch": 1.12, "learning_rate": 8.870963224233623e-06, "logits/chosen": -2.7557883262634277, "logits/rejected": -2.2187185287475586, "logps/chosen": -199.17994689941406, "logps/rejected": -235.27999877929688, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -2.1940879821777344, "rewards/margins": 6.266490459442139, "rewards/rejected": -8.460578918457031, "step": 7194 }, { "epoch": 1.12, "learning_rate": 8.870229783702475e-06, "logits/chosen": -2.856438398361206, "logits/rejected": -2.9313483238220215, "logps/chosen": -479.1023864746094, "logps/rejected": -433.9573974609375, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.630789577960968, "rewards/margins": 4.777583122253418, "rewards/rejected": -5.40837287902832, "step": 7195 }, { "epoch": 1.12, "learning_rate": 8.869496343171327e-06, "logits/chosen": -2.74273943901062, "logits/rejected": -2.9481749534606934, "logps/chosen": -104.03948211669922, "logps/rejected": -281.74395751953125, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": -1.80131196975708, "rewards/margins": 4.006685256958008, "rewards/rejected": -5.807997226715088, "step": 7196 }, { "epoch": 1.12, "learning_rate": 8.868762902640179e-06, "logits/chosen": -2.961158037185669, "logits/rejected": -1.4918713569641113, "logps/chosen": -578.18505859375, "logps/rejected": -206.01161193847656, "loss": 0.3934, "rewards/accuracies": 0.5, "rewards/chosen": -1.52940833568573, "rewards/margins": 3.986123561859131, "rewards/rejected": -5.515531539916992, "step": 7197 }, { "epoch": 1.12, "learning_rate": 8.868029462109031e-06, "logits/chosen": -2.6395530700683594, "logits/rejected": -2.932842969894409, "logps/chosen": -101.00416564941406, "logps/rejected": -226.98768615722656, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.7584856152534485, "rewards/margins": 6.555387496948242, "rewards/rejected": -7.313873291015625, "step": 7198 }, { "epoch": 1.12, "learning_rate": 8.867296021577883e-06, "logits/chosen": -1.6616419553756714, "logits/rejected": -2.6419880390167236, "logps/chosen": -96.72126770019531, "logps/rejected": -317.5397033691406, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -4.234569549560547, "rewards/margins": 5.712143898010254, "rewards/rejected": -9.9467134475708, "step": 7199 }, { "epoch": 1.12, "learning_rate": 8.866562581046735e-06, "logits/chosen": -2.9699010848999023, "logits/rejected": -2.354546070098877, "logps/chosen": -566.2300415039062, "logps/rejected": -374.1668395996094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 1.7747405767440796, "rewards/margins": 9.751285552978516, "rewards/rejected": -7.976544380187988, "step": 7200 }, { "epoch": 1.12, "learning_rate": 8.865829140515588e-06, "logits/chosen": -1.6644043922424316, "logits/rejected": -2.716250419616699, "logps/chosen": -170.38217163085938, "logps/rejected": -340.66912841796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.140772819519043, "rewards/margins": 7.929964542388916, "rewards/rejected": -10.0707368850708, "step": 7201 }, { "epoch": 1.12, "learning_rate": 8.86509569998444e-06, "logits/chosen": -1.735289216041565, "logits/rejected": -2.735132932662964, "logps/chosen": -124.07926177978516, "logps/rejected": -318.635986328125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.1293792724609375, "rewards/margins": 7.2488203048706055, "rewards/rejected": -10.378199577331543, "step": 7202 }, { "epoch": 1.12, "learning_rate": 8.864362259453292e-06, "logits/chosen": -1.5103284120559692, "logits/rejected": -2.4969000816345215, "logps/chosen": -216.08590698242188, "logps/rejected": -414.62628173828125, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -3.135650634765625, "rewards/margins": 7.080362319946289, "rewards/rejected": -10.216012954711914, "step": 7203 }, { "epoch": 1.12, "learning_rate": 8.863628818922144e-06, "logits/chosen": -2.6735432147979736, "logits/rejected": -2.675673723220825, "logps/chosen": -258.9763488769531, "logps/rejected": -148.30953979492188, "loss": 1.7652, "rewards/accuracies": 0.5, "rewards/chosen": -4.95143985748291, "rewards/margins": 1.1009840965270996, "rewards/rejected": -6.05242395401001, "step": 7204 }, { "epoch": 1.12, "learning_rate": 8.862895378390996e-06, "logits/chosen": -2.3968865871429443, "logits/rejected": -2.9809365272521973, "logps/chosen": -145.16531372070312, "logps/rejected": -328.2594299316406, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -2.7830862998962402, "rewards/margins": 3.9523844718933105, "rewards/rejected": -6.735470771789551, "step": 7205 }, { "epoch": 1.12, "learning_rate": 8.862161937859848e-06, "logits/chosen": -2.6543283462524414, "logits/rejected": -2.060169219970703, "logps/chosen": -282.81494140625, "logps/rejected": -251.0725555419922, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.0502806901931763, "rewards/margins": 6.293442726135254, "rewards/rejected": -7.343723297119141, "step": 7206 }, { "epoch": 1.12, "learning_rate": 8.8614284973287e-06, "logits/chosen": -1.5318236351013184, "logits/rejected": -2.7790162563323975, "logps/chosen": -301.4554443359375, "logps/rejected": -393.9457092285156, "loss": 2.4397, "rewards/accuracies": 0.5, "rewards/chosen": -5.3254265785217285, "rewards/margins": 1.841928243637085, "rewards/rejected": -7.167354583740234, "step": 7207 }, { "epoch": 1.12, "learning_rate": 8.860695056797551e-06, "logits/chosen": -2.2270114421844482, "logits/rejected": -2.9804165363311768, "logps/chosen": -189.34986877441406, "logps/rejected": -339.51239013671875, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.404679536819458, "rewards/margins": 4.851841926574707, "rewards/rejected": -6.256521701812744, "step": 7208 }, { "epoch": 1.12, "learning_rate": 8.859961616266403e-06, "logits/chosen": -2.0155060291290283, "logits/rejected": -2.882880449295044, "logps/chosen": -100.47442626953125, "logps/rejected": -286.7630615234375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.8895390033721924, "rewards/margins": 6.386517524719238, "rewards/rejected": -8.276056289672852, "step": 7209 }, { "epoch": 1.12, "learning_rate": 8.859228175735257e-06, "logits/chosen": -2.1591429710388184, "logits/rejected": -2.5010578632354736, "logps/chosen": -274.39471435546875, "logps/rejected": -486.24273681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6187835931777954, "rewards/margins": 12.027645111083984, "rewards/rejected": -13.646429061889648, "step": 7210 }, { "epoch": 1.12, "learning_rate": 8.858494735204109e-06, "logits/chosen": -2.6653952598571777, "logits/rejected": -2.2087504863739014, "logps/chosen": -167.32968139648438, "logps/rejected": -245.2377471923828, "loss": 0.8247, "rewards/accuracies": 0.5, "rewards/chosen": -3.9829132556915283, "rewards/margins": 2.8806586265563965, "rewards/rejected": -6.863572120666504, "step": 7211 }, { "epoch": 1.12, "learning_rate": 8.85776129467296e-06, "logits/chosen": -2.8814144134521484, "logits/rejected": -2.178685426712036, "logps/chosen": -123.18464660644531, "logps/rejected": -114.3504638671875, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -2.300424098968506, "rewards/margins": 4.250296115875244, "rewards/rejected": -6.55072021484375, "step": 7212 }, { "epoch": 1.12, "learning_rate": 8.857027854141812e-06, "logits/chosen": -2.286583185195923, "logits/rejected": -2.942256212234497, "logps/chosen": -223.6654052734375, "logps/rejected": -271.47125244140625, "loss": 1.1503, "rewards/accuracies": 0.5, "rewards/chosen": -3.988910675048828, "rewards/margins": 2.452239513397217, "rewards/rejected": -6.441150188446045, "step": 7213 }, { "epoch": 1.12, "learning_rate": 8.856294413610664e-06, "logits/chosen": -3.0185251235961914, "logits/rejected": -2.9539833068847656, "logps/chosen": -406.556396484375, "logps/rejected": -313.57147216796875, "loss": 1.2163, "rewards/accuracies": 0.5, "rewards/chosen": -4.1354804039001465, "rewards/margins": 2.178239583969116, "rewards/rejected": -6.313719749450684, "step": 7214 }, { "epoch": 1.12, "learning_rate": 8.855560973079516e-06, "logits/chosen": -2.7848455905914307, "logits/rejected": -2.3863511085510254, "logps/chosen": -186.7379913330078, "logps/rejected": -173.1120147705078, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.2031234502792358, "rewards/margins": 5.037664890289307, "rewards/rejected": -6.240788459777832, "step": 7215 }, { "epoch": 1.12, "learning_rate": 8.854827532548368e-06, "logits/chosen": -1.5840479135513306, "logits/rejected": -3.2470433712005615, "logps/chosen": -200.35287475585938, "logps/rejected": -442.48931884765625, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -2.636446475982666, "rewards/margins": 5.28438663482666, "rewards/rejected": -7.920832633972168, "step": 7216 }, { "epoch": 1.12, "learning_rate": 8.85409409201722e-06, "logits/chosen": -2.580299139022827, "logits/rejected": -2.819293260574341, "logps/chosen": -117.38136291503906, "logps/rejected": -281.09320068359375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.911879301071167, "rewards/margins": 6.979972839355469, "rewards/rejected": -9.891851425170898, "step": 7217 }, { "epoch": 1.12, "learning_rate": 8.853360651486072e-06, "logits/chosen": -1.8604788780212402, "logits/rejected": -2.889995813369751, "logps/chosen": -271.1607360839844, "logps/rejected": -313.87518310546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1726967096328735, "rewards/margins": 8.455967903137207, "rewards/rejected": -9.628664016723633, "step": 7218 }, { "epoch": 1.12, "learning_rate": 8.852627210954925e-06, "logits/chosen": -1.8143709897994995, "logits/rejected": -2.9683780670166016, "logps/chosen": -136.64666748046875, "logps/rejected": -283.3750915527344, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -3.0149364471435547, "rewards/margins": 5.291077613830566, "rewards/rejected": -8.306015014648438, "step": 7219 }, { "epoch": 1.12, "learning_rate": 8.851893770423777e-06, "logits/chosen": -2.480194330215454, "logits/rejected": -2.7123959064483643, "logps/chosen": -322.3176574707031, "logps/rejected": -327.0045471191406, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -3.0888185501098633, "rewards/margins": 4.942503929138184, "rewards/rejected": -8.031322479248047, "step": 7220 }, { "epoch": 1.12, "learning_rate": 8.85116032989263e-06, "logits/chosen": -2.393327474594116, "logits/rejected": -2.8891663551330566, "logps/chosen": -141.81813049316406, "logps/rejected": -412.1241760253906, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.39544713497161865, "rewards/margins": 8.573047637939453, "rewards/rejected": -8.96849536895752, "step": 7221 }, { "epoch": 1.12, "learning_rate": 8.850426889361481e-06, "logits/chosen": -2.66996431350708, "logits/rejected": -2.829057455062866, "logps/chosen": -237.03819274902344, "logps/rejected": -383.568115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3894142210483551, "rewards/margins": 9.48965072631836, "rewards/rejected": -9.879064559936523, "step": 7222 }, { "epoch": 1.12, "learning_rate": 8.849693448830333e-06, "logits/chosen": -2.8527748584747314, "logits/rejected": -1.316757082939148, "logps/chosen": -270.08514404296875, "logps/rejected": -191.15480041503906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.23427891731262207, "rewards/margins": 9.40081787109375, "rewards/rejected": -9.635096549987793, "step": 7223 }, { "epoch": 1.12, "learning_rate": 8.848960008299185e-06, "logits/chosen": -2.340204954147339, "logits/rejected": -2.8138558864593506, "logps/chosen": -385.6788024902344, "logps/rejected": -418.3855285644531, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.6520938873291016, "rewards/margins": 6.892640590667725, "rewards/rejected": -10.544734954833984, "step": 7224 }, { "epoch": 1.12, "learning_rate": 8.848226567768037e-06, "logits/chosen": -1.5197422504425049, "logits/rejected": -2.8505444526672363, "logps/chosen": -102.70221710205078, "logps/rejected": -352.23760986328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2563358545303345, "rewards/margins": 8.294391632080078, "rewards/rejected": -9.550726890563965, "step": 7225 }, { "epoch": 1.12, "learning_rate": 8.847493127236889e-06, "logits/chosen": -2.3651959896087646, "logits/rejected": -3.1206166744232178, "logps/chosen": -83.18408203125, "logps/rejected": -174.93495178222656, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": -5.85987663269043, "rewards/margins": 3.1041877269744873, "rewards/rejected": -8.96406364440918, "step": 7226 }, { "epoch": 1.12, "learning_rate": 8.846759686705742e-06, "logits/chosen": -2.6594274044036865, "logits/rejected": -2.537768840789795, "logps/chosen": -110.48658752441406, "logps/rejected": -291.9808349609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.5151954889297485, "rewards/margins": 7.220036029815674, "rewards/rejected": -8.735231399536133, "step": 7227 }, { "epoch": 1.12, "learning_rate": 8.846026246174594e-06, "logits/chosen": -3.1413767337799072, "logits/rejected": -3.088810920715332, "logps/chosen": -564.0677490234375, "logps/rejected": -500.74847412109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7150177359580994, "rewards/margins": 8.045578002929688, "rewards/rejected": -8.76059627532959, "step": 7228 }, { "epoch": 1.12, "learning_rate": 8.845292805643448e-06, "logits/chosen": -2.6839356422424316, "logits/rejected": -2.53188157081604, "logps/chosen": -578.6283569335938, "logps/rejected": -480.46051025390625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.9459773302078247, "rewards/margins": 6.322005748748779, "rewards/rejected": -8.267983436584473, "step": 7229 }, { "epoch": 1.12, "learning_rate": 8.8445593651123e-06, "logits/chosen": -2.915809154510498, "logits/rejected": -2.7015764713287354, "logps/chosen": -390.6713562011719, "logps/rejected": -416.06207275390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0054807662963867, "rewards/margins": 7.07197380065918, "rewards/rejected": -8.077454566955566, "step": 7230 }, { "epoch": 1.12, "learning_rate": 8.843825924581151e-06, "logits/chosen": -3.131959915161133, "logits/rejected": -3.2925493717193604, "logps/chosen": -124.56836700439453, "logps/rejected": -224.029296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.5089974403381348, "rewards/margins": 8.396631240844727, "rewards/rejected": -9.905628204345703, "step": 7231 }, { "epoch": 1.12, "learning_rate": 8.843092484050003e-06, "logits/chosen": -2.4239003658294678, "logits/rejected": -2.8109397888183594, "logps/chosen": -247.83322143554688, "logps/rejected": -345.6146240234375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.2992660701274872, "rewards/margins": 6.14272403717041, "rewards/rejected": -6.441989898681641, "step": 7232 }, { "epoch": 1.12, "learning_rate": 8.842359043518855e-06, "logits/chosen": -2.7948989868164062, "logits/rejected": -2.012995481491089, "logps/chosen": -126.34498596191406, "logps/rejected": -154.2582244873047, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.5871959924697876, "rewards/margins": 7.126275062561035, "rewards/rejected": -7.713470935821533, "step": 7233 }, { "epoch": 1.13, "learning_rate": 8.841625602987707e-06, "logits/chosen": -3.0625622272491455, "logits/rejected": -2.2783846855163574, "logps/chosen": -748.7405395507812, "logps/rejected": -550.3240966796875, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -3.2524871826171875, "rewards/margins": 5.656802177429199, "rewards/rejected": -8.909290313720703, "step": 7234 }, { "epoch": 1.13, "learning_rate": 8.840892162456559e-06, "logits/chosen": -1.9959032535552979, "logits/rejected": -2.747467517852783, "logps/chosen": -119.83619689941406, "logps/rejected": -530.188720703125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.605955123901367, "rewards/margins": 7.479175090789795, "rewards/rejected": -11.08513069152832, "step": 7235 }, { "epoch": 1.13, "learning_rate": 8.84015872192541e-06, "logits/chosen": -1.8862992525100708, "logits/rejected": -2.1731553077697754, "logps/chosen": -308.20013427734375, "logps/rejected": -459.5413818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.09040222316980362, "rewards/margins": 10.42239761352539, "rewards/rejected": -10.331995010375977, "step": 7236 }, { "epoch": 1.13, "learning_rate": 8.839425281394264e-06, "logits/chosen": -0.7932060956954956, "logits/rejected": -2.7271933555603027, "logps/chosen": -82.822021484375, "logps/rejected": -428.6912841796875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.3443650007247925, "rewards/margins": 6.8182830810546875, "rewards/rejected": -8.16264820098877, "step": 7237 }, { "epoch": 1.13, "learning_rate": 8.838691840863116e-06, "logits/chosen": -2.244821310043335, "logits/rejected": -2.9012513160705566, "logps/chosen": -69.86418151855469, "logps/rejected": -327.0582275390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.1631596088409424, "rewards/margins": 8.135744094848633, "rewards/rejected": -10.298903465270996, "step": 7238 }, { "epoch": 1.13, "learning_rate": 8.837958400331968e-06, "logits/chosen": -2.086604356765747, "logits/rejected": -2.7454442977905273, "logps/chosen": -276.410888671875, "logps/rejected": -236.3770751953125, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -0.9449989795684814, "rewards/margins": 3.240110158920288, "rewards/rejected": -4.1851091384887695, "step": 7239 }, { "epoch": 1.13, "learning_rate": 8.83722495980082e-06, "logits/chosen": -2.71266770362854, "logits/rejected": -1.8838002681732178, "logps/chosen": -261.2324523925781, "logps/rejected": -146.82720947265625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -0.91402667760849, "rewards/margins": 5.657088279724121, "rewards/rejected": -6.571114540100098, "step": 7240 }, { "epoch": 1.13, "learning_rate": 8.836491519269672e-06, "logits/chosen": -1.2931759357452393, "logits/rejected": -2.9926459789276123, "logps/chosen": -98.55501556396484, "logps/rejected": -412.85650634765625, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -5.339866638183594, "rewards/margins": 5.34956693649292, "rewards/rejected": -10.689433097839355, "step": 7241 }, { "epoch": 1.13, "learning_rate": 8.835758078738524e-06, "logits/chosen": -2.201186180114746, "logits/rejected": -2.9319849014282227, "logps/chosen": -477.9329528808594, "logps/rejected": -468.60186767578125, "loss": 1.8502, "rewards/accuracies": 0.5, "rewards/chosen": -6.364086151123047, "rewards/margins": 2.2510688304901123, "rewards/rejected": -8.615155220031738, "step": 7242 }, { "epoch": 1.13, "learning_rate": 8.835024638207376e-06, "logits/chosen": -2.6685802936553955, "logits/rejected": -2.3341281414031982, "logps/chosen": -277.67523193359375, "logps/rejected": -233.26275634765625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.7358750104904175, "rewards/margins": 6.611347198486328, "rewards/rejected": -8.347222328186035, "step": 7243 }, { "epoch": 1.13, "learning_rate": 8.834291197676227e-06, "logits/chosen": -2.7099366188049316, "logits/rejected": -3.2309248447418213, "logps/chosen": -43.56231689453125, "logps/rejected": -330.656005859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.2982425689697266, "rewards/margins": 7.510808944702148, "rewards/rejected": -9.809051513671875, "step": 7244 }, { "epoch": 1.13, "learning_rate": 8.83355775714508e-06, "logits/chosen": -1.7693781852722168, "logits/rejected": -2.5902609825134277, "logps/chosen": -102.78656005859375, "logps/rejected": -205.44259643554688, "loss": 0.653, "rewards/accuracies": 0.5, "rewards/chosen": -5.415313243865967, "rewards/margins": 3.482760429382324, "rewards/rejected": -8.898073196411133, "step": 7245 }, { "epoch": 1.13, "learning_rate": 8.832824316613933e-06, "logits/chosen": -2.583873987197876, "logits/rejected": -2.9323537349700928, "logps/chosen": -114.21412658691406, "logps/rejected": -301.5471496582031, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -1.5881553888320923, "rewards/margins": 5.337123870849609, "rewards/rejected": -6.925279140472412, "step": 7246 }, { "epoch": 1.13, "learning_rate": 8.832090876082785e-06, "logits/chosen": -3.1432197093963623, "logits/rejected": -3.0460214614868164, "logps/chosen": -112.46923828125, "logps/rejected": -217.1986846923828, "loss": 0.1185, "rewards/accuracies": 1.0, "rewards/chosen": -3.510004997253418, "rewards/margins": 5.136669158935547, "rewards/rejected": -8.646674156188965, "step": 7247 }, { "epoch": 1.13, "learning_rate": 8.831357435551637e-06, "logits/chosen": -2.7092297077178955, "logits/rejected": -2.917437791824341, "logps/chosen": -145.87632751464844, "logps/rejected": -377.1397705078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.557134747505188, "rewards/margins": 8.042314529418945, "rewards/rejected": -9.599449157714844, "step": 7248 }, { "epoch": 1.13, "learning_rate": 8.830623995020489e-06, "logits/chosen": -2.308131694793701, "logits/rejected": -3.0041775703430176, "logps/chosen": -236.99139404296875, "logps/rejected": -492.4320983886719, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.1920385360717773, "rewards/margins": 8.1612548828125, "rewards/rejected": -11.353292465209961, "step": 7249 }, { "epoch": 1.13, "learning_rate": 8.82989055448934e-06, "logits/chosen": -2.5993740558624268, "logits/rejected": -3.108626127243042, "logps/chosen": -263.62103271484375, "logps/rejected": -591.6785278320312, "loss": 0.1965, "rewards/accuracies": 1.0, "rewards/chosen": -3.864132881164551, "rewards/margins": 4.589787483215332, "rewards/rejected": -8.453920364379883, "step": 7250 }, { "epoch": 1.13, "learning_rate": 8.829157113958192e-06, "logits/chosen": -2.2002806663513184, "logits/rejected": -3.123818874359131, "logps/chosen": -88.35523986816406, "logps/rejected": -432.7936706542969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1631520986557007, "rewards/margins": 9.28703498840332, "rewards/rejected": -10.450187683105469, "step": 7251 }, { "epoch": 1.13, "learning_rate": 8.828423673427044e-06, "logits/chosen": -2.7443907260894775, "logits/rejected": -2.236018657684326, "logps/chosen": -313.4227294921875, "logps/rejected": -314.92181396484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.080043315887451, "rewards/margins": 7.717001914978027, "rewards/rejected": -9.79704475402832, "step": 7252 }, { "epoch": 1.13, "learning_rate": 8.827690232895896e-06, "logits/chosen": -1.8330435752868652, "logits/rejected": -2.5967564582824707, "logps/chosen": -231.1838836669922, "logps/rejected": -515.0027465820312, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -4.064307689666748, "rewards/margins": 6.461615562438965, "rewards/rejected": -10.525923728942871, "step": 7253 }, { "epoch": 1.13, "learning_rate": 8.826956792364748e-06, "logits/chosen": -2.5782902240753174, "logits/rejected": -2.860905408859253, "logps/chosen": -392.5592346191406, "logps/rejected": -474.8089599609375, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -1.922823429107666, "rewards/margins": 4.43492317199707, "rewards/rejected": -6.3577470779418945, "step": 7254 }, { "epoch": 1.13, "learning_rate": 8.826223351833602e-06, "logits/chosen": -1.7784959077835083, "logits/rejected": -2.443864345550537, "logps/chosen": -199.7017822265625, "logps/rejected": -512.7015380859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.333316445350647, "rewards/margins": 8.562646865844727, "rewards/rejected": -9.895962715148926, "step": 7255 }, { "epoch": 1.13, "learning_rate": 8.825489911302453e-06, "logits/chosen": -1.3079618215560913, "logits/rejected": -1.7716103792190552, "logps/chosen": -38.11227035522461, "logps/rejected": -200.608154296875, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -2.198087215423584, "rewards/margins": 4.5814032554626465, "rewards/rejected": -6.7794904708862305, "step": 7256 }, { "epoch": 1.13, "learning_rate": 8.824756470771305e-06, "logits/chosen": -2.7673845291137695, "logits/rejected": -2.9573962688446045, "logps/chosen": -77.33818817138672, "logps/rejected": -431.26983642578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.6157968044281006, "rewards/margins": 8.011744499206543, "rewards/rejected": -9.627540588378906, "step": 7257 }, { "epoch": 1.13, "learning_rate": 8.824023030240157e-06, "logits/chosen": -2.874812126159668, "logits/rejected": -2.6682286262512207, "logps/chosen": -217.15887451171875, "logps/rejected": -231.9720458984375, "loss": 1.1264, "rewards/accuracies": 0.5, "rewards/chosen": -3.7377336025238037, "rewards/margins": 5.305734634399414, "rewards/rejected": -9.043468475341797, "step": 7258 }, { "epoch": 1.13, "learning_rate": 8.823289589709009e-06, "logits/chosen": -2.4511804580688477, "logits/rejected": -2.965383529663086, "logps/chosen": -106.76657104492188, "logps/rejected": -220.61172485351562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2761008739471436, "rewards/margins": 9.381397247314453, "rewards/rejected": -10.65749740600586, "step": 7259 }, { "epoch": 1.13, "learning_rate": 8.822556149177861e-06, "logits/chosen": -3.013727903366089, "logits/rejected": -3.0713090896606445, "logps/chosen": -228.41598510742188, "logps/rejected": -216.66567993164062, "loss": 0.0685, "rewards/accuracies": 1.0, "rewards/chosen": -2.627753496170044, "rewards/margins": 4.0035200119018555, "rewards/rejected": -6.6312737464904785, "step": 7260 }, { "epoch": 1.13, "learning_rate": 8.821822708646714e-06, "logits/chosen": -1.0700151920318604, "logits/rejected": -2.4921398162841797, "logps/chosen": -94.78083038330078, "logps/rejected": -418.4040832519531, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.9639320373535156, "rewards/margins": 5.939375877380371, "rewards/rejected": -8.903307914733887, "step": 7261 }, { "epoch": 1.13, "learning_rate": 8.821089268115566e-06, "logits/chosen": -1.1855573654174805, "logits/rejected": -2.642737627029419, "logps/chosen": -65.48097229003906, "logps/rejected": -317.3533020019531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.248257875442505, "rewards/margins": 8.636852264404297, "rewards/rejected": -10.885109901428223, "step": 7262 }, { "epoch": 1.13, "learning_rate": 8.820355827584418e-06, "logits/chosen": -2.6409637928009033, "logits/rejected": -2.720072031021118, "logps/chosen": -242.44496154785156, "logps/rejected": -282.38128662109375, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -3.349318027496338, "rewards/margins": 3.8442952632904053, "rewards/rejected": -7.193613529205322, "step": 7263 }, { "epoch": 1.13, "learning_rate": 8.819622387053272e-06, "logits/chosen": -2.6756083965301514, "logits/rejected": -2.8452377319335938, "logps/chosen": -52.45899200439453, "logps/rejected": -236.72332763671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3158469200134277, "rewards/margins": 9.765705108642578, "rewards/rejected": -12.081552505493164, "step": 7264 }, { "epoch": 1.13, "learning_rate": 8.818888946522124e-06, "logits/chosen": -1.5650242567062378, "logits/rejected": -2.800872325897217, "logps/chosen": -104.79833984375, "logps/rejected": -542.65185546875, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -2.038910388946533, "rewards/margins": 4.3151350021362305, "rewards/rejected": -6.3540449142456055, "step": 7265 }, { "epoch": 1.13, "learning_rate": 8.818155505990976e-06, "logits/chosen": -0.5874137878417969, "logits/rejected": -2.359701156616211, "logps/chosen": -92.84204864501953, "logps/rejected": -326.3718566894531, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -2.757585048675537, "rewards/margins": 6.611541271209717, "rewards/rejected": -9.369126319885254, "step": 7266 }, { "epoch": 1.13, "learning_rate": 8.817422065459827e-06, "logits/chosen": -2.6286323070526123, "logits/rejected": -2.4422576427459717, "logps/chosen": -372.60107421875, "logps/rejected": -445.8399658203125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.8166656494140625, "rewards/margins": 6.148555278778076, "rewards/rejected": -8.965221405029297, "step": 7267 }, { "epoch": 1.13, "learning_rate": 8.81668862492868e-06, "logits/chosen": -2.1132748126983643, "logits/rejected": -2.9172170162200928, "logps/chosen": -51.60010528564453, "logps/rejected": -217.0496826171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.1928818225860596, "rewards/margins": 7.796813011169434, "rewards/rejected": -9.989694595336914, "step": 7268 }, { "epoch": 1.13, "learning_rate": 8.815955184397531e-06, "logits/chosen": -1.9709339141845703, "logits/rejected": -3.1042935848236084, "logps/chosen": -68.70870971679688, "logps/rejected": -486.3744812011719, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.6808500289916992, "rewards/margins": 7.173863410949707, "rewards/rejected": -8.854713439941406, "step": 7269 }, { "epoch": 1.13, "learning_rate": 8.815221743866383e-06, "logits/chosen": -1.5069024562835693, "logits/rejected": -1.952240228652954, "logps/chosen": -125.38111877441406, "logps/rejected": -154.36236572265625, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -4.254474639892578, "rewards/margins": 4.250283718109131, "rewards/rejected": -8.504758834838867, "step": 7270 }, { "epoch": 1.13, "learning_rate": 8.814488303335235e-06, "logits/chosen": -2.905212640762329, "logits/rejected": -2.952998161315918, "logps/chosen": -297.984375, "logps/rejected": -237.13816833496094, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2651687860488892, "rewards/margins": 7.001007080078125, "rewards/rejected": -8.266176223754883, "step": 7271 }, { "epoch": 1.13, "learning_rate": 8.813754862804089e-06, "logits/chosen": -0.8242502212524414, "logits/rejected": -1.8796130418777466, "logps/chosen": -248.40469360351562, "logps/rejected": -759.26513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.537255048751831, "rewards/margins": 14.00547981262207, "rewards/rejected": -16.542734146118164, "step": 7272 }, { "epoch": 1.13, "learning_rate": 8.81302142227294e-06, "logits/chosen": -2.671105146408081, "logits/rejected": -2.5541224479675293, "logps/chosen": -227.5560302734375, "logps/rejected": -329.0211181640625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8855636715888977, "rewards/margins": 7.900952339172363, "rewards/rejected": -8.786516189575195, "step": 7273 }, { "epoch": 1.13, "learning_rate": 8.812287981741792e-06, "logits/chosen": -1.2873830795288086, "logits/rejected": -2.8138234615325928, "logps/chosen": -134.83367919921875, "logps/rejected": -415.40521240234375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.949127674102783, "rewards/margins": 7.639983654022217, "rewards/rejected": -10.589111328125, "step": 7274 }, { "epoch": 1.13, "learning_rate": 8.811554541210644e-06, "logits/chosen": -2.296306610107422, "logits/rejected": -2.1708786487579346, "logps/chosen": -70.24925231933594, "logps/rejected": -143.48291015625, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -2.2184226512908936, "rewards/margins": 6.301087856292725, "rewards/rejected": -8.519510269165039, "step": 7275 }, { "epoch": 1.13, "learning_rate": 8.810821100679496e-06, "logits/chosen": -2.0628652572631836, "logits/rejected": -3.1566269397735596, "logps/chosen": -81.40251159667969, "logps/rejected": -311.706298828125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -4.037545204162598, "rewards/margins": 5.5249786376953125, "rewards/rejected": -9.56252384185791, "step": 7276 }, { "epoch": 1.13, "learning_rate": 8.810087660148348e-06, "logits/chosen": -2.6057093143463135, "logits/rejected": -2.5984227657318115, "logps/chosen": -164.85626220703125, "logps/rejected": -204.16722106933594, "loss": 0.1479, "rewards/accuracies": 1.0, "rewards/chosen": -2.8031537532806396, "rewards/margins": 5.461121559143066, "rewards/rejected": -8.264275550842285, "step": 7277 }, { "epoch": 1.13, "learning_rate": 8.8093542196172e-06, "logits/chosen": -2.1399946212768555, "logits/rejected": -2.7371716499328613, "logps/chosen": -221.03562927246094, "logps/rejected": -311.9113464355469, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0609893798828125, "rewards/margins": 7.584193706512451, "rewards/rejected": -8.645183563232422, "step": 7278 }, { "epoch": 1.13, "learning_rate": 8.808620779086052e-06, "logits/chosen": -1.7484999895095825, "logits/rejected": -2.4638938903808594, "logps/chosen": -318.3291320800781, "logps/rejected": -488.18902587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6278824210166931, "rewards/margins": 12.874985694885254, "rewards/rejected": -13.50286865234375, "step": 7279 }, { "epoch": 1.13, "learning_rate": 8.807887338554904e-06, "logits/chosen": -0.758956789970398, "logits/rejected": -2.686296224594116, "logps/chosen": -101.17949676513672, "logps/rejected": -543.5926513671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.9120588302612305, "rewards/margins": 8.768004417419434, "rewards/rejected": -10.680063247680664, "step": 7280 }, { "epoch": 1.13, "learning_rate": 8.807153898023757e-06, "logits/chosen": -2.681121826171875, "logits/rejected": -3.0267295837402344, "logps/chosen": -272.71978759765625, "logps/rejected": -399.20941162109375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.329280376434326, "rewards/margins": 8.331110954284668, "rewards/rejected": -11.660390853881836, "step": 7281 }, { "epoch": 1.13, "learning_rate": 8.806420457492609e-06, "logits/chosen": -1.4464539289474487, "logits/rejected": -2.657576322555542, "logps/chosen": -80.70509338378906, "logps/rejected": -229.88275146484375, "loss": 0.3529, "rewards/accuracies": 0.5, "rewards/chosen": -3.1078014373779297, "rewards/margins": 2.8791627883911133, "rewards/rejected": -5.986964225769043, "step": 7282 }, { "epoch": 1.13, "learning_rate": 8.805687016961461e-06, "logits/chosen": -2.6422083377838135, "logits/rejected": -2.845374822616577, "logps/chosen": -188.80828857421875, "logps/rejected": -373.092041015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.152306318283081, "rewards/margins": 8.323862075805664, "rewards/rejected": -10.47616958618164, "step": 7283 }, { "epoch": 1.13, "learning_rate": 8.804953576430313e-06, "logits/chosen": -1.9563449621200562, "logits/rejected": -2.7547731399536133, "logps/chosen": -61.86097717285156, "logps/rejected": -330.01654052734375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.258251190185547, "rewards/margins": 6.865941047668457, "rewards/rejected": -9.124191284179688, "step": 7284 }, { "epoch": 1.13, "learning_rate": 8.804220135899165e-06, "logits/chosen": -1.6269557476043701, "logits/rejected": -2.7701916694641113, "logps/chosen": -107.871337890625, "logps/rejected": -387.1041259765625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.06211519241333, "rewards/margins": 6.482004165649414, "rewards/rejected": -10.544118881225586, "step": 7285 }, { "epoch": 1.13, "learning_rate": 8.803486695368017e-06, "logits/chosen": -2.7075183391571045, "logits/rejected": -2.5458757877349854, "logps/chosen": -100.83770751953125, "logps/rejected": -199.39569091796875, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -3.23677921295166, "rewards/margins": 4.385536193847656, "rewards/rejected": -7.622315406799316, "step": 7286 }, { "epoch": 1.13, "learning_rate": 8.802753254836868e-06, "logits/chosen": -2.2521159648895264, "logits/rejected": -2.767322301864624, "logps/chosen": -89.77535247802734, "logps/rejected": -209.3269500732422, "loss": 0.1195, "rewards/accuracies": 1.0, "rewards/chosen": -4.4054741859436035, "rewards/margins": 5.093269348144531, "rewards/rejected": -9.498743057250977, "step": 7287 }, { "epoch": 1.13, "learning_rate": 8.80201981430572e-06, "logits/chosen": -1.386772871017456, "logits/rejected": -2.593036413192749, "logps/chosen": -86.33338928222656, "logps/rejected": -333.0743408203125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.7869739532470703, "rewards/margins": 9.879316329956055, "rewards/rejected": -12.666290283203125, "step": 7288 }, { "epoch": 1.13, "learning_rate": 8.801286373774572e-06, "logits/chosen": -1.8078041076660156, "logits/rejected": -2.8007259368896484, "logps/chosen": -481.2019348144531, "logps/rejected": -632.6875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.4846296310424805, "rewards/margins": 6.942527770996094, "rewards/rejected": -11.42715835571289, "step": 7289 }, { "epoch": 1.13, "learning_rate": 8.800552933243426e-06, "logits/chosen": -2.751948356628418, "logits/rejected": -2.7797799110412598, "logps/chosen": -176.82508850097656, "logps/rejected": -316.3662414550781, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.4112880229949951, "rewards/margins": 7.5578460693359375, "rewards/rejected": -8.969134330749512, "step": 7290 }, { "epoch": 1.13, "learning_rate": 8.799819492712278e-06, "logits/chosen": -2.5411698818206787, "logits/rejected": -3.2983410358428955, "logps/chosen": -188.72650146484375, "logps/rejected": -470.13140869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.20911940932273865, "rewards/margins": 10.207427978515625, "rewards/rejected": -10.416546821594238, "step": 7291 }, { "epoch": 1.13, "learning_rate": 8.79908605218113e-06, "logits/chosen": -1.8974847793579102, "logits/rejected": -2.8459360599517822, "logps/chosen": -172.40560913085938, "logps/rejected": -562.2439575195312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.4877452850341797, "rewards/margins": 10.093403816223145, "rewards/rejected": -12.58115005493164, "step": 7292 }, { "epoch": 1.13, "learning_rate": 8.798352611649981e-06, "logits/chosen": -2.6109390258789062, "logits/rejected": -2.75897479057312, "logps/chosen": -96.61663055419922, "logps/rejected": -291.01873779296875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.219921827316284, "rewards/margins": 9.856371879577637, "rewards/rejected": -12.0762939453125, "step": 7293 }, { "epoch": 1.13, "learning_rate": 8.797619171118833e-06, "logits/chosen": -2.314258098602295, "logits/rejected": -2.6051993370056152, "logps/chosen": -90.4185791015625, "logps/rejected": -327.0986633300781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.6484146118164062, "rewards/margins": 9.132291793823242, "rewards/rejected": -11.780706405639648, "step": 7294 }, { "epoch": 1.13, "learning_rate": 8.796885730587687e-06, "logits/chosen": -2.534555435180664, "logits/rejected": -2.909471035003662, "logps/chosen": -485.556396484375, "logps/rejected": -664.6171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6694545745849609, "rewards/margins": 12.38446044921875, "rewards/rejected": -13.053915023803711, "step": 7295 }, { "epoch": 1.13, "learning_rate": 8.796152290056539e-06, "logits/chosen": -2.95052170753479, "logits/rejected": -3.123572587966919, "logps/chosen": -170.68161010742188, "logps/rejected": -264.65740966796875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -4.553641319274902, "rewards/margins": 6.164907455444336, "rewards/rejected": -10.718548774719238, "step": 7296 }, { "epoch": 1.13, "learning_rate": 8.79541884952539e-06, "logits/chosen": -1.4732296466827393, "logits/rejected": -2.7046608924865723, "logps/chosen": -128.83453369140625, "logps/rejected": -391.8854675292969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.107105255126953, "rewards/margins": 7.507315635681152, "rewards/rejected": -9.614420890808105, "step": 7297 }, { "epoch": 1.13, "learning_rate": 8.794685408994242e-06, "logits/chosen": -3.1355600357055664, "logits/rejected": -2.8530399799346924, "logps/chosen": -181.37689208984375, "logps/rejected": -194.25999450683594, "loss": 0.9559, "rewards/accuracies": 0.5, "rewards/chosen": -3.835946798324585, "rewards/margins": 1.492222547531128, "rewards/rejected": -5.328169345855713, "step": 7298 }, { "epoch": 1.14, "learning_rate": 8.793951968463096e-06, "logits/chosen": -2.0656464099884033, "logits/rejected": -2.5956389904022217, "logps/chosen": -119.65250396728516, "logps/rejected": -297.486083984375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.1214274168014526, "rewards/margins": 8.466306686401367, "rewards/rejected": -9.58773422241211, "step": 7299 }, { "epoch": 1.14, "learning_rate": 8.793218527931948e-06, "logits/chosen": -1.5088857412338257, "logits/rejected": -2.641117811203003, "logps/chosen": -233.38818359375, "logps/rejected": -521.1572875976562, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.5211883783340454, "rewards/margins": 9.428160667419434, "rewards/rejected": -10.949349403381348, "step": 7300 }, { "epoch": 1.14, "learning_rate": 8.7924850874008e-06, "logits/chosen": -2.7893640995025635, "logits/rejected": -2.9553780555725098, "logps/chosen": -144.8887481689453, "logps/rejected": -215.08209228515625, "loss": 2.2864, "rewards/accuracies": 0.5, "rewards/chosen": -4.938717365264893, "rewards/margins": 2.045365571975708, "rewards/rejected": -6.98408317565918, "step": 7301 }, { "epoch": 1.14, "learning_rate": 8.791751646869652e-06, "logits/chosen": -3.119286298751831, "logits/rejected": -2.881051778793335, "logps/chosen": -393.367919921875, "logps/rejected": -331.646240234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.9458791017532349, "rewards/margins": 8.253788948059082, "rewards/rejected": -9.199667930603027, "step": 7302 }, { "epoch": 1.14, "learning_rate": 8.791018206338504e-06, "logits/chosen": -3.0631954669952393, "logits/rejected": -3.0267093181610107, "logps/chosen": -124.76911926269531, "logps/rejected": -237.371337890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.383776068687439, "rewards/margins": 8.945516586303711, "rewards/rejected": -10.329292297363281, "step": 7303 }, { "epoch": 1.14, "learning_rate": 8.790284765807355e-06, "logits/chosen": -2.9539525508880615, "logits/rejected": -2.820812463760376, "logps/chosen": -551.9056396484375, "logps/rejected": -456.3514404296875, "loss": 0.2297, "rewards/accuracies": 1.0, "rewards/chosen": -3.75596022605896, "rewards/margins": 5.6937971115112305, "rewards/rejected": -9.44975757598877, "step": 7304 }, { "epoch": 1.14, "learning_rate": 8.789551325276207e-06, "logits/chosen": -2.887535810470581, "logits/rejected": -2.9019088745117188, "logps/chosen": -72.51171112060547, "logps/rejected": -171.34213256835938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5953691601753235, "rewards/margins": 8.84950065612793, "rewards/rejected": -9.444869995117188, "step": 7305 }, { "epoch": 1.14, "learning_rate": 8.78881788474506e-06, "logits/chosen": -2.4634695053100586, "logits/rejected": -2.86145281791687, "logps/chosen": -75.80587768554688, "logps/rejected": -209.29051208496094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.4555163383483887, "rewards/margins": 7.872772216796875, "rewards/rejected": -11.328289031982422, "step": 7306 }, { "epoch": 1.14, "learning_rate": 8.788084444213911e-06, "logits/chosen": -2.6650471687316895, "logits/rejected": -1.7530323266983032, "logps/chosen": -287.5214538574219, "logps/rejected": -258.3512878417969, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.320042848587036, "rewards/margins": 9.129415512084961, "rewards/rejected": -11.449457168579102, "step": 7307 }, { "epoch": 1.14, "learning_rate": 8.787351003682765e-06, "logits/chosen": -2.1421470642089844, "logits/rejected": -2.9937193393707275, "logps/chosen": -128.2861328125, "logps/rejected": -418.6492614746094, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.5329338312149048, "rewards/margins": 8.83439826965332, "rewards/rejected": -10.367332458496094, "step": 7308 }, { "epoch": 1.14, "learning_rate": 8.786617563151617e-06, "logits/chosen": -1.958594560623169, "logits/rejected": -2.659878969192505, "logps/chosen": -113.00811767578125, "logps/rejected": -226.66671752929688, "loss": 1.8899, "rewards/accuracies": 0.5, "rewards/chosen": -4.32806396484375, "rewards/margins": 3.2545690536499023, "rewards/rejected": -7.582633018493652, "step": 7309 }, { "epoch": 1.14, "learning_rate": 8.785884122620468e-06, "logits/chosen": -2.5486159324645996, "logits/rejected": -2.557987928390503, "logps/chosen": -183.356201171875, "logps/rejected": -258.61883544921875, "loss": 0.5945, "rewards/accuracies": 0.5, "rewards/chosen": -5.3439435958862305, "rewards/margins": 1.9170329570770264, "rewards/rejected": -7.260976791381836, "step": 7310 }, { "epoch": 1.14, "learning_rate": 8.78515068208932e-06, "logits/chosen": -1.7459768056869507, "logits/rejected": -2.919779062271118, "logps/chosen": -133.37261962890625, "logps/rejected": -311.0574951171875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.6727113723754883, "rewards/margins": 7.911750793457031, "rewards/rejected": -11.58446216583252, "step": 7311 }, { "epoch": 1.14, "learning_rate": 8.784417241558172e-06, "logits/chosen": -2.51686692237854, "logits/rejected": -2.771378993988037, "logps/chosen": -49.73649597167969, "logps/rejected": -194.8330078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.269896984100342, "rewards/margins": 8.763509750366211, "rewards/rejected": -12.033407211303711, "step": 7312 }, { "epoch": 1.14, "learning_rate": 8.783683801027024e-06, "logits/chosen": -2.618868112564087, "logits/rejected": -2.818920850753784, "logps/chosen": -112.18414306640625, "logps/rejected": -257.776611328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.886730670928955, "rewards/margins": 8.573393821716309, "rewards/rejected": -10.460124969482422, "step": 7313 }, { "epoch": 1.14, "learning_rate": 8.782950360495876e-06, "logits/chosen": -2.0518994331359863, "logits/rejected": -2.569132089614868, "logps/chosen": -199.62625122070312, "logps/rejected": -407.497802734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7956551313400269, "rewards/margins": 9.820558547973633, "rewards/rejected": -11.616214752197266, "step": 7314 }, { "epoch": 1.14, "learning_rate": 8.782216919964728e-06, "logits/chosen": -2.0062999725341797, "logits/rejected": -2.821284770965576, "logps/chosen": -40.463321685791016, "logps/rejected": -307.43780517578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.5643534660339355, "rewards/margins": 7.457655429840088, "rewards/rejected": -10.022008895874023, "step": 7315 }, { "epoch": 1.14, "learning_rate": 8.78148347943358e-06, "logits/chosen": -1.3276712894439697, "logits/rejected": -2.63802433013916, "logps/chosen": -53.608795166015625, "logps/rejected": -313.5786437988281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.733708381652832, "rewards/margins": 9.841375350952148, "rewards/rejected": -11.57508373260498, "step": 7316 }, { "epoch": 1.14, "learning_rate": 8.780750038902433e-06, "logits/chosen": -2.9273557662963867, "logits/rejected": -3.0994560718536377, "logps/chosen": -347.7522277832031, "logps/rejected": -509.6451416015625, "loss": 0.9743, "rewards/accuracies": 0.5, "rewards/chosen": -5.683316230773926, "rewards/margins": 1.9535598754882812, "rewards/rejected": -7.636876106262207, "step": 7317 }, { "epoch": 1.14, "learning_rate": 8.780016598371285e-06, "logits/chosen": -2.647019624710083, "logits/rejected": -2.884197950363159, "logps/chosen": -133.046142578125, "logps/rejected": -288.30767822265625, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -1.3838984966278076, "rewards/margins": 4.84795618057251, "rewards/rejected": -6.2318549156188965, "step": 7318 }, { "epoch": 1.14, "learning_rate": 8.779283157840137e-06, "logits/chosen": -2.951505661010742, "logits/rejected": -2.7024340629577637, "logps/chosen": -275.19171142578125, "logps/rejected": -198.81683349609375, "loss": 2.8101, "rewards/accuracies": 0.5, "rewards/chosen": -5.215493679046631, "rewards/margins": -0.08022952079772949, "rewards/rejected": -5.1352643966674805, "step": 7319 }, { "epoch": 1.14, "learning_rate": 8.778549717308989e-06, "logits/chosen": -2.013476848602295, "logits/rejected": -2.8706235885620117, "logps/chosen": -106.24050903320312, "logps/rejected": -328.9893798828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.0339431762695312, "rewards/margins": 8.394407272338867, "rewards/rejected": -10.428350448608398, "step": 7320 }, { "epoch": 1.14, "learning_rate": 8.77781627677784e-06, "logits/chosen": -2.5236167907714844, "logits/rejected": -2.816293478012085, "logps/chosen": -42.369224548339844, "logps/rejected": -173.05145263671875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.9455392360687256, "rewards/margins": 7.224661827087402, "rewards/rejected": -9.17020034790039, "step": 7321 }, { "epoch": 1.14, "learning_rate": 8.777082836246693e-06, "logits/chosen": -2.9472410678863525, "logits/rejected": -2.940805673599243, "logps/chosen": -64.32101440429688, "logps/rejected": -154.3541259765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8150510787963867, "rewards/margins": 7.077178955078125, "rewards/rejected": -7.892230033874512, "step": 7322 }, { "epoch": 1.14, "learning_rate": 8.776349395715545e-06, "logits/chosen": -2.7493531703948975, "logits/rejected": -2.8007209300994873, "logps/chosen": -106.9411392211914, "logps/rejected": -178.32064819335938, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -2.219409704208374, "rewards/margins": 5.625167369842529, "rewards/rejected": -7.844576835632324, "step": 7323 }, { "epoch": 1.14, "learning_rate": 8.775615955184396e-06, "logits/chosen": -2.6804747581481934, "logits/rejected": -2.903940439224243, "logps/chosen": -146.701171875, "logps/rejected": -199.79225158691406, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.888486862182617, "rewards/margins": 6.353590965270996, "rewards/rejected": -9.242077827453613, "step": 7324 }, { "epoch": 1.14, "learning_rate": 8.774882514653248e-06, "logits/chosen": -2.957324266433716, "logits/rejected": -3.1162936687469482, "logps/chosen": -219.97225952148438, "logps/rejected": -258.51007080078125, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -2.341595411300659, "rewards/margins": 4.944444179534912, "rewards/rejected": -7.286039352416992, "step": 7325 }, { "epoch": 1.14, "learning_rate": 8.774149074122102e-06, "logits/chosen": -2.448791980743408, "logits/rejected": -2.9078738689422607, "logps/chosen": -181.45635986328125, "logps/rejected": -363.4795227050781, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.6508119106292725, "rewards/margins": 7.934401035308838, "rewards/rejected": -9.585212707519531, "step": 7326 }, { "epoch": 1.14, "learning_rate": 8.773415633590954e-06, "logits/chosen": -1.7728670835494995, "logits/rejected": -2.73067045211792, "logps/chosen": -469.1461486816406, "logps/rejected": -537.123291015625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -3.4147727489471436, "rewards/margins": 5.615487575531006, "rewards/rejected": -9.03026008605957, "step": 7327 }, { "epoch": 1.14, "learning_rate": 8.772682193059806e-06, "logits/chosen": -2.519706964492798, "logits/rejected": -3.0538434982299805, "logps/chosen": -491.94061279296875, "logps/rejected": -698.491943359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.7518494129180908, "rewards/margins": 7.4555511474609375, "rewards/rejected": -9.207401275634766, "step": 7328 }, { "epoch": 1.14, "learning_rate": 8.77194875252866e-06, "logits/chosen": -2.728825807571411, "logits/rejected": -1.7358520030975342, "logps/chosen": -265.69622802734375, "logps/rejected": -198.95155334472656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.5350654721260071, "rewards/margins": 9.335712432861328, "rewards/rejected": -8.80064582824707, "step": 7329 }, { "epoch": 1.14, "learning_rate": 8.771215311997511e-06, "logits/chosen": -2.539795398712158, "logits/rejected": -2.8571619987487793, "logps/chosen": -169.03829956054688, "logps/rejected": -258.02508544921875, "loss": 0.1385, "rewards/accuracies": 1.0, "rewards/chosen": -0.7663910388946533, "rewards/margins": 5.47751522064209, "rewards/rejected": -6.243906497955322, "step": 7330 }, { "epoch": 1.14, "learning_rate": 8.770481871466363e-06, "logits/chosen": -1.910044550895691, "logits/rejected": -2.933396339416504, "logps/chosen": -211.64913940429688, "logps/rejected": -518.8370361328125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.8195066452026367, "rewards/margins": 7.622842788696289, "rewards/rejected": -10.442349433898926, "step": 7331 }, { "epoch": 1.14, "learning_rate": 8.769748430935215e-06, "logits/chosen": -2.580538272857666, "logits/rejected": -2.8503808975219727, "logps/chosen": -192.57940673828125, "logps/rejected": -203.15121459960938, "loss": 1.0456, "rewards/accuracies": 0.5, "rewards/chosen": -3.6760852336883545, "rewards/margins": 2.650899887084961, "rewards/rejected": -6.3269853591918945, "step": 7332 }, { "epoch": 1.14, "learning_rate": 8.769014990404067e-06, "logits/chosen": -2.7912421226501465, "logits/rejected": -1.6125683784484863, "logps/chosen": -243.38076782226562, "logps/rejected": -128.91055297851562, "loss": 0.2324, "rewards/accuracies": 1.0, "rewards/chosen": -1.3476107120513916, "rewards/margins": 4.532370090484619, "rewards/rejected": -5.87998104095459, "step": 7333 }, { "epoch": 1.14, "learning_rate": 8.768281549872919e-06, "logits/chosen": -2.7492780685424805, "logits/rejected": -2.5033135414123535, "logps/chosen": -357.89862060546875, "logps/rejected": -397.14959716796875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.0365856885910034, "rewards/margins": 9.2415771484375, "rewards/rejected": -10.27816390991211, "step": 7334 }, { "epoch": 1.14, "learning_rate": 8.767548109341772e-06, "logits/chosen": -2.8574845790863037, "logits/rejected": -2.486877918243408, "logps/chosen": -640.5316162109375, "logps/rejected": -406.5065612792969, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.7755829095840454, "rewards/margins": 5.2391157150268555, "rewards/rejected": -7.0146989822387695, "step": 7335 }, { "epoch": 1.14, "learning_rate": 8.766814668810624e-06, "logits/chosen": -2.6277687549591064, "logits/rejected": -3.101200580596924, "logps/chosen": -323.1801452636719, "logps/rejected": -609.2058715820312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.8100321292877197, "rewards/margins": 8.235151290893555, "rewards/rejected": -10.045183181762695, "step": 7336 }, { "epoch": 1.14, "learning_rate": 8.766081228279476e-06, "logits/chosen": -2.4731554985046387, "logits/rejected": -3.11090087890625, "logps/chosen": -47.034820556640625, "logps/rejected": -256.01141357421875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.9137451648712158, "rewards/margins": 8.010778427124023, "rewards/rejected": -8.92452335357666, "step": 7337 }, { "epoch": 1.14, "learning_rate": 8.765347787748328e-06, "logits/chosen": -3.0408403873443604, "logits/rejected": -2.5518863201141357, "logps/chosen": -107.4584732055664, "logps/rejected": -124.32923889160156, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.4413650035858154, "rewards/margins": 5.934386253356934, "rewards/rejected": -7.375751495361328, "step": 7338 }, { "epoch": 1.14, "learning_rate": 8.76461434721718e-06, "logits/chosen": -1.3787299394607544, "logits/rejected": -2.1989426612854004, "logps/chosen": -324.5477294921875, "logps/rejected": -224.74493408203125, "loss": 1.0798, "rewards/accuracies": 0.5, "rewards/chosen": -3.5596954822540283, "rewards/margins": 5.316073417663574, "rewards/rejected": -8.875768661499023, "step": 7339 }, { "epoch": 1.14, "learning_rate": 8.763880906686032e-06, "logits/chosen": -3.0060508251190186, "logits/rejected": -2.607180595397949, "logps/chosen": -564.652587890625, "logps/rejected": -359.18231201171875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.371856689453125, "rewards/margins": 5.714961051940918, "rewards/rejected": -8.086817741394043, "step": 7340 }, { "epoch": 1.14, "learning_rate": 8.763147466154883e-06, "logits/chosen": -2.924740791320801, "logits/rejected": -3.123598575592041, "logps/chosen": -48.27308654785156, "logps/rejected": -172.67218017578125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.1208271980285645, "rewards/margins": 7.045736789703369, "rewards/rejected": -9.166563987731934, "step": 7341 }, { "epoch": 1.14, "learning_rate": 8.762414025623735e-06, "logits/chosen": -2.7264034748077393, "logits/rejected": -3.1243646144866943, "logps/chosen": -107.85456848144531, "logps/rejected": -154.69515991210938, "loss": 0.7466, "rewards/accuracies": 0.5, "rewards/chosen": -3.062164306640625, "rewards/margins": 3.7558579444885254, "rewards/rejected": -6.81802225112915, "step": 7342 }, { "epoch": 1.14, "learning_rate": 8.761680585092587e-06, "logits/chosen": -1.6873114109039307, "logits/rejected": -2.5411536693573, "logps/chosen": -168.716064453125, "logps/rejected": -516.7215576171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2533085346221924, "rewards/margins": 11.359517097473145, "rewards/rejected": -13.612825393676758, "step": 7343 }, { "epoch": 1.14, "learning_rate": 8.76094714456144e-06, "logits/chosen": -2.4877188205718994, "logits/rejected": -2.8656630516052246, "logps/chosen": -75.43142700195312, "logps/rejected": -266.7168884277344, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.945833444595337, "rewards/margins": 7.767683029174805, "rewards/rejected": -9.713516235351562, "step": 7344 }, { "epoch": 1.14, "learning_rate": 8.760213704030293e-06, "logits/chosen": -2.3161613941192627, "logits/rejected": -2.939932107925415, "logps/chosen": -78.1260986328125, "logps/rejected": -318.6399230957031, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8001610040664673, "rewards/margins": 7.354121685028076, "rewards/rejected": -8.154282569885254, "step": 7345 }, { "epoch": 1.14, "learning_rate": 8.759480263499144e-06, "logits/chosen": -1.8854670524597168, "logits/rejected": -2.6423897743225098, "logps/chosen": -216.2748565673828, "logps/rejected": -608.99462890625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.829918622970581, "rewards/margins": 9.308806419372559, "rewards/rejected": -11.138725280761719, "step": 7346 }, { "epoch": 1.14, "learning_rate": 8.758746822967996e-06, "logits/chosen": -1.9258891344070435, "logits/rejected": -2.614292621612549, "logps/chosen": -103.84457397460938, "logps/rejected": -137.20486450195312, "loss": 0.5158, "rewards/accuracies": 0.5, "rewards/chosen": -3.8859620094299316, "rewards/margins": 1.004171371459961, "rewards/rejected": -4.890133380889893, "step": 7347 }, { "epoch": 1.14, "learning_rate": 8.758013382436848e-06, "logits/chosen": -1.3713231086730957, "logits/rejected": -3.0177114009857178, "logps/chosen": -249.72560119628906, "logps/rejected": -561.1712036132812, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -2.1987996101379395, "rewards/margins": 6.451021671295166, "rewards/rejected": -8.649821281433105, "step": 7348 }, { "epoch": 1.14, "learning_rate": 8.7572799419057e-06, "logits/chosen": -1.6211882829666138, "logits/rejected": -2.7849905490875244, "logps/chosen": -85.89109802246094, "logps/rejected": -312.14105224609375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.507829189300537, "rewards/margins": 7.576348304748535, "rewards/rejected": -11.084177017211914, "step": 7349 }, { "epoch": 1.14, "learning_rate": 8.756546501374552e-06, "logits/chosen": -2.226048231124878, "logits/rejected": -2.818948745727539, "logps/chosen": -276.5617980957031, "logps/rejected": -400.826904296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1058140993118286, "rewards/margins": 7.311977386474609, "rewards/rejected": -8.417791366577148, "step": 7350 }, { "epoch": 1.14, "learning_rate": 8.755813060843404e-06, "logits/chosen": -2.022869110107422, "logits/rejected": -2.8085503578186035, "logps/chosen": -104.30612182617188, "logps/rejected": -310.188720703125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -3.7675890922546387, "rewards/margins": 5.8281965255737305, "rewards/rejected": -9.595785140991211, "step": 7351 }, { "epoch": 1.14, "learning_rate": 8.755079620312256e-06, "logits/chosen": -2.9961609840393066, "logits/rejected": -3.1243996620178223, "logps/chosen": -184.8857421875, "logps/rejected": -221.8861541748047, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.16549646854400635, "rewards/margins": 5.4213175773620605, "rewards/rejected": -5.586813926696777, "step": 7352 }, { "epoch": 1.14, "learning_rate": 8.75434617978111e-06, "logits/chosen": -2.827740430831909, "logits/rejected": -2.0973455905914307, "logps/chosen": -247.1187286376953, "logps/rejected": -229.55963134765625, "loss": 2.389, "rewards/accuracies": 0.5, "rewards/chosen": -4.020941257476807, "rewards/margins": 1.0151736736297607, "rewards/rejected": -5.036114692687988, "step": 7353 }, { "epoch": 1.14, "learning_rate": 8.753612739249961e-06, "logits/chosen": -2.7206690311431885, "logits/rejected": -2.91017484664917, "logps/chosen": -244.94825744628906, "logps/rejected": -493.7703857421875, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -3.209977626800537, "rewards/margins": 8.147582054138184, "rewards/rejected": -11.357559204101562, "step": 7354 }, { "epoch": 1.14, "learning_rate": 8.752879298718813e-06, "logits/chosen": -2.5032618045806885, "logits/rejected": -2.801703929901123, "logps/chosen": -69.09595489501953, "logps/rejected": -259.3505554199219, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.747314453125, "rewards/margins": 6.32627010345459, "rewards/rejected": -10.07358455657959, "step": 7355 }, { "epoch": 1.14, "learning_rate": 8.752145858187665e-06, "logits/chosen": -2.988231897354126, "logits/rejected": -3.3979458808898926, "logps/chosen": -46.557777404785156, "logps/rejected": -181.77255249023438, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.9740352630615234, "rewards/margins": 5.770613670349121, "rewards/rejected": -8.744648933410645, "step": 7356 }, { "epoch": 1.14, "learning_rate": 8.751412417656517e-06, "logits/chosen": -2.5730011463165283, "logits/rejected": -2.697376012802124, "logps/chosen": -85.90823364257812, "logps/rejected": -274.5802001953125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.105149030685425, "rewards/margins": 8.843323707580566, "rewards/rejected": -10.94847297668457, "step": 7357 }, { "epoch": 1.14, "learning_rate": 8.750678977125369e-06, "logits/chosen": -2.5744669437408447, "logits/rejected": -3.1599245071411133, "logps/chosen": -113.50949096679688, "logps/rejected": -265.55157470703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.7247662544250488, "rewards/margins": 6.544796943664551, "rewards/rejected": -7.2695631980896, "step": 7358 }, { "epoch": 1.14, "learning_rate": 8.74994553659422e-06, "logits/chosen": -2.9255053997039795, "logits/rejected": -2.787487506866455, "logps/chosen": -174.07708740234375, "logps/rejected": -214.90565490722656, "loss": 0.1552, "rewards/accuracies": 1.0, "rewards/chosen": -4.074133396148682, "rewards/margins": 1.9788944721221924, "rewards/rejected": -6.053028106689453, "step": 7359 }, { "epoch": 1.14, "learning_rate": 8.749212096063072e-06, "logits/chosen": -1.567872166633606, "logits/rejected": -2.816915512084961, "logps/chosen": -77.7213363647461, "logps/rejected": -256.0707702636719, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.1921658515930176, "rewards/margins": 5.598724365234375, "rewards/rejected": -8.79089069366455, "step": 7360 }, { "epoch": 1.14, "learning_rate": 8.748478655531926e-06, "logits/chosen": -2.574788808822632, "logits/rejected": -2.822101593017578, "logps/chosen": -747.220458984375, "logps/rejected": -639.6834106445312, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -3.328002452850342, "rewards/margins": 5.286669731140137, "rewards/rejected": -8.614672660827637, "step": 7361 }, { "epoch": 1.14, "learning_rate": 8.747745215000778e-06, "logits/chosen": -1.8951011896133423, "logits/rejected": -2.7288331985473633, "logps/chosen": -472.5738525390625, "logps/rejected": -692.6765747070312, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -0.5047054290771484, "rewards/margins": 11.401263236999512, "rewards/rejected": -11.905967712402344, "step": 7362 }, { "epoch": 1.15, "learning_rate": 8.747011774469631e-06, "logits/chosen": -2.7723872661590576, "logits/rejected": -2.197957754135132, "logps/chosen": -146.51141357421875, "logps/rejected": -98.73340606689453, "loss": 0.6948, "rewards/accuracies": 0.5, "rewards/chosen": -3.1770124435424805, "rewards/margins": 2.7500858306884766, "rewards/rejected": -5.927098274230957, "step": 7363 }, { "epoch": 1.15, "learning_rate": 8.746278333938483e-06, "logits/chosen": -1.2858933210372925, "logits/rejected": -2.3543455600738525, "logps/chosen": -170.1593475341797, "logps/rejected": -421.14154052734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.6724929809570312, "rewards/margins": 8.361932754516602, "rewards/rejected": -10.034425735473633, "step": 7364 }, { "epoch": 1.15, "learning_rate": 8.745544893407335e-06, "logits/chosen": -2.6232969760894775, "logits/rejected": -2.832794666290283, "logps/chosen": -319.9772644042969, "logps/rejected": -488.5439147949219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.5484676361083984, "rewards/margins": 9.576080322265625, "rewards/rejected": -12.124547958374023, "step": 7365 }, { "epoch": 1.15, "learning_rate": 8.744811452876187e-06, "logits/chosen": -2.655454397201538, "logits/rejected": -1.7078397274017334, "logps/chosen": -141.14462280273438, "logps/rejected": -208.08184814453125, "loss": 0.0523, "rewards/accuracies": 1.0, "rewards/chosen": -2.831068992614746, "rewards/margins": 6.399316787719727, "rewards/rejected": -9.230385780334473, "step": 7366 }, { "epoch": 1.15, "learning_rate": 8.744078012345039e-06, "logits/chosen": -2.8799564838409424, "logits/rejected": -2.494454860687256, "logps/chosen": -145.22935485839844, "logps/rejected": -186.95030212402344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 2.0642597675323486, "rewards/margins": 9.139421463012695, "rewards/rejected": -7.075161933898926, "step": 7367 }, { "epoch": 1.15, "learning_rate": 8.743344571813891e-06, "logits/chosen": -2.7694125175476074, "logits/rejected": -3.0232505798339844, "logps/chosen": -303.1068115234375, "logps/rejected": -502.4425048828125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.7028533220291138, "rewards/margins": 6.350353717803955, "rewards/rejected": -8.053207397460938, "step": 7368 }, { "epoch": 1.15, "learning_rate": 8.742611131282743e-06, "logits/chosen": -2.7351059913635254, "logits/rejected": -2.6088311672210693, "logps/chosen": -105.86199951171875, "logps/rejected": -147.48997497558594, "loss": 1.0794, "rewards/accuracies": 0.5, "rewards/chosen": -3.8823533058166504, "rewards/margins": 2.814216136932373, "rewards/rejected": -6.696569442749023, "step": 7369 }, { "epoch": 1.15, "learning_rate": 8.741877690751596e-06, "logits/chosen": -2.157521963119507, "logits/rejected": -2.378844976425171, "logps/chosen": -249.47137451171875, "logps/rejected": -376.00177001953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7987350225448608, "rewards/margins": 10.64600944519043, "rewards/rejected": -12.444745063781738, "step": 7370 }, { "epoch": 1.15, "learning_rate": 8.741144250220448e-06, "logits/chosen": -2.5501296520233154, "logits/rejected": -1.3641927242279053, "logps/chosen": -181.56277465820312, "logps/rejected": -157.51223754882812, "loss": 0.1905, "rewards/accuracies": 1.0, "rewards/chosen": -2.0160441398620605, "rewards/margins": 4.496237277984619, "rewards/rejected": -6.51228141784668, "step": 7371 }, { "epoch": 1.15, "learning_rate": 8.7404108096893e-06, "logits/chosen": -1.9952212572097778, "logits/rejected": -2.8314731121063232, "logps/chosen": -158.83529663085938, "logps/rejected": -367.9770202636719, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.7223739624023438, "rewards/margins": 7.170607566833496, "rewards/rejected": -9.892982482910156, "step": 7372 }, { "epoch": 1.15, "learning_rate": 8.739677369158152e-06, "logits/chosen": -2.5229854583740234, "logits/rejected": -2.9799649715423584, "logps/chosen": -320.8476867675781, "logps/rejected": -309.2754821777344, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.9023315906524658, "rewards/margins": 5.659759521484375, "rewards/rejected": -7.56209135055542, "step": 7373 }, { "epoch": 1.15, "learning_rate": 8.738943928627004e-06, "logits/chosen": -1.1874780654907227, "logits/rejected": -2.9729487895965576, "logps/chosen": -137.33470153808594, "logps/rejected": -476.2314453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.030434489250183, "rewards/margins": 7.898355484008789, "rewards/rejected": -8.928790092468262, "step": 7374 }, { "epoch": 1.15, "learning_rate": 8.738210488095856e-06, "logits/chosen": -2.508455514907837, "logits/rejected": -2.6744532585144043, "logps/chosen": -121.15983581542969, "logps/rejected": -195.047607421875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6709415316581726, "rewards/margins": 6.654541969299316, "rewards/rejected": -7.325483798980713, "step": 7375 }, { "epoch": 1.15, "learning_rate": 8.737477047564708e-06, "logits/chosen": -2.065572500228882, "logits/rejected": -2.947702646255493, "logps/chosen": -53.595272064208984, "logps/rejected": -271.08636474609375, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.156304121017456, "rewards/margins": 5.589260101318359, "rewards/rejected": -6.7455644607543945, "step": 7376 }, { "epoch": 1.15, "learning_rate": 8.73674360703356e-06, "logits/chosen": -2.4204487800598145, "logits/rejected": -2.9807612895965576, "logps/chosen": -166.40748596191406, "logps/rejected": -325.4371337890625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.7491912841796875, "rewards/margins": 7.496706962585449, "rewards/rejected": -9.245898246765137, "step": 7377 }, { "epoch": 1.15, "learning_rate": 8.736010166502411e-06, "logits/chosen": -1.4005709886550903, "logits/rejected": -2.4124245643615723, "logps/chosen": -140.01828002929688, "logps/rejected": -374.0806579589844, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -2.4781765937805176, "rewards/margins": 6.98167610168457, "rewards/rejected": -9.459853172302246, "step": 7378 }, { "epoch": 1.15, "learning_rate": 8.735276725971265e-06, "logits/chosen": -2.6435277462005615, "logits/rejected": -0.9678269624710083, "logps/chosen": -301.40716552734375, "logps/rejected": -82.04065704345703, "loss": 0.4894, "rewards/accuracies": 0.5, "rewards/chosen": -3.233914613723755, "rewards/margins": 2.243243932723999, "rewards/rejected": -5.477158546447754, "step": 7379 }, { "epoch": 1.15, "learning_rate": 8.734543285440117e-06, "logits/chosen": -2.536452054977417, "logits/rejected": -1.768925666809082, "logps/chosen": -441.9458312988281, "logps/rejected": -420.5212097167969, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -3.179586410522461, "rewards/margins": 4.982300758361816, "rewards/rejected": -8.161887168884277, "step": 7380 }, { "epoch": 1.15, "learning_rate": 8.733809844908969e-06, "logits/chosen": -2.3161699771881104, "logits/rejected": -2.9637372493743896, "logps/chosen": -99.70695495605469, "logps/rejected": -248.78900146484375, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -4.368055820465088, "rewards/margins": 6.36592960357666, "rewards/rejected": -10.733985900878906, "step": 7381 }, { "epoch": 1.15, "learning_rate": 8.73307640437782e-06, "logits/chosen": -2.7428789138793945, "logits/rejected": -2.550471305847168, "logps/chosen": -538.468994140625, "logps/rejected": -558.7337646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.21529310941696167, "rewards/margins": 9.425108909606934, "rewards/rejected": -9.640401840209961, "step": 7382 }, { "epoch": 1.15, "learning_rate": 8.732342963846672e-06, "logits/chosen": -2.190781831741333, "logits/rejected": -2.590027332305908, "logps/chosen": -295.25787353515625, "logps/rejected": -514.5731811523438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3783555030822754, "rewards/margins": 11.351122856140137, "rewards/rejected": -13.72947883605957, "step": 7383 }, { "epoch": 1.15, "learning_rate": 8.731609523315524e-06, "logits/chosen": -2.722403049468994, "logits/rejected": -2.4745256900787354, "logps/chosen": -213.94918823242188, "logps/rejected": -228.20541381835938, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.12019681930542, "rewards/margins": 6.328060626983643, "rewards/rejected": -8.448257446289062, "step": 7384 }, { "epoch": 1.15, "learning_rate": 8.730876082784376e-06, "logits/chosen": -1.9393430948257446, "logits/rejected": -2.8659982681274414, "logps/chosen": -98.751220703125, "logps/rejected": -286.26123046875, "loss": 0.6143, "rewards/accuracies": 0.5, "rewards/chosen": -4.357638359069824, "rewards/margins": 2.1322693824768066, "rewards/rejected": -6.489908218383789, "step": 7385 }, { "epoch": 1.15, "learning_rate": 8.730142642253228e-06, "logits/chosen": -2.5453004837036133, "logits/rejected": -1.9430594444274902, "logps/chosen": -248.16659545898438, "logps/rejected": -210.8181610107422, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.405182361602783, "rewards/margins": 5.547759532928467, "rewards/rejected": -7.95294189453125, "step": 7386 }, { "epoch": 1.15, "learning_rate": 8.72940920172208e-06, "logits/chosen": -1.7611855268478394, "logits/rejected": -2.6852877140045166, "logps/chosen": -85.03693389892578, "logps/rejected": -265.29461669921875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.206827163696289, "rewards/margins": 7.81799840927124, "rewards/rejected": -10.024825096130371, "step": 7387 }, { "epoch": 1.15, "learning_rate": 8.728675761190934e-06, "logits/chosen": -2.784271001815796, "logits/rejected": -2.8750007152557373, "logps/chosen": -282.17559814453125, "logps/rejected": -354.83544921875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.081228733062744, "rewards/margins": 5.582894325256348, "rewards/rejected": -7.664122581481934, "step": 7388 }, { "epoch": 1.15, "learning_rate": 8.727942320659785e-06, "logits/chosen": -1.9221012592315674, "logits/rejected": -2.768874168395996, "logps/chosen": -156.9818115234375, "logps/rejected": -534.8229370117188, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.8847508430480957, "rewards/margins": 5.7494916915893555, "rewards/rejected": -8.63424301147461, "step": 7389 }, { "epoch": 1.15, "learning_rate": 8.727208880128637e-06, "logits/chosen": -2.31575345993042, "logits/rejected": -2.3395979404449463, "logps/chosen": -200.84768676757812, "logps/rejected": -220.6245574951172, "loss": 3.4652, "rewards/accuracies": 0.5, "rewards/chosen": -5.223753929138184, "rewards/margins": 0.28961658477783203, "rewards/rejected": -5.513370513916016, "step": 7390 }, { "epoch": 1.15, "learning_rate": 8.72647543959749e-06, "logits/chosen": -1.7360904216766357, "logits/rejected": -2.6166117191314697, "logps/chosen": -84.1751708984375, "logps/rejected": -403.94305419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.410295248031616, "rewards/margins": 11.813545227050781, "rewards/rejected": -14.223840713500977, "step": 7391 }, { "epoch": 1.15, "learning_rate": 8.725741999066341e-06, "logits/chosen": -3.316044330596924, "logits/rejected": -3.400918483734131, "logps/chosen": -170.03323364257812, "logps/rejected": -325.5499267578125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.121182918548584, "rewards/margins": 7.567103385925293, "rewards/rejected": -9.688286781311035, "step": 7392 }, { "epoch": 1.15, "learning_rate": 8.725008558535193e-06, "logits/chosen": -2.253718376159668, "logits/rejected": -2.712651491165161, "logps/chosen": -136.11058044433594, "logps/rejected": -328.7622985839844, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 0.08415374159812927, "rewards/margins": 7.576520919799805, "rewards/rejected": -7.492366790771484, "step": 7393 }, { "epoch": 1.15, "learning_rate": 8.724275118004045e-06, "logits/chosen": -2.605588436126709, "logits/rejected": -2.7957048416137695, "logps/chosen": -477.60015869140625, "logps/rejected": -385.0838623046875, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -2.109236240386963, "rewards/margins": 5.181305885314941, "rewards/rejected": -7.290542125701904, "step": 7394 }, { "epoch": 1.15, "learning_rate": 8.723541677472898e-06, "logits/chosen": -1.5059576034545898, "logits/rejected": -2.738240957260132, "logps/chosen": -64.16732025146484, "logps/rejected": -361.66845703125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -2.509007215499878, "rewards/margins": 8.809572219848633, "rewards/rejected": -11.31857967376709, "step": 7395 }, { "epoch": 1.15, "learning_rate": 8.72280823694175e-06, "logits/chosen": -2.1183383464813232, "logits/rejected": -2.9836442470550537, "logps/chosen": -74.1441650390625, "logps/rejected": -268.1819763183594, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -3.1287293434143066, "rewards/margins": 3.758474349975586, "rewards/rejected": -6.887203216552734, "step": 7396 }, { "epoch": 1.15, "learning_rate": 8.722074796410604e-06, "logits/chosen": -2.727795124053955, "logits/rejected": -2.354233980178833, "logps/chosen": -149.96006774902344, "logps/rejected": -194.28128051757812, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.4452984035015106, "rewards/margins": 8.456289291381836, "rewards/rejected": -8.901588439941406, "step": 7397 }, { "epoch": 1.15, "learning_rate": 8.721341355879456e-06, "logits/chosen": -3.074371814727783, "logits/rejected": -3.1076879501342773, "logps/chosen": -366.7640686035156, "logps/rejected": -443.8581848144531, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3460853099822998, "rewards/margins": 8.972301483154297, "rewards/rejected": -10.318387031555176, "step": 7398 }, { "epoch": 1.15, "learning_rate": 8.720607915348308e-06, "logits/chosen": -2.1105258464813232, "logits/rejected": -2.6681101322174072, "logps/chosen": -216.8173370361328, "logps/rejected": -250.19363403320312, "loss": 1.8077, "rewards/accuracies": 0.5, "rewards/chosen": -3.1087753772735596, "rewards/margins": 1.5648940801620483, "rewards/rejected": -4.673669338226318, "step": 7399 }, { "epoch": 1.15, "learning_rate": 8.71987447481716e-06, "logits/chosen": -2.5488457679748535, "logits/rejected": -2.614161968231201, "logps/chosen": -340.9432067871094, "logps/rejected": -661.0675659179688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9942047595977783, "rewards/margins": 14.27263069152832, "rewards/rejected": -16.266834259033203, "step": 7400 }, { "epoch": 1.15, "learning_rate": 8.719141034286011e-06, "logits/chosen": -0.6047711372375488, "logits/rejected": -1.966179609298706, "logps/chosen": -81.93462371826172, "logps/rejected": -509.9261779785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3406139612197876, "rewards/margins": 11.152341842651367, "rewards/rejected": -12.492955207824707, "step": 7401 }, { "epoch": 1.15, "learning_rate": 8.718407593754863e-06, "logits/chosen": -1.4431072473526, "logits/rejected": -2.8784594535827637, "logps/chosen": -118.29461669921875, "logps/rejected": -276.8114318847656, "loss": 0.6918, "rewards/accuracies": 0.5, "rewards/chosen": -3.829369306564331, "rewards/margins": 1.934474229812622, "rewards/rejected": -5.763843536376953, "step": 7402 }, { "epoch": 1.15, "learning_rate": 8.717674153223715e-06, "logits/chosen": -2.661233425140381, "logits/rejected": -2.9369232654571533, "logps/chosen": -114.19009399414062, "logps/rejected": -275.03009033203125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.11076182126998901, "rewards/margins": 9.699589729309082, "rewards/rejected": -9.810351371765137, "step": 7403 }, { "epoch": 1.15, "learning_rate": 8.716940712692567e-06, "logits/chosen": -2.2972073554992676, "logits/rejected": -2.80568528175354, "logps/chosen": -176.1441192626953, "logps/rejected": -360.106689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.072535753250122, "rewards/margins": 10.568729400634766, "rewards/rejected": -11.641263961791992, "step": 7404 }, { "epoch": 1.15, "learning_rate": 8.716207272161419e-06, "logits/chosen": -2.530745267868042, "logits/rejected": -1.4437917470932007, "logps/chosen": -267.91326904296875, "logps/rejected": -259.94384765625, "loss": 1.0453, "rewards/accuracies": 0.5, "rewards/chosen": -2.542374849319458, "rewards/margins": 3.255910873413086, "rewards/rejected": -5.798285484313965, "step": 7405 }, { "epoch": 1.15, "learning_rate": 8.715473831630272e-06, "logits/chosen": -3.1198251247406006, "logits/rejected": -3.132018566131592, "logps/chosen": -337.8858337402344, "logps/rejected": -229.25047302246094, "loss": 1.2645, "rewards/accuracies": 0.5, "rewards/chosen": -3.5842607021331787, "rewards/margins": 1.1491234302520752, "rewards/rejected": -4.733384132385254, "step": 7406 }, { "epoch": 1.15, "learning_rate": 8.714740391099124e-06, "logits/chosen": -3.1347599029541016, "logits/rejected": -2.3745102882385254, "logps/chosen": -316.0599365234375, "logps/rejected": -209.09080505371094, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -0.08378452062606812, "rewards/margins": 5.840574264526367, "rewards/rejected": -5.92435884475708, "step": 7407 }, { "epoch": 1.15, "learning_rate": 8.714006950567976e-06, "logits/chosen": -2.9679043292999268, "logits/rejected": -2.087817907333374, "logps/chosen": -244.7165985107422, "logps/rejected": -184.36048889160156, "loss": 0.1056, "rewards/accuracies": 1.0, "rewards/chosen": -2.352843999862671, "rewards/margins": 3.538969039916992, "rewards/rejected": -5.891813278198242, "step": 7408 }, { "epoch": 1.15, "learning_rate": 8.713273510036828e-06, "logits/chosen": -1.4610533714294434, "logits/rejected": -2.721043586730957, "logps/chosen": -157.27940368652344, "logps/rejected": -367.7143249511719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2548789978027344, "rewards/margins": 9.503227233886719, "rewards/rejected": -9.248348236083984, "step": 7409 }, { "epoch": 1.15, "learning_rate": 8.71254006950568e-06, "logits/chosen": -0.888437032699585, "logits/rejected": -2.660214424133301, "logps/chosen": -120.06063842773438, "logps/rejected": -611.8125610351562, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9657160043716431, "rewards/margins": 7.518133640289307, "rewards/rejected": -8.48384952545166, "step": 7410 }, { "epoch": 1.15, "learning_rate": 8.711806628974532e-06, "logits/chosen": -2.655256748199463, "logits/rejected": -2.5564017295837402, "logps/chosen": -469.2908630371094, "logps/rejected": -477.860595703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0348236560821533, "rewards/margins": 7.899074077606201, "rewards/rejected": -8.933897972106934, "step": 7411 }, { "epoch": 1.15, "learning_rate": 8.711073188443384e-06, "logits/chosen": -2.706981897354126, "logits/rejected": -2.430358648300171, "logps/chosen": -188.24685668945312, "logps/rejected": -187.52696228027344, "loss": 1.9814, "rewards/accuracies": 0.5, "rewards/chosen": -5.194981098175049, "rewards/margins": -0.7136585712432861, "rewards/rejected": -4.481322288513184, "step": 7412 }, { "epoch": 1.15, "learning_rate": 8.710339747912236e-06, "logits/chosen": -1.6702778339385986, "logits/rejected": -2.7609152793884277, "logps/chosen": -248.0080108642578, "logps/rejected": -367.03521728515625, "loss": 1.1073, "rewards/accuracies": 0.5, "rewards/chosen": -3.30332350730896, "rewards/margins": 4.525819778442383, "rewards/rejected": -7.829143047332764, "step": 7413 }, { "epoch": 1.15, "learning_rate": 8.709606307381087e-06, "logits/chosen": -1.834248423576355, "logits/rejected": -2.831350326538086, "logps/chosen": -114.94347381591797, "logps/rejected": -235.99000549316406, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -3.015063762664795, "rewards/margins": 5.898962020874023, "rewards/rejected": -8.914026260375977, "step": 7414 }, { "epoch": 1.15, "learning_rate": 8.708872866849941e-06, "logits/chosen": -1.9136097431182861, "logits/rejected": -2.768085241317749, "logps/chosen": -128.4534149169922, "logps/rejected": -350.56732177734375, "loss": 0.0733, "rewards/accuracies": 1.0, "rewards/chosen": -2.5897889137268066, "rewards/margins": 6.2410101890563965, "rewards/rejected": -8.830799102783203, "step": 7415 }, { "epoch": 1.15, "learning_rate": 8.708139426318793e-06, "logits/chosen": -2.807194948196411, "logits/rejected": -3.1803488731384277, "logps/chosen": -131.06817626953125, "logps/rejected": -217.64100646972656, "loss": 1.4678, "rewards/accuracies": 0.5, "rewards/chosen": -4.095146179199219, "rewards/margins": 1.5411845445632935, "rewards/rejected": -5.636330604553223, "step": 7416 }, { "epoch": 1.15, "learning_rate": 8.707405985787645e-06, "logits/chosen": -1.5162302255630493, "logits/rejected": -2.6788666248321533, "logps/chosen": -411.7626037597656, "logps/rejected": -575.1094970703125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.5662455558776855, "rewards/margins": 6.359112739562988, "rewards/rejected": -8.925358772277832, "step": 7417 }, { "epoch": 1.15, "learning_rate": 8.706672545256497e-06, "logits/chosen": -2.8351454734802246, "logits/rejected": -3.001307487487793, "logps/chosen": -134.9815673828125, "logps/rejected": -162.22340393066406, "loss": 2.5135, "rewards/accuracies": 0.5, "rewards/chosen": -5.325197219848633, "rewards/margins": 1.582700252532959, "rewards/rejected": -6.907897472381592, "step": 7418 }, { "epoch": 1.15, "learning_rate": 8.705939104725349e-06, "logits/chosen": -2.8033008575439453, "logits/rejected": -2.660170078277588, "logps/chosen": -150.3414306640625, "logps/rejected": -217.91201782226562, "loss": 0.8627, "rewards/accuracies": 0.5, "rewards/chosen": -4.650698661804199, "rewards/margins": 4.882127285003662, "rewards/rejected": -9.53282642364502, "step": 7419 }, { "epoch": 1.15, "learning_rate": 8.7052056641942e-06, "logits/chosen": -2.218945264816284, "logits/rejected": -2.893383741378784, "logps/chosen": -164.5584716796875, "logps/rejected": -309.8401794433594, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.1969635486602783, "rewards/margins": 5.892124176025391, "rewards/rejected": -7.08908748626709, "step": 7420 }, { "epoch": 1.15, "learning_rate": 8.704472223663052e-06, "logits/chosen": -2.202343463897705, "logits/rejected": -2.8430869579315186, "logps/chosen": -208.03091430664062, "logps/rejected": -368.8197021484375, "loss": 0.1894, "rewards/accuracies": 1.0, "rewards/chosen": -2.8856163024902344, "rewards/margins": 6.278619289398193, "rewards/rejected": -9.164236068725586, "step": 7421 }, { "epoch": 1.15, "learning_rate": 8.703738783131904e-06, "logits/chosen": -1.6613675355911255, "logits/rejected": -2.8312408924102783, "logps/chosen": -208.22080993652344, "logps/rejected": -532.6865234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.003180742263794, "rewards/margins": 11.877887725830078, "rewards/rejected": -12.88106918334961, "step": 7422 }, { "epoch": 1.15, "learning_rate": 8.703005342600756e-06, "logits/chosen": -2.736654758453369, "logits/rejected": -2.9120805263519287, "logps/chosen": -251.0224609375, "logps/rejected": -320.7654724121094, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.7368179559707642, "rewards/margins": 6.818853855133057, "rewards/rejected": -7.555671691894531, "step": 7423 }, { "epoch": 1.15, "learning_rate": 8.70227190206961e-06, "logits/chosen": -2.9880409240722656, "logits/rejected": -2.928238868713379, "logps/chosen": -202.32257080078125, "logps/rejected": -193.38763427734375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -0.15078240633010864, "rewards/margins": 5.394407749176025, "rewards/rejected": -5.54518985748291, "step": 7424 }, { "epoch": 1.15, "learning_rate": 8.701538461538461e-06, "logits/chosen": -2.580526351928711, "logits/rejected": -2.8902223110198975, "logps/chosen": -49.55596160888672, "logps/rejected": -242.05267333984375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.6549997329711914, "rewards/margins": 7.5193586349487305, "rewards/rejected": -10.174358367919922, "step": 7425 }, { "epoch": 1.15, "learning_rate": 8.700805021007313e-06, "logits/chosen": -2.7655460834503174, "logits/rejected": -2.748811721801758, "logps/chosen": -103.59326934814453, "logps/rejected": -271.3581848144531, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.7437114715576172, "rewards/margins": 7.296234130859375, "rewards/rejected": -9.039945602416992, "step": 7426 }, { "epoch": 1.16, "learning_rate": 8.700071580476165e-06, "logits/chosen": -2.851553440093994, "logits/rejected": -2.676274061203003, "logps/chosen": -135.37168884277344, "logps/rejected": -180.907470703125, "loss": 0.2616, "rewards/accuracies": 1.0, "rewards/chosen": -1.5941516160964966, "rewards/margins": 3.86332106590271, "rewards/rejected": -5.457472801208496, "step": 7427 }, { "epoch": 1.16, "learning_rate": 8.699338139945017e-06, "logits/chosen": -1.064193844795227, "logits/rejected": -2.4318270683288574, "logps/chosen": -79.68656921386719, "logps/rejected": -571.65625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1351916790008545, "rewards/margins": 10.71055793762207, "rewards/rejected": -11.84575080871582, "step": 7428 }, { "epoch": 1.16, "learning_rate": 8.69860469941387e-06, "logits/chosen": -3.1904473304748535, "logits/rejected": -3.2135581970214844, "logps/chosen": -232.91937255859375, "logps/rejected": -269.07159423828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9255080223083496, "rewards/margins": 7.124250888824463, "rewards/rejected": -10.049758911132812, "step": 7429 }, { "epoch": 1.16, "learning_rate": 8.697871258882723e-06, "logits/chosen": -1.1590557098388672, "logits/rejected": -2.668405055999756, "logps/chosen": -212.36911010742188, "logps/rejected": -301.0683898925781, "loss": 0.2555, "rewards/accuracies": 1.0, "rewards/chosen": -2.902827024459839, "rewards/margins": 2.662055015563965, "rewards/rejected": -5.564882278442383, "step": 7430 }, { "epoch": 1.16, "learning_rate": 8.697137818351574e-06, "logits/chosen": -1.1207753419876099, "logits/rejected": -2.6675968170166016, "logps/chosen": -74.21624755859375, "logps/rejected": -295.446533203125, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": -4.050962448120117, "rewards/margins": 3.3099794387817383, "rewards/rejected": -7.3609418869018555, "step": 7431 }, { "epoch": 1.16, "learning_rate": 8.696404377820426e-06, "logits/chosen": -2.934814929962158, "logits/rejected": -1.2404015064239502, "logps/chosen": -306.5127258300781, "logps/rejected": -97.36654663085938, "loss": 1.4455, "rewards/accuracies": 0.5, "rewards/chosen": -5.443713188171387, "rewards/margins": -0.950392484664917, "rewards/rejected": -4.493320941925049, "step": 7432 }, { "epoch": 1.16, "learning_rate": 8.69567093728928e-06, "logits/chosen": -2.195402145385742, "logits/rejected": -3.2162654399871826, "logps/chosen": -98.8290023803711, "logps/rejected": -343.0086669921875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.4812015295028687, "rewards/margins": 6.795246124267578, "rewards/rejected": -8.276447296142578, "step": 7433 }, { "epoch": 1.16, "learning_rate": 8.694937496758132e-06, "logits/chosen": -2.471451997756958, "logits/rejected": -2.7933268547058105, "logps/chosen": -119.21347045898438, "logps/rejected": -238.4364471435547, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.640891671180725, "rewards/margins": 6.249555587768555, "rewards/rejected": -7.890447616577148, "step": 7434 }, { "epoch": 1.16, "learning_rate": 8.694204056226984e-06, "logits/chosen": -2.9967641830444336, "logits/rejected": -2.315781593322754, "logps/chosen": -211.74148559570312, "logps/rejected": -194.7174072265625, "loss": 0.3373, "rewards/accuracies": 1.0, "rewards/chosen": -3.1850616931915283, "rewards/margins": 4.583064079284668, "rewards/rejected": -7.768125534057617, "step": 7435 }, { "epoch": 1.16, "learning_rate": 8.693470615695836e-06, "logits/chosen": -2.322561502456665, "logits/rejected": -2.5269343852996826, "logps/chosen": -110.92863464355469, "logps/rejected": -163.727294921875, "loss": 0.122, "rewards/accuracies": 1.0, "rewards/chosen": -2.5717835426330566, "rewards/margins": 5.304898262023926, "rewards/rejected": -7.876681804656982, "step": 7436 }, { "epoch": 1.16, "learning_rate": 8.692737175164687e-06, "logits/chosen": -2.4708611965179443, "logits/rejected": -3.0195600986480713, "logps/chosen": -397.1784362792969, "logps/rejected": -534.20947265625, "loss": 0.1821, "rewards/accuracies": 1.0, "rewards/chosen": -2.451862335205078, "rewards/margins": 3.2592873573303223, "rewards/rejected": -5.7111496925354, "step": 7437 }, { "epoch": 1.16, "learning_rate": 8.69200373463354e-06, "logits/chosen": -2.916529417037964, "logits/rejected": -2.495706558227539, "logps/chosen": -157.33538818359375, "logps/rejected": -189.69126892089844, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -1.4765989780426025, "rewards/margins": 4.927999973297119, "rewards/rejected": -6.404599189758301, "step": 7438 }, { "epoch": 1.16, "learning_rate": 8.691270294102391e-06, "logits/chosen": -2.539834976196289, "logits/rejected": -2.994340658187866, "logps/chosen": -85.24740600585938, "logps/rejected": -339.20379638671875, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -3.4101080894470215, "rewards/margins": 7.305201530456543, "rewards/rejected": -10.715309143066406, "step": 7439 }, { "epoch": 1.16, "learning_rate": 8.690536853571243e-06, "logits/chosen": -2.9221959114074707, "logits/rejected": -3.0588436126708984, "logps/chosen": -188.48866271972656, "logps/rejected": -228.0140380859375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.0997703075408936, "rewards/margins": 6.0229034423828125, "rewards/rejected": -8.122673988342285, "step": 7440 }, { "epoch": 1.16, "learning_rate": 8.689803413040095e-06, "logits/chosen": -3.1234724521636963, "logits/rejected": -3.215013265609741, "logps/chosen": -160.63540649414062, "logps/rejected": -208.2030487060547, "loss": 0.5169, "rewards/accuracies": 0.5, "rewards/chosen": -4.011636257171631, "rewards/margins": 4.610821723937988, "rewards/rejected": -8.622458457946777, "step": 7441 }, { "epoch": 1.16, "learning_rate": 8.689069972508949e-06, "logits/chosen": -3.0004959106445312, "logits/rejected": -2.860074281692505, "logps/chosen": -189.8314208984375, "logps/rejected": -238.31910705566406, "loss": 0.6311, "rewards/accuracies": 0.5, "rewards/chosen": -0.9420936703681946, "rewards/margins": 4.981266498565674, "rewards/rejected": -5.923360347747803, "step": 7442 }, { "epoch": 1.16, "learning_rate": 8.6883365319778e-06, "logits/chosen": -2.374075412750244, "logits/rejected": -2.9023098945617676, "logps/chosen": -218.86911010742188, "logps/rejected": -254.40557861328125, "loss": 0.345, "rewards/accuracies": 1.0, "rewards/chosen": -2.75390362739563, "rewards/margins": 5.152332305908203, "rewards/rejected": -7.90623664855957, "step": 7443 }, { "epoch": 1.16, "learning_rate": 8.687603091446652e-06, "logits/chosen": -1.219088077545166, "logits/rejected": -2.637857437133789, "logps/chosen": -80.28077697753906, "logps/rejected": -228.90045166015625, "loss": 0.2102, "rewards/accuracies": 1.0, "rewards/chosen": -2.809142589569092, "rewards/margins": 3.53076171875, "rewards/rejected": -6.339904308319092, "step": 7444 }, { "epoch": 1.16, "learning_rate": 8.686869650915504e-06, "logits/chosen": -2.523580312728882, "logits/rejected": -1.996422290802002, "logps/chosen": -127.6702651977539, "logps/rejected": -119.08971405029297, "loss": 0.4633, "rewards/accuracies": 0.5, "rewards/chosen": -3.8109641075134277, "rewards/margins": 0.7522785663604736, "rewards/rejected": -4.5632429122924805, "step": 7445 }, { "epoch": 1.16, "learning_rate": 8.686136210384356e-06, "logits/chosen": -1.6637976169586182, "logits/rejected": -2.5267672538757324, "logps/chosen": -78.1260986328125, "logps/rejected": -228.18714904785156, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -2.3571457862854004, "rewards/margins": 4.259469032287598, "rewards/rejected": -6.61661434173584, "step": 7446 }, { "epoch": 1.16, "learning_rate": 8.685402769853208e-06, "logits/chosen": -1.882828950881958, "logits/rejected": -3.2150375843048096, "logps/chosen": -83.05160522460938, "logps/rejected": -380.928955078125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -3.956725597381592, "rewards/margins": 6.0522003173828125, "rewards/rejected": -10.008926391601562, "step": 7447 }, { "epoch": 1.16, "learning_rate": 8.68466932932206e-06, "logits/chosen": -2.638796329498291, "logits/rejected": -2.694547414779663, "logps/chosen": -189.3105010986328, "logps/rejected": -208.64718627929688, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": -3.097132921218872, "rewards/margins": 5.0648674964904785, "rewards/rejected": -8.16200065612793, "step": 7448 }, { "epoch": 1.16, "learning_rate": 8.683935888790912e-06, "logits/chosen": -3.071514844894409, "logits/rejected": -2.6488261222839355, "logps/chosen": -618.5443725585938, "logps/rejected": -562.2664794921875, "loss": 0.0606, "rewards/accuracies": 1.0, "rewards/chosen": -1.5148712396621704, "rewards/margins": 8.70600700378418, "rewards/rejected": -10.220878601074219, "step": 7449 }, { "epoch": 1.16, "learning_rate": 8.683202448259764e-06, "logits/chosen": -2.088676691055298, "logits/rejected": -3.195913791656494, "logps/chosen": -75.8046875, "logps/rejected": -526.052490234375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -1.7407028675079346, "rewards/margins": 6.9730329513549805, "rewards/rejected": -8.713735580444336, "step": 7450 }, { "epoch": 1.16, "learning_rate": 8.682469007728617e-06, "logits/chosen": -2.763651132583618, "logits/rejected": -3.1779322624206543, "logps/chosen": -270.54913330078125, "logps/rejected": -453.376708984375, "loss": 0.2653, "rewards/accuracies": 1.0, "rewards/chosen": -2.8568663597106934, "rewards/margins": 3.7233188152313232, "rewards/rejected": -6.580185413360596, "step": 7451 }, { "epoch": 1.16, "learning_rate": 8.681735567197469e-06, "logits/chosen": -2.5662331581115723, "logits/rejected": -2.657677173614502, "logps/chosen": -78.68391418457031, "logps/rejected": -148.6790771484375, "loss": 0.1559, "rewards/accuracies": 1.0, "rewards/chosen": -1.778510332107544, "rewards/margins": 3.9730429649353027, "rewards/rejected": -5.751553535461426, "step": 7452 }, { "epoch": 1.16, "learning_rate": 8.681002126666321e-06, "logits/chosen": -2.7858986854553223, "logits/rejected": -2.591491460800171, "logps/chosen": -178.97181701660156, "logps/rejected": -336.71453857421875, "loss": 0.0909, "rewards/accuracies": 1.0, "rewards/chosen": -2.2781214714050293, "rewards/margins": 5.046097755432129, "rewards/rejected": -7.32421875, "step": 7453 }, { "epoch": 1.16, "learning_rate": 8.680268686135173e-06, "logits/chosen": -2.9600510597229004, "logits/rejected": -3.2079803943634033, "logps/chosen": -253.6875762939453, "logps/rejected": -469.93994140625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": 0.8088516592979431, "rewards/margins": 7.561552047729492, "rewards/rejected": -6.7527008056640625, "step": 7454 }, { "epoch": 1.16, "learning_rate": 8.679535245604025e-06, "logits/chosen": -2.5661303997039795, "logits/rejected": -2.468496322631836, "logps/chosen": -372.4660949707031, "logps/rejected": -491.26666259765625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.8871915340423584, "rewards/margins": 9.191727638244629, "rewards/rejected": -11.078919410705566, "step": 7455 }, { "epoch": 1.16, "learning_rate": 8.678801805072876e-06, "logits/chosen": -2.2131717205047607, "logits/rejected": -3.0096099376678467, "logps/chosen": -118.05424499511719, "logps/rejected": -266.5826721191406, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -3.1803977489471436, "rewards/margins": 5.220926284790039, "rewards/rejected": -8.401323318481445, "step": 7456 }, { "epoch": 1.16, "learning_rate": 8.678068364541728e-06, "logits/chosen": -3.096001148223877, "logits/rejected": -2.8018057346343994, "logps/chosen": -521.9146118164062, "logps/rejected": -493.52862548828125, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -1.868174433708191, "rewards/margins": 4.677768230438232, "rewards/rejected": -6.545942783355713, "step": 7457 }, { "epoch": 1.16, "learning_rate": 8.67733492401058e-06, "logits/chosen": -1.5896207094192505, "logits/rejected": -2.990283250808716, "logps/chosen": -58.75139617919922, "logps/rejected": -307.92572021484375, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -1.630098819732666, "rewards/margins": 6.270381927490234, "rewards/rejected": -7.900480270385742, "step": 7458 }, { "epoch": 1.16, "learning_rate": 8.676601483479434e-06, "logits/chosen": -2.769596576690674, "logits/rejected": -3.2757747173309326, "logps/chosen": -261.109130859375, "logps/rejected": -416.830322265625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 1.1742162704467773, "rewards/margins": 7.471592903137207, "rewards/rejected": -6.29737663269043, "step": 7459 }, { "epoch": 1.16, "learning_rate": 8.675868042948286e-06, "logits/chosen": -1.2577641010284424, "logits/rejected": -2.596738576889038, "logps/chosen": -113.14202880859375, "logps/rejected": -244.23892211914062, "loss": 0.5768, "rewards/accuracies": 0.5, "rewards/chosen": -2.8669803142547607, "rewards/margins": 5.310585975646973, "rewards/rejected": -8.177566528320312, "step": 7460 }, { "epoch": 1.16, "learning_rate": 8.675134602417138e-06, "logits/chosen": -1.9023797512054443, "logits/rejected": -3.0123085975646973, "logps/chosen": -135.10189819335938, "logps/rejected": -333.3436279296875, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.9896926879882812, "rewards/margins": 4.494035243988037, "rewards/rejected": -5.483727931976318, "step": 7461 }, { "epoch": 1.16, "learning_rate": 8.67440116188599e-06, "logits/chosen": -1.9595869779586792, "logits/rejected": -2.8338980674743652, "logps/chosen": -145.27743530273438, "logps/rejected": -381.05792236328125, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.6548988819122314, "rewards/margins": 5.5491814613342285, "rewards/rejected": -7.204080581665039, "step": 7462 }, { "epoch": 1.16, "learning_rate": 8.673667721354843e-06, "logits/chosen": -2.7516775131225586, "logits/rejected": -2.9309136867523193, "logps/chosen": -87.14305114746094, "logps/rejected": -318.10540771484375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.715747833251953, "rewards/margins": 6.20739221572876, "rewards/rejected": -8.923139572143555, "step": 7463 }, { "epoch": 1.16, "learning_rate": 8.672934280823695e-06, "logits/chosen": -0.9060928225517273, "logits/rejected": -2.8033640384674072, "logps/chosen": -78.23091125488281, "logps/rejected": -339.4137268066406, "loss": 2.8, "rewards/accuracies": 0.5, "rewards/chosen": -5.867157936096191, "rewards/margins": 2.0354127883911133, "rewards/rejected": -7.902571201324463, "step": 7464 }, { "epoch": 1.16, "learning_rate": 8.672200840292547e-06, "logits/chosen": -2.605800151824951, "logits/rejected": -2.0434730052948, "logps/chosen": -216.92684936523438, "logps/rejected": -224.39651489257812, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/chosen": -4.171024799346924, "rewards/margins": 3.354811906814575, "rewards/rejected": -7.525836944580078, "step": 7465 }, { "epoch": 1.16, "learning_rate": 8.671467399761399e-06, "logits/chosen": -2.884443759918213, "logits/rejected": -3.058077335357666, "logps/chosen": -67.27715301513672, "logps/rejected": -136.55926513671875, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -2.626987934112549, "rewards/margins": 3.749774217605591, "rewards/rejected": -6.376762390136719, "step": 7466 }, { "epoch": 1.16, "learning_rate": 8.67073395923025e-06, "logits/chosen": -1.721030354499817, "logits/rejected": -2.70817232131958, "logps/chosen": -113.55976867675781, "logps/rejected": -430.96771240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1043713092803955, "rewards/margins": 10.36661434173584, "rewards/rejected": -11.470985412597656, "step": 7467 }, { "epoch": 1.16, "learning_rate": 8.670000518699104e-06, "logits/chosen": -1.3938547372817993, "logits/rejected": -1.7943270206451416, "logps/chosen": -446.4091796875, "logps/rejected": -482.88800048828125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4991874694824219, "rewards/margins": 6.700163841247559, "rewards/rejected": -8.19935131072998, "step": 7468 }, { "epoch": 1.16, "learning_rate": 8.669267078167956e-06, "logits/chosen": -2.578291177749634, "logits/rejected": -3.1421351432800293, "logps/chosen": -62.465816497802734, "logps/rejected": -168.0831298828125, "loss": 0.2279, "rewards/accuracies": 1.0, "rewards/chosen": -3.598480701446533, "rewards/margins": 2.9012980461120605, "rewards/rejected": -6.499778747558594, "step": 7469 }, { "epoch": 1.16, "learning_rate": 8.668533637636808e-06, "logits/chosen": -1.21675705909729, "logits/rejected": -2.7382938861846924, "logps/chosen": -119.67778015136719, "logps/rejected": -459.2042236328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.1292812824249268, "rewards/margins": 9.24824047088623, "rewards/rejected": -12.377521514892578, "step": 7470 }, { "epoch": 1.16, "learning_rate": 8.66780019710566e-06, "logits/chosen": -2.699988842010498, "logits/rejected": -1.387162685394287, "logps/chosen": -180.80148315429688, "logps/rejected": -94.95359802246094, "loss": 0.4905, "rewards/accuracies": 0.5, "rewards/chosen": -2.7759342193603516, "rewards/margins": 1.1687871217727661, "rewards/rejected": -3.944721221923828, "step": 7471 }, { "epoch": 1.16, "learning_rate": 8.667066756574512e-06, "logits/chosen": -1.5899765491485596, "logits/rejected": -2.87082576751709, "logps/chosen": -152.99560546875, "logps/rejected": -365.0020446777344, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.823995590209961, "rewards/margins": 7.7599077224731445, "rewards/rejected": -9.583903312683105, "step": 7472 }, { "epoch": 1.16, "learning_rate": 8.666333316043364e-06, "logits/chosen": -3.1631577014923096, "logits/rejected": -1.9238941669464111, "logps/chosen": -554.1216430664062, "logps/rejected": -206.52078247070312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 2.9034852981567383, "rewards/margins": 7.386266231536865, "rewards/rejected": -4.482781410217285, "step": 7473 }, { "epoch": 1.16, "learning_rate": 8.665599875512215e-06, "logits/chosen": -2.780153751373291, "logits/rejected": -2.0276312828063965, "logps/chosen": -193.4412384033203, "logps/rejected": -181.45144653320312, "loss": 0.7875, "rewards/accuracies": 0.5, "rewards/chosen": -3.9689126014709473, "rewards/margins": 1.9632885456085205, "rewards/rejected": -5.932201385498047, "step": 7474 }, { "epoch": 1.16, "learning_rate": 8.664866434981067e-06, "logits/chosen": -2.8410634994506836, "logits/rejected": -2.687816619873047, "logps/chosen": -239.94749450683594, "logps/rejected": -163.46380615234375, "loss": 0.2251, "rewards/accuracies": 1.0, "rewards/chosen": -1.7677751779556274, "rewards/margins": 4.2880706787109375, "rewards/rejected": -6.055846214294434, "step": 7475 }, { "epoch": 1.16, "learning_rate": 8.664132994449919e-06, "logits/chosen": -2.4307761192321777, "logits/rejected": -2.773580312728882, "logps/chosen": -119.73432922363281, "logps/rejected": -180.0065460205078, "loss": 0.091, "rewards/accuracies": 1.0, "rewards/chosen": -2.9028985500335693, "rewards/margins": 4.208726406097412, "rewards/rejected": -7.111624717712402, "step": 7476 }, { "epoch": 1.16, "learning_rate": 8.663399553918773e-06, "logits/chosen": -1.3878648281097412, "logits/rejected": -2.7897891998291016, "logps/chosen": -110.1238784790039, "logps/rejected": -346.9508361816406, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.501084804534912, "rewards/margins": 9.283824920654297, "rewards/rejected": -11.784910202026367, "step": 7477 }, { "epoch": 1.16, "learning_rate": 8.662666113387625e-06, "logits/chosen": -1.8155438899993896, "logits/rejected": -2.6327764987945557, "logps/chosen": -136.4243621826172, "logps/rejected": -215.32403564453125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.013658046722412, "rewards/margins": 6.423706531524658, "rewards/rejected": -8.43736457824707, "step": 7478 }, { "epoch": 1.16, "learning_rate": 8.661932672856476e-06, "logits/chosen": -2.0189826488494873, "logits/rejected": -3.0314581394195557, "logps/chosen": -186.7752685546875, "logps/rejected": -453.8515930175781, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": -1.4937336444854736, "rewards/margins": 5.493995189666748, "rewards/rejected": -6.987729072570801, "step": 7479 }, { "epoch": 1.16, "learning_rate": 8.661199232325328e-06, "logits/chosen": -2.5272059440612793, "logits/rejected": -2.9675588607788086, "logps/chosen": -139.42236328125, "logps/rejected": -336.90106201171875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.679877519607544, "rewards/margins": 5.165760040283203, "rewards/rejected": -6.845637321472168, "step": 7480 }, { "epoch": 1.16, "learning_rate": 8.66046579179418e-06, "logits/chosen": -2.651779890060425, "logits/rejected": -1.5209455490112305, "logps/chosen": -543.4019775390625, "logps/rejected": -265.8411865234375, "loss": 0.1122, "rewards/accuracies": 1.0, "rewards/chosen": -4.348667144775391, "rewards/margins": 3.4643638134002686, "rewards/rejected": -7.813031196594238, "step": 7481 }, { "epoch": 1.16, "learning_rate": 8.659732351263032e-06, "logits/chosen": -2.0220344066619873, "logits/rejected": -2.360959529876709, "logps/chosen": -107.88275909423828, "logps/rejected": -350.1439208984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.0972065925598145, "rewards/margins": 9.994647026062012, "rewards/rejected": -13.091854095458984, "step": 7482 }, { "epoch": 1.16, "learning_rate": 8.658998910731884e-06, "logits/chosen": -1.6042828559875488, "logits/rejected": -2.3653271198272705, "logps/chosen": -143.15252685546875, "logps/rejected": -376.26263427734375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.0501837730407715, "rewards/margins": 8.194770812988281, "rewards/rejected": -10.244955062866211, "step": 7483 }, { "epoch": 1.16, "learning_rate": 8.658265470200736e-06, "logits/chosen": -2.6500494480133057, "logits/rejected": -2.0764074325561523, "logps/chosen": -410.35723876953125, "logps/rejected": -331.6510009765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.746063232421875, "rewards/margins": 8.879249572753906, "rewards/rejected": -9.625312805175781, "step": 7484 }, { "epoch": 1.16, "learning_rate": 8.657532029669588e-06, "logits/chosen": -1.83066987991333, "logits/rejected": -3.057175397872925, "logps/chosen": -239.28567504882812, "logps/rejected": -388.40679931640625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.3593063354492188, "rewards/margins": 8.727106094360352, "rewards/rejected": -11.08641242980957, "step": 7485 }, { "epoch": 1.16, "learning_rate": 8.656798589138441e-06, "logits/chosen": -1.024509310722351, "logits/rejected": -2.7225539684295654, "logps/chosen": -57.047035217285156, "logps/rejected": -370.73394775390625, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -2.7573373317718506, "rewards/margins": 5.784201622009277, "rewards/rejected": -8.541539192199707, "step": 7486 }, { "epoch": 1.16, "learning_rate": 8.656065148607293e-06, "logits/chosen": -2.207866907119751, "logits/rejected": -2.6346523761749268, "logps/chosen": -65.06822204589844, "logps/rejected": -180.94558715820312, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -3.0365424156188965, "rewards/margins": 5.41640567779541, "rewards/rejected": -8.452948570251465, "step": 7487 }, { "epoch": 1.16, "learning_rate": 8.655331708076145e-06, "logits/chosen": -2.732398271560669, "logits/rejected": -2.2658815383911133, "logps/chosen": -110.6763687133789, "logps/rejected": -237.85374450683594, "loss": 1.0432, "rewards/accuracies": 0.5, "rewards/chosen": -4.599596977233887, "rewards/margins": 5.174449920654297, "rewards/rejected": -9.774046897888184, "step": 7488 }, { "epoch": 1.16, "learning_rate": 8.654598267544997e-06, "logits/chosen": -2.5888559818267822, "logits/rejected": -2.8560702800750732, "logps/chosen": -401.8720397949219, "logps/rejected": -356.085205078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.8146088123321533, "rewards/margins": 7.4505085945129395, "rewards/rejected": -10.265117645263672, "step": 7489 }, { "epoch": 1.16, "learning_rate": 8.653864827013849e-06, "logits/chosen": -2.8055830001831055, "logits/rejected": -2.6030194759368896, "logps/chosen": -550.00244140625, "logps/rejected": -416.7998352050781, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.564122200012207, "rewards/margins": 5.600709438323975, "rewards/rejected": -9.164831161499023, "step": 7490 }, { "epoch": 1.17, "learning_rate": 8.6531313864827e-06, "logits/chosen": -2.686213731765747, "logits/rejected": -3.369974136352539, "logps/chosen": -169.6374053955078, "logps/rejected": -304.1102294921875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.219318389892578, "rewards/margins": 6.420763969421387, "rewards/rejected": -8.640083312988281, "step": 7491 }, { "epoch": 1.17, "learning_rate": 8.652397945951553e-06, "logits/chosen": -2.9111850261688232, "logits/rejected": -2.7433080673217773, "logps/chosen": -358.6812438964844, "logps/rejected": -510.3851318359375, "loss": 0.0801, "rewards/accuracies": 1.0, "rewards/chosen": -5.081045150756836, "rewards/margins": 5.071508407592773, "rewards/rejected": -10.152552604675293, "step": 7492 }, { "epoch": 1.17, "learning_rate": 8.651664505420404e-06, "logits/chosen": -2.5485241413116455, "logits/rejected": -2.7909533977508545, "logps/chosen": -611.6693115234375, "logps/rejected": -612.6369018554688, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.6552765369415283, "rewards/margins": 8.481393814086914, "rewards/rejected": -10.136670112609863, "step": 7493 }, { "epoch": 1.17, "learning_rate": 8.650931064889256e-06, "logits/chosen": -2.3988518714904785, "logits/rejected": -2.77764630317688, "logps/chosen": -88.59869384765625, "logps/rejected": -331.35491943359375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.2530434131622314, "rewards/margins": 7.602337837219238, "rewards/rejected": -8.85538101196289, "step": 7494 }, { "epoch": 1.17, "learning_rate": 8.65019762435811e-06, "logits/chosen": -1.399557113647461, "logits/rejected": -2.5268309116363525, "logps/chosen": -164.53529357910156, "logps/rejected": -259.81695556640625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.6349828243255615, "rewards/margins": 5.367254257202148, "rewards/rejected": -7.002237319946289, "step": 7495 }, { "epoch": 1.17, "learning_rate": 8.649464183826962e-06, "logits/chosen": -2.8076834678649902, "logits/rejected": -2.656343460083008, "logps/chosen": -631.991943359375, "logps/rejected": -579.923828125, "loss": 1.3312, "rewards/accuracies": 0.5, "rewards/chosen": -4.72923469543457, "rewards/margins": 5.624046325683594, "rewards/rejected": -10.353281021118164, "step": 7496 }, { "epoch": 1.17, "learning_rate": 8.648730743295815e-06, "logits/chosen": -2.525606393814087, "logits/rejected": -2.867664098739624, "logps/chosen": -89.30072021484375, "logps/rejected": -297.40380859375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.687746047973633, "rewards/margins": 8.960872650146484, "rewards/rejected": -11.648618698120117, "step": 7497 }, { "epoch": 1.17, "learning_rate": 8.647997302764667e-06, "logits/chosen": -2.0851500034332275, "logits/rejected": -2.5821850299835205, "logps/chosen": -126.90022277832031, "logps/rejected": -222.9409942626953, "loss": 0.118, "rewards/accuracies": 1.0, "rewards/chosen": -3.482431173324585, "rewards/margins": 3.138380289077759, "rewards/rejected": -6.620811462402344, "step": 7498 }, { "epoch": 1.17, "learning_rate": 8.647263862233519e-06, "logits/chosen": -2.0332024097442627, "logits/rejected": -2.748988389968872, "logps/chosen": -358.96185302734375, "logps/rejected": -557.86962890625, "loss": 0.16, "rewards/accuracies": 1.0, "rewards/chosen": -3.102407932281494, "rewards/margins": 5.112216472625732, "rewards/rejected": -8.214624404907227, "step": 7499 }, { "epoch": 1.17, "learning_rate": 8.646530421702371e-06, "logits/chosen": -1.7791610956192017, "logits/rejected": -2.6778435707092285, "logps/chosen": -72.87381744384766, "logps/rejected": -235.84103393554688, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -3.892225503921509, "rewards/margins": 6.972733497619629, "rewards/rejected": -10.864959716796875, "step": 7500 }, { "epoch": 1.17, "learning_rate": 8.645796981171223e-06, "logits/chosen": -2.4231555461883545, "logits/rejected": -2.5813686847686768, "logps/chosen": -137.89002990722656, "logps/rejected": -452.7827453613281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8310417532920837, "rewards/margins": 11.310266494750977, "rewards/rejected": -12.141307830810547, "step": 7501 }, { "epoch": 1.17, "learning_rate": 8.645063540640075e-06, "logits/chosen": -2.7532236576080322, "logits/rejected": -2.931736469268799, "logps/chosen": -699.8880615234375, "logps/rejected": -687.9430541992188, "loss": 0.4274, "rewards/accuracies": 0.5, "rewards/chosen": -3.2205324172973633, "rewards/margins": 3.730163812637329, "rewards/rejected": -6.950695991516113, "step": 7502 }, { "epoch": 1.17, "learning_rate": 8.644330100108927e-06, "logits/chosen": -2.519458055496216, "logits/rejected": -2.909708023071289, "logps/chosen": -102.94097900390625, "logps/rejected": -135.54539489746094, "loss": 1.4054, "rewards/accuracies": 0.5, "rewards/chosen": -3.562624931335449, "rewards/margins": 2.5887041091918945, "rewards/rejected": -6.151329040527344, "step": 7503 }, { "epoch": 1.17, "learning_rate": 8.64359665957778e-06, "logits/chosen": -2.6453890800476074, "logits/rejected": -1.3914439678192139, "logps/chosen": -699.2865600585938, "logps/rejected": -307.09320068359375, "loss": 1.8989, "rewards/accuracies": 0.5, "rewards/chosen": -4.4825439453125, "rewards/margins": 0.5897564888000488, "rewards/rejected": -5.072300434112549, "step": 7504 }, { "epoch": 1.17, "learning_rate": 8.642863219046632e-06, "logits/chosen": -2.976727247238159, "logits/rejected": -2.5582692623138428, "logps/chosen": -147.21038818359375, "logps/rejected": -59.627342224121094, "loss": 0.983, "rewards/accuracies": 0.5, "rewards/chosen": -4.480549335479736, "rewards/margins": 0.044645071029663086, "rewards/rejected": -4.52519416809082, "step": 7505 }, { "epoch": 1.17, "learning_rate": 8.642129778515484e-06, "logits/chosen": -2.9066879749298096, "logits/rejected": -2.5097577571868896, "logps/chosen": -150.54513549804688, "logps/rejected": -109.41568756103516, "loss": 0.3243, "rewards/accuracies": 1.0, "rewards/chosen": -2.2238569259643555, "rewards/margins": 2.05129337310791, "rewards/rejected": -4.275150299072266, "step": 7506 }, { "epoch": 1.17, "learning_rate": 8.641396337984336e-06, "logits/chosen": -3.031174898147583, "logits/rejected": -3.1718502044677734, "logps/chosen": -238.87408447265625, "logps/rejected": -159.09268188476562, "loss": 0.205, "rewards/accuracies": 1.0, "rewards/chosen": -2.5347650051116943, "rewards/margins": 1.8479081392288208, "rewards/rejected": -4.382673263549805, "step": 7507 }, { "epoch": 1.17, "learning_rate": 8.640662897453188e-06, "logits/chosen": -2.449927568435669, "logits/rejected": -2.8333351612091064, "logps/chosen": -251.86009216308594, "logps/rejected": -268.8671875, "loss": 0.5684, "rewards/accuracies": 0.5, "rewards/chosen": -5.151185512542725, "rewards/margins": 0.3000659942626953, "rewards/rejected": -5.45125150680542, "step": 7508 }, { "epoch": 1.17, "learning_rate": 8.63992945692204e-06, "logits/chosen": -2.8931963443756104, "logits/rejected": -2.899732828140259, "logps/chosen": -181.76914978027344, "logps/rejected": -174.31430053710938, "loss": 1.329, "rewards/accuracies": 0.5, "rewards/chosen": -3.797457218170166, "rewards/margins": 1.5450822114944458, "rewards/rejected": -5.342539310455322, "step": 7509 }, { "epoch": 1.17, "learning_rate": 8.639196016390891e-06, "logits/chosen": -2.6887612342834473, "logits/rejected": -2.9246106147766113, "logps/chosen": -327.98016357421875, "logps/rejected": -420.6038513183594, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.19096791744232178, "rewards/margins": 7.017526626586914, "rewards/rejected": -7.208494186401367, "step": 7510 }, { "epoch": 1.17, "learning_rate": 8.638462575859743e-06, "logits/chosen": -1.571738839149475, "logits/rejected": -2.532109022140503, "logps/chosen": -635.7401733398438, "logps/rejected": -678.5174560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.079007625579834, "rewards/margins": 9.352840423583984, "rewards/rejected": -11.431848526000977, "step": 7511 }, { "epoch": 1.17, "learning_rate": 8.637729135328595e-06, "logits/chosen": -0.8733375072479248, "logits/rejected": -2.605417251586914, "logps/chosen": -156.14822387695312, "logps/rejected": -590.6548461914062, "loss": 1.825, "rewards/accuracies": 0.5, "rewards/chosen": -4.294943332672119, "rewards/margins": 0.8149067163467407, "rewards/rejected": -5.10984992980957, "step": 7512 }, { "epoch": 1.17, "learning_rate": 8.636995694797449e-06, "logits/chosen": -2.7276360988616943, "logits/rejected": -2.5910675525665283, "logps/chosen": -228.41400146484375, "logps/rejected": -340.9739685058594, "loss": 0.0849, "rewards/accuracies": 1.0, "rewards/chosen": -2.5209364891052246, "rewards/margins": 5.437117576599121, "rewards/rejected": -7.958054542541504, "step": 7513 }, { "epoch": 1.17, "learning_rate": 8.6362622542663e-06, "logits/chosen": -2.5120949745178223, "logits/rejected": -2.711853504180908, "logps/chosen": -296.9212951660156, "logps/rejected": -384.93878173828125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.0466445684432983, "rewards/margins": 5.989128112792969, "rewards/rejected": -7.035773277282715, "step": 7514 }, { "epoch": 1.17, "learning_rate": 8.635528813735153e-06, "logits/chosen": -2.7323343753814697, "logits/rejected": -2.927332878112793, "logps/chosen": -33.296600341796875, "logps/rejected": -225.2035675048828, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.7299143075942993, "rewards/margins": 5.959680557250977, "rewards/rejected": -6.6895952224731445, "step": 7515 }, { "epoch": 1.17, "learning_rate": 8.634795373204004e-06, "logits/chosen": -2.926913261413574, "logits/rejected": -2.2926864624023438, "logps/chosen": -333.1351013183594, "logps/rejected": -466.2401428222656, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -2.642042636871338, "rewards/margins": 7.745495796203613, "rewards/rejected": -10.387537956237793, "step": 7516 }, { "epoch": 1.17, "learning_rate": 8.634061932672856e-06, "logits/chosen": -0.7281890511512756, "logits/rejected": -2.7650747299194336, "logps/chosen": -247.62313842773438, "logps/rejected": -347.7582092285156, "loss": 0.2202, "rewards/accuracies": 1.0, "rewards/chosen": -2.9948172569274902, "rewards/margins": 5.036317825317383, "rewards/rejected": -8.031135559082031, "step": 7517 }, { "epoch": 1.17, "learning_rate": 8.633328492141708e-06, "logits/chosen": -1.840435266494751, "logits/rejected": -2.966010808944702, "logps/chosen": -154.40753173828125, "logps/rejected": -428.1425476074219, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.418891191482544, "rewards/margins": 5.490647315979004, "rewards/rejected": -6.909538269042969, "step": 7518 }, { "epoch": 1.17, "learning_rate": 8.63259505161056e-06, "logits/chosen": -2.7883071899414062, "logits/rejected": -2.976374626159668, "logps/chosen": -105.02609252929688, "logps/rejected": -199.31222534179688, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": -0.8441962599754333, "rewards/margins": 5.689642429351807, "rewards/rejected": -6.533838748931885, "step": 7519 }, { "epoch": 1.17, "learning_rate": 8.631861611079412e-06, "logits/chosen": -2.7103147506713867, "logits/rejected": -2.1575798988342285, "logps/chosen": -152.2513427734375, "logps/rejected": -198.91928100585938, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -0.8062336444854736, "rewards/margins": 6.133823394775391, "rewards/rejected": -6.940057277679443, "step": 7520 }, { "epoch": 1.17, "learning_rate": 8.631128170548264e-06, "logits/chosen": -2.9871597290039062, "logits/rejected": -2.634585380554199, "logps/chosen": -90.0423583984375, "logps/rejected": -120.38086700439453, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.5733630657196045, "rewards/margins": 5.9319539070129395, "rewards/rejected": -7.505316734313965, "step": 7521 }, { "epoch": 1.17, "learning_rate": 8.630394730017117e-06, "logits/chosen": -2.9757583141326904, "logits/rejected": -3.007310152053833, "logps/chosen": -596.7860717773438, "logps/rejected": -383.29473876953125, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -2.0644333362579346, "rewards/margins": 5.054762363433838, "rewards/rejected": -7.119195938110352, "step": 7522 }, { "epoch": 1.17, "learning_rate": 8.62966128948597e-06, "logits/chosen": -2.8320858478546143, "logits/rejected": -2.3894524574279785, "logps/chosen": -432.79827880859375, "logps/rejected": -496.0246276855469, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5215297937393188, "rewards/margins": 4.845677375793457, "rewards/rejected": -6.3672075271606445, "step": 7523 }, { "epoch": 1.17, "learning_rate": 8.628927848954821e-06, "logits/chosen": -2.312469720840454, "logits/rejected": -3.0116286277770996, "logps/chosen": -126.34248352050781, "logps/rejected": -285.7987060546875, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -2.3104915618896484, "rewards/margins": 4.342895984649658, "rewards/rejected": -6.653387546539307, "step": 7524 }, { "epoch": 1.17, "learning_rate": 8.628194408423673e-06, "logits/chosen": -2.8007633686065674, "logits/rejected": -1.2186906337738037, "logps/chosen": -728.3823852539062, "logps/rejected": -253.75997924804688, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 1.51361083984375, "rewards/margins": 7.607098579406738, "rewards/rejected": -6.093487739562988, "step": 7525 }, { "epoch": 1.17, "learning_rate": 8.627460967892525e-06, "logits/chosen": -2.3628528118133545, "logits/rejected": -2.933886766433716, "logps/chosen": -60.900150299072266, "logps/rejected": -210.11093139648438, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.6471655368804932, "rewards/margins": 6.9548540115356445, "rewards/rejected": -7.602019309997559, "step": 7526 }, { "epoch": 1.17, "learning_rate": 8.626727527361377e-06, "logits/chosen": -2.112933397293091, "logits/rejected": -2.959592342376709, "logps/chosen": -123.97093963623047, "logps/rejected": -351.76922607421875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.7712204456329346, "rewards/margins": 6.068351745605469, "rewards/rejected": -7.839571952819824, "step": 7527 }, { "epoch": 1.17, "learning_rate": 8.625994086830229e-06, "logits/chosen": -1.5627626180648804, "logits/rejected": -3.178187847137451, "logps/chosen": -163.1404266357422, "logps/rejected": -593.8265380859375, "loss": 0.1428, "rewards/accuracies": 1.0, "rewards/chosen": -4.12940788269043, "rewards/margins": 3.302055835723877, "rewards/rejected": -7.431463718414307, "step": 7528 }, { "epoch": 1.17, "learning_rate": 8.625260646299082e-06, "logits/chosen": -2.858428955078125, "logits/rejected": -1.8419063091278076, "logps/chosen": -459.8904724121094, "logps/rejected": -372.50494384765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.256049394607544, "rewards/margins": 7.387045860290527, "rewards/rejected": -8.643095016479492, "step": 7529 }, { "epoch": 1.17, "learning_rate": 8.624527205767934e-06, "logits/chosen": -2.1282715797424316, "logits/rejected": -2.750182628631592, "logps/chosen": -83.90956115722656, "logps/rejected": -241.9691162109375, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": -3.884542942047119, "rewards/margins": 5.671753883361816, "rewards/rejected": -9.556296348571777, "step": 7530 }, { "epoch": 1.17, "learning_rate": 8.623793765236788e-06, "logits/chosen": -2.8875958919525146, "logits/rejected": -1.9496201276779175, "logps/chosen": -228.1711883544922, "logps/rejected": -51.36504364013672, "loss": 0.485, "rewards/accuracies": 0.5, "rewards/chosen": -2.201266050338745, "rewards/margins": 0.7786461710929871, "rewards/rejected": -2.979912281036377, "step": 7531 }, { "epoch": 1.17, "learning_rate": 8.62306032470564e-06, "logits/chosen": -2.7609775066375732, "logits/rejected": -3.174448013305664, "logps/chosen": -135.6011199951172, "logps/rejected": -225.1666259765625, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.4831489324569702, "rewards/margins": 4.538968086242676, "rewards/rejected": -6.022116661071777, "step": 7532 }, { "epoch": 1.17, "learning_rate": 8.622326884174491e-06, "logits/chosen": -2.9886300563812256, "logits/rejected": -2.151758909225464, "logps/chosen": -659.371826171875, "logps/rejected": -413.2725524902344, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.5722213387489319, "rewards/margins": 5.961175918579102, "rewards/rejected": -6.5333967208862305, "step": 7533 }, { "epoch": 1.17, "learning_rate": 8.621593443643343e-06, "logits/chosen": -0.49615052342414856, "logits/rejected": -2.6460914611816406, "logps/chosen": -92.62437438964844, "logps/rejected": -576.9149169921875, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -2.034299850463867, "rewards/margins": 6.179880142211914, "rewards/rejected": -8.214179992675781, "step": 7534 }, { "epoch": 1.17, "learning_rate": 8.620860003112195e-06, "logits/chosen": -2.6785590648651123, "logits/rejected": -2.6063520908355713, "logps/chosen": -272.53289794921875, "logps/rejected": -262.40386962890625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.3269367218017578, "rewards/margins": 6.3748087882995605, "rewards/rejected": -7.701745510101318, "step": 7535 }, { "epoch": 1.17, "learning_rate": 8.620126562581047e-06, "logits/chosen": -2.966536283493042, "logits/rejected": -2.3473281860351562, "logps/chosen": -499.45062255859375, "logps/rejected": -328.4322204589844, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.4383363723754883, "rewards/margins": 6.397483825683594, "rewards/rejected": -8.835820198059082, "step": 7536 }, { "epoch": 1.17, "learning_rate": 8.619393122049899e-06, "logits/chosen": -2.255171775817871, "logits/rejected": -2.564035415649414, "logps/chosen": -173.6099853515625, "logps/rejected": -145.19371032714844, "loss": 0.3152, "rewards/accuracies": 1.0, "rewards/chosen": -2.8989744186401367, "rewards/margins": 2.946115493774414, "rewards/rejected": -5.845089912414551, "step": 7537 }, { "epoch": 1.17, "learning_rate": 8.618659681518751e-06, "logits/chosen": -2.905228853225708, "logits/rejected": -2.908780336380005, "logps/chosen": -395.054931640625, "logps/rejected": -386.3187255859375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.9520301818847656, "rewards/margins": 6.121358394622803, "rewards/rejected": -8.073389053344727, "step": 7538 }, { "epoch": 1.17, "learning_rate": 8.617926240987603e-06, "logits/chosen": -2.2880585193634033, "logits/rejected": -2.9141385555267334, "logps/chosen": -94.46577453613281, "logps/rejected": -277.79351806640625, "loss": 0.2574, "rewards/accuracies": 1.0, "rewards/chosen": -2.522702693939209, "rewards/margins": 3.3499064445495605, "rewards/rejected": -5.8726091384887695, "step": 7539 }, { "epoch": 1.17, "learning_rate": 8.617192800456456e-06, "logits/chosen": -1.9508543014526367, "logits/rejected": -2.942122459411621, "logps/chosen": -39.81016540527344, "logps/rejected": -254.1614990234375, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -2.353998899459839, "rewards/margins": 5.73618221282959, "rewards/rejected": -8.090181350708008, "step": 7540 }, { "epoch": 1.17, "learning_rate": 8.616459359925308e-06, "logits/chosen": -2.7478809356689453, "logits/rejected": -3.088913679122925, "logps/chosen": -116.89093017578125, "logps/rejected": -310.6678466796875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.5101215243339539, "rewards/margins": 6.890842437744141, "rewards/rejected": -7.400964260101318, "step": 7541 }, { "epoch": 1.17, "learning_rate": 8.61572591939416e-06, "logits/chosen": -2.202831506729126, "logits/rejected": -3.2657182216644287, "logps/chosen": -632.5277709960938, "logps/rejected": -715.64599609375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.0319595336914062, "rewards/margins": 5.805665493011475, "rewards/rejected": -6.837625026702881, "step": 7542 }, { "epoch": 1.17, "learning_rate": 8.614992478863012e-06, "logits/chosen": -2.0480682849884033, "logits/rejected": -2.4939801692962646, "logps/chosen": -232.9595184326172, "logps/rejected": -250.74078369140625, "loss": 0.5734, "rewards/accuracies": 0.5, "rewards/chosen": -2.85028076171875, "rewards/margins": 5.296091556549072, "rewards/rejected": -8.146371841430664, "step": 7543 }, { "epoch": 1.17, "learning_rate": 8.614259038331864e-06, "logits/chosen": -1.413582444190979, "logits/rejected": -2.776155948638916, "logps/chosen": -138.24024963378906, "logps/rejected": -752.0585327148438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5404984951019287, "rewards/margins": 12.421709060668945, "rewards/rejected": -15.962207794189453, "step": 7544 }, { "epoch": 1.17, "learning_rate": 8.613525597800716e-06, "logits/chosen": -1.7199664115905762, "logits/rejected": -2.431360960006714, "logps/chosen": -104.45711517333984, "logps/rejected": -241.87606811523438, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.7899349927902222, "rewards/margins": 9.003305435180664, "rewards/rejected": -10.79323959350586, "step": 7545 }, { "epoch": 1.17, "learning_rate": 8.612792157269568e-06, "logits/chosen": -2.1400935649871826, "logits/rejected": -2.9337148666381836, "logps/chosen": -213.29513549804688, "logps/rejected": -378.9254150390625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.4644325971603394, "rewards/margins": 5.913580417633057, "rewards/rejected": -7.378012657165527, "step": 7546 }, { "epoch": 1.17, "learning_rate": 8.61205871673842e-06, "logits/chosen": -1.9879095554351807, "logits/rejected": -2.780362129211426, "logps/chosen": -86.82865142822266, "logps/rejected": -377.8258056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5696826577186584, "rewards/margins": 10.386466979980469, "rewards/rejected": -10.95615005493164, "step": 7547 }, { "epoch": 1.17, "learning_rate": 8.611325276207273e-06, "logits/chosen": -1.1577109098434448, "logits/rejected": -1.3001645803451538, "logps/chosen": -365.68359375, "logps/rejected": -451.76470947265625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.319354295730591, "rewards/margins": 7.8246235847473145, "rewards/rejected": -10.143978118896484, "step": 7548 }, { "epoch": 1.17, "learning_rate": 8.610591835676125e-06, "logits/chosen": -2.663022518157959, "logits/rejected": -3.2241573333740234, "logps/chosen": -217.3463134765625, "logps/rejected": -437.4126892089844, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -0.905964732170105, "rewards/margins": 6.352534294128418, "rewards/rejected": -7.2584991455078125, "step": 7549 }, { "epoch": 1.17, "learning_rate": 8.609858395144977e-06, "logits/chosen": -1.7482960224151611, "logits/rejected": -2.770986318588257, "logps/chosen": -115.08877563476562, "logps/rejected": -407.64715576171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.4968814849853516, "rewards/margins": 9.617572784423828, "rewards/rejected": -11.11445426940918, "step": 7550 }, { "epoch": 1.17, "learning_rate": 8.609124954613829e-06, "logits/chosen": -2.8077330589294434, "logits/rejected": -2.570341110229492, "logps/chosen": -159.93850708007812, "logps/rejected": -107.62579345703125, "loss": 1.49, "rewards/accuracies": 0.5, "rewards/chosen": -4.191562652587891, "rewards/margins": 1.7923861742019653, "rewards/rejected": -5.983948707580566, "step": 7551 }, { "epoch": 1.17, "learning_rate": 8.60839151408268e-06, "logits/chosen": -2.3457064628601074, "logits/rejected": -2.6692020893096924, "logps/chosen": -627.7861328125, "logps/rejected": -591.8175048828125, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -3.194270372390747, "rewards/margins": 4.028175354003906, "rewards/rejected": -7.222445964813232, "step": 7552 }, { "epoch": 1.17, "learning_rate": 8.607658073551532e-06, "logits/chosen": -2.0831456184387207, "logits/rejected": -2.765836000442505, "logps/chosen": -283.5479736328125, "logps/rejected": -415.5460205078125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.4511849880218506, "rewards/margins": 7.228169918060303, "rewards/rejected": -9.679354667663574, "step": 7553 }, { "epoch": 1.17, "learning_rate": 8.606924633020384e-06, "logits/chosen": -2.0046935081481934, "logits/rejected": -3.1230978965759277, "logps/chosen": -132.1864471435547, "logps/rejected": -318.053955078125, "loss": 1.2458, "rewards/accuracies": 0.5, "rewards/chosen": -3.9246647357940674, "rewards/margins": 2.787797451019287, "rewards/rejected": -6.712461948394775, "step": 7554 }, { "epoch": 1.17, "learning_rate": 8.606191192489236e-06, "logits/chosen": -2.7670276165008545, "logits/rejected": -2.8887462615966797, "logps/chosen": -187.18972778320312, "logps/rejected": -244.66236877441406, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -2.763986349105835, "rewards/margins": 5.463059425354004, "rewards/rejected": -8.227046012878418, "step": 7555 }, { "epoch": 1.18, "learning_rate": 8.605457751958088e-06, "logits/chosen": -3.3718132972717285, "logits/rejected": -3.3395612239837646, "logps/chosen": -96.0383529663086, "logps/rejected": -119.71854400634766, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": -2.3841958045959473, "rewards/margins": 5.655803203582764, "rewards/rejected": -8.039999008178711, "step": 7556 }, { "epoch": 1.18, "learning_rate": 8.604724311426942e-06, "logits/chosen": -3.0610127449035645, "logits/rejected": -1.9199786186218262, "logps/chosen": -242.51158142089844, "logps/rejected": -183.70208740234375, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -1.465767741203308, "rewards/margins": 4.863171100616455, "rewards/rejected": -6.3289384841918945, "step": 7557 }, { "epoch": 1.18, "learning_rate": 8.603990870895793e-06, "logits/chosen": -2.1891024112701416, "logits/rejected": -1.5845310688018799, "logps/chosen": -1816.4749755859375, "logps/rejected": -288.6999816894531, "loss": 0.5624, "rewards/accuracies": 1.0, "rewards/chosen": -8.193341255187988, "rewards/margins": 0.2901277542114258, "rewards/rejected": -8.483469009399414, "step": 7558 }, { "epoch": 1.18, "learning_rate": 8.603257430364645e-06, "logits/chosen": -2.9932122230529785, "logits/rejected": -3.174530029296875, "logps/chosen": -92.69132995605469, "logps/rejected": -177.13641357421875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.5916919708251953, "rewards/margins": 6.285614490509033, "rewards/rejected": -7.877306938171387, "step": 7559 }, { "epoch": 1.18, "learning_rate": 8.602523989833497e-06, "logits/chosen": -2.930079936981201, "logits/rejected": -2.9737231731414795, "logps/chosen": -214.4749755859375, "logps/rejected": -172.85760498046875, "loss": 0.2325, "rewards/accuracies": 1.0, "rewards/chosen": -1.8082329034805298, "rewards/margins": 3.6934823989868164, "rewards/rejected": -5.501715183258057, "step": 7560 }, { "epoch": 1.18, "learning_rate": 8.601790549302349e-06, "logits/chosen": -2.0321450233459473, "logits/rejected": -2.7065491676330566, "logps/chosen": -158.4659423828125, "logps/rejected": -288.14715576171875, "loss": 0.2991, "rewards/accuracies": 1.0, "rewards/chosen": -3.8865270614624023, "rewards/margins": 2.1690378189086914, "rewards/rejected": -6.055564880371094, "step": 7561 }, { "epoch": 1.18, "learning_rate": 8.601057108771201e-06, "logits/chosen": -2.5630245208740234, "logits/rejected": -2.8136606216430664, "logps/chosen": -87.88138580322266, "logps/rejected": -190.96160888671875, "loss": 0.1268, "rewards/accuracies": 1.0, "rewards/chosen": -2.3136539459228516, "rewards/margins": 3.38120174407959, "rewards/rejected": -5.694855690002441, "step": 7562 }, { "epoch": 1.18, "learning_rate": 8.600323668240055e-06, "logits/chosen": -1.7912133932113647, "logits/rejected": -2.9104020595550537, "logps/chosen": -96.18718719482422, "logps/rejected": -322.44903564453125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.4738214015960693, "rewards/margins": 7.4799089431762695, "rewards/rejected": -9.953730583190918, "step": 7563 }, { "epoch": 1.18, "learning_rate": 8.599590227708906e-06, "logits/chosen": -2.194260597229004, "logits/rejected": -2.963507890701294, "logps/chosen": -197.16490173339844, "logps/rejected": -388.21337890625, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -2.8009679317474365, "rewards/margins": 7.149347305297852, "rewards/rejected": -9.950315475463867, "step": 7564 }, { "epoch": 1.18, "learning_rate": 8.598856787177758e-06, "logits/chosen": -1.9709769487380981, "logits/rejected": -2.73874568939209, "logps/chosen": -214.00502014160156, "logps/rejected": -317.5448303222656, "loss": 0.5643, "rewards/accuracies": 0.5, "rewards/chosen": -3.6230359077453613, "rewards/margins": 4.857497692108154, "rewards/rejected": -8.480533599853516, "step": 7565 }, { "epoch": 1.18, "learning_rate": 8.598123346646612e-06, "logits/chosen": -1.371230125427246, "logits/rejected": -3.033661127090454, "logps/chosen": -85.27043151855469, "logps/rejected": -406.607666015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.3518834114074707, "rewards/margins": 8.355628967285156, "rewards/rejected": -9.707511901855469, "step": 7566 }, { "epoch": 1.18, "learning_rate": 8.597389906115464e-06, "logits/chosen": -2.721759557723999, "logits/rejected": -1.9590469598770142, "logps/chosen": -230.56488037109375, "logps/rejected": -156.08517456054688, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -1.8870147466659546, "rewards/margins": 4.163027286529541, "rewards/rejected": -6.050042152404785, "step": 7567 }, { "epoch": 1.18, "learning_rate": 8.596656465584316e-06, "logits/chosen": -2.1715199947357178, "logits/rejected": -2.7153773307800293, "logps/chosen": -314.2328186035156, "logps/rejected": -445.8603210449219, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -1.879724144935608, "rewards/margins": 5.32148551940918, "rewards/rejected": -7.20120906829834, "step": 7568 }, { "epoch": 1.18, "learning_rate": 8.595923025053168e-06, "logits/chosen": -2.8000199794769287, "logits/rejected": -1.9400012493133545, "logps/chosen": -255.15658569335938, "logps/rejected": -223.90676879882812, "loss": 0.0847, "rewards/accuracies": 1.0, "rewards/chosen": -1.1717182397842407, "rewards/margins": 3.5504462718963623, "rewards/rejected": -4.722164630889893, "step": 7569 }, { "epoch": 1.18, "learning_rate": 8.59518958452202e-06, "logits/chosen": -2.175898551940918, "logits/rejected": -2.986720085144043, "logps/chosen": -112.97464752197266, "logps/rejected": -248.46864318847656, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -2.4358103275299072, "rewards/margins": 4.458337306976318, "rewards/rejected": -6.894147872924805, "step": 7570 }, { "epoch": 1.18, "learning_rate": 8.594456143990871e-06, "logits/chosen": -2.552860736846924, "logits/rejected": -1.7761040925979614, "logps/chosen": -650.92138671875, "logps/rejected": -459.83074951171875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.9002318382263184, "rewards/margins": 7.420586109161377, "rewards/rejected": -10.320817947387695, "step": 7571 }, { "epoch": 1.18, "learning_rate": 8.593722703459723e-06, "logits/chosen": -2.547123432159424, "logits/rejected": -2.7200844287872314, "logps/chosen": -259.41815185546875, "logps/rejected": -447.3291015625, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -2.6217896938323975, "rewards/margins": 7.157005310058594, "rewards/rejected": -9.77879524230957, "step": 7572 }, { "epoch": 1.18, "learning_rate": 8.592989262928575e-06, "logits/chosen": -2.2213101387023926, "logits/rejected": -2.9992411136627197, "logps/chosen": -59.81561279296875, "logps/rejected": -235.48583984375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.5726112127304077, "rewards/margins": 7.834680080413818, "rewards/rejected": -9.407291412353516, "step": 7573 }, { "epoch": 1.18, "learning_rate": 8.592255822397427e-06, "logits/chosen": -2.9778099060058594, "logits/rejected": -2.6166186332702637, "logps/chosen": -131.0400848388672, "logps/rejected": -120.12591552734375, "loss": 0.873, "rewards/accuracies": 0.5, "rewards/chosen": -3.655137062072754, "rewards/margins": 2.5975916385650635, "rewards/rejected": -6.252728462219238, "step": 7574 }, { "epoch": 1.18, "learning_rate": 8.59152238186628e-06, "logits/chosen": -2.5917413234710693, "logits/rejected": -1.7423326969146729, "logps/chosen": -445.2724609375, "logps/rejected": -326.30810546875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -3.243338108062744, "rewards/margins": 6.439109802246094, "rewards/rejected": -9.68244743347168, "step": 7575 }, { "epoch": 1.18, "learning_rate": 8.590788941335132e-06, "logits/chosen": -2.6601333618164062, "logits/rejected": -2.9815876483917236, "logps/chosen": -246.63671875, "logps/rejected": -273.71661376953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.48351746797561646, "rewards/margins": 8.424684524536133, "rewards/rejected": -7.94116735458374, "step": 7576 }, { "epoch": 1.18, "learning_rate": 8.590055500803984e-06, "logits/chosen": -3.158623456954956, "logits/rejected": -3.253329277038574, "logps/chosen": -76.91778564453125, "logps/rejected": -166.71693420410156, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.9096955060958862, "rewards/margins": 5.812614917755127, "rewards/rejected": -6.722310543060303, "step": 7577 }, { "epoch": 1.18, "learning_rate": 8.589322060272836e-06, "logits/chosen": -2.322704315185547, "logits/rejected": -3.1685893535614014, "logps/chosen": -98.27061462402344, "logps/rejected": -230.47377014160156, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -3.23826003074646, "rewards/margins": 3.8184797763824463, "rewards/rejected": -7.056739807128906, "step": 7578 }, { "epoch": 1.18, "learning_rate": 8.588588619741688e-06, "logits/chosen": -2.7675535678863525, "logits/rejected": -1.60581636428833, "logps/chosen": -356.9595947265625, "logps/rejected": -373.43939208984375, "loss": 0.0772, "rewards/accuracies": 1.0, "rewards/chosen": -2.809804916381836, "rewards/margins": 2.694767475128174, "rewards/rejected": -5.50457239151001, "step": 7579 }, { "epoch": 1.18, "learning_rate": 8.58785517921054e-06, "logits/chosen": -2.9945685863494873, "logits/rejected": -2.7819976806640625, "logps/chosen": -264.52459716796875, "logps/rejected": -228.32212829589844, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.9723068475723267, "rewards/margins": 7.026706218719482, "rewards/rejected": -8.99901294708252, "step": 7580 }, { "epoch": 1.18, "learning_rate": 8.587121738679392e-06, "logits/chosen": -2.2672085762023926, "logits/rejected": -2.900573968887329, "logps/chosen": -210.51736450195312, "logps/rejected": -250.12539672851562, "loss": 0.2059, "rewards/accuracies": 1.0, "rewards/chosen": -1.5273735523223877, "rewards/margins": 2.9176340103149414, "rewards/rejected": -4.44500732421875, "step": 7581 }, { "epoch": 1.18, "learning_rate": 8.586388298148244e-06, "logits/chosen": -2.348353862762451, "logits/rejected": -2.8230443000793457, "logps/chosen": -534.6575927734375, "logps/rejected": -330.1829833984375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.218763828277588, "rewards/margins": 6.804473876953125, "rewards/rejected": -9.023237228393555, "step": 7582 }, { "epoch": 1.18, "learning_rate": 8.585654857617096e-06, "logits/chosen": -2.6032633781433105, "logits/rejected": -2.7748191356658936, "logps/chosen": -134.64141845703125, "logps/rejected": -141.9465789794922, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.732968807220459, "rewards/margins": 5.174194812774658, "rewards/rejected": -7.907163619995117, "step": 7583 }, { "epoch": 1.18, "learning_rate": 8.584921417085949e-06, "logits/chosen": -2.4217119216918945, "logits/rejected": -2.9561192989349365, "logps/chosen": -367.4826965332031, "logps/rejected": -460.0260314941406, "loss": 1.6616, "rewards/accuracies": 0.5, "rewards/chosen": -6.078349590301514, "rewards/margins": 4.544246673583984, "rewards/rejected": -10.622596740722656, "step": 7584 }, { "epoch": 1.18, "learning_rate": 8.584187976554801e-06, "logits/chosen": -2.45855712890625, "logits/rejected": -2.8742308616638184, "logps/chosen": -69.4664535522461, "logps/rejected": -155.61029052734375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -1.271254301071167, "rewards/margins": 6.415372371673584, "rewards/rejected": -7.686626434326172, "step": 7585 }, { "epoch": 1.18, "learning_rate": 8.583454536023653e-06, "logits/chosen": -2.6268510818481445, "logits/rejected": -3.0023655891418457, "logps/chosen": -1232.554931640625, "logps/rejected": -1036.266845703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.144738793373108, "rewards/margins": 8.676446914672852, "rewards/rejected": -9.821186065673828, "step": 7586 }, { "epoch": 1.18, "learning_rate": 8.582721095492505e-06, "logits/chosen": -2.0255661010742188, "logits/rejected": -2.909987211227417, "logps/chosen": -57.64227294921875, "logps/rejected": -222.62588500976562, "loss": 1.3523, "rewards/accuracies": 0.5, "rewards/chosen": -4.241765975952148, "rewards/margins": 1.5019108057022095, "rewards/rejected": -5.743676662445068, "step": 7587 }, { "epoch": 1.18, "learning_rate": 8.581987654961357e-06, "logits/chosen": -2.4225668907165527, "logits/rejected": -2.826951742172241, "logps/chosen": -207.2635955810547, "logps/rejected": -484.7777099609375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -0.4520217776298523, "rewards/margins": 7.681275844573975, "rewards/rejected": -8.13329792022705, "step": 7588 }, { "epoch": 1.18, "learning_rate": 8.581254214430208e-06, "logits/chosen": -2.7710769176483154, "logits/rejected": -2.694882869720459, "logps/chosen": -85.0248031616211, "logps/rejected": -177.39059448242188, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": -0.39942896366119385, "rewards/margins": 5.752244472503662, "rewards/rejected": -6.151673316955566, "step": 7589 }, { "epoch": 1.18, "learning_rate": 8.58052077389906e-06, "logits/chosen": -2.353196144104004, "logits/rejected": -2.9728469848632812, "logps/chosen": -210.18417358398438, "logps/rejected": -474.290771484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.5991463661193848, "rewards/margins": 9.736715316772461, "rewards/rejected": -11.335861206054688, "step": 7590 }, { "epoch": 1.18, "learning_rate": 8.579787333367912e-06, "logits/chosen": -2.8359134197235107, "logits/rejected": -2.97670578956604, "logps/chosen": -147.57955932617188, "logps/rejected": -266.3623046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.8002500534057617, "rewards/margins": 6.661197185516357, "rewards/rejected": -9.461446762084961, "step": 7591 }, { "epoch": 1.18, "learning_rate": 8.579053892836764e-06, "logits/chosen": -2.158256769180298, "logits/rejected": -2.894706964492798, "logps/chosen": -188.65875244140625, "logps/rejected": -493.3301696777344, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.5182159543037415, "rewards/margins": 9.366263389587402, "rewards/rejected": -9.884479522705078, "step": 7592 }, { "epoch": 1.18, "learning_rate": 8.578320452305618e-06, "logits/chosen": -2.434725761413574, "logits/rejected": -2.749764919281006, "logps/chosen": -177.2421875, "logps/rejected": -322.8156433105469, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8868278861045837, "rewards/margins": 7.987236976623535, "rewards/rejected": -8.874064445495605, "step": 7593 }, { "epoch": 1.18, "learning_rate": 8.57758701177447e-06, "logits/chosen": -2.3710367679595947, "logits/rejected": -2.6902356147766113, "logps/chosen": -58.45475387573242, "logps/rejected": -220.08456420898438, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.9829463958740234, "rewards/margins": 5.528736591339111, "rewards/rejected": -7.511683464050293, "step": 7594 }, { "epoch": 1.18, "learning_rate": 8.576853571243321e-06, "logits/chosen": -2.337634563446045, "logits/rejected": -2.700657606124878, "logps/chosen": -192.4451904296875, "logps/rejected": -143.92562866210938, "loss": 2.7131, "rewards/accuracies": 0.5, "rewards/chosen": -4.640815734863281, "rewards/margins": 0.6582052707672119, "rewards/rejected": -5.299021244049072, "step": 7595 }, { "epoch": 1.18, "learning_rate": 8.576120130712173e-06, "logits/chosen": -2.8186047077178955, "logits/rejected": -3.0231246948242188, "logps/chosen": -52.42259979248047, "logps/rejected": -215.75576782226562, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -1.3405519723892212, "rewards/margins": 5.162311553955078, "rewards/rejected": -6.50286340713501, "step": 7596 }, { "epoch": 1.18, "learning_rate": 8.575386690181025e-06, "logits/chosen": -2.5035617351531982, "logits/rejected": -2.783869743347168, "logps/chosen": -637.955810546875, "logps/rejected": -658.374267578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.9280495643615723, "rewards/margins": 7.020164489746094, "rewards/rejected": -9.948214530944824, "step": 7597 }, { "epoch": 1.18, "learning_rate": 8.574653249649879e-06, "logits/chosen": -1.6854480504989624, "logits/rejected": -2.396819829940796, "logps/chosen": -111.63934326171875, "logps/rejected": -213.32818603515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.6710010766983032, "rewards/margins": 6.829311847686768, "rewards/rejected": -8.500312805175781, "step": 7598 }, { "epoch": 1.18, "learning_rate": 8.57391980911873e-06, "logits/chosen": -2.6827826499938965, "logits/rejected": -3.1427531242370605, "logps/chosen": -756.5052490234375, "logps/rejected": -727.3291625976562, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.6860694885253906, "rewards/margins": 6.361495018005371, "rewards/rejected": -8.047564506530762, "step": 7599 }, { "epoch": 1.18, "learning_rate": 8.573186368587583e-06, "logits/chosen": -1.745689868927002, "logits/rejected": -2.9068689346313477, "logps/chosen": -347.1991882324219, "logps/rejected": -619.3018798828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.636171817779541, "rewards/margins": 12.480609893798828, "rewards/rejected": -15.116781234741211, "step": 7600 }, { "epoch": 1.18, "learning_rate": 8.572452928056434e-06, "logits/chosen": -2.0355212688446045, "logits/rejected": -2.6639323234558105, "logps/chosen": -153.6667022705078, "logps/rejected": -216.04458618164062, "loss": 1.447, "rewards/accuracies": 0.5, "rewards/chosen": -6.0770673751831055, "rewards/margins": 0.3600494861602783, "rewards/rejected": -6.437116622924805, "step": 7601 }, { "epoch": 1.18, "learning_rate": 8.571719487525288e-06, "logits/chosen": -2.8104543685913086, "logits/rejected": -2.755409002304077, "logps/chosen": -109.50613403320312, "logps/rejected": -121.64946746826172, "loss": 0.8333, "rewards/accuracies": 0.5, "rewards/chosen": -3.3062548637390137, "rewards/margins": 1.188244104385376, "rewards/rejected": -4.494499206542969, "step": 7602 }, { "epoch": 1.18, "learning_rate": 8.57098604699414e-06, "logits/chosen": -2.8010048866271973, "logits/rejected": -2.0003912448883057, "logps/chosen": -285.94677734375, "logps/rejected": -356.81439208984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.360240936279297, "rewards/margins": 8.04880428314209, "rewards/rejected": -10.409046173095703, "step": 7603 }, { "epoch": 1.18, "learning_rate": 8.570252606462992e-06, "logits/chosen": -2.2350914478302, "logits/rejected": -2.8943374156951904, "logps/chosen": -382.52325439453125, "logps/rejected": -657.4314575195312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0082626342773438, "rewards/margins": 10.333897590637207, "rewards/rejected": -11.34216022491455, "step": 7604 }, { "epoch": 1.18, "learning_rate": 8.569519165931844e-06, "logits/chosen": -1.837511420249939, "logits/rejected": -2.4793295860290527, "logps/chosen": -154.0321807861328, "logps/rejected": -366.4530029296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0400673151016235, "rewards/margins": 9.154513359069824, "rewards/rejected": -10.194581031799316, "step": 7605 }, { "epoch": 1.18, "learning_rate": 8.568785725400696e-06, "logits/chosen": -2.576266050338745, "logits/rejected": -2.185471296310425, "logps/chosen": -164.7880859375, "logps/rejected": -238.8385467529297, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.862532138824463, "rewards/margins": 6.093735218048096, "rewards/rejected": -8.956267356872559, "step": 7606 }, { "epoch": 1.18, "learning_rate": 8.568052284869547e-06, "logits/chosen": -2.5060954093933105, "logits/rejected": -2.787558078765869, "logps/chosen": -260.5362243652344, "logps/rejected": -324.734375, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -2.6453399658203125, "rewards/margins": 4.464909553527832, "rewards/rejected": -7.1102495193481445, "step": 7607 }, { "epoch": 1.18, "learning_rate": 8.5673188443384e-06, "logits/chosen": -1.528188705444336, "logits/rejected": -2.8659558296203613, "logps/chosen": -107.0956802368164, "logps/rejected": -561.1958618164062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.6237080097198486, "rewards/margins": 8.704960823059082, "rewards/rejected": -10.328668594360352, "step": 7608 }, { "epoch": 1.18, "learning_rate": 8.566585403807251e-06, "logits/chosen": -2.8388307094573975, "logits/rejected": -2.7821037769317627, "logps/chosen": -147.81341552734375, "logps/rejected": -299.231689453125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.8767342567443848, "rewards/margins": 6.7460856437683105, "rewards/rejected": -8.622819900512695, "step": 7609 }, { "epoch": 1.18, "learning_rate": 8.565851963276103e-06, "logits/chosen": -2.9835705757141113, "logits/rejected": -2.9903676509857178, "logps/chosen": -759.1624145507812, "logps/rejected": -840.9808349609375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.5511231422424316, "rewards/margins": 7.981790065765381, "rewards/rejected": -10.532913208007812, "step": 7610 }, { "epoch": 1.18, "learning_rate": 8.565118522744957e-06, "logits/chosen": -3.061795473098755, "logits/rejected": -3.2091450691223145, "logps/chosen": -401.0621337890625, "logps/rejected": -367.27471923828125, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -2.698485851287842, "rewards/margins": 4.706996440887451, "rewards/rejected": -7.405482292175293, "step": 7611 }, { "epoch": 1.18, "learning_rate": 8.564385082213808e-06, "logits/chosen": -2.6670913696289062, "logits/rejected": -2.6265757083892822, "logps/chosen": -266.5169677734375, "logps/rejected": -412.28875732421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.3490493297576904, "rewards/margins": 8.205374717712402, "rewards/rejected": -10.554424285888672, "step": 7612 }, { "epoch": 1.18, "learning_rate": 8.56365164168266e-06, "logits/chosen": -2.5177738666534424, "logits/rejected": -2.7632012367248535, "logps/chosen": -208.5386962890625, "logps/rejected": -232.00999450683594, "loss": 1.1003, "rewards/accuracies": 0.5, "rewards/chosen": -4.664988994598389, "rewards/margins": 2.4436709880828857, "rewards/rejected": -7.1086602210998535, "step": 7613 }, { "epoch": 1.18, "learning_rate": 8.562918201151512e-06, "logits/chosen": -3.119950532913208, "logits/rejected": -2.1983416080474854, "logps/chosen": -300.5038146972656, "logps/rejected": -156.90872192382812, "loss": 4.3706, "rewards/accuracies": 0.0, "rewards/chosen": -6.782183647155762, "rewards/margins": -4.35346794128418, "rewards/rejected": -2.428715944290161, "step": 7614 }, { "epoch": 1.18, "learning_rate": 8.562184760620364e-06, "logits/chosen": -1.8177536725997925, "logits/rejected": -2.9065589904785156, "logps/chosen": -109.75552368164062, "logps/rejected": -354.62652587890625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.0468506813049316, "rewards/margins": 5.782667636871338, "rewards/rejected": -6.8295183181762695, "step": 7615 }, { "epoch": 1.18, "learning_rate": 8.561451320089216e-06, "logits/chosen": -1.1071486473083496, "logits/rejected": -2.5070085525512695, "logps/chosen": -68.13984680175781, "logps/rejected": -444.45611572265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.8745243549346924, "rewards/margins": 8.84506607055664, "rewards/rejected": -10.71959114074707, "step": 7616 }, { "epoch": 1.18, "learning_rate": 8.560717879558068e-06, "logits/chosen": -2.296182870864868, "logits/rejected": -2.086879253387451, "logps/chosen": -439.4566345214844, "logps/rejected": -555.6492309570312, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -4.844890594482422, "rewards/margins": 5.457151412963867, "rewards/rejected": -10.302042007446289, "step": 7617 }, { "epoch": 1.18, "learning_rate": 8.55998443902692e-06, "logits/chosen": -1.9532771110534668, "logits/rejected": -2.7620556354522705, "logps/chosen": -94.98347473144531, "logps/rejected": -187.4268798828125, "loss": 0.0677, "rewards/accuracies": 1.0, "rewards/chosen": -4.167196750640869, "rewards/margins": 2.82743501663208, "rewards/rejected": -6.994631767272949, "step": 7618 }, { "epoch": 1.18, "learning_rate": 8.559250998495772e-06, "logits/chosen": -2.3983702659606934, "logits/rejected": -2.6147449016571045, "logps/chosen": -360.7121276855469, "logps/rejected": -512.7554931640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5832877159118652, "rewards/margins": 8.275745391845703, "rewards/rejected": -9.859033584594727, "step": 7619 }, { "epoch": 1.19, "learning_rate": 8.558517557964625e-06, "logits/chosen": -2.080601930618286, "logits/rejected": -2.7234513759613037, "logps/chosen": -264.98663330078125, "logps/rejected": -663.4544067382812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.8754830360412598, "rewards/margins": 8.404559135437012, "rewards/rejected": -11.280041694641113, "step": 7620 }, { "epoch": 1.19, "learning_rate": 8.557784117433477e-06, "logits/chosen": -1.7431280612945557, "logits/rejected": -2.7405669689178467, "logps/chosen": -117.66990661621094, "logps/rejected": -365.64117431640625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": 0.012572482228279114, "rewards/margins": 9.647024154663086, "rewards/rejected": -9.634451866149902, "step": 7621 }, { "epoch": 1.19, "learning_rate": 8.557050676902329e-06, "logits/chosen": -2.062251329421997, "logits/rejected": -2.764650821685791, "logps/chosen": -168.88302612304688, "logps/rejected": -378.3310546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.304751992225647, "rewards/margins": 9.85040283203125, "rewards/rejected": -11.155155181884766, "step": 7622 }, { "epoch": 1.19, "learning_rate": 8.55631723637118e-06, "logits/chosen": -2.6859941482543945, "logits/rejected": -2.8230297565460205, "logps/chosen": -219.3634033203125, "logps/rejected": -314.47021484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.6489853858947754, "rewards/margins": 6.711033821105957, "rewards/rejected": -9.36001968383789, "step": 7623 }, { "epoch": 1.19, "learning_rate": 8.555583795840033e-06, "logits/chosen": -0.954543948173523, "logits/rejected": -2.1682262420654297, "logps/chosen": -94.70652770996094, "logps/rejected": -192.17092895507812, "loss": 0.559, "rewards/accuracies": 0.5, "rewards/chosen": -4.1703948974609375, "rewards/margins": 4.303842544555664, "rewards/rejected": -8.474237442016602, "step": 7624 }, { "epoch": 1.19, "learning_rate": 8.554850355308885e-06, "logits/chosen": -1.692488670349121, "logits/rejected": -2.5457632541656494, "logps/chosen": -167.18360900878906, "logps/rejected": -360.4779968261719, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0601539611816406, "rewards/margins": 7.586856842041016, "rewards/rejected": -8.647010803222656, "step": 7625 }, { "epoch": 1.19, "learning_rate": 8.554116914777736e-06, "logits/chosen": -2.258908987045288, "logits/rejected": -2.709674835205078, "logps/chosen": -88.67023468017578, "logps/rejected": -232.7001953125, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.009697675704956, "rewards/margins": 7.246594429016113, "rewards/rejected": -9.256292343139648, "step": 7626 }, { "epoch": 1.19, "learning_rate": 8.553383474246588e-06, "logits/chosen": -2.516908645629883, "logits/rejected": -3.0531980991363525, "logps/chosen": -58.24746322631836, "logps/rejected": -373.85107421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.83238685131073, "rewards/margins": 9.515410423278809, "rewards/rejected": -10.347797393798828, "step": 7627 }, { "epoch": 1.19, "learning_rate": 8.55265003371544e-06, "logits/chosen": -2.7584986686706543, "logits/rejected": -3.05794358253479, "logps/chosen": -97.2392578125, "logps/rejected": -221.39271545410156, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.198784351348877, "rewards/margins": 5.594545364379883, "rewards/rejected": -7.793329238891602, "step": 7628 }, { "epoch": 1.19, "learning_rate": 8.551916593184294e-06, "logits/chosen": -2.6689932346343994, "logits/rejected": -2.5019690990448, "logps/chosen": -131.11962890625, "logps/rejected": -144.4612579345703, "loss": 1.6793, "rewards/accuracies": 0.5, "rewards/chosen": -4.8311262130737305, "rewards/margins": -0.14349853992462158, "rewards/rejected": -4.687627792358398, "step": 7629 }, { "epoch": 1.19, "learning_rate": 8.551183152653146e-06, "logits/chosen": -2.95208477973938, "logits/rejected": -2.231238842010498, "logps/chosen": -424.6728210449219, "logps/rejected": -264.4477844238281, "loss": 0.0613, "rewards/accuracies": 1.0, "rewards/chosen": -3.3163697719573975, "rewards/margins": 3.5858330726623535, "rewards/rejected": -6.902202606201172, "step": 7630 }, { "epoch": 1.19, "learning_rate": 8.550449712121998e-06, "logits/chosen": -2.4761202335357666, "logits/rejected": -3.1422436237335205, "logps/chosen": -150.83692932128906, "logps/rejected": -278.14764404296875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.394108772277832, "rewards/margins": 5.6536712646484375, "rewards/rejected": -7.0477800369262695, "step": 7631 }, { "epoch": 1.19, "learning_rate": 8.549716271590851e-06, "logits/chosen": -2.7769644260406494, "logits/rejected": -2.6614491939544678, "logps/chosen": -54.90216064453125, "logps/rejected": -147.3955078125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.2572164535522461, "rewards/margins": 6.188427925109863, "rewards/rejected": -6.445644378662109, "step": 7632 }, { "epoch": 1.19, "learning_rate": 8.548982831059703e-06, "logits/chosen": -2.6658518314361572, "logits/rejected": -2.6644630432128906, "logps/chosen": -625.3118286132812, "logps/rejected": -615.8177490234375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.459857225418091, "rewards/margins": 7.340798854827881, "rewards/rejected": -9.80065631866455, "step": 7633 }, { "epoch": 1.19, "learning_rate": 8.548249390528555e-06, "logits/chosen": -2.9212594032287598, "logits/rejected": -2.101526975631714, "logps/chosen": -232.89385986328125, "logps/rejected": -104.50347137451172, "loss": 1.3869, "rewards/accuracies": 0.5, "rewards/chosen": -3.389302968978882, "rewards/margins": -0.9602406024932861, "rewards/rejected": -2.4290623664855957, "step": 7634 }, { "epoch": 1.19, "learning_rate": 8.547515949997407e-06, "logits/chosen": -2.087674617767334, "logits/rejected": -2.8341078758239746, "logps/chosen": -125.95931243896484, "logps/rejected": -336.84356689453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.624160051345825, "rewards/margins": 8.263381004333496, "rewards/rejected": -10.887540817260742, "step": 7635 }, { "epoch": 1.19, "learning_rate": 8.546782509466259e-06, "logits/chosen": -2.596576690673828, "logits/rejected": -2.620211362838745, "logps/chosen": -142.12066650390625, "logps/rejected": -179.37933349609375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.2649898529052734, "rewards/margins": 6.073015213012695, "rewards/rejected": -7.338005065917969, "step": 7636 }, { "epoch": 1.19, "learning_rate": 8.54604906893511e-06, "logits/chosen": -2.6755993366241455, "logits/rejected": -2.4779558181762695, "logps/chosen": -215.37884521484375, "logps/rejected": -305.7030944824219, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.6584551334381104, "rewards/margins": 6.266641139984131, "rewards/rejected": -7.92509651184082, "step": 7637 }, { "epoch": 1.19, "learning_rate": 8.545315628403964e-06, "logits/chosen": -2.31672739982605, "logits/rejected": -2.525803327560425, "logps/chosen": -186.989990234375, "logps/rejected": -211.3907470703125, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -2.7144854068756104, "rewards/margins": 7.425111770629883, "rewards/rejected": -10.139596939086914, "step": 7638 }, { "epoch": 1.19, "learning_rate": 8.544582187872816e-06, "logits/chosen": -2.4796254634857178, "logits/rejected": -2.681529998779297, "logps/chosen": -286.92987060546875, "logps/rejected": -364.1651611328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.6638085842132568, "rewards/margins": 9.631101608276367, "rewards/rejected": -8.967292785644531, "step": 7639 }, { "epoch": 1.19, "learning_rate": 8.543848747341668e-06, "logits/chosen": -3.116262435913086, "logits/rejected": -3.1183693408966064, "logps/chosen": -156.23342895507812, "logps/rejected": -229.09564208984375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.16506001353263855, "rewards/margins": 6.351435661315918, "rewards/rejected": -6.516495704650879, "step": 7640 }, { "epoch": 1.19, "learning_rate": 8.54311530681052e-06, "logits/chosen": -2.4290239810943604, "logits/rejected": -2.7528533935546875, "logps/chosen": -146.92601013183594, "logps/rejected": -254.27706909179688, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.3063931465148926, "rewards/margins": 5.261287212371826, "rewards/rejected": -6.567680358886719, "step": 7641 }, { "epoch": 1.19, "learning_rate": 8.542381866279372e-06, "logits/chosen": -2.1235463619232178, "logits/rejected": -2.5524206161499023, "logps/chosen": -46.54679870605469, "logps/rejected": -145.54905700683594, "loss": 0.1145, "rewards/accuracies": 1.0, "rewards/chosen": -2.9687252044677734, "rewards/margins": 3.099165678024292, "rewards/rejected": -6.0678911209106445, "step": 7642 }, { "epoch": 1.19, "learning_rate": 8.541648425748223e-06, "logits/chosen": -1.8636159896850586, "logits/rejected": -2.8088276386260986, "logps/chosen": -176.97854614257812, "logps/rejected": -472.6016845703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.416055291891098, "rewards/margins": 8.777265548706055, "rewards/rejected": -9.193321228027344, "step": 7643 }, { "epoch": 1.19, "learning_rate": 8.540914985217075e-06, "logits/chosen": -2.4390807151794434, "logits/rejected": -2.308021306991577, "logps/chosen": -426.0445556640625, "logps/rejected": -509.8470153808594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.926124632358551, "rewards/margins": 12.692298889160156, "rewards/rejected": -11.76617431640625, "step": 7644 }, { "epoch": 1.19, "learning_rate": 8.540181544685927e-06, "logits/chosen": -1.3977571725845337, "logits/rejected": -2.827505588531494, "logps/chosen": -219.22750854492188, "logps/rejected": -502.5375061035156, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.839001476764679, "rewards/margins": 8.687774658203125, "rewards/rejected": -9.526776313781738, "step": 7645 }, { "epoch": 1.19, "learning_rate": 8.53944810415478e-06, "logits/chosen": -2.4003334045410156, "logits/rejected": -2.7984583377838135, "logps/chosen": -489.58587646484375, "logps/rejected": -476.5754699707031, "loss": 0.8524, "rewards/accuracies": 0.5, "rewards/chosen": -4.303441047668457, "rewards/margins": 0.6701202392578125, "rewards/rejected": -4.9735612869262695, "step": 7646 }, { "epoch": 1.19, "learning_rate": 8.538714663623633e-06, "logits/chosen": -2.1152896881103516, "logits/rejected": -3.059603691101074, "logps/chosen": -203.64816284179688, "logps/rejected": -459.9478759765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.4560158252716064, "rewards/margins": 7.1704792976379395, "rewards/rejected": -9.626495361328125, "step": 7647 }, { "epoch": 1.19, "learning_rate": 8.537981223092485e-06, "logits/chosen": -2.9019105434417725, "logits/rejected": -1.7637823820114136, "logps/chosen": -411.6195373535156, "logps/rejected": -236.3330535888672, "loss": 0.0661, "rewards/accuracies": 1.0, "rewards/chosen": -0.8062652945518494, "rewards/margins": 3.512911319732666, "rewards/rejected": -4.31917667388916, "step": 7648 }, { "epoch": 1.19, "learning_rate": 8.537247782561336e-06, "logits/chosen": -2.5154616832733154, "logits/rejected": -2.5932095050811768, "logps/chosen": -422.4324035644531, "logps/rejected": -181.90554809570312, "loss": 0.0348, "rewards/accuracies": 1.0, "rewards/chosen": -0.991254448890686, "rewards/margins": 4.473695755004883, "rewards/rejected": -5.464950084686279, "step": 7649 }, { "epoch": 1.19, "learning_rate": 8.536514342030188e-06, "logits/chosen": -2.470048666000366, "logits/rejected": -2.8473620414733887, "logps/chosen": -130.49717712402344, "logps/rejected": -300.1007080078125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -3.364506244659424, "rewards/margins": 8.443170547485352, "rewards/rejected": -11.807676315307617, "step": 7650 }, { "epoch": 1.19, "learning_rate": 8.53578090149904e-06, "logits/chosen": -2.791407585144043, "logits/rejected": -2.822019577026367, "logps/chosen": -232.68508911132812, "logps/rejected": -271.66375732421875, "loss": 0.2841, "rewards/accuracies": 1.0, "rewards/chosen": -1.9419074058532715, "rewards/margins": 1.2414453029632568, "rewards/rejected": -3.1833527088165283, "step": 7651 }, { "epoch": 1.19, "learning_rate": 8.535047460967892e-06, "logits/chosen": -2.472628116607666, "logits/rejected": -2.219916820526123, "logps/chosen": -119.31471252441406, "logps/rejected": -216.2283172607422, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.519555926322937, "rewards/margins": 6.158669948577881, "rewards/rejected": -7.678225517272949, "step": 7652 }, { "epoch": 1.19, "learning_rate": 8.534314020436744e-06, "logits/chosen": -2.578355312347412, "logits/rejected": -3.040698289871216, "logps/chosen": -85.49066162109375, "logps/rejected": -258.51458740234375, "loss": 1.183, "rewards/accuracies": 0.5, "rewards/chosen": -2.3648738861083984, "rewards/margins": 2.7087950706481934, "rewards/rejected": -5.073668956756592, "step": 7653 }, { "epoch": 1.19, "learning_rate": 8.533580579905596e-06, "logits/chosen": -2.253920793533325, "logits/rejected": -2.8784399032592773, "logps/chosen": -524.8189697265625, "logps/rejected": -502.388427734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.003244400024414, "rewards/margins": 9.347236633300781, "rewards/rejected": -13.350481033325195, "step": 7654 }, { "epoch": 1.19, "learning_rate": 8.53284713937445e-06, "logits/chosen": -1.6407287120819092, "logits/rejected": -2.7818832397460938, "logps/chosen": -128.78338623046875, "logps/rejected": -380.0542297363281, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.6104888916015625, "rewards/margins": 8.954157829284668, "rewards/rejected": -11.564647674560547, "step": 7655 }, { "epoch": 1.19, "learning_rate": 8.532113698843301e-06, "logits/chosen": -2.5630135536193848, "logits/rejected": -2.2813541889190674, "logps/chosen": -189.31338500976562, "logps/rejected": -245.01443481445312, "loss": 0.7176, "rewards/accuracies": 0.5, "rewards/chosen": -2.7519378662109375, "rewards/margins": 2.3308770656585693, "rewards/rejected": -5.082815170288086, "step": 7656 }, { "epoch": 1.19, "learning_rate": 8.531380258312153e-06, "logits/chosen": -2.56868577003479, "logits/rejected": -2.537559747695923, "logps/chosen": -236.6878662109375, "logps/rejected": -260.5620422363281, "loss": 0.0923, "rewards/accuracies": 1.0, "rewards/chosen": -2.6714768409729004, "rewards/margins": 2.9289231300354004, "rewards/rejected": -5.600399971008301, "step": 7657 }, { "epoch": 1.19, "learning_rate": 8.530646817781005e-06, "logits/chosen": -2.688361883163452, "logits/rejected": -2.1091866493225098, "logps/chosen": -270.04864501953125, "logps/rejected": -274.9585266113281, "loss": 0.8799, "rewards/accuracies": 0.5, "rewards/chosen": -2.225149154663086, "rewards/margins": 4.1320672035217285, "rewards/rejected": -6.3572163581848145, "step": 7658 }, { "epoch": 1.19, "learning_rate": 8.529913377249857e-06, "logits/chosen": -2.648315191268921, "logits/rejected": -3.0095298290252686, "logps/chosen": -251.86497497558594, "logps/rejected": -430.2529602050781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1145126819610596, "rewards/margins": 8.226652145385742, "rewards/rejected": -9.341165542602539, "step": 7659 }, { "epoch": 1.19, "learning_rate": 8.529179936718709e-06, "logits/chosen": -2.66085147857666, "logits/rejected": -3.1135196685791016, "logps/chosen": -77.01113891601562, "logps/rejected": -303.69696044921875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.3964191675186157, "rewards/margins": 5.389542102813721, "rewards/rejected": -6.785961151123047, "step": 7660 }, { "epoch": 1.19, "learning_rate": 8.52844649618756e-06, "logits/chosen": -2.284775495529175, "logits/rejected": -2.829157590866089, "logps/chosen": -76.911376953125, "logps/rejected": -346.46514892578125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.3508161306381226, "rewards/margins": 6.515953063964844, "rewards/rejected": -7.866769790649414, "step": 7661 }, { "epoch": 1.19, "learning_rate": 8.527713055656413e-06, "logits/chosen": -2.9457993507385254, "logits/rejected": -2.2135748863220215, "logps/chosen": -211.43154907226562, "logps/rejected": -128.5959014892578, "loss": 1.5948, "rewards/accuracies": 0.5, "rewards/chosen": -2.6213431358337402, "rewards/margins": -0.19809412956237793, "rewards/rejected": -2.4232490062713623, "step": 7662 }, { "epoch": 1.19, "learning_rate": 8.526979615125264e-06, "logits/chosen": -1.7158899307250977, "logits/rejected": -2.914020538330078, "logps/chosen": -218.7793731689453, "logps/rejected": -539.7572631835938, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.1950135231018066, "rewards/margins": 7.539549350738525, "rewards/rejected": -9.734562873840332, "step": 7663 }, { "epoch": 1.19, "learning_rate": 8.526246174594118e-06, "logits/chosen": -2.6764702796936035, "logits/rejected": -1.7409093379974365, "logps/chosen": -448.7275390625, "logps/rejected": -280.0018005371094, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.3654723167419434, "rewards/margins": 6.2546820640563965, "rewards/rejected": -8.62015438079834, "step": 7664 }, { "epoch": 1.19, "learning_rate": 8.52551273406297e-06, "logits/chosen": -2.5425941944122314, "logits/rejected": -2.787052869796753, "logps/chosen": -59.304019927978516, "logps/rejected": -255.5268096923828, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.3149657249450684, "rewards/margins": 8.710311889648438, "rewards/rejected": -11.025278091430664, "step": 7665 }, { "epoch": 1.19, "learning_rate": 8.524779293531823e-06, "logits/chosen": -3.0566494464874268, "logits/rejected": -3.4111478328704834, "logps/chosen": -83.56051635742188, "logps/rejected": -150.07618713378906, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": -1.7086007595062256, "rewards/margins": 2.767127513885498, "rewards/rejected": -4.4757280349731445, "step": 7666 }, { "epoch": 1.19, "learning_rate": 8.524045853000675e-06, "logits/chosen": -1.6680930852890015, "logits/rejected": -2.7507619857788086, "logps/chosen": -317.87603759765625, "logps/rejected": -552.3746337890625, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -1.7125000953674316, "rewards/margins": 7.463458061218262, "rewards/rejected": -9.175958633422852, "step": 7667 }, { "epoch": 1.19, "learning_rate": 8.523312412469527e-06, "logits/chosen": -2.449955463409424, "logits/rejected": -2.836869239807129, "logps/chosen": -91.19882202148438, "logps/rejected": -252.87734985351562, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.5556108951568604, "rewards/margins": 6.747528076171875, "rewards/rejected": -9.303138732910156, "step": 7668 }, { "epoch": 1.19, "learning_rate": 8.522578971938379e-06, "logits/chosen": -2.6578221321105957, "logits/rejected": -3.158954620361328, "logps/chosen": -501.35638427734375, "logps/rejected": -521.57763671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.8636573553085327, "rewards/margins": 8.214821815490723, "rewards/rejected": -10.078479766845703, "step": 7669 }, { "epoch": 1.19, "learning_rate": 8.521845531407231e-06, "logits/chosen": -0.6388453245162964, "logits/rejected": -1.6107006072998047, "logps/chosen": -207.26199340820312, "logps/rejected": -503.95562744140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.0351624488830566, "rewards/margins": 10.235536575317383, "rewards/rejected": -12.270698547363281, "step": 7670 }, { "epoch": 1.19, "learning_rate": 8.521112090876083e-06, "logits/chosen": -1.930133581161499, "logits/rejected": -2.79524302482605, "logps/chosen": -188.58407592773438, "logps/rejected": -439.97271728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.993951678276062, "rewards/margins": 12.118910789489746, "rewards/rejected": -14.112861633300781, "step": 7671 }, { "epoch": 1.19, "learning_rate": 8.520378650344935e-06, "logits/chosen": -2.0956568717956543, "logits/rejected": -2.965428113937378, "logps/chosen": -147.9382781982422, "logps/rejected": -386.3528137207031, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.6423168182373047, "rewards/margins": 7.129860877990723, "rewards/rejected": -7.772177696228027, "step": 7672 }, { "epoch": 1.19, "learning_rate": 8.519645209813788e-06, "logits/chosen": -2.5550503730773926, "logits/rejected": -2.775070905685425, "logps/chosen": -73.51825714111328, "logps/rejected": -292.33929443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.708045482635498, "rewards/margins": 11.378604888916016, "rewards/rejected": -12.086649894714355, "step": 7673 }, { "epoch": 1.19, "learning_rate": 8.51891176928264e-06, "logits/chosen": -3.086846113204956, "logits/rejected": -2.834420919418335, "logps/chosen": -98.96815490722656, "logps/rejected": -171.50999450683594, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -0.5803230404853821, "rewards/margins": 8.700023651123047, "rewards/rejected": -9.280345916748047, "step": 7674 }, { "epoch": 1.19, "learning_rate": 8.518178328751492e-06, "logits/chosen": -1.4384605884552002, "logits/rejected": -2.751915693283081, "logps/chosen": -57.43415069580078, "logps/rejected": -179.14112854003906, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -2.9714338779449463, "rewards/margins": 4.490527629852295, "rewards/rejected": -7.46196174621582, "step": 7675 }, { "epoch": 1.19, "learning_rate": 8.517444888220344e-06, "logits/chosen": -1.70453941822052, "logits/rejected": -2.421337127685547, "logps/chosen": -210.86940002441406, "logps/rejected": -360.04510498046875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.3928794860839844, "rewards/margins": 6.3835015296936035, "rewards/rejected": -9.77638053894043, "step": 7676 }, { "epoch": 1.19, "learning_rate": 8.516711447689196e-06, "logits/chosen": -2.8530657291412354, "logits/rejected": -2.4770193099975586, "logps/chosen": -639.3317260742188, "logps/rejected": -449.5211181640625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.647692322731018, "rewards/margins": 7.60666036605835, "rewards/rejected": -9.254352569580078, "step": 7677 }, { "epoch": 1.19, "learning_rate": 8.515978007158048e-06, "logits/chosen": -2.8238186836242676, "logits/rejected": -2.9018197059631348, "logps/chosen": -295.20489501953125, "logps/rejected": -334.50408935546875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.3229713439941406, "rewards/margins": 6.2469916343688965, "rewards/rejected": -9.569963455200195, "step": 7678 }, { "epoch": 1.19, "learning_rate": 8.5152445666269e-06, "logits/chosen": -2.873778820037842, "logits/rejected": -3.1220107078552246, "logps/chosen": -715.8726806640625, "logps/rejected": -786.24072265625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.639206886291504, "rewards/margins": 6.806240081787109, "rewards/rejected": -9.445446968078613, "step": 7679 }, { "epoch": 1.19, "learning_rate": 8.514511126095751e-06, "logits/chosen": -1.1484544277191162, "logits/rejected": -2.2523789405822754, "logps/chosen": -243.566162109375, "logps/rejected": -381.15087890625, "loss": 0.3722, "rewards/accuracies": 0.5, "rewards/chosen": -2.2468247413635254, "rewards/margins": 5.581165313720703, "rewards/rejected": -7.8279900550842285, "step": 7680 }, { "epoch": 1.19, "learning_rate": 8.513777685564603e-06, "logits/chosen": -2.796875, "logits/rejected": -2.661222219467163, "logps/chosen": -309.8671875, "logps/rejected": -185.89846801757812, "loss": 0.9557, "rewards/accuracies": 0.5, "rewards/chosen": -1.9680976867675781, "rewards/margins": 2.9026217460632324, "rewards/rejected": -4.8707194328308105, "step": 7681 }, { "epoch": 1.19, "learning_rate": 8.513044245033457e-06, "logits/chosen": -2.3286967277526855, "logits/rejected": -2.8338963985443115, "logps/chosen": -113.93843078613281, "logps/rejected": -264.484619140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.7972373962402344, "rewards/margins": 8.10149097442627, "rewards/rejected": -9.898728370666504, "step": 7682 }, { "epoch": 1.19, "learning_rate": 8.512310804502309e-06, "logits/chosen": -1.1368587017059326, "logits/rejected": -2.7342469692230225, "logps/chosen": -300.24542236328125, "logps/rejected": -557.11181640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3145065307617188, "rewards/margins": 8.854695320129395, "rewards/rejected": -11.169201850891113, "step": 7683 }, { "epoch": 1.2, "learning_rate": 8.51157736397116e-06, "logits/chosen": -1.2214150428771973, "logits/rejected": -2.8072452545166016, "logps/chosen": -133.9766082763672, "logps/rejected": -476.7789306640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.6342098712921143, "rewards/margins": 7.584338188171387, "rewards/rejected": -10.218548774719238, "step": 7684 }, { "epoch": 1.2, "learning_rate": 8.510843923440013e-06, "logits/chosen": -2.369743585586548, "logits/rejected": -2.7320218086242676, "logps/chosen": -161.81077575683594, "logps/rejected": -273.82177734375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.7376856803894043, "rewards/margins": 6.3974504470825195, "rewards/rejected": -8.135135650634766, "step": 7685 }, { "epoch": 1.2, "learning_rate": 8.510110482908864e-06, "logits/chosen": -2.8359458446502686, "logits/rejected": -2.677337884902954, "logps/chosen": -91.8134994506836, "logps/rejected": -103.97404479980469, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.2230327129364014, "rewards/margins": 4.992350101470947, "rewards/rejected": -7.2153825759887695, "step": 7686 }, { "epoch": 1.2, "learning_rate": 8.509377042377716e-06, "logits/chosen": -2.74835205078125, "logits/rejected": -2.2730610370635986, "logps/chosen": -158.0587615966797, "logps/rejected": -148.50543212890625, "loss": 1.2059, "rewards/accuracies": 0.5, "rewards/chosen": -5.491515636444092, "rewards/margins": 1.601623773574829, "rewards/rejected": -7.0931396484375, "step": 7687 }, { "epoch": 1.2, "learning_rate": 8.508643601846568e-06, "logits/chosen": -2.4575607776641846, "logits/rejected": -3.107396125793457, "logps/chosen": -304.077392578125, "logps/rejected": -585.4384765625, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -2.9264578819274902, "rewards/margins": 3.8087334632873535, "rewards/rejected": -6.735191345214844, "step": 7688 }, { "epoch": 1.2, "learning_rate": 8.50791016131542e-06, "logits/chosen": -2.529733419418335, "logits/rejected": -2.928241014480591, "logps/chosen": -305.31414794921875, "logps/rejected": -327.4261474609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8174185752868652, "rewards/margins": 7.00925350189209, "rewards/rejected": -8.826672554016113, "step": 7689 }, { "epoch": 1.2, "learning_rate": 8.507176720784272e-06, "logits/chosen": -2.914557456970215, "logits/rejected": -2.1303060054779053, "logps/chosen": -421.1372375488281, "logps/rejected": -283.11370849609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.816222667694092, "rewards/margins": 7.102406978607178, "rewards/rejected": -9.91862964630127, "step": 7690 }, { "epoch": 1.2, "learning_rate": 8.506443280253125e-06, "logits/chosen": -1.4927237033843994, "logits/rejected": -2.8272318840026855, "logps/chosen": -170.09803771972656, "logps/rejected": -366.3663330078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.985464096069336, "rewards/margins": 7.414271831512451, "rewards/rejected": -9.399736404418945, "step": 7691 }, { "epoch": 1.2, "learning_rate": 8.505709839721977e-06, "logits/chosen": -1.871815800666809, "logits/rejected": -3.0502424240112305, "logps/chosen": -104.19770812988281, "logps/rejected": -360.3385925292969, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.9346773624420166, "rewards/margins": 6.134472846984863, "rewards/rejected": -8.069150924682617, "step": 7692 }, { "epoch": 1.2, "learning_rate": 8.50497639919083e-06, "logits/chosen": -2.8642351627349854, "logits/rejected": -2.8901822566986084, "logps/chosen": -101.29263305664062, "logps/rejected": -287.7160339355469, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -2.4364471435546875, "rewards/margins": 5.288354396820068, "rewards/rejected": -7.724801063537598, "step": 7693 }, { "epoch": 1.2, "learning_rate": 8.504242958659681e-06, "logits/chosen": -2.64599609375, "logits/rejected": -1.7931679487228394, "logps/chosen": -302.5377197265625, "logps/rejected": -295.00421142578125, "loss": 0.9078, "rewards/accuracies": 0.5, "rewards/chosen": -2.377673387527466, "rewards/margins": 3.683164358139038, "rewards/rejected": -6.060837268829346, "step": 7694 }, { "epoch": 1.2, "learning_rate": 8.503509518128533e-06, "logits/chosen": -2.4570820331573486, "logits/rejected": -2.800964117050171, "logps/chosen": -227.21665954589844, "logps/rejected": -341.03216552734375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.786232829093933, "rewards/margins": 6.0554633140563965, "rewards/rejected": -7.841696262359619, "step": 7695 }, { "epoch": 1.2, "learning_rate": 8.502776077597385e-06, "logits/chosen": -2.9165115356445312, "logits/rejected": -1.9981659650802612, "logps/chosen": -872.521240234375, "logps/rejected": -396.1971130371094, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.8060462474822998, "rewards/margins": 6.126983642578125, "rewards/rejected": -7.933030128479004, "step": 7696 }, { "epoch": 1.2, "learning_rate": 8.502042637066237e-06, "logits/chosen": -2.4818274974823, "logits/rejected": -1.9012432098388672, "logps/chosen": -142.2902069091797, "logps/rejected": -257.7737731933594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8861911296844482, "rewards/margins": 11.02173137664795, "rewards/rejected": -11.907922744750977, "step": 7697 }, { "epoch": 1.2, "learning_rate": 8.50130919653509e-06, "logits/chosen": -2.711622476577759, "logits/rejected": -1.0662343502044678, "logps/chosen": -367.5388488769531, "logps/rejected": -173.25909423828125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.30368882417678833, "rewards/margins": 5.626172065734863, "rewards/rejected": -5.929861068725586, "step": 7698 }, { "epoch": 1.2, "learning_rate": 8.500575756003942e-06, "logits/chosen": -2.59822678565979, "logits/rejected": -2.930546998977661, "logps/chosen": -32.994354248046875, "logps/rejected": -163.74240112304688, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -1.5669207572937012, "rewards/margins": 5.228461742401123, "rewards/rejected": -6.795382499694824, "step": 7699 }, { "epoch": 1.2, "learning_rate": 8.499842315472796e-06, "logits/chosen": -1.4757330417633057, "logits/rejected": -2.5212042331695557, "logps/chosen": -174.6942138671875, "logps/rejected": -494.3246154785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.100937843322754, "rewards/margins": 11.258964538574219, "rewards/rejected": -14.359901428222656, "step": 7700 }, { "epoch": 1.2, "learning_rate": 8.499108874941648e-06, "logits/chosen": -1.995474100112915, "logits/rejected": -2.309856414794922, "logps/chosen": -241.4595947265625, "logps/rejected": -504.376708984375, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": -4.016854286193848, "rewards/margins": 5.081526756286621, "rewards/rejected": -9.098381042480469, "step": 7701 }, { "epoch": 1.2, "learning_rate": 8.4983754344105e-06, "logits/chosen": -2.4612386226654053, "logits/rejected": -2.8392820358276367, "logps/chosen": -31.26556396484375, "logps/rejected": -130.00039672851562, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -1.7190972566604614, "rewards/margins": 5.270715713500977, "rewards/rejected": -6.989812850952148, "step": 7702 }, { "epoch": 1.2, "learning_rate": 8.497641993879351e-06, "logits/chosen": -1.6394683122634888, "logits/rejected": -2.782435178756714, "logps/chosen": -141.16329956054688, "logps/rejected": -300.162109375, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -3.6058406829833984, "rewards/margins": 5.504897117614746, "rewards/rejected": -9.110737800598145, "step": 7703 }, { "epoch": 1.2, "learning_rate": 8.496908553348203e-06, "logits/chosen": -1.059647560119629, "logits/rejected": -2.7444331645965576, "logps/chosen": -53.12071990966797, "logps/rejected": -307.09124755859375, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -2.0771331787109375, "rewards/margins": 5.878683567047119, "rewards/rejected": -7.955817222595215, "step": 7704 }, { "epoch": 1.2, "learning_rate": 8.496175112817055e-06, "logits/chosen": -2.4469056129455566, "logits/rejected": -2.9580929279327393, "logps/chosen": -109.89948272705078, "logps/rejected": -421.49993896484375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.8569930791854858, "rewards/margins": 6.644580841064453, "rewards/rejected": -8.50157356262207, "step": 7705 }, { "epoch": 1.2, "learning_rate": 8.495441672285907e-06, "logits/chosen": -2.3409855365753174, "logits/rejected": -2.7592239379882812, "logps/chosen": -162.3048095703125, "logps/rejected": -482.78997802734375, "loss": 0.0715, "rewards/accuracies": 1.0, "rewards/chosen": -3.3531103134155273, "rewards/margins": 3.8655192852020264, "rewards/rejected": -7.218629837036133, "step": 7706 }, { "epoch": 1.2, "learning_rate": 8.494708231754759e-06, "logits/chosen": -2.6957437992095947, "logits/rejected": -2.025657892227173, "logps/chosen": -784.5281982421875, "logps/rejected": -463.2851257324219, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.9936833381652832, "rewards/margins": 5.823415756225586, "rewards/rejected": -7.817099094390869, "step": 7707 }, { "epoch": 1.2, "learning_rate": 8.49397479122361e-06, "logits/chosen": -2.3706719875335693, "logits/rejected": -1.7009543180465698, "logps/chosen": -352.2326965332031, "logps/rejected": -376.6340026855469, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2871315479278564, "rewards/margins": 6.499438285827637, "rewards/rejected": -8.786569595336914, "step": 7708 }, { "epoch": 1.2, "learning_rate": 8.493241350692464e-06, "logits/chosen": -1.7774136066436768, "logits/rejected": -2.4511795043945312, "logps/chosen": -195.23785400390625, "logps/rejected": -304.9588623046875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -4.027713775634766, "rewards/margins": 5.697725296020508, "rewards/rejected": -9.725439071655273, "step": 7709 }, { "epoch": 1.2, "learning_rate": 8.492507910161316e-06, "logits/chosen": -2.6975653171539307, "logits/rejected": -1.8686473369598389, "logps/chosen": -276.6669616699219, "logps/rejected": -296.70068359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7313127517700195, "rewards/margins": 9.58708381652832, "rewards/rejected": -10.31839656829834, "step": 7710 }, { "epoch": 1.2, "learning_rate": 8.491774469630168e-06, "logits/chosen": -2.091425895690918, "logits/rejected": -2.872199535369873, "logps/chosen": -378.2197265625, "logps/rejected": -515.8560180664062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.9670608043670654, "rewards/margins": 10.037408828735352, "rewards/rejected": -14.00446891784668, "step": 7711 }, { "epoch": 1.2, "learning_rate": 8.49104102909902e-06, "logits/chosen": -1.06327223777771, "logits/rejected": -2.138645887374878, "logps/chosen": -135.912109375, "logps/rejected": -384.4727783203125, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.6138455867767334, "rewards/margins": 11.68834114074707, "rewards/rejected": -13.302186965942383, "step": 7712 }, { "epoch": 1.2, "learning_rate": 8.490307588567872e-06, "logits/chosen": -2.212571382522583, "logits/rejected": -2.511807680130005, "logps/chosen": -191.6572265625, "logps/rejected": -154.19720458984375, "loss": 0.3604, "rewards/accuracies": 0.5, "rewards/chosen": -1.3751496076583862, "rewards/margins": 2.6196413040161133, "rewards/rejected": -3.99479079246521, "step": 7713 }, { "epoch": 1.2, "learning_rate": 8.489574148036724e-06, "logits/chosen": -2.6606853008270264, "logits/rejected": -2.115105390548706, "logps/chosen": -184.73704528808594, "logps/rejected": -321.5967102050781, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": 1.0477747917175293, "rewards/margins": 7.766726493835449, "rewards/rejected": -6.718952178955078, "step": 7714 }, { "epoch": 1.2, "learning_rate": 8.488840707505576e-06, "logits/chosen": -1.79006826877594, "logits/rejected": -2.825570583343506, "logps/chosen": -86.34984588623047, "logps/rejected": -374.9599304199219, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.8572490215301514, "rewards/margins": 9.621307373046875, "rewards/rejected": -11.478556632995605, "step": 7715 }, { "epoch": 1.2, "learning_rate": 8.488107266974428e-06, "logits/chosen": -1.9979517459869385, "logits/rejected": -3.0215072631835938, "logps/chosen": -70.0166244506836, "logps/rejected": -213.7229461669922, "loss": 0.9258, "rewards/accuracies": 0.5, "rewards/chosen": -4.099729537963867, "rewards/margins": 3.5829579830169678, "rewards/rejected": -7.682687759399414, "step": 7716 }, { "epoch": 1.2, "learning_rate": 8.48737382644328e-06, "logits/chosen": -1.351826786994934, "logits/rejected": -2.5558032989501953, "logps/chosen": -123.13005828857422, "logps/rejected": -271.9706115722656, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.3645615577697754, "rewards/margins": 6.453042984008789, "rewards/rejected": -8.817604064941406, "step": 7717 }, { "epoch": 1.2, "learning_rate": 8.486640385912133e-06, "logits/chosen": -2.1404786109924316, "logits/rejected": -1.9180082082748413, "logps/chosen": -191.43695068359375, "logps/rejected": -285.21612548828125, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": -1.4492218494415283, "rewards/margins": 4.998266220092773, "rewards/rejected": -6.447488307952881, "step": 7718 }, { "epoch": 1.2, "learning_rate": 8.485906945380985e-06, "logits/chosen": -2.789085865020752, "logits/rejected": -2.4219133853912354, "logps/chosen": -585.9091796875, "logps/rejected": -390.3768310546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.206495761871338, "rewards/margins": 8.078896522521973, "rewards/rejected": -10.285392761230469, "step": 7719 }, { "epoch": 1.2, "learning_rate": 8.485173504849837e-06, "logits/chosen": -1.6670796871185303, "logits/rejected": -2.694988489151001, "logps/chosen": -99.01940155029297, "logps/rejected": -294.492919921875, "loss": 0.0771, "rewards/accuracies": 1.0, "rewards/chosen": -3.381181240081787, "rewards/margins": 2.5253968238830566, "rewards/rejected": -5.906578063964844, "step": 7720 }, { "epoch": 1.2, "learning_rate": 8.484440064318689e-06, "logits/chosen": -1.906715750694275, "logits/rejected": -2.471015453338623, "logps/chosen": -60.65336608886719, "logps/rejected": -320.69293212890625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.7082589268684387, "rewards/margins": 6.365602970123291, "rewards/rejected": -7.073862075805664, "step": 7721 }, { "epoch": 1.2, "learning_rate": 8.48370662378754e-06, "logits/chosen": -2.5249757766723633, "logits/rejected": -2.8770060539245605, "logps/chosen": -144.04322814941406, "logps/rejected": -256.2261962890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4041253328323364, "rewards/margins": 7.726621150970459, "rewards/rejected": -9.130746841430664, "step": 7722 }, { "epoch": 1.2, "learning_rate": 8.482973183256392e-06, "logits/chosen": -1.9011272192001343, "logits/rejected": -2.5800113677978516, "logps/chosen": -107.96208190917969, "logps/rejected": -365.2842712402344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1473007202148438, "rewards/margins": 8.423202514648438, "rewards/rejected": -10.570503234863281, "step": 7723 }, { "epoch": 1.2, "learning_rate": 8.482239742725244e-06, "logits/chosen": -2.5991358757019043, "logits/rejected": -2.75298810005188, "logps/chosen": -48.469451904296875, "logps/rejected": -133.08563232421875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -0.8482917547225952, "rewards/margins": 5.44426155090332, "rewards/rejected": -6.292552947998047, "step": 7724 }, { "epoch": 1.2, "learning_rate": 8.481506302194096e-06, "logits/chosen": -1.2463380098342896, "logits/rejected": -2.75439190864563, "logps/chosen": -117.58938598632812, "logps/rejected": -738.196044921875, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -1.708536148071289, "rewards/margins": 4.519021511077881, "rewards/rejected": -6.227557182312012, "step": 7725 }, { "epoch": 1.2, "learning_rate": 8.480772861662948e-06, "logits/chosen": -1.8134676218032837, "logits/rejected": -2.4959895610809326, "logps/chosen": -161.51190185546875, "logps/rejected": -232.9578094482422, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -2.1171669960021973, "rewards/margins": 6.8225836753845215, "rewards/rejected": -8.939750671386719, "step": 7726 }, { "epoch": 1.2, "learning_rate": 8.480039421131802e-06, "logits/chosen": -2.5965354442596436, "logits/rejected": -1.6272799968719482, "logps/chosen": -355.26885986328125, "logps/rejected": -210.4107666015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.021490097045898438, "rewards/margins": 8.964420318603516, "rewards/rejected": -8.9429292678833, "step": 7727 }, { "epoch": 1.2, "learning_rate": 8.479305980600653e-06, "logits/chosen": -1.987021565437317, "logits/rejected": -2.499239444732666, "logps/chosen": -85.1205825805664, "logps/rejected": -336.8152770996094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.7767808437347412, "rewards/margins": 8.93370246887207, "rewards/rejected": -10.71048355102539, "step": 7728 }, { "epoch": 1.2, "learning_rate": 8.478572540069505e-06, "logits/chosen": -2.337279796600342, "logits/rejected": -1.9567983150482178, "logps/chosen": -514.451171875, "logps/rejected": -453.59393310546875, "loss": 0.1557, "rewards/accuracies": 1.0, "rewards/chosen": -4.846546173095703, "rewards/margins": 2.299302816390991, "rewards/rejected": -7.145849227905273, "step": 7729 }, { "epoch": 1.2, "learning_rate": 8.477839099538357e-06, "logits/chosen": -3.0321364402770996, "logits/rejected": -2.916400194168091, "logps/chosen": -218.0759735107422, "logps/rejected": -272.63751220703125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.16043394804000854, "rewards/margins": 8.56989860534668, "rewards/rejected": -8.730332374572754, "step": 7730 }, { "epoch": 1.2, "learning_rate": 8.477105659007209e-06, "logits/chosen": -2.5931901931762695, "logits/rejected": -2.4981563091278076, "logps/chosen": -257.03143310546875, "logps/rejected": -446.63787841796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2154533863067627, "rewards/margins": 8.873388290405273, "rewards/rejected": -10.088842391967773, "step": 7731 }, { "epoch": 1.2, "learning_rate": 8.476372218476063e-06, "logits/chosen": -2.4787445068359375, "logits/rejected": -2.7647976875305176, "logps/chosen": -117.2636489868164, "logps/rejected": -238.581298828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.681298017501831, "rewards/margins": 8.374314308166504, "rewards/rejected": -10.055612564086914, "step": 7732 }, { "epoch": 1.2, "learning_rate": 8.475638777944915e-06, "logits/chosen": -3.1421122550964355, "logits/rejected": -2.589974880218506, "logps/chosen": -595.47021484375, "logps/rejected": -339.78692626953125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.922926664352417, "rewards/margins": 6.195515155792236, "rewards/rejected": -8.118441581726074, "step": 7733 }, { "epoch": 1.2, "learning_rate": 8.474905337413766e-06, "logits/chosen": -0.7971555590629578, "logits/rejected": -1.8551533222198486, "logps/chosen": -242.9542236328125, "logps/rejected": -636.4127807617188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.1780331134796143, "rewards/margins": 13.157443046569824, "rewards/rejected": -16.33547592163086, "step": 7734 }, { "epoch": 1.2, "learning_rate": 8.47417189688262e-06, "logits/chosen": -1.9934889078140259, "logits/rejected": -1.9881477355957031, "logps/chosen": -290.7013244628906, "logps/rejected": -464.6322021484375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.8190011978149414, "rewards/margins": 7.8804450035095215, "rewards/rejected": -11.699445724487305, "step": 7735 }, { "epoch": 1.2, "learning_rate": 8.473438456351472e-06, "logits/chosen": -2.093597650527954, "logits/rejected": -2.436901569366455, "logps/chosen": -151.59999084472656, "logps/rejected": -349.90667724609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.7638370990753174, "rewards/margins": 9.602577209472656, "rewards/rejected": -12.366415023803711, "step": 7736 }, { "epoch": 1.2, "learning_rate": 8.472705015820324e-06, "logits/chosen": -2.6048502922058105, "logits/rejected": -2.0523409843444824, "logps/chosen": -236.99835205078125, "logps/rejected": -307.1082763671875, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -3.2733302116394043, "rewards/margins": 7.232211589813232, "rewards/rejected": -10.505541801452637, "step": 7737 }, { "epoch": 1.2, "learning_rate": 8.471971575289176e-06, "logits/chosen": -0.6574700474739075, "logits/rejected": -1.50872802734375, "logps/chosen": -136.19631958007812, "logps/rejected": -615.3946533203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4743789732456207, "rewards/margins": 13.94172477722168, "rewards/rejected": -14.41610336303711, "step": 7738 }, { "epoch": 1.2, "learning_rate": 8.471238134758028e-06, "logits/chosen": -2.696582794189453, "logits/rejected": -2.704632043838501, "logps/chosen": -346.5927429199219, "logps/rejected": -342.5520324707031, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.6410067081451416, "rewards/margins": 6.872588157653809, "rewards/rejected": -10.513595581054688, "step": 7739 }, { "epoch": 1.2, "learning_rate": 8.47050469422688e-06, "logits/chosen": -2.5424649715423584, "logits/rejected": -2.7515199184417725, "logps/chosen": -143.47702026367188, "logps/rejected": -181.98953247070312, "loss": 1.0206, "rewards/accuracies": 0.5, "rewards/chosen": -2.2588343620300293, "rewards/margins": 4.07615852355957, "rewards/rejected": -6.3349928855896, "step": 7740 }, { "epoch": 1.2, "learning_rate": 8.469771253695731e-06, "logits/chosen": -2.311274766921997, "logits/rejected": -3.050816059112549, "logps/chosen": -411.227294921875, "logps/rejected": -642.6235961914062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.510522484779358, "rewards/margins": 11.746376037597656, "rewards/rejected": -13.256898880004883, "step": 7741 }, { "epoch": 1.2, "learning_rate": 8.469037813164583e-06, "logits/chosen": -1.3446515798568726, "logits/rejected": -2.6904311180114746, "logps/chosen": -72.57301330566406, "logps/rejected": -374.1297607421875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.000739753246307373, "rewards/margins": 6.870270252227783, "rewards/rejected": -6.871009826660156, "step": 7742 }, { "epoch": 1.2, "learning_rate": 8.468304372633435e-06, "logits/chosen": -2.1566572189331055, "logits/rejected": -2.4605298042297363, "logps/chosen": -502.99432373046875, "logps/rejected": -515.3749389648438, "loss": 0.6592, "rewards/accuracies": 0.5, "rewards/chosen": -1.0649826526641846, "rewards/margins": 5.710620403289795, "rewards/rejected": -6.7756028175354, "step": 7743 }, { "epoch": 1.2, "learning_rate": 8.467570932102289e-06, "logits/chosen": -2.430588960647583, "logits/rejected": -2.8059232234954834, "logps/chosen": -136.47576904296875, "logps/rejected": -282.91583251953125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.7144497632980347, "rewards/margins": 7.683956146240234, "rewards/rejected": -9.398405075073242, "step": 7744 }, { "epoch": 1.2, "learning_rate": 8.46683749157114e-06, "logits/chosen": -2.501749277114868, "logits/rejected": -2.283635377883911, "logps/chosen": -792.0525512695312, "logps/rejected": -632.7021484375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.085625648498535, "rewards/margins": 5.884880065917969, "rewards/rejected": -8.970505714416504, "step": 7745 }, { "epoch": 1.2, "learning_rate": 8.466104051039992e-06, "logits/chosen": -2.6902005672454834, "logits/rejected": -2.37634539604187, "logps/chosen": -237.3440399169922, "logps/rejected": -344.3525390625, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -1.5950762033462524, "rewards/margins": 7.704866886138916, "rewards/rejected": -9.299942970275879, "step": 7746 }, { "epoch": 1.2, "learning_rate": 8.465370610508844e-06, "logits/chosen": -2.5839171409606934, "logits/rejected": -2.038094997406006, "logps/chosen": -580.6550903320312, "logps/rejected": -396.45037841796875, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.10931396484375, "rewards/margins": 4.848795413970947, "rewards/rejected": -6.958109378814697, "step": 7747 }, { "epoch": 1.2, "learning_rate": 8.464637169977696e-06, "logits/chosen": -2.3443384170532227, "logits/rejected": -2.7348086833953857, "logps/chosen": -116.7732925415039, "logps/rejected": -308.6217041015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5471656918525696, "rewards/margins": 9.356069564819336, "rewards/rejected": -9.90323543548584, "step": 7748 }, { "epoch": 1.21, "learning_rate": 8.463903729446548e-06, "logits/chosen": -2.5626866817474365, "logits/rejected": -0.6513354182243347, "logps/chosen": -238.94178771972656, "logps/rejected": -107.58320617675781, "loss": 0.9593, "rewards/accuracies": 0.5, "rewards/chosen": -2.1257846355438232, "rewards/margins": 2.670808792114258, "rewards/rejected": -4.79659366607666, "step": 7749 }, { "epoch": 1.21, "learning_rate": 8.4631702889154e-06, "logits/chosen": -2.3997395038604736, "logits/rejected": -1.9868394136428833, "logps/chosen": -179.5557861328125, "logps/rejected": -116.29136657714844, "loss": 3.3601, "rewards/accuracies": 0.5, "rewards/chosen": -5.068123817443848, "rewards/margins": -0.28079652786254883, "rewards/rejected": -4.787327289581299, "step": 7750 }, { "epoch": 1.21, "learning_rate": 8.462436848384252e-06, "logits/chosen": -1.9188346862792969, "logits/rejected": -2.7719051837921143, "logps/chosen": -309.357177734375, "logps/rejected": -210.65916442871094, "loss": 5.0499, "rewards/accuracies": 0.5, "rewards/chosen": -6.3673095703125, "rewards/margins": -2.859936237335205, "rewards/rejected": -3.507373094558716, "step": 7751 }, { "epoch": 1.21, "learning_rate": 8.461703407853104e-06, "logits/chosen": -1.703203797340393, "logits/rejected": -2.657677412033081, "logps/chosen": -110.90068817138672, "logps/rejected": -347.6705017089844, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -2.254289150238037, "rewards/margins": 4.526009559631348, "rewards/rejected": -6.780298709869385, "step": 7752 }, { "epoch": 1.21, "learning_rate": 8.460969967321957e-06, "logits/chosen": -2.594912052154541, "logits/rejected": -1.5938321352005005, "logps/chosen": -907.9002075195312, "logps/rejected": -382.0076904296875, "loss": 0.0777, "rewards/accuracies": 1.0, "rewards/chosen": -1.491114854812622, "rewards/margins": 5.432405948638916, "rewards/rejected": -6.923520565032959, "step": 7753 }, { "epoch": 1.21, "learning_rate": 8.460236526790809e-06, "logits/chosen": -2.573382616043091, "logits/rejected": -2.531824827194214, "logps/chosen": -253.41856384277344, "logps/rejected": -286.40557861328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.547697067260742, "rewards/margins": 7.000758171081543, "rewards/rejected": -9.548455238342285, "step": 7754 }, { "epoch": 1.21, "learning_rate": 8.459503086259661e-06, "logits/chosen": -2.837312698364258, "logits/rejected": -2.8959882259368896, "logps/chosen": -96.3991928100586, "logps/rejected": -193.51736450195312, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.12775456905365, "rewards/margins": 7.46876335144043, "rewards/rejected": -8.596518516540527, "step": 7755 }, { "epoch": 1.21, "learning_rate": 8.458769645728513e-06, "logits/chosen": -2.388451337814331, "logits/rejected": -2.715834856033325, "logps/chosen": -90.71681213378906, "logps/rejected": -319.630615234375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9675455689430237, "rewards/margins": 10.475898742675781, "rewards/rejected": -11.443443298339844, "step": 7756 }, { "epoch": 1.21, "learning_rate": 8.458036205197365e-06, "logits/chosen": -2.493831157684326, "logits/rejected": -2.8463051319122314, "logps/chosen": -54.856712341308594, "logps/rejected": -247.3057861328125, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -2.4277689456939697, "rewards/margins": 7.0633721351623535, "rewards/rejected": -9.491141319274902, "step": 7757 }, { "epoch": 1.21, "learning_rate": 8.457302764666217e-06, "logits/chosen": -1.9986361265182495, "logits/rejected": -2.5737719535827637, "logps/chosen": -122.31198120117188, "logps/rejected": -311.4491271972656, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.2974801063537598, "rewards/margins": 8.264774322509766, "rewards/rejected": -10.562253952026367, "step": 7758 }, { "epoch": 1.21, "learning_rate": 8.456569324135068e-06, "logits/chosen": -1.7247604131698608, "logits/rejected": -2.643033266067505, "logps/chosen": -126.60545349121094, "logps/rejected": -301.0413818359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3170051574707031, "rewards/margins": 9.134496688842773, "rewards/rejected": -10.451501846313477, "step": 7759 }, { "epoch": 1.21, "learning_rate": 8.45583588360392e-06, "logits/chosen": -2.7390623092651367, "logits/rejected": -2.1070339679718018, "logps/chosen": -509.895263671875, "logps/rejected": -413.1224365234375, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -2.9162323474884033, "rewards/margins": 6.014658451080322, "rewards/rejected": -8.930891036987305, "step": 7760 }, { "epoch": 1.21, "learning_rate": 8.455102443072772e-06, "logits/chosen": -1.0649229288101196, "logits/rejected": -2.2496068477630615, "logps/chosen": -93.22479248046875, "logps/rejected": -232.34283447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8875045776367188, "rewards/margins": 10.10630989074707, "rewards/rejected": -10.993814468383789, "step": 7761 }, { "epoch": 1.21, "learning_rate": 8.454369002541626e-06, "logits/chosen": -2.3460464477539062, "logits/rejected": -2.869182825088501, "logps/chosen": -156.86610412597656, "logps/rejected": -318.31201171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.118133306503296, "rewards/margins": 6.9080810546875, "rewards/rejected": -9.026214599609375, "step": 7762 }, { "epoch": 1.21, "learning_rate": 8.453635562010478e-06, "logits/chosen": -2.0414111614227295, "logits/rejected": -2.7381606101989746, "logps/chosen": -33.81289291381836, "logps/rejected": -235.48150634765625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.9754369258880615, "rewards/margins": 7.570392608642578, "rewards/rejected": -9.545829772949219, "step": 7763 }, { "epoch": 1.21, "learning_rate": 8.45290212147933e-06, "logits/chosen": -2.4623963832855225, "logits/rejected": -2.731895685195923, "logps/chosen": -77.93257904052734, "logps/rejected": -323.1382751464844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5990253686904907, "rewards/margins": 10.212191581726074, "rewards/rejected": -11.811216354370117, "step": 7764 }, { "epoch": 1.21, "learning_rate": 8.452168680948181e-06, "logits/chosen": -2.9606897830963135, "logits/rejected": -3.1094000339508057, "logps/chosen": -257.25225830078125, "logps/rejected": -391.6142578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.966517925262451, "rewards/margins": 7.94271993637085, "rewards/rejected": -10.9092378616333, "step": 7765 }, { "epoch": 1.21, "learning_rate": 8.451435240417035e-06, "logits/chosen": -1.5774562358856201, "logits/rejected": -2.7562615871429443, "logps/chosen": -95.94587707519531, "logps/rejected": -474.0662841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1127052307128906, "rewards/margins": 10.824707984924316, "rewards/rejected": -12.937413215637207, "step": 7766 }, { "epoch": 1.21, "learning_rate": 8.450701799885887e-06, "logits/chosen": -2.7749834060668945, "logits/rejected": -2.7152063846588135, "logps/chosen": -307.35845947265625, "logps/rejected": -265.93115234375, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -2.5771446228027344, "rewards/margins": 8.111247062683105, "rewards/rejected": -10.68839168548584, "step": 7767 }, { "epoch": 1.21, "learning_rate": 8.449968359354739e-06, "logits/chosen": -2.3120477199554443, "logits/rejected": -2.636223554611206, "logps/chosen": -186.96923828125, "logps/rejected": -324.2189636230469, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3564071655273438, "rewards/margins": 7.711520195007324, "rewards/rejected": -9.067927360534668, "step": 7768 }, { "epoch": 1.21, "learning_rate": 8.44923491882359e-06, "logits/chosen": -2.939593553543091, "logits/rejected": -3.1725432872772217, "logps/chosen": -439.43231201171875, "logps/rejected": -502.251708984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2394417524337769, "rewards/margins": 9.145092964172363, "rewards/rejected": -10.38453483581543, "step": 7769 }, { "epoch": 1.21, "learning_rate": 8.448501478292443e-06, "logits/chosen": -2.036647319793701, "logits/rejected": -2.6556873321533203, "logps/chosen": -125.53646850585938, "logps/rejected": -316.05914306640625, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -2.795619010925293, "rewards/margins": 9.05482292175293, "rewards/rejected": -11.850440979003906, "step": 7770 }, { "epoch": 1.21, "learning_rate": 8.447768037761296e-06, "logits/chosen": -1.249155879020691, "logits/rejected": -2.376340627670288, "logps/chosen": -53.52355194091797, "logps/rejected": -293.44219970703125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.3904786109924316, "rewards/margins": 9.948081970214844, "rewards/rejected": -12.338560104370117, "step": 7771 }, { "epoch": 1.21, "learning_rate": 8.447034597230148e-06, "logits/chosen": -2.7648768424987793, "logits/rejected": -2.538360595703125, "logps/chosen": -286.8746643066406, "logps/rejected": -208.17715454101562, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.24793928861618042, "rewards/margins": 6.509402751922607, "rewards/rejected": -6.757342338562012, "step": 7772 }, { "epoch": 1.21, "learning_rate": 8.446301156699e-06, "logits/chosen": -2.764669895172119, "logits/rejected": -2.9582276344299316, "logps/chosen": -137.02505493164062, "logps/rejected": -299.32177734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9905116558074951, "rewards/margins": 8.113321304321289, "rewards/rejected": -9.103833198547363, "step": 7773 }, { "epoch": 1.21, "learning_rate": 8.445567716167852e-06, "logits/chosen": -2.990959405899048, "logits/rejected": -2.8620877265930176, "logps/chosen": -801.15771484375, "logps/rejected": -616.9503173828125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.947854518890381, "rewards/margins": 7.94336462020874, "rewards/rejected": -10.891219139099121, "step": 7774 }, { "epoch": 1.21, "learning_rate": 8.444834275636704e-06, "logits/chosen": -2.714921474456787, "logits/rejected": -2.5229382514953613, "logps/chosen": -166.1331329345703, "logps/rejected": -209.7872772216797, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.4133952856063843, "rewards/margins": 5.840664863586426, "rewards/rejected": -7.2540602684021, "step": 7775 }, { "epoch": 1.21, "learning_rate": 8.444100835105555e-06, "logits/chosen": -2.4838716983795166, "logits/rejected": -2.335047721862793, "logps/chosen": -229.3271942138672, "logps/rejected": -354.4861145019531, "loss": 2.3758, "rewards/accuracies": 0.5, "rewards/chosen": -7.34986686706543, "rewards/margins": 3.102320432662964, "rewards/rejected": -10.452186584472656, "step": 7776 }, { "epoch": 1.21, "learning_rate": 8.443367394574407e-06, "logits/chosen": -2.7499608993530273, "logits/rejected": -3.097529888153076, "logps/chosen": -321.976806640625, "logps/rejected": -484.5924072265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.009374618530273438, "rewards/margins": 10.076118469238281, "rewards/rejected": -10.085493087768555, "step": 7777 }, { "epoch": 1.21, "learning_rate": 8.44263395404326e-06, "logits/chosen": -2.6738622188568115, "logits/rejected": -2.892672061920166, "logps/chosen": -111.81716918945312, "logps/rejected": -301.968017578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.8267605304718018, "rewards/margins": 9.00084400177002, "rewards/rejected": -11.827604293823242, "step": 7778 }, { "epoch": 1.21, "learning_rate": 8.441900513512111e-06, "logits/chosen": -2.6701254844665527, "logits/rejected": -1.8969969749450684, "logps/chosen": -662.8726806640625, "logps/rejected": -310.05828857421875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.9554262161254883, "rewards/margins": 7.587299346923828, "rewards/rejected": -11.542725563049316, "step": 7779 }, { "epoch": 1.21, "learning_rate": 8.441167072980965e-06, "logits/chosen": -3.0628788471221924, "logits/rejected": -2.831169843673706, "logps/chosen": -95.41582489013672, "logps/rejected": -156.26986694335938, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.2743815183639526, "rewards/margins": 6.353899955749512, "rewards/rejected": -7.628281593322754, "step": 7780 }, { "epoch": 1.21, "learning_rate": 8.440433632449817e-06, "logits/chosen": -2.4266276359558105, "logits/rejected": -2.1053433418273926, "logps/chosen": -99.97370910644531, "logps/rejected": -298.5056457519531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6713014841079712, "rewards/margins": 10.020101547241211, "rewards/rejected": -11.691402435302734, "step": 7781 }, { "epoch": 1.21, "learning_rate": 8.439700191918668e-06, "logits/chosen": -2.8218138217926025, "logits/rejected": -2.7118990421295166, "logps/chosen": -270.56195068359375, "logps/rejected": -295.03179931640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3570520877838135, "rewards/margins": 7.164888381958008, "rewards/rejected": -8.521940231323242, "step": 7782 }, { "epoch": 1.21, "learning_rate": 8.43896675138752e-06, "logits/chosen": -2.9701085090637207, "logits/rejected": -2.9004151821136475, "logps/chosen": -385.279541015625, "logps/rejected": -327.942626953125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4622948169708252, "rewards/margins": 7.234572410583496, "rewards/rejected": -8.696866989135742, "step": 7783 }, { "epoch": 1.21, "learning_rate": 8.438233310856372e-06, "logits/chosen": -1.9314144849777222, "logits/rejected": -2.6345252990722656, "logps/chosen": -271.65570068359375, "logps/rejected": -593.3885498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2987667322158813, "rewards/margins": 11.590758323669434, "rewards/rejected": -12.889524459838867, "step": 7784 }, { "epoch": 1.21, "learning_rate": 8.437499870325224e-06, "logits/chosen": -0.7069400548934937, "logits/rejected": -0.9474336504936218, "logps/chosen": -242.95913696289062, "logps/rejected": -414.6561279296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.584333896636963, "rewards/margins": 10.01159954071045, "rewards/rejected": -12.59593391418457, "step": 7785 }, { "epoch": 1.21, "learning_rate": 8.436766429794076e-06, "logits/chosen": -2.7602250576019287, "logits/rejected": -2.920192003250122, "logps/chosen": -387.19091796875, "logps/rejected": -519.9356689453125, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -1.6876800060272217, "rewards/margins": 5.500851631164551, "rewards/rejected": -7.188531398773193, "step": 7786 }, { "epoch": 1.21, "learning_rate": 8.436032989262928e-06, "logits/chosen": -2.670233964920044, "logits/rejected": -2.814286708831787, "logps/chosen": -149.07382202148438, "logps/rejected": -230.6351318359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.41574323177337646, "rewards/margins": 7.2207841873168945, "rewards/rejected": -7.6365275382995605, "step": 7787 }, { "epoch": 1.21, "learning_rate": 8.43529954873178e-06, "logits/chosen": -1.5832219123840332, "logits/rejected": -2.6357040405273438, "logps/chosen": -280.9462585449219, "logps/rejected": -540.835205078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1330116987228394, "rewards/margins": 9.026054382324219, "rewards/rejected": -10.159065246582031, "step": 7788 }, { "epoch": 1.21, "learning_rate": 8.434566108200633e-06, "logits/chosen": -1.015608310699463, "logits/rejected": -2.7851297855377197, "logps/chosen": -203.59915161132812, "logps/rejected": -464.250244140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.5860008001327515, "rewards/margins": 8.08014965057373, "rewards/rejected": -9.666150093078613, "step": 7789 }, { "epoch": 1.21, "learning_rate": 8.433832667669485e-06, "logits/chosen": -2.171715021133423, "logits/rejected": -2.6133716106414795, "logps/chosen": -301.4937744140625, "logps/rejected": -485.1451416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7450119256973267, "rewards/margins": 10.382793426513672, "rewards/rejected": -12.12780475616455, "step": 7790 }, { "epoch": 1.21, "learning_rate": 8.433099227138337e-06, "logits/chosen": -2.632948637008667, "logits/rejected": -3.0135905742645264, "logps/chosen": -259.44427490234375, "logps/rejected": -289.3345031738281, "loss": 0.1817, "rewards/accuracies": 1.0, "rewards/chosen": -4.174166679382324, "rewards/margins": 2.040381669998169, "rewards/rejected": -6.214548587799072, "step": 7791 }, { "epoch": 1.21, "learning_rate": 8.432365786607189e-06, "logits/chosen": -2.0217952728271484, "logits/rejected": -2.438754081726074, "logps/chosen": -190.7793731689453, "logps/rejected": -325.2680969238281, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -1.9771411418914795, "rewards/margins": 9.384466171264648, "rewards/rejected": -11.361608505249023, "step": 7792 }, { "epoch": 1.21, "learning_rate": 8.43163234607604e-06, "logits/chosen": -2.9853553771972656, "logits/rejected": -3.119231700897217, "logps/chosen": -287.8736572265625, "logps/rejected": -289.8575439453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.18330079317092896, "rewards/margins": 8.658875465393066, "rewards/rejected": -8.84217643737793, "step": 7793 }, { "epoch": 1.21, "learning_rate": 8.430898905544893e-06, "logits/chosen": -2.450241804122925, "logits/rejected": -1.6917980909347534, "logps/chosen": -364.330810546875, "logps/rejected": -301.26171875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.982739210128784, "rewards/margins": 5.6562347412109375, "rewards/rejected": -8.6389741897583, "step": 7794 }, { "epoch": 1.21, "learning_rate": 8.430165465013745e-06, "logits/chosen": -3.0042853355407715, "logits/rejected": -2.695605993270874, "logps/chosen": -827.7512817382812, "logps/rejected": -646.3309326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1674546003341675, "rewards/margins": 9.838775634765625, "rewards/rejected": -11.006230354309082, "step": 7795 }, { "epoch": 1.21, "learning_rate": 8.429432024482596e-06, "logits/chosen": -0.7767027616500854, "logits/rejected": -1.8028337955474854, "logps/chosen": -89.66015625, "logps/rejected": -360.57220458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.27370023727417, "rewards/margins": 10.797351837158203, "rewards/rejected": -13.071052551269531, "step": 7796 }, { "epoch": 1.21, "learning_rate": 8.428698583951448e-06, "logits/chosen": -3.047340154647827, "logits/rejected": -1.8896890878677368, "logps/chosen": -515.2855834960938, "logps/rejected": -100.98816680908203, "loss": 3.6726, "rewards/accuracies": 0.5, "rewards/chosen": -6.170332431793213, "rewards/margins": -1.3780303001403809, "rewards/rejected": -4.792302131652832, "step": 7797 }, { "epoch": 1.21, "learning_rate": 8.427965143420302e-06, "logits/chosen": -2.2053277492523193, "logits/rejected": -2.5160772800445557, "logps/chosen": -65.57185363769531, "logps/rejected": -299.19403076171875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.2787561416625977, "rewards/margins": 5.653251647949219, "rewards/rejected": -6.932007789611816, "step": 7798 }, { "epoch": 1.21, "learning_rate": 8.427231702889154e-06, "logits/chosen": -1.4232442378997803, "logits/rejected": -2.5927250385284424, "logps/chosen": -195.1000213623047, "logps/rejected": -506.0389099121094, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.8154234886169434, "rewards/margins": 6.611103534698486, "rewards/rejected": -9.42652702331543, "step": 7799 }, { "epoch": 1.21, "learning_rate": 8.426498262358007e-06, "logits/chosen": -1.3590511083602905, "logits/rejected": -2.733875036239624, "logps/chosen": -78.94258117675781, "logps/rejected": -298.7342834472656, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.021704912185669, "rewards/margins": 6.178849220275879, "rewards/rejected": -8.200553894042969, "step": 7800 }, { "epoch": 1.21, "learning_rate": 8.42576482182686e-06, "logits/chosen": -2.7718560695648193, "logits/rejected": -2.2995474338531494, "logps/chosen": -419.6156921386719, "logps/rejected": -434.66259765625, "loss": 0.1938, "rewards/accuracies": 1.0, "rewards/chosen": -2.6371307373046875, "rewards/margins": 5.275566101074219, "rewards/rejected": -7.912696838378906, "step": 7801 }, { "epoch": 1.21, "learning_rate": 8.425031381295711e-06, "logits/chosen": -2.6014997959136963, "logits/rejected": -2.611785411834717, "logps/chosen": -169.74984741210938, "logps/rejected": -347.4645080566406, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.3988525867462158, "rewards/margins": 8.208844184875488, "rewards/rejected": -8.607696533203125, "step": 7802 }, { "epoch": 1.21, "learning_rate": 8.424297940764563e-06, "logits/chosen": -2.6506893634796143, "logits/rejected": -2.4835193157196045, "logps/chosen": -315.4507751464844, "logps/rejected": -398.3073425292969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.558946371078491, "rewards/margins": 9.364553451538086, "rewards/rejected": -11.923500061035156, "step": 7803 }, { "epoch": 1.21, "learning_rate": 8.423564500233415e-06, "logits/chosen": -1.8846265077590942, "logits/rejected": -2.576231002807617, "logps/chosen": -115.87101745605469, "logps/rejected": -151.58062744140625, "loss": 2.3985, "rewards/accuracies": 0.5, "rewards/chosen": -4.493256568908691, "rewards/margins": -0.20188570022583008, "rewards/rejected": -4.291370391845703, "step": 7804 }, { "epoch": 1.21, "learning_rate": 8.422831059702267e-06, "logits/chosen": -2.1432785987854004, "logits/rejected": -2.8920772075653076, "logps/chosen": -443.7663269042969, "logps/rejected": -481.0966796875, "loss": 1.0045, "rewards/accuracies": 0.5, "rewards/chosen": -5.968586444854736, "rewards/margins": 3.378514289855957, "rewards/rejected": -9.347101211547852, "step": 7805 }, { "epoch": 1.21, "learning_rate": 8.422097619171119e-06, "logits/chosen": -2.7181921005249023, "logits/rejected": -2.8015904426574707, "logps/chosen": -177.89971923828125, "logps/rejected": -129.15805053710938, "loss": 0.2024, "rewards/accuracies": 1.0, "rewards/chosen": -1.8516695499420166, "rewards/margins": 4.130032062530518, "rewards/rejected": -5.981701850891113, "step": 7806 }, { "epoch": 1.21, "learning_rate": 8.421364178639972e-06, "logits/chosen": -1.5726995468139648, "logits/rejected": -2.5613491535186768, "logps/chosen": -97.65681457519531, "logps/rejected": -386.4927673339844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.912466049194336, "rewards/margins": 7.55885648727417, "rewards/rejected": -9.471323013305664, "step": 7807 }, { "epoch": 1.21, "learning_rate": 8.420630738108824e-06, "logits/chosen": -2.842552661895752, "logits/rejected": -2.396711826324463, "logps/chosen": -250.5339813232422, "logps/rejected": -280.12200927734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.40311890840530396, "rewards/margins": 9.015008926391602, "rewards/rejected": -8.61189079284668, "step": 7808 }, { "epoch": 1.21, "learning_rate": 8.419897297577676e-06, "logits/chosen": -2.8696587085723877, "logits/rejected": -1.5761804580688477, "logps/chosen": -405.005859375, "logps/rejected": -203.52841186523438, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.6234238147735596, "rewards/margins": 6.72526741027832, "rewards/rejected": -8.3486909866333, "step": 7809 }, { "epoch": 1.21, "learning_rate": 8.419163857046528e-06, "logits/chosen": -1.8620532751083374, "logits/rejected": -2.642442464828491, "logps/chosen": -175.4276123046875, "logps/rejected": -413.1122131347656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5641961097717285, "rewards/margins": 9.751337051391602, "rewards/rejected": -11.315533638000488, "step": 7810 }, { "epoch": 1.21, "learning_rate": 8.41843041651538e-06, "logits/chosen": -2.2017483711242676, "logits/rejected": -2.7357406616210938, "logps/chosen": -69.36970520019531, "logps/rejected": -204.49856567382812, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -1.9520606994628906, "rewards/margins": 6.059119701385498, "rewards/rejected": -8.011180877685547, "step": 7811 }, { "epoch": 1.21, "learning_rate": 8.417696975984232e-06, "logits/chosen": -2.3590612411499023, "logits/rejected": -2.7546651363372803, "logps/chosen": -127.68576049804688, "logps/rejected": -325.7674255371094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9046798944473267, "rewards/margins": 11.004694938659668, "rewards/rejected": -11.909375190734863, "step": 7812 }, { "epoch": 1.22, "learning_rate": 8.416963535453083e-06, "logits/chosen": -2.428877115249634, "logits/rejected": -3.1298701763153076, "logps/chosen": -52.366546630859375, "logps/rejected": -225.89315795898438, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.8691959381103516, "rewards/margins": 6.1255269050598145, "rewards/rejected": -7.994722843170166, "step": 7813 }, { "epoch": 1.22, "learning_rate": 8.416230094921935e-06, "logits/chosen": -1.4402607679367065, "logits/rejected": -2.92622447013855, "logps/chosen": -121.4106674194336, "logps/rejected": -525.0827026367188, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.149557590484619, "rewards/margins": 7.3419084548950195, "rewards/rejected": -10.491466522216797, "step": 7814 }, { "epoch": 1.22, "learning_rate": 8.415496654390787e-06, "logits/chosen": -2.8724863529205322, "logits/rejected": -2.40932297706604, "logps/chosen": -257.3746643066406, "logps/rejected": -259.618896484375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1986916065216064, "rewards/margins": 7.376410961151123, "rewards/rejected": -8.575101852416992, "step": 7815 }, { "epoch": 1.22, "learning_rate": 8.41476321385964e-06, "logits/chosen": -2.7793824672698975, "logits/rejected": -3.159137010574341, "logps/chosen": -166.8413848876953, "logps/rejected": -355.44140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.8952887058258057, "rewards/margins": 10.187005996704102, "rewards/rejected": -12.082294464111328, "step": 7816 }, { "epoch": 1.22, "learning_rate": 8.414029773328493e-06, "logits/chosen": -1.8514208793640137, "logits/rejected": -2.7235403060913086, "logps/chosen": -139.3463134765625, "logps/rejected": -340.67779541015625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.5729715824127197, "rewards/margins": 7.841760635375977, "rewards/rejected": -9.414731979370117, "step": 7817 }, { "epoch": 1.22, "learning_rate": 8.413296332797345e-06, "logits/chosen": -2.7703757286071777, "logits/rejected": -3.0557875633239746, "logps/chosen": -39.658302307128906, "logps/rejected": -190.22222900390625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.5714550018310547, "rewards/margins": 7.411925315856934, "rewards/rejected": -9.983380317687988, "step": 7818 }, { "epoch": 1.22, "learning_rate": 8.412562892266196e-06, "logits/chosen": -2.0877726078033447, "logits/rejected": -2.084588050842285, "logps/chosen": -324.0611572265625, "logps/rejected": -442.9407958984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7865002155303955, "rewards/margins": 9.844819068908691, "rewards/rejected": -12.631319046020508, "step": 7819 }, { "epoch": 1.22, "learning_rate": 8.411829451735048e-06, "logits/chosen": -1.424418568611145, "logits/rejected": -2.053544282913208, "logps/chosen": -410.94989013671875, "logps/rejected": -645.19482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.263113021850586, "rewards/margins": 11.727027893066406, "rewards/rejected": -14.990140914916992, "step": 7820 }, { "epoch": 1.22, "learning_rate": 8.4110960112039e-06, "logits/chosen": -1.9399787187576294, "logits/rejected": -1.2218877077102661, "logps/chosen": -672.5173950195312, "logps/rejected": -395.0625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.6955904960632324, "rewards/margins": 9.380193710327148, "rewards/rejected": -12.075783729553223, "step": 7821 }, { "epoch": 1.22, "learning_rate": 8.410362570672752e-06, "logits/chosen": -2.6769886016845703, "logits/rejected": -3.0128438472747803, "logps/chosen": -39.085693359375, "logps/rejected": -146.16281127929688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.356339693069458, "rewards/margins": 7.058854103088379, "rewards/rejected": -9.415193557739258, "step": 7822 }, { "epoch": 1.22, "learning_rate": 8.409629130141604e-06, "logits/chosen": -3.1560556888580322, "logits/rejected": -1.9422411918640137, "logps/chosen": -360.04461669921875, "logps/rejected": -236.8359375, "loss": 0.8946, "rewards/accuracies": 0.5, "rewards/chosen": -5.016912937164307, "rewards/margins": 2.2225241661071777, "rewards/rejected": -7.239437103271484, "step": 7823 }, { "epoch": 1.22, "learning_rate": 8.408895689610456e-06, "logits/chosen": -2.770746946334839, "logits/rejected": -1.6713000535964966, "logps/chosen": -198.90383911132812, "logps/rejected": -252.92672729492188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0819438695907593, "rewards/margins": 6.947911262512207, "rewards/rejected": -8.029854774475098, "step": 7824 }, { "epoch": 1.22, "learning_rate": 8.40816224907931e-06, "logits/chosen": -0.7241029143333435, "logits/rejected": -2.925571918487549, "logps/chosen": -118.37675476074219, "logps/rejected": -800.175048828125, "loss": 0.1375, "rewards/accuracies": 1.0, "rewards/chosen": -3.724236011505127, "rewards/margins": 5.554698944091797, "rewards/rejected": -9.278935432434082, "step": 7825 }, { "epoch": 1.22, "learning_rate": 8.407428808548161e-06, "logits/chosen": -1.9462757110595703, "logits/rejected": -2.7301924228668213, "logps/chosen": -202.00570678710938, "logps/rejected": -371.08074951171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.9182915687561035, "rewards/margins": 8.691353797912598, "rewards/rejected": -10.60964584350586, "step": 7826 }, { "epoch": 1.22, "learning_rate": 8.406695368017013e-06, "logits/chosen": -1.5592492818832397, "logits/rejected": -1.6853532791137695, "logps/chosen": -221.61936950683594, "logps/rejected": -265.1300048828125, "loss": 0.5041, "rewards/accuracies": 0.5, "rewards/chosen": -5.282897472381592, "rewards/margins": 3.820000648498535, "rewards/rejected": -9.102897644042969, "step": 7827 }, { "epoch": 1.22, "learning_rate": 8.405961927485865e-06, "logits/chosen": -2.018752336502075, "logits/rejected": -2.7603819370269775, "logps/chosen": -188.16888427734375, "logps/rejected": -424.51666259765625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.7800217866897583, "rewards/margins": 7.1669721603393555, "rewards/rejected": -8.94699478149414, "step": 7828 }, { "epoch": 1.22, "learning_rate": 8.405228486954717e-06, "logits/chosen": -2.6801295280456543, "logits/rejected": -2.4247379302978516, "logps/chosen": -202.60986328125, "logps/rejected": -267.22991943359375, "loss": 0.1251, "rewards/accuracies": 1.0, "rewards/chosen": -3.2454352378845215, "rewards/margins": 4.700614929199219, "rewards/rejected": -7.946050643920898, "step": 7829 }, { "epoch": 1.22, "learning_rate": 8.404495046423569e-06, "logits/chosen": -2.290802001953125, "logits/rejected": -3.226128339767456, "logps/chosen": -58.5648307800293, "logps/rejected": -408.45037841796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.0493228435516357, "rewards/margins": 7.764120101928711, "rewards/rejected": -9.813443183898926, "step": 7830 }, { "epoch": 1.22, "learning_rate": 8.40376160589242e-06, "logits/chosen": -1.89754056930542, "logits/rejected": -3.0285086631774902, "logps/chosen": -239.1956024169922, "logps/rejected": -479.87542724609375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.3096160888671875, "rewards/margins": 6.972001075744629, "rewards/rejected": -10.281617164611816, "step": 7831 }, { "epoch": 1.22, "learning_rate": 8.403028165361274e-06, "logits/chosen": -1.4436320066452026, "logits/rejected": -2.684605360031128, "logps/chosen": -224.56903076171875, "logps/rejected": -407.6977233886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6873230338096619, "rewards/margins": 11.912872314453125, "rewards/rejected": -12.600194931030273, "step": 7832 }, { "epoch": 1.22, "learning_rate": 8.402294724830126e-06, "logits/chosen": -2.397528648376465, "logits/rejected": -2.776960849761963, "logps/chosen": -444.8633728027344, "logps/rejected": -421.63232421875, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": -3.5114898681640625, "rewards/margins": 5.373472213745117, "rewards/rejected": -8.88496208190918, "step": 7833 }, { "epoch": 1.22, "learning_rate": 8.40156128429898e-06, "logits/chosen": -1.5011574029922485, "logits/rejected": -2.9077038764953613, "logps/chosen": -148.14822387695312, "logps/rejected": -491.0233154296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.5245964527130127, "rewards/margins": 8.756770133972168, "rewards/rejected": -10.281366348266602, "step": 7834 }, { "epoch": 1.22, "learning_rate": 8.400827843767832e-06, "logits/chosen": -2.819214105606079, "logits/rejected": -2.8952643871307373, "logps/chosen": -79.38026428222656, "logps/rejected": -188.15032958984375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.6074819564819336, "rewards/margins": 6.931454181671143, "rewards/rejected": -9.538935661315918, "step": 7835 }, { "epoch": 1.22, "learning_rate": 8.400094403236683e-06, "logits/chosen": -3.069596290588379, "logits/rejected": -2.8685812950134277, "logps/chosen": -65.70555877685547, "logps/rejected": -159.39950561523438, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -0.8393112421035767, "rewards/margins": 8.451221466064453, "rewards/rejected": -9.290533065795898, "step": 7836 }, { "epoch": 1.22, "learning_rate": 8.399360962705535e-06, "logits/chosen": -2.6262900829315186, "logits/rejected": -0.7411617636680603, "logps/chosen": -897.2559814453125, "logps/rejected": -387.3214111328125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.4684433937072754, "rewards/margins": 6.371942520141602, "rewards/rejected": -9.840385437011719, "step": 7837 }, { "epoch": 1.22, "learning_rate": 8.398627522174387e-06, "logits/chosen": -1.3963648080825806, "logits/rejected": -2.790555953979492, "logps/chosen": -168.19302368164062, "logps/rejected": -459.3892822265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.279323101043701, "rewards/margins": 8.793649673461914, "rewards/rejected": -12.072972297668457, "step": 7838 }, { "epoch": 1.22, "learning_rate": 8.397894081643239e-06, "logits/chosen": -2.7278435230255127, "logits/rejected": -2.817629814147949, "logps/chosen": -116.38617706298828, "logps/rejected": -266.2320556640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.103907823562622, "rewards/margins": 9.29435920715332, "rewards/rejected": -11.398266792297363, "step": 7839 }, { "epoch": 1.22, "learning_rate": 8.397160641112091e-06, "logits/chosen": -2.457925796508789, "logits/rejected": -1.6015084981918335, "logps/chosen": -189.6829071044922, "logps/rejected": -275.294921875, "loss": 0.1427, "rewards/accuracies": 1.0, "rewards/chosen": -2.4066243171691895, "rewards/margins": 6.155482769012451, "rewards/rejected": -8.56210708618164, "step": 7840 }, { "epoch": 1.22, "learning_rate": 8.396427200580943e-06, "logits/chosen": -1.0525833368301392, "logits/rejected": -2.721113443374634, "logps/chosen": -308.11968994140625, "logps/rejected": -768.2341918945312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.022231101989746, "rewards/margins": 8.163233757019043, "rewards/rejected": -13.185464859008789, "step": 7841 }, { "epoch": 1.22, "learning_rate": 8.395693760049796e-06, "logits/chosen": -2.9060897827148438, "logits/rejected": -3.0257468223571777, "logps/chosen": -362.41693115234375, "logps/rejected": -321.9728698730469, "loss": 2.4337, "rewards/accuracies": 0.5, "rewards/chosen": -4.893551826477051, "rewards/margins": 2.926333427429199, "rewards/rejected": -7.81988525390625, "step": 7842 }, { "epoch": 1.22, "learning_rate": 8.394960319518648e-06, "logits/chosen": -2.5390639305114746, "logits/rejected": -3.026618242263794, "logps/chosen": -184.40383911132812, "logps/rejected": -345.297607421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.901077389717102, "rewards/margins": 9.61863899230957, "rewards/rejected": -11.519716262817383, "step": 7843 }, { "epoch": 1.22, "learning_rate": 8.3942268789875e-06, "logits/chosen": -2.4631121158599854, "logits/rejected": -2.9917240142822266, "logps/chosen": -323.6732177734375, "logps/rejected": -310.75238037109375, "loss": 0.5633, "rewards/accuracies": 0.5, "rewards/chosen": -1.4936469793319702, "rewards/margins": 6.4971537590026855, "rewards/rejected": -7.990800857543945, "step": 7844 }, { "epoch": 1.22, "learning_rate": 8.393493438456352e-06, "logits/chosen": -2.536766529083252, "logits/rejected": -3.0884196758270264, "logps/chosen": -753.6813354492188, "logps/rejected": -898.689697265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.131458282470703, "rewards/margins": 9.302705764770508, "rewards/rejected": -12.434164047241211, "step": 7845 }, { "epoch": 1.22, "learning_rate": 8.392759997925204e-06, "logits/chosen": -2.6135034561157227, "logits/rejected": -2.5841903686523438, "logps/chosen": -160.29379272460938, "logps/rejected": -286.24615478515625, "loss": 0.6025, "rewards/accuracies": 0.5, "rewards/chosen": -4.077047348022461, "rewards/margins": 3.654298782348633, "rewards/rejected": -7.731346130371094, "step": 7846 }, { "epoch": 1.22, "learning_rate": 8.392026557394056e-06, "logits/chosen": -2.351841926574707, "logits/rejected": -2.749195098876953, "logps/chosen": -75.52616119384766, "logps/rejected": -222.01416015625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.210568428039551, "rewards/margins": 4.898656368255615, "rewards/rejected": -7.109224796295166, "step": 7847 }, { "epoch": 1.22, "learning_rate": 8.391293116862908e-06, "logits/chosen": -2.1944923400878906, "logits/rejected": -3.040581703186035, "logps/chosen": -55.629844665527344, "logps/rejected": -238.11387634277344, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.4742746353149414, "rewards/margins": 5.56578254699707, "rewards/rejected": -8.040057182312012, "step": 7848 }, { "epoch": 1.22, "learning_rate": 8.39055967633176e-06, "logits/chosen": -2.283703088760376, "logits/rejected": -1.0560023784637451, "logps/chosen": -358.489501953125, "logps/rejected": -125.18345642089844, "loss": 3.4915, "rewards/accuracies": 0.5, "rewards/chosen": -6.110991954803467, "rewards/margins": 0.33495044708251953, "rewards/rejected": -6.445942401885986, "step": 7849 }, { "epoch": 1.22, "learning_rate": 8.389826235800611e-06, "logits/chosen": -1.228013277053833, "logits/rejected": -2.5753486156463623, "logps/chosen": -50.404022216796875, "logps/rejected": -275.14990234375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.185117244720459, "rewards/margins": 6.19746732711792, "rewards/rejected": -8.382584571838379, "step": 7850 }, { "epoch": 1.22, "learning_rate": 8.389092795269465e-06, "logits/chosen": -2.8927597999572754, "logits/rejected": -2.65472412109375, "logps/chosen": -945.5139770507812, "logps/rejected": -710.5550537109375, "loss": 0.5406, "rewards/accuracies": 0.5, "rewards/chosen": -4.600824356079102, "rewards/margins": 1.6677184104919434, "rewards/rejected": -6.268542289733887, "step": 7851 }, { "epoch": 1.22, "learning_rate": 8.388359354738317e-06, "logits/chosen": -2.57357120513916, "logits/rejected": -2.8065085411071777, "logps/chosen": -240.20989990234375, "logps/rejected": -435.38543701171875, "loss": 0.395, "rewards/accuracies": 0.5, "rewards/chosen": -5.774879455566406, "rewards/margins": 3.337442398071289, "rewards/rejected": -9.112321853637695, "step": 7852 }, { "epoch": 1.22, "learning_rate": 8.387625914207169e-06, "logits/chosen": -2.7228219509124756, "logits/rejected": -2.660118579864502, "logps/chosen": -277.67926025390625, "logps/rejected": -265.4753112792969, "loss": 0.4586, "rewards/accuracies": 0.5, "rewards/chosen": -4.528071403503418, "rewards/margins": 4.4445481300354, "rewards/rejected": -8.972620010375977, "step": 7853 }, { "epoch": 1.22, "learning_rate": 8.38689247367602e-06, "logits/chosen": -1.7414700984954834, "logits/rejected": -2.577043294906616, "logps/chosen": -192.12083435058594, "logps/rejected": -358.7383117675781, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.9295315742492676, "rewards/margins": 7.467278003692627, "rewards/rejected": -11.396809577941895, "step": 7854 }, { "epoch": 1.22, "learning_rate": 8.386159033144872e-06, "logits/chosen": -2.8885326385498047, "logits/rejected": -3.036888599395752, "logps/chosen": -30.76941680908203, "logps/rejected": -268.3494873046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4670946598052979, "rewards/margins": 9.514466285705566, "rewards/rejected": -10.981561660766602, "step": 7855 }, { "epoch": 1.22, "learning_rate": 8.385425592613724e-06, "logits/chosen": -2.5821115970611572, "logits/rejected": -2.819028854370117, "logps/chosen": -365.05303955078125, "logps/rejected": -487.83917236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4527008533477783, "rewards/margins": 10.451990127563477, "rewards/rejected": -11.904690742492676, "step": 7856 }, { "epoch": 1.22, "learning_rate": 8.384692152082576e-06, "logits/chosen": -2.954521656036377, "logits/rejected": -2.316490411758423, "logps/chosen": -360.028564453125, "logps/rejected": -343.345458984375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.649794101715088, "rewards/margins": 7.059084415435791, "rewards/rejected": -9.708878517150879, "step": 7857 }, { "epoch": 1.22, "learning_rate": 8.383958711551428e-06, "logits/chosen": -2.8183650970458984, "logits/rejected": -2.752257823944092, "logps/chosen": -178.91986083984375, "logps/rejected": -282.264892578125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -3.0078492164611816, "rewards/margins": 7.080455780029297, "rewards/rejected": -10.08830451965332, "step": 7858 }, { "epoch": 1.22, "learning_rate": 8.38322527102028e-06, "logits/chosen": -2.825039863586426, "logits/rejected": -2.518429756164551, "logps/chosen": -665.364990234375, "logps/rejected": -427.03009033203125, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -4.6486101150512695, "rewards/margins": 5.461644649505615, "rewards/rejected": -10.110254287719727, "step": 7859 }, { "epoch": 1.22, "learning_rate": 8.382491830489134e-06, "logits/chosen": -2.4080052375793457, "logits/rejected": -2.2676665782928467, "logps/chosen": -137.6797637939453, "logps/rejected": -244.0251007080078, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -3.6902427673339844, "rewards/margins": 7.122503280639648, "rewards/rejected": -10.812746047973633, "step": 7860 }, { "epoch": 1.22, "learning_rate": 8.381758389957985e-06, "logits/chosen": -2.4104232788085938, "logits/rejected": -3.074240207672119, "logps/chosen": -64.35009765625, "logps/rejected": -379.1111755371094, "loss": 0.1819, "rewards/accuracies": 1.0, "rewards/chosen": -4.840508937835693, "rewards/margins": 4.319375038146973, "rewards/rejected": -9.159883499145508, "step": 7861 }, { "epoch": 1.22, "learning_rate": 8.381024949426837e-06, "logits/chosen": -0.9852226376533508, "logits/rejected": -2.6937408447265625, "logps/chosen": -159.39183044433594, "logps/rejected": -467.9017333984375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.913848876953125, "rewards/margins": 6.000919342041016, "rewards/rejected": -6.914768218994141, "step": 7862 }, { "epoch": 1.22, "learning_rate": 8.38029150889569e-06, "logits/chosen": -2.8177618980407715, "logits/rejected": -2.2140541076660156, "logps/chosen": -545.5490112304688, "logps/rejected": -900.0712890625, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.9090607166290283, "rewards/margins": 6.842770576477051, "rewards/rejected": -8.7518310546875, "step": 7863 }, { "epoch": 1.22, "learning_rate": 8.379558068364541e-06, "logits/chosen": -2.9860448837280273, "logits/rejected": -2.633439779281616, "logps/chosen": -467.06219482421875, "logps/rejected": -583.8704223632812, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -3.729454517364502, "rewards/margins": 4.850547790527344, "rewards/rejected": -8.580001831054688, "step": 7864 }, { "epoch": 1.22, "learning_rate": 8.378824627833393e-06, "logits/chosen": -2.6501221656799316, "logits/rejected": -2.8020853996276855, "logps/chosen": -78.90263366699219, "logps/rejected": -209.71994018554688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.948282241821289, "rewards/margins": 6.921343803405762, "rewards/rejected": -10.86962604522705, "step": 7865 }, { "epoch": 1.22, "learning_rate": 8.378091187302247e-06, "logits/chosen": -2.696545124053955, "logits/rejected": -1.8870400190353394, "logps/chosen": -89.3226318359375, "logps/rejected": -190.510498046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.7059699892997742, "rewards/margins": 7.644923210144043, "rewards/rejected": -8.350893020629883, "step": 7866 }, { "epoch": 1.22, "learning_rate": 8.377357746771098e-06, "logits/chosen": -3.0859267711639404, "logits/rejected": -2.8403656482696533, "logps/chosen": -113.35698699951172, "logps/rejected": -139.21826171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.8291834592819214, "rewards/margins": 7.1623969078063965, "rewards/rejected": -7.991580009460449, "step": 7867 }, { "epoch": 1.22, "learning_rate": 8.37662430623995e-06, "logits/chosen": -2.4410438537597656, "logits/rejected": -2.683666944503784, "logps/chosen": -130.69952392578125, "logps/rejected": -322.02203369140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.871164083480835, "rewards/margins": 8.921917915344238, "rewards/rejected": -12.793082237243652, "step": 7868 }, { "epoch": 1.22, "learning_rate": 8.375890865708804e-06, "logits/chosen": -2.459636688232422, "logits/rejected": -2.753757953643799, "logps/chosen": -513.9871215820312, "logps/rejected": -603.8494262695312, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -4.226762771606445, "rewards/margins": 5.269500732421875, "rewards/rejected": -9.49626350402832, "step": 7869 }, { "epoch": 1.22, "learning_rate": 8.375157425177656e-06, "logits/chosen": -2.0463478565216064, "logits/rejected": -2.7227327823638916, "logps/chosen": -135.21995544433594, "logps/rejected": -301.82891845703125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.7527462244033813, "rewards/margins": 7.300567626953125, "rewards/rejected": -9.053313255310059, "step": 7870 }, { "epoch": 1.22, "learning_rate": 8.374423984646508e-06, "logits/chosen": -2.821242332458496, "logits/rejected": -1.6529911756515503, "logps/chosen": -624.1946411132812, "logps/rejected": -417.3813781738281, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -2.069387912750244, "rewards/margins": 5.792050838470459, "rewards/rejected": -7.861438751220703, "step": 7871 }, { "epoch": 1.22, "learning_rate": 8.37369054411536e-06, "logits/chosen": -1.399392008781433, "logits/rejected": -2.4828040599823, "logps/chosen": -159.76641845703125, "logps/rejected": -333.3092956542969, "loss": 0.6623, "rewards/accuracies": 0.5, "rewards/chosen": -2.287804365158081, "rewards/margins": 4.2322998046875, "rewards/rejected": -6.52010440826416, "step": 7872 }, { "epoch": 1.22, "learning_rate": 8.372957103584211e-06, "logits/chosen": -2.1703014373779297, "logits/rejected": -2.6087071895599365, "logps/chosen": -243.94696044921875, "logps/rejected": -412.9293518066406, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.3003387451171875, "rewards/margins": 7.744409084320068, "rewards/rejected": -10.044748306274414, "step": 7873 }, { "epoch": 1.22, "learning_rate": 8.372223663053063e-06, "logits/chosen": -2.8622677326202393, "logits/rejected": -2.8088557720184326, "logps/chosen": -103.39712524414062, "logps/rejected": -145.8754425048828, "loss": 0.3798, "rewards/accuracies": 0.5, "rewards/chosen": -3.518397569656372, "rewards/margins": 3.5965590476989746, "rewards/rejected": -7.114956855773926, "step": 7874 }, { "epoch": 1.22, "learning_rate": 8.371490222521915e-06, "logits/chosen": -0.7577214241027832, "logits/rejected": -2.812293291091919, "logps/chosen": -86.28813934326172, "logps/rejected": -508.2568359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.517699718475342, "rewards/margins": 9.078168869018555, "rewards/rejected": -11.595869064331055, "step": 7875 }, { "epoch": 1.22, "learning_rate": 8.370756781990767e-06, "logits/chosen": -1.7469905614852905, "logits/rejected": -2.7074954509735107, "logps/chosen": -101.423828125, "logps/rejected": -391.02813720703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1800495386123657, "rewards/margins": 10.150522232055664, "rewards/rejected": -11.330571174621582, "step": 7876 }, { "epoch": 1.23, "learning_rate": 8.370023341459619e-06, "logits/chosen": -2.4121744632720947, "logits/rejected": -2.8554513454437256, "logps/chosen": -40.23036575317383, "logps/rejected": -214.37722778320312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.541806936264038, "rewards/margins": 7.599480628967285, "rewards/rejected": -9.141287803649902, "step": 7877 }, { "epoch": 1.23, "learning_rate": 8.369289900928472e-06, "logits/chosen": -2.4910266399383545, "logits/rejected": -2.912637710571289, "logps/chosen": -133.31402587890625, "logps/rejected": -202.98780822753906, "loss": 1.277, "rewards/accuracies": 0.5, "rewards/chosen": -1.8635780811309814, "rewards/margins": 0.26369285583496094, "rewards/rejected": -2.1272709369659424, "step": 7878 }, { "epoch": 1.23, "learning_rate": 8.368556460397324e-06, "logits/chosen": -1.471496820449829, "logits/rejected": -2.5407629013061523, "logps/chosen": -234.2494354248047, "logps/rejected": -420.6730651855469, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -1.4727771282196045, "rewards/margins": 7.049867153167725, "rewards/rejected": -8.52264404296875, "step": 7879 }, { "epoch": 1.23, "learning_rate": 8.367823019866176e-06, "logits/chosen": -2.911606788635254, "logits/rejected": -2.6452858448028564, "logps/chosen": -330.2217102050781, "logps/rejected": -205.70196533203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.6106628179550171, "rewards/margins": 8.15197467803955, "rewards/rejected": -8.7626371383667, "step": 7880 }, { "epoch": 1.23, "learning_rate": 8.367089579335028e-06, "logits/chosen": -2.792808771133423, "logits/rejected": -2.87446928024292, "logps/chosen": -175.03988647460938, "logps/rejected": -213.89749145507812, "loss": 2.2903, "rewards/accuracies": 0.5, "rewards/chosen": -5.071671485900879, "rewards/margins": 0.6763966083526611, "rewards/rejected": -5.748068332672119, "step": 7881 }, { "epoch": 1.23, "learning_rate": 8.36635613880388e-06, "logits/chosen": -2.6991007328033447, "logits/rejected": -2.757866382598877, "logps/chosen": -234.8194580078125, "logps/rejected": -198.7067413330078, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": -1.2024307250976562, "rewards/margins": 3.4899048805236816, "rewards/rejected": -4.692335605621338, "step": 7882 }, { "epoch": 1.23, "learning_rate": 8.365622698272732e-06, "logits/chosen": -2.46140456199646, "logits/rejected": -1.4981489181518555, "logps/chosen": -194.07591247558594, "logps/rejected": -143.1311492919922, "loss": 0.1041, "rewards/accuracies": 1.0, "rewards/chosen": -1.9840655326843262, "rewards/margins": 3.0170912742614746, "rewards/rejected": -5.001156806945801, "step": 7883 }, { "epoch": 1.23, "learning_rate": 8.364889257741584e-06, "logits/chosen": -2.8231518268585205, "logits/rejected": -2.685725688934326, "logps/chosen": -121.70245361328125, "logps/rejected": -188.7928924560547, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -2.145418882369995, "rewards/margins": 4.833796977996826, "rewards/rejected": -6.9792160987854, "step": 7884 }, { "epoch": 1.23, "learning_rate": 8.364155817210436e-06, "logits/chosen": -2.54042387008667, "logits/rejected": -3.053407907485962, "logps/chosen": -675.08203125, "logps/rejected": -616.7267456054688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.673614501953125, "rewards/margins": 7.826391220092773, "rewards/rejected": -9.500005722045898, "step": 7885 }, { "epoch": 1.23, "learning_rate": 8.363422376679288e-06, "logits/chosen": -2.618152141571045, "logits/rejected": -2.700368642807007, "logps/chosen": -35.71797180175781, "logps/rejected": -182.61703491210938, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.4568971395492554, "rewards/margins": 7.099902153015137, "rewards/rejected": -8.556798934936523, "step": 7886 }, { "epoch": 1.23, "learning_rate": 8.362688936148141e-06, "logits/chosen": -2.5146114826202393, "logits/rejected": -2.9247591495513916, "logps/chosen": -480.6675109863281, "logps/rejected": -521.8295288085938, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -3.2453575134277344, "rewards/margins": 4.142370223999023, "rewards/rejected": -7.387727737426758, "step": 7887 }, { "epoch": 1.23, "learning_rate": 8.361955495616993e-06, "logits/chosen": -3.073338270187378, "logits/rejected": -2.4187145233154297, "logps/chosen": -361.2799072265625, "logps/rejected": -345.5185241699219, "loss": 1.3187, "rewards/accuracies": 0.5, "rewards/chosen": -4.100884914398193, "rewards/margins": 4.004916667938232, "rewards/rejected": -8.105801582336426, "step": 7888 }, { "epoch": 1.23, "learning_rate": 8.361222055085845e-06, "logits/chosen": -2.7076234817504883, "logits/rejected": -2.4089231491088867, "logps/chosen": -874.6522216796875, "logps/rejected": -741.7403564453125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.7424583435058594, "rewards/margins": 7.251852989196777, "rewards/rejected": -9.994312286376953, "step": 7889 }, { "epoch": 1.23, "learning_rate": 8.360488614554697e-06, "logits/chosen": -2.0210862159729004, "logits/rejected": -2.7422518730163574, "logps/chosen": -291.1981506347656, "logps/rejected": -613.9713745117188, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3302178382873535, "rewards/margins": 11.923181533813477, "rewards/rejected": -15.253398895263672, "step": 7890 }, { "epoch": 1.23, "learning_rate": 8.359755174023549e-06, "logits/chosen": -2.8989505767822266, "logits/rejected": -2.4165940284729004, "logps/chosen": -570.3187255859375, "logps/rejected": -411.9942321777344, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.8010833263397217, "rewards/margins": 6.172967910766602, "rewards/rejected": -7.974051475524902, "step": 7891 }, { "epoch": 1.23, "learning_rate": 8.3590217334924e-06, "logits/chosen": -2.8330724239349365, "logits/rejected": -2.9390456676483154, "logps/chosen": -117.75148010253906, "logps/rejected": -191.44268798828125, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -2.2264318466186523, "rewards/margins": 4.561247825622559, "rewards/rejected": -6.787679672241211, "step": 7892 }, { "epoch": 1.23, "learning_rate": 8.358288292961252e-06, "logits/chosen": -1.6810886859893799, "logits/rejected": -2.678553342819214, "logps/chosen": -501.7410583496094, "logps/rejected": -572.8724365234375, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -3.7022085189819336, "rewards/margins": 6.177199363708496, "rewards/rejected": -9.87940788269043, "step": 7893 }, { "epoch": 1.23, "learning_rate": 8.357554852430104e-06, "logits/chosen": -2.570514440536499, "logits/rejected": -2.2543630599975586, "logps/chosen": -159.14779663085938, "logps/rejected": -210.28807067871094, "loss": 0.3658, "rewards/accuracies": 0.5, "rewards/chosen": -3.38746976852417, "rewards/margins": 3.916604995727539, "rewards/rejected": -7.304074764251709, "step": 7894 }, { "epoch": 1.23, "learning_rate": 8.356821411898956e-06, "logits/chosen": -1.9602181911468506, "logits/rejected": -2.8206419944763184, "logps/chosen": -52.29831314086914, "logps/rejected": -297.41796875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -3.110384941101074, "rewards/margins": 5.860818386077881, "rewards/rejected": -8.971203804016113, "step": 7895 }, { "epoch": 1.23, "learning_rate": 8.35608797136781e-06, "logits/chosen": -2.8746981620788574, "logits/rejected": -2.825403928756714, "logps/chosen": -585.5523071289062, "logps/rejected": -555.286376953125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.821419596672058, "rewards/margins": 6.7095232009887695, "rewards/rejected": -8.530942916870117, "step": 7896 }, { "epoch": 1.23, "learning_rate": 8.355354530836662e-06, "logits/chosen": -2.0059237480163574, "logits/rejected": -2.775629758834839, "logps/chosen": -130.34808349609375, "logps/rejected": -446.1109313964844, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.0401558876037598, "rewards/margins": 6.244235992431641, "rewards/rejected": -8.284392356872559, "step": 7897 }, { "epoch": 1.23, "learning_rate": 8.354621090305513e-06, "logits/chosen": -1.4098377227783203, "logits/rejected": -2.374988317489624, "logps/chosen": -57.271568298339844, "logps/rejected": -286.91644287109375, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.614511489868164, "rewards/margins": 6.527036666870117, "rewards/rejected": -10.141548156738281, "step": 7898 }, { "epoch": 1.23, "learning_rate": 8.353887649774365e-06, "logits/chosen": -2.146453619003296, "logits/rejected": -1.4393281936645508, "logps/chosen": -290.04656982421875, "logps/rejected": -319.6939697265625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.3197038173675537, "rewards/margins": 7.575097560882568, "rewards/rejected": -9.89480209350586, "step": 7899 }, { "epoch": 1.23, "learning_rate": 8.353154209243219e-06, "logits/chosen": -1.6645054817199707, "logits/rejected": -2.7524521350860596, "logps/chosen": -76.55133056640625, "logps/rejected": -195.96665954589844, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -3.191798210144043, "rewards/margins": 4.675223350524902, "rewards/rejected": -7.8670220375061035, "step": 7900 }, { "epoch": 1.23, "learning_rate": 8.35242076871207e-06, "logits/chosen": -2.932664632797241, "logits/rejected": -2.4340670108795166, "logps/chosen": -597.1094360351562, "logps/rejected": -447.7669677734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.279541015625, "rewards/margins": 6.609846591949463, "rewards/rejected": -10.889387130737305, "step": 7901 }, { "epoch": 1.23, "learning_rate": 8.351687328180923e-06, "logits/chosen": -1.9606021642684937, "logits/rejected": -2.7080061435699463, "logps/chosen": -524.646484375, "logps/rejected": -512.4338989257812, "loss": 0.1181, "rewards/accuracies": 1.0, "rewards/chosen": -4.6022419929504395, "rewards/margins": 5.803133964538574, "rewards/rejected": -10.405375480651855, "step": 7902 }, { "epoch": 1.23, "learning_rate": 8.350953887649775e-06, "logits/chosen": -2.690269947052002, "logits/rejected": -2.883474826812744, "logps/chosen": -342.5657958984375, "logps/rejected": -486.7974853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.45351070165634155, "rewards/margins": 10.220155715942383, "rewards/rejected": -10.673666000366211, "step": 7903 }, { "epoch": 1.23, "learning_rate": 8.350220447118626e-06, "logits/chosen": -1.3908917903900146, "logits/rejected": -2.3921425342559814, "logps/chosen": -177.241455078125, "logps/rejected": -348.84820556640625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.0021979808807373, "rewards/margins": 7.883225917816162, "rewards/rejected": -9.88542366027832, "step": 7904 }, { "epoch": 1.23, "learning_rate": 8.34948700658748e-06, "logits/chosen": -2.6898226737976074, "logits/rejected": -2.6619465351104736, "logps/chosen": -301.4074401855469, "logps/rejected": -129.2920684814453, "loss": 0.2272, "rewards/accuracies": 1.0, "rewards/chosen": -2.4095776081085205, "rewards/margins": 3.4999098777770996, "rewards/rejected": -5.909487247467041, "step": 7905 }, { "epoch": 1.23, "learning_rate": 8.348753566056332e-06, "logits/chosen": -2.6914820671081543, "logits/rejected": -2.6970958709716797, "logps/chosen": -119.24636840820312, "logps/rejected": -141.967041015625, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -4.607666015625, "rewards/margins": 5.485507011413574, "rewards/rejected": -10.093172073364258, "step": 7906 }, { "epoch": 1.23, "learning_rate": 8.348020125525184e-06, "logits/chosen": -3.0696167945861816, "logits/rejected": -3.3588826656341553, "logps/chosen": -174.6728057861328, "logps/rejected": -265.7667541503906, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -4.765669822692871, "rewards/margins": 5.919116973876953, "rewards/rejected": -10.68478775024414, "step": 7907 }, { "epoch": 1.23, "learning_rate": 8.347286684994036e-06, "logits/chosen": -2.1882994174957275, "logits/rejected": -2.711510181427002, "logps/chosen": -371.416259765625, "logps/rejected": -486.8470458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.152373313903809, "rewards/margins": 10.550765991210938, "rewards/rejected": -14.703140258789062, "step": 7908 }, { "epoch": 1.23, "learning_rate": 8.346553244462887e-06, "logits/chosen": -2.5887973308563232, "logits/rejected": -2.8885250091552734, "logps/chosen": -95.48607635498047, "logps/rejected": -224.70111083984375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.8724000453948975, "rewards/margins": 6.091028213500977, "rewards/rejected": -7.963428497314453, "step": 7909 }, { "epoch": 1.23, "learning_rate": 8.34581980393174e-06, "logits/chosen": -2.751642942428589, "logits/rejected": -2.8895552158355713, "logps/chosen": -97.1202163696289, "logps/rejected": -284.37005615234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.922086000442505, "rewards/margins": 9.401508331298828, "rewards/rejected": -12.323594093322754, "step": 7910 }, { "epoch": 1.23, "learning_rate": 8.345086363400591e-06, "logits/chosen": -2.6507022380828857, "logits/rejected": -3.0717520713806152, "logps/chosen": -142.16307067871094, "logps/rejected": -423.71990966796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.087627410888672, "rewards/margins": 8.507218360900879, "rewards/rejected": -11.59484577178955, "step": 7911 }, { "epoch": 1.23, "learning_rate": 8.344352922869443e-06, "logits/chosen": -2.827622413635254, "logits/rejected": -2.6860902309417725, "logps/chosen": -174.85765075683594, "logps/rejected": -199.18577575683594, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -0.7378303408622742, "rewards/margins": 4.194445610046387, "rewards/rejected": -4.932276248931885, "step": 7912 }, { "epoch": 1.23, "learning_rate": 8.343619482338295e-06, "logits/chosen": -3.113678216934204, "logits/rejected": -3.0367929935455322, "logps/chosen": -45.12946319580078, "logps/rejected": -134.12625122070312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.2530479431152344, "rewards/margins": 6.537236213684082, "rewards/rejected": -8.790284156799316, "step": 7913 }, { "epoch": 1.23, "learning_rate": 8.342886041807149e-06, "logits/chosen": -2.386151075363159, "logits/rejected": -2.5275650024414062, "logps/chosen": -174.9279022216797, "logps/rejected": -253.1796112060547, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.9857780933380127, "rewards/margins": 5.686056613922119, "rewards/rejected": -8.671834945678711, "step": 7914 }, { "epoch": 1.23, "learning_rate": 8.342152601276e-06, "logits/chosen": -1.579850435256958, "logits/rejected": -2.712597131729126, "logps/chosen": -195.2322998046875, "logps/rejected": -405.0275573730469, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.207855224609375, "rewards/margins": 8.891552925109863, "rewards/rejected": -11.099408149719238, "step": 7915 }, { "epoch": 1.23, "learning_rate": 8.341419160744852e-06, "logits/chosen": -2.2239487171173096, "logits/rejected": -2.987915515899658, "logps/chosen": -542.4130249023438, "logps/rejected": -646.494873046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.6124634742736816, "rewards/margins": 7.00514554977417, "rewards/rejected": -9.617609024047852, "step": 7916 }, { "epoch": 1.23, "learning_rate": 8.340685720213704e-06, "logits/chosen": -1.2015129327774048, "logits/rejected": -2.7417550086975098, "logps/chosen": -177.41102600097656, "logps/rejected": -535.0252685546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6721447706222534, "rewards/margins": 9.487619400024414, "rewards/rejected": -11.159764289855957, "step": 7917 }, { "epoch": 1.23, "learning_rate": 8.339952279682556e-06, "logits/chosen": -2.793116569519043, "logits/rejected": -2.95448899269104, "logps/chosen": -134.24093627929688, "logps/rejected": -239.24659729003906, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -0.9740943908691406, "rewards/margins": 5.106636047363281, "rewards/rejected": -6.080730438232422, "step": 7918 }, { "epoch": 1.23, "learning_rate": 8.339218839151408e-06, "logits/chosen": -2.3197176456451416, "logits/rejected": -3.1056854724884033, "logps/chosen": -200.70213317871094, "logps/rejected": -355.120849609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.609898090362549, "rewards/margins": 6.474870681762695, "rewards/rejected": -9.084768295288086, "step": 7919 }, { "epoch": 1.23, "learning_rate": 8.33848539862026e-06, "logits/chosen": -1.5896061658859253, "logits/rejected": -2.7412495613098145, "logps/chosen": -392.274169921875, "logps/rejected": -628.9298095703125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.381730794906616, "rewards/margins": 6.701395034790039, "rewards/rejected": -9.083126068115234, "step": 7920 }, { "epoch": 1.23, "learning_rate": 8.337751958089112e-06, "logits/chosen": -2.6757657527923584, "logits/rejected": -2.949843645095825, "logps/chosen": -534.0643310546875, "logps/rejected": -519.3673095703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.829411268234253, "rewards/margins": 7.492265224456787, "rewards/rejected": -9.321676254272461, "step": 7921 }, { "epoch": 1.23, "learning_rate": 8.337018517557965e-06, "logits/chosen": -1.1425234079360962, "logits/rejected": -2.6713383197784424, "logps/chosen": -115.53850555419922, "logps/rejected": -525.0732421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.413458824157715, "rewards/margins": 10.07242202758789, "rewards/rejected": -13.485881805419922, "step": 7922 }, { "epoch": 1.23, "learning_rate": 8.336285077026817e-06, "logits/chosen": -2.007596731185913, "logits/rejected": -2.6220829486846924, "logps/chosen": -269.7183837890625, "logps/rejected": -554.2880249023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6177773475646973, "rewards/margins": 10.521539688110352, "rewards/rejected": -13.139317512512207, "step": 7923 }, { "epoch": 1.23, "learning_rate": 8.335551636495669e-06, "logits/chosen": -2.4506092071533203, "logits/rejected": -2.8233625888824463, "logps/chosen": -258.1285705566406, "logps/rejected": -347.484130859375, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -1.0711663961410522, "rewards/margins": 6.234886646270752, "rewards/rejected": -7.306053161621094, "step": 7924 }, { "epoch": 1.23, "learning_rate": 8.334818195964521e-06, "logits/chosen": -1.7225652933120728, "logits/rejected": -2.9496805667877197, "logps/chosen": -111.3243408203125, "logps/rejected": -288.31903076171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.3529369831085205, "rewards/margins": 6.993979454040527, "rewards/rejected": -9.346916198730469, "step": 7925 }, { "epoch": 1.23, "learning_rate": 8.334084755433373e-06, "logits/chosen": -2.604383707046509, "logits/rejected": -2.292508840560913, "logps/chosen": -483.9053955078125, "logps/rejected": -666.5506591796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7628974914550781, "rewards/margins": 12.43494987487793, "rewards/rejected": -13.197847366333008, "step": 7926 }, { "epoch": 1.23, "learning_rate": 8.333351314902225e-06, "logits/chosen": -2.405622959136963, "logits/rejected": -3.0732035636901855, "logps/chosen": -37.64623260498047, "logps/rejected": -281.4158935546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.377831220626831, "rewards/margins": 6.760692596435547, "rewards/rejected": -9.13852310180664, "step": 7927 }, { "epoch": 1.23, "learning_rate": 8.332617874371077e-06, "logits/chosen": -2.694239854812622, "logits/rejected": -2.294790744781494, "logps/chosen": -327.9451904296875, "logps/rejected": -281.59716796875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.054124355316162, "rewards/margins": 6.48012113571167, "rewards/rejected": -8.534245491027832, "step": 7928 }, { "epoch": 1.23, "learning_rate": 8.331884433839928e-06, "logits/chosen": -3.0022780895233154, "logits/rejected": -2.842700958251953, "logps/chosen": -470.4849548339844, "logps/rejected": -427.15777587890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.7755439281463623, "rewards/margins": 8.253031730651855, "rewards/rejected": -10.028575897216797, "step": 7929 }, { "epoch": 1.23, "learning_rate": 8.33115099330878e-06, "logits/chosen": -2.20475697517395, "logits/rejected": -2.9679434299468994, "logps/chosen": -118.95174407958984, "logps/rejected": -302.16058349609375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.6468384265899658, "rewards/margins": 7.564271926879883, "rewards/rejected": -9.21111011505127, "step": 7930 }, { "epoch": 1.23, "learning_rate": 8.330417552777634e-06, "logits/chosen": -2.8240885734558105, "logits/rejected": -2.8538591861724854, "logps/chosen": -182.18394470214844, "logps/rejected": -339.17327880859375, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": -2.7349252700805664, "rewards/margins": 3.440659284591675, "rewards/rejected": -6.17558479309082, "step": 7931 }, { "epoch": 1.23, "learning_rate": 8.329684112246486e-06, "logits/chosen": -2.5803351402282715, "logits/rejected": -2.0604305267333984, "logps/chosen": -289.060546875, "logps/rejected": -257.1695861816406, "loss": 0.1016, "rewards/accuracies": 1.0, "rewards/chosen": -2.13627028465271, "rewards/margins": 6.78632926940918, "rewards/rejected": -8.922599792480469, "step": 7932 }, { "epoch": 1.23, "learning_rate": 8.328950671715338e-06, "logits/chosen": -2.5451221466064453, "logits/rejected": -2.792311191558838, "logps/chosen": -69.59101867675781, "logps/rejected": -257.7205810546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.5617170333862305, "rewards/margins": 8.318426132202148, "rewards/rejected": -9.880142211914062, "step": 7933 }, { "epoch": 1.23, "learning_rate": 8.328217231184191e-06, "logits/chosen": -1.374779224395752, "logits/rejected": -2.6899657249450684, "logps/chosen": -170.62960815429688, "logps/rejected": -436.6028137207031, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -3.7870514392852783, "rewards/margins": 5.674955368041992, "rewards/rejected": -9.462006568908691, "step": 7934 }, { "epoch": 1.23, "learning_rate": 8.327483790653043e-06, "logits/chosen": -2.8085451126098633, "logits/rejected": -2.6919071674346924, "logps/chosen": -235.403076171875, "logps/rejected": -383.8717041015625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.4093185663223267, "rewards/margins": 5.46508264541626, "rewards/rejected": -6.874401092529297, "step": 7935 }, { "epoch": 1.23, "learning_rate": 8.326750350121895e-06, "logits/chosen": -2.131481885910034, "logits/rejected": -2.6187548637390137, "logps/chosen": -234.07972717285156, "logps/rejected": -298.83258056640625, "loss": 0.6464, "rewards/accuracies": 0.5, "rewards/chosen": -4.9058518409729, "rewards/margins": 3.1676244735717773, "rewards/rejected": -8.073476791381836, "step": 7936 }, { "epoch": 1.23, "learning_rate": 8.326016909590747e-06, "logits/chosen": -1.1627346277236938, "logits/rejected": -2.5376780033111572, "logps/chosen": -95.14784240722656, "logps/rejected": -428.39068603515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.8545544147491455, "rewards/margins": 9.22613525390625, "rewards/rejected": -12.080690383911133, "step": 7937 }, { "epoch": 1.23, "learning_rate": 8.325283469059599e-06, "logits/chosen": -2.71526837348938, "logits/rejected": -2.2859394550323486, "logps/chosen": -251.93780517578125, "logps/rejected": -283.5875244140625, "loss": 0.8776, "rewards/accuracies": 0.5, "rewards/chosen": -3.782235622406006, "rewards/margins": 2.071197986602783, "rewards/rejected": -5.853433609008789, "step": 7938 }, { "epoch": 1.23, "learning_rate": 8.32455002852845e-06, "logits/chosen": -2.20147967338562, "logits/rejected": -2.5541250705718994, "logps/chosen": -59.47481155395508, "logps/rejected": -230.93115234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.27278995513916, "rewards/margins": 7.5289154052734375, "rewards/rejected": -9.801705360412598, "step": 7939 }, { "epoch": 1.23, "learning_rate": 8.323816587997304e-06, "logits/chosen": -1.6187450885772705, "logits/rejected": -2.2267913818359375, "logps/chosen": -218.7208251953125, "logps/rejected": -324.2683410644531, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.552464485168457, "rewards/margins": 6.620577812194824, "rewards/rejected": -9.173042297363281, "step": 7940 }, { "epoch": 1.23, "learning_rate": 8.323083147466156e-06, "logits/chosen": -2.668501615524292, "logits/rejected": -2.813502073287964, "logps/chosen": -258.75579833984375, "logps/rejected": -247.85169982910156, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -3.0800130367279053, "rewards/margins": 6.6654767990112305, "rewards/rejected": -9.745490074157715, "step": 7941 }, { "epoch": 1.24, "learning_rate": 8.322349706935008e-06, "logits/chosen": -1.7366712093353271, "logits/rejected": -2.618586778640747, "logps/chosen": -160.475830078125, "logps/rejected": -332.1347961425781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3346681594848633, "rewards/margins": 9.626110076904297, "rewards/rejected": -11.960777282714844, "step": 7942 }, { "epoch": 1.24, "learning_rate": 8.32161626640386e-06, "logits/chosen": -2.8426878452301025, "logits/rejected": -2.4041097164154053, "logps/chosen": -184.95037841796875, "logps/rejected": -318.66064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9263641238212585, "rewards/margins": 10.764274597167969, "rewards/rejected": -11.690637588500977, "step": 7943 }, { "epoch": 1.24, "learning_rate": 8.320882825872712e-06, "logits/chosen": -1.9475263357162476, "logits/rejected": -2.957493782043457, "logps/chosen": -217.65879821777344, "logps/rejected": -455.08245849609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.4041554927825928, "rewards/margins": 7.119256973266602, "rewards/rejected": -9.523412704467773, "step": 7944 }, { "epoch": 1.24, "learning_rate": 8.320149385341564e-06, "logits/chosen": -2.8496177196502686, "logits/rejected": -2.4546830654144287, "logps/chosen": -130.59645080566406, "logps/rejected": -164.093994140625, "loss": 1.1862, "rewards/accuracies": 0.5, "rewards/chosen": -5.1074090003967285, "rewards/margins": 1.1798864603042603, "rewards/rejected": -6.287295341491699, "step": 7945 }, { "epoch": 1.24, "learning_rate": 8.319415944810415e-06, "logits/chosen": -2.775822401046753, "logits/rejected": -2.5468928813934326, "logps/chosen": -333.743896484375, "logps/rejected": -492.43304443359375, "loss": 0.1292, "rewards/accuracies": 1.0, "rewards/chosen": -6.691794395446777, "rewards/margins": 2.3569836616516113, "rewards/rejected": -9.04877758026123, "step": 7946 }, { "epoch": 1.24, "learning_rate": 8.318682504279267e-06, "logits/chosen": -2.4360876083374023, "logits/rejected": -2.7633495330810547, "logps/chosen": -97.0768814086914, "logps/rejected": -211.704833984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.6465034484863281, "rewards/margins": 7.430523872375488, "rewards/rejected": -8.077027320861816, "step": 7947 }, { "epoch": 1.24, "learning_rate": 8.31794906374812e-06, "logits/chosen": -2.892702341079712, "logits/rejected": -2.754946708679199, "logps/chosen": -560.6094360351562, "logps/rejected": -499.70001220703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5517954230308533, "rewards/margins": 8.674304008483887, "rewards/rejected": -9.226099014282227, "step": 7948 }, { "epoch": 1.24, "learning_rate": 8.317215623216973e-06, "logits/chosen": -1.9315310716629028, "logits/rejected": -2.8386311531066895, "logps/chosen": -82.72727966308594, "logps/rejected": -288.6302490234375, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -2.8075337409973145, "rewards/margins": 6.581097602844238, "rewards/rejected": -9.388631820678711, "step": 7949 }, { "epoch": 1.24, "learning_rate": 8.316482182685825e-06, "logits/chosen": -2.065638303756714, "logits/rejected": -2.664163827896118, "logps/chosen": -814.1578979492188, "logps/rejected": -327.2242431640625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.7973670959472656, "rewards/margins": 6.420639991760254, "rewards/rejected": -8.21800708770752, "step": 7950 }, { "epoch": 1.24, "learning_rate": 8.315748742154677e-06, "logits/chosen": -1.9339253902435303, "logits/rejected": -2.2944512367248535, "logps/chosen": -345.71868896484375, "logps/rejected": -547.9481201171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.9109388589859009, "rewards/margins": 10.341789245605469, "rewards/rejected": -12.252728462219238, "step": 7951 }, { "epoch": 1.24, "learning_rate": 8.315015301623528e-06, "logits/chosen": -3.024794816970825, "logits/rejected": -2.7344472408294678, "logps/chosen": -254.4450225830078, "logps/rejected": -242.8313446044922, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.8398605585098267, "rewards/margins": 6.866607666015625, "rewards/rejected": -7.70646858215332, "step": 7952 }, { "epoch": 1.24, "learning_rate": 8.31428186109238e-06, "logits/chosen": -2.274263620376587, "logits/rejected": -2.5955724716186523, "logps/chosen": -114.00184631347656, "logps/rejected": -321.29949951171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8103549480438232, "rewards/margins": 8.95750617980957, "rewards/rejected": -12.767860412597656, "step": 7953 }, { "epoch": 1.24, "learning_rate": 8.313548420561232e-06, "logits/chosen": -1.4979571104049683, "logits/rejected": -2.7547101974487305, "logps/chosen": -138.80059814453125, "logps/rejected": -604.1075439453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.914685010910034, "rewards/margins": 9.920316696166992, "rewards/rejected": -12.835000991821289, "step": 7954 }, { "epoch": 1.24, "learning_rate": 8.312814980030084e-06, "logits/chosen": -2.046403169631958, "logits/rejected": -2.4677467346191406, "logps/chosen": -124.56007385253906, "logps/rejected": -276.5672607421875, "loss": 0.1638, "rewards/accuracies": 1.0, "rewards/chosen": -6.184208869934082, "rewards/margins": 1.8686163425445557, "rewards/rejected": -8.052824974060059, "step": 7955 }, { "epoch": 1.24, "learning_rate": 8.312081539498936e-06, "logits/chosen": -2.467308521270752, "logits/rejected": -3.0164248943328857, "logps/chosen": -297.7352294921875, "logps/rejected": -516.1507568359375, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -2.5877866744995117, "rewards/margins": 4.631501197814941, "rewards/rejected": -7.219287872314453, "step": 7956 }, { "epoch": 1.24, "learning_rate": 8.311348098967788e-06, "logits/chosen": -2.3235254287719727, "logits/rejected": -2.502685308456421, "logps/chosen": -503.1258544921875, "logps/rejected": -307.9677734375, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.8049371242523193, "rewards/margins": 7.699458122253418, "rewards/rejected": -10.504395484924316, "step": 7957 }, { "epoch": 1.24, "learning_rate": 8.310614658436641e-06, "logits/chosen": -2.503223419189453, "logits/rejected": -1.2563655376434326, "logps/chosen": -404.89483642578125, "logps/rejected": -248.9414520263672, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 0.3452727794647217, "rewards/margins": 6.835325717926025, "rewards/rejected": -6.490053176879883, "step": 7958 }, { "epoch": 1.24, "learning_rate": 8.309881217905493e-06, "logits/chosen": -2.6011571884155273, "logits/rejected": -1.6656248569488525, "logps/chosen": -318.546875, "logps/rejected": -283.7726745605469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.5931564569473267, "rewards/margins": 9.267338752746582, "rewards/rejected": -9.860494613647461, "step": 7959 }, { "epoch": 1.24, "learning_rate": 8.309147777374345e-06, "logits/chosen": -2.6986005306243896, "logits/rejected": -2.7319819927215576, "logps/chosen": -391.666015625, "logps/rejected": -853.010986328125, "loss": 0.0754, "rewards/accuracies": 1.0, "rewards/chosen": -4.271317958831787, "rewards/margins": 4.928957939147949, "rewards/rejected": -9.200276374816895, "step": 7960 }, { "epoch": 1.24, "learning_rate": 8.308414336843197e-06, "logits/chosen": -2.803755521774292, "logits/rejected": -2.0587403774261475, "logps/chosen": -252.2269287109375, "logps/rejected": -135.75271606445312, "loss": 2.2425, "rewards/accuracies": 0.5, "rewards/chosen": -5.3722991943359375, "rewards/margins": -0.7226854562759399, "rewards/rejected": -4.649613857269287, "step": 7961 }, { "epoch": 1.24, "learning_rate": 8.307680896312049e-06, "logits/chosen": -2.5475046634674072, "logits/rejected": -2.739076614379883, "logps/chosen": -67.27008056640625, "logps/rejected": -293.5884704589844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.751380443572998, "rewards/margins": 9.509416580200195, "rewards/rejected": -11.260797500610352, "step": 7962 }, { "epoch": 1.24, "learning_rate": 8.3069474557809e-06, "logits/chosen": -2.6509275436401367, "logits/rejected": -2.8257622718811035, "logps/chosen": -46.267051696777344, "logps/rejected": -191.59471130371094, "loss": 0.0434, "rewards/accuracies": 1.0, "rewards/chosen": -2.4753193855285645, "rewards/margins": 5.143041610717773, "rewards/rejected": -7.618361473083496, "step": 7963 }, { "epoch": 1.24, "learning_rate": 8.306214015249753e-06, "logits/chosen": -2.21103572845459, "logits/rejected": -2.5521202087402344, "logps/chosen": -188.54598999023438, "logps/rejected": -384.98876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7913048267364502, "rewards/margins": 10.42261791229248, "rewards/rejected": -12.213922500610352, "step": 7964 }, { "epoch": 1.24, "learning_rate": 8.305480574718605e-06, "logits/chosen": -2.646876573562622, "logits/rejected": -1.4466872215270996, "logps/chosen": -277.43499755859375, "logps/rejected": -230.82254028320312, "loss": 0.1933, "rewards/accuracies": 1.0, "rewards/chosen": -1.8516755104064941, "rewards/margins": 4.091649055480957, "rewards/rejected": -5.943324565887451, "step": 7965 }, { "epoch": 1.24, "learning_rate": 8.304747134187456e-06, "logits/chosen": -2.5192172527313232, "logits/rejected": -2.5584230422973633, "logps/chosen": -94.95413970947266, "logps/rejected": -187.13427734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.3212182521820068, "rewards/margins": 8.007991790771484, "rewards/rejected": -9.32921028137207, "step": 7966 }, { "epoch": 1.24, "learning_rate": 8.30401369365631e-06, "logits/chosen": -3.037762403488159, "logits/rejected": -2.825451374053955, "logps/chosen": -603.638671875, "logps/rejected": -512.2423095703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": 2.0611648559570312, "rewards/margins": 9.313813209533691, "rewards/rejected": -7.25264835357666, "step": 7967 }, { "epoch": 1.24, "learning_rate": 8.303280253125162e-06, "logits/chosen": -2.8549962043762207, "logits/rejected": -2.5802576541900635, "logps/chosen": -206.24221801757812, "logps/rejected": -203.8974609375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": 0.09641113877296448, "rewards/margins": 7.073575973510742, "rewards/rejected": -6.977165222167969, "step": 7968 }, { "epoch": 1.24, "learning_rate": 8.302546812594015e-06, "logits/chosen": -2.5341618061065674, "logits/rejected": -2.6889796257019043, "logps/chosen": -82.81736755371094, "logps/rejected": -309.7739562988281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.745078444480896, "rewards/margins": 11.783878326416016, "rewards/rejected": -12.52895736694336, "step": 7969 }, { "epoch": 1.24, "learning_rate": 8.301813372062867e-06, "logits/chosen": -1.3633413314819336, "logits/rejected": -2.881183385848999, "logps/chosen": -176.04965209960938, "logps/rejected": -412.3321533203125, "loss": 0.0466, "rewards/accuracies": 1.0, "rewards/chosen": -2.2683424949645996, "rewards/margins": 6.290742874145508, "rewards/rejected": -8.55908489227295, "step": 7970 }, { "epoch": 1.24, "learning_rate": 8.30107993153172e-06, "logits/chosen": -2.233703851699829, "logits/rejected": -2.8213753700256348, "logps/chosen": -101.40884399414062, "logps/rejected": -201.6597137451172, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -2.0380845069885254, "rewards/margins": 5.922171115875244, "rewards/rejected": -7.9602556228637695, "step": 7971 }, { "epoch": 1.24, "learning_rate": 8.300346491000571e-06, "logits/chosen": -2.5830793380737305, "logits/rejected": -2.614628791809082, "logps/chosen": -111.00325775146484, "logps/rejected": -229.42877197265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.160975933074951, "rewards/margins": 7.540916442871094, "rewards/rejected": -9.701892852783203, "step": 7972 }, { "epoch": 1.24, "learning_rate": 8.299613050469423e-06, "logits/chosen": -2.8499557971954346, "logits/rejected": -2.5761029720306396, "logps/chosen": -179.9370574951172, "logps/rejected": -164.769287109375, "loss": 1.4043, "rewards/accuracies": 0.5, "rewards/chosen": -5.360579967498779, "rewards/margins": 2.087296724319458, "rewards/rejected": -7.447876930236816, "step": 7973 }, { "epoch": 1.24, "learning_rate": 8.298879609938275e-06, "logits/chosen": -2.668826103210449, "logits/rejected": -2.8705925941467285, "logps/chosen": -175.1863555908203, "logps/rejected": -303.35491943359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.37837815284729, "rewards/margins": 8.400619506835938, "rewards/rejected": -9.778997421264648, "step": 7974 }, { "epoch": 1.24, "learning_rate": 8.298146169407127e-06, "logits/chosen": -3.00384783744812, "logits/rejected": -3.1195902824401855, "logps/chosen": -99.48236083984375, "logps/rejected": -230.0673065185547, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.5049424171447754, "rewards/margins": 7.538089752197266, "rewards/rejected": -9.043031692504883, "step": 7975 }, { "epoch": 1.24, "learning_rate": 8.29741272887598e-06, "logits/chosen": -2.6619715690612793, "logits/rejected": -2.642158269882202, "logps/chosen": -199.30014038085938, "logps/rejected": -221.93798828125, "loss": 0.1297, "rewards/accuracies": 1.0, "rewards/chosen": -1.313023328781128, "rewards/margins": 4.964444637298584, "rewards/rejected": -6.277467727661133, "step": 7976 }, { "epoch": 1.24, "learning_rate": 8.296679288344832e-06, "logits/chosen": -2.5899758338928223, "logits/rejected": -1.9330880641937256, "logps/chosen": -671.007080078125, "logps/rejected": -438.7087707519531, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": 0.2513994574546814, "rewards/margins": 7.433204650878906, "rewards/rejected": -7.181804656982422, "step": 7977 }, { "epoch": 1.24, "learning_rate": 8.295945847813684e-06, "logits/chosen": -2.6325440406799316, "logits/rejected": -2.9578490257263184, "logps/chosen": -1262.202880859375, "logps/rejected": -1068.4580078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.081964135169983, "rewards/margins": 9.608345985412598, "rewards/rejected": -10.69031047821045, "step": 7978 }, { "epoch": 1.24, "learning_rate": 8.295212407282536e-06, "logits/chosen": -2.594144821166992, "logits/rejected": -1.5125977993011475, "logps/chosen": -353.91314697265625, "logps/rejected": -137.644287109375, "loss": 1.2691, "rewards/accuracies": 0.5, "rewards/chosen": -4.847639560699463, "rewards/margins": 0.5366007089614868, "rewards/rejected": -5.38424015045166, "step": 7979 }, { "epoch": 1.24, "learning_rate": 8.294478966751388e-06, "logits/chosen": -2.097470283508301, "logits/rejected": -2.657644510269165, "logps/chosen": -99.35009765625, "logps/rejected": -195.8763427734375, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": -1.8960120677947998, "rewards/margins": 5.768444061279297, "rewards/rejected": -7.664456367492676, "step": 7980 }, { "epoch": 1.24, "learning_rate": 8.29374552622024e-06, "logits/chosen": -2.5109384059906006, "logits/rejected": -2.8406219482421875, "logps/chosen": -257.9638977050781, "logps/rejected": -428.91424560546875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.3217077255249023, "rewards/margins": 7.0430097579956055, "rewards/rejected": -9.364717483520508, "step": 7981 }, { "epoch": 1.24, "learning_rate": 8.293012085689092e-06, "logits/chosen": -2.9600226879119873, "logits/rejected": -2.991637945175171, "logps/chosen": -466.3969421386719, "logps/rejected": -631.0469970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.6271759271621704, "rewards/margins": 14.035913467407227, "rewards/rejected": -13.408737182617188, "step": 7982 }, { "epoch": 1.24, "learning_rate": 8.292278645157943e-06, "logits/chosen": -2.834022283554077, "logits/rejected": -2.457820177078247, "logps/chosen": -306.13079833984375, "logps/rejected": -402.842041015625, "loss": 0.8211, "rewards/accuracies": 0.5, "rewards/chosen": -3.594219207763672, "rewards/margins": 2.313443660736084, "rewards/rejected": -5.907662868499756, "step": 7983 }, { "epoch": 1.24, "learning_rate": 8.291545204626795e-06, "logits/chosen": -1.0567179918289185, "logits/rejected": -2.9155187606811523, "logps/chosen": -78.72422790527344, "logps/rejected": -329.95391845703125, "loss": 0.1586, "rewards/accuracies": 1.0, "rewards/chosen": -2.400542736053467, "rewards/margins": 3.296401023864746, "rewards/rejected": -5.696943759918213, "step": 7984 }, { "epoch": 1.24, "learning_rate": 8.290811764095649e-06, "logits/chosen": -1.607803463935852, "logits/rejected": -2.8665196895599365, "logps/chosen": -50.02183151245117, "logps/rejected": -285.2978515625, "loss": 0.1447, "rewards/accuracies": 1.0, "rewards/chosen": -2.38885498046875, "rewards/margins": 4.517097473144531, "rewards/rejected": -6.905952453613281, "step": 7985 }, { "epoch": 1.24, "learning_rate": 8.2900783235645e-06, "logits/chosen": -1.8896998167037964, "logits/rejected": -2.911794662475586, "logps/chosen": -94.0591049194336, "logps/rejected": -401.63763427734375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.495702862739563, "rewards/margins": 7.234940528869629, "rewards/rejected": -8.730643272399902, "step": 7986 }, { "epoch": 1.24, "learning_rate": 8.289344883033353e-06, "logits/chosen": -2.9878594875335693, "logits/rejected": -3.163606643676758, "logps/chosen": -314.92327880859375, "logps/rejected": -426.94549560546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.612621784210205, "rewards/margins": 8.848226547241211, "rewards/rejected": -11.460847854614258, "step": 7987 }, { "epoch": 1.24, "learning_rate": 8.288611442502204e-06, "logits/chosen": -2.6654787063598633, "logits/rejected": -2.978363275527954, "logps/chosen": -89.54963684082031, "logps/rejected": -292.8446044921875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -1.034231185913086, "rewards/margins": 5.015464782714844, "rewards/rejected": -6.04969596862793, "step": 7988 }, { "epoch": 1.24, "learning_rate": 8.287878001971056e-06, "logits/chosen": -3.0572826862335205, "logits/rejected": -3.206737995147705, "logps/chosen": -100.82534790039062, "logps/rejected": -159.4092254638672, "loss": 2.3895, "rewards/accuracies": 0.5, "rewards/chosen": -4.260377883911133, "rewards/margins": -0.05582022666931152, "rewards/rejected": -4.2045578956604, "step": 7989 }, { "epoch": 1.24, "learning_rate": 8.287144561439908e-06, "logits/chosen": -1.7356042861938477, "logits/rejected": -2.8561012744903564, "logps/chosen": -80.66175079345703, "logps/rejected": -379.4241943359375, "loss": 0.1557, "rewards/accuracies": 1.0, "rewards/chosen": -2.4094717502593994, "rewards/margins": 6.416306495666504, "rewards/rejected": -8.825778007507324, "step": 7990 }, { "epoch": 1.24, "learning_rate": 8.28641112090876e-06, "logits/chosen": -3.011955499649048, "logits/rejected": -3.131699562072754, "logps/chosen": -108.71766662597656, "logps/rejected": -241.021728515625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -1.5116870403289795, "rewards/margins": 5.086784362792969, "rewards/rejected": -6.598471164703369, "step": 7991 }, { "epoch": 1.24, "learning_rate": 8.285677680377612e-06, "logits/chosen": -1.5180857181549072, "logits/rejected": -2.7380659580230713, "logps/chosen": -106.24534606933594, "logps/rejected": -567.229736328125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.8010478019714355, "rewards/margins": 7.382530689239502, "rewards/rejected": -10.183578491210938, "step": 7992 }, { "epoch": 1.24, "learning_rate": 8.284944239846464e-06, "logits/chosen": -2.670064687728882, "logits/rejected": -2.2109086513519287, "logps/chosen": -129.17913818359375, "logps/rejected": -158.95181274414062, "loss": 0.3682, "rewards/accuracies": 0.5, "rewards/chosen": -2.9395787715911865, "rewards/margins": 4.32090950012207, "rewards/rejected": -7.260488510131836, "step": 7993 }, { "epoch": 1.24, "learning_rate": 8.284210799315317e-06, "logits/chosen": -2.895608901977539, "logits/rejected": -2.9556782245635986, "logps/chosen": -204.91717529296875, "logps/rejected": -441.9424133300781, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -2.1906447410583496, "rewards/margins": 5.165431022644043, "rewards/rejected": -7.356076240539551, "step": 7994 }, { "epoch": 1.24, "learning_rate": 8.28347735878417e-06, "logits/chosen": -0.6543717980384827, "logits/rejected": -1.9858781099319458, "logps/chosen": -111.10502624511719, "logps/rejected": -361.2854919433594, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.380436897277832, "rewards/margins": 10.017762184143066, "rewards/rejected": -12.398199081420898, "step": 7995 }, { "epoch": 1.24, "learning_rate": 8.282743918253021e-06, "logits/chosen": -2.121939182281494, "logits/rejected": -2.967820882797241, "logps/chosen": -155.3788604736328, "logps/rejected": -360.914794921875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.5474653244018555, "rewards/margins": 5.423095703125, "rewards/rejected": -7.9705610275268555, "step": 7996 }, { "epoch": 1.24, "learning_rate": 8.282010477721873e-06, "logits/chosen": -2.152707099914551, "logits/rejected": -2.794106960296631, "logps/chosen": -111.70365905761719, "logps/rejected": -398.33050537109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.097381591796875, "rewards/margins": 9.423937797546387, "rewards/rejected": -10.521319389343262, "step": 7997 }, { "epoch": 1.24, "learning_rate": 8.281277037190725e-06, "logits/chosen": -2.0078508853912354, "logits/rejected": -3.2158043384552, "logps/chosen": -106.2003173828125, "logps/rejected": -384.3977355957031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.4330217838287354, "rewards/margins": 9.437881469726562, "rewards/rejected": -8.004859924316406, "step": 7998 }, { "epoch": 1.24, "learning_rate": 8.280543596659577e-06, "logits/chosen": -2.774729013442993, "logits/rejected": -2.4766180515289307, "logps/chosen": -434.044677734375, "logps/rejected": -390.23175048828125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.7433419227600098, "rewards/margins": 6.503034591674805, "rewards/rejected": -10.246376037597656, "step": 7999 }, { "epoch": 1.24, "learning_rate": 8.279810156128429e-06, "logits/chosen": -2.0120227336883545, "logits/rejected": -2.8862197399139404, "logps/chosen": -321.9801940917969, "logps/rejected": -352.45635986328125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -1.5756957530975342, "rewards/margins": 5.819075107574463, "rewards/rejected": -7.394770622253418, "step": 8000 }, { "epoch": 1.24, "learning_rate": 8.279076715597282e-06, "logits/chosen": -2.702363967895508, "logits/rejected": -2.741743326187134, "logps/chosen": -646.157958984375, "logps/rejected": -524.7835083007812, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.6938499212265015, "rewards/margins": 7.9983415603637695, "rewards/rejected": -9.692192077636719, "step": 8001 }, { "epoch": 1.24, "learning_rate": 8.278343275066134e-06, "logits/chosen": -2.324047803878784, "logits/rejected": -2.7241322994232178, "logps/chosen": -273.46868896484375, "logps/rejected": -315.28985595703125, "loss": 0.0304, "rewards/accuracies": 1.0, "rewards/chosen": -1.5151793956756592, "rewards/margins": 5.803291320800781, "rewards/rejected": -7.3184709548950195, "step": 8002 }, { "epoch": 1.24, "learning_rate": 8.277609834534988e-06, "logits/chosen": -2.0968377590179443, "logits/rejected": -2.2051432132720947, "logps/chosen": -67.43766784667969, "logps/rejected": -156.94338989257812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.6616768836975098, "rewards/margins": 6.393523216247559, "rewards/rejected": -8.055200576782227, "step": 8003 }, { "epoch": 1.24, "learning_rate": 8.27687639400384e-06, "logits/chosen": -2.757589340209961, "logits/rejected": -3.077693223953247, "logps/chosen": -116.86994934082031, "logps/rejected": -320.65478515625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.697563886642456, "rewards/margins": 6.949965476989746, "rewards/rejected": -8.647529602050781, "step": 8004 }, { "epoch": 1.24, "learning_rate": 8.276142953472692e-06, "logits/chosen": -2.23384165763855, "logits/rejected": -3.1797938346862793, "logps/chosen": -99.60515594482422, "logps/rejected": -541.8977661132812, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.3118021488189697, "rewards/margins": 7.882339954376221, "rewards/rejected": -10.19414234161377, "step": 8005 }, { "epoch": 1.25, "learning_rate": 8.275409512941543e-06, "logits/chosen": -2.4952056407928467, "logits/rejected": -2.220454692840576, "logps/chosen": -172.90585327148438, "logps/rejected": -171.858642578125, "loss": 0.0514, "rewards/accuracies": 1.0, "rewards/chosen": -0.6754226684570312, "rewards/margins": 4.043946266174316, "rewards/rejected": -4.719368934631348, "step": 8006 }, { "epoch": 1.25, "learning_rate": 8.274676072410395e-06, "logits/chosen": -2.8626463413238525, "logits/rejected": -2.8894107341766357, "logps/chosen": -398.4825744628906, "logps/rejected": -401.35235595703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.5468257069587708, "rewards/margins": 9.507708549499512, "rewards/rejected": -8.960883140563965, "step": 8007 }, { "epoch": 1.25, "learning_rate": 8.273942631879247e-06, "logits/chosen": -2.6850712299346924, "logits/rejected": -2.6407082080841064, "logps/chosen": -193.6807861328125, "logps/rejected": -281.2083435058594, "loss": 0.6288, "rewards/accuracies": 0.5, "rewards/chosen": -3.4833450317382812, "rewards/margins": 3.300157308578491, "rewards/rejected": -6.783502578735352, "step": 8008 }, { "epoch": 1.25, "learning_rate": 8.273209191348099e-06, "logits/chosen": -2.076709508895874, "logits/rejected": -2.3575003147125244, "logps/chosen": -216.11080932617188, "logps/rejected": -253.89501953125, "loss": 2.6529, "rewards/accuracies": 0.5, "rewards/chosen": -5.56633996963501, "rewards/margins": 1.6029267311096191, "rewards/rejected": -7.169266700744629, "step": 8009 }, { "epoch": 1.25, "learning_rate": 8.272475750816951e-06, "logits/chosen": -2.753936767578125, "logits/rejected": -2.0252861976623535, "logps/chosen": -402.6086120605469, "logps/rejected": -334.00701904296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.5699868202209473, "rewards/margins": 7.547354221343994, "rewards/rejected": -11.117341041564941, "step": 8010 }, { "epoch": 1.25, "learning_rate": 8.271742310285803e-06, "logits/chosen": -2.463102340698242, "logits/rejected": -2.4321930408477783, "logps/chosen": -131.21995544433594, "logps/rejected": -325.663330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5006267428398132, "rewards/margins": 11.99029541015625, "rewards/rejected": -12.490922927856445, "step": 8011 }, { "epoch": 1.25, "learning_rate": 8.271008869754656e-06, "logits/chosen": -2.640989303588867, "logits/rejected": -2.042612314224243, "logps/chosen": -269.0958251953125, "logps/rejected": -232.67579650878906, "loss": 2.3554, "rewards/accuracies": 0.5, "rewards/chosen": -5.494063854217529, "rewards/margins": 0.9322056770324707, "rewards/rejected": -6.42626953125, "step": 8012 }, { "epoch": 1.25, "learning_rate": 8.270275429223508e-06, "logits/chosen": -2.770057201385498, "logits/rejected": -2.651813268661499, "logps/chosen": -321.7305908203125, "logps/rejected": -299.17230224609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8678619861602783, "rewards/margins": 9.305672645568848, "rewards/rejected": -10.173534393310547, "step": 8013 }, { "epoch": 1.25, "learning_rate": 8.26954198869236e-06, "logits/chosen": -1.0061228275299072, "logits/rejected": -1.9499200582504272, "logps/chosen": -86.84617614746094, "logps/rejected": -297.79327392578125, "loss": 0.1412, "rewards/accuracies": 1.0, "rewards/chosen": -2.313713312149048, "rewards/margins": 6.8140974044799805, "rewards/rejected": -9.127811431884766, "step": 8014 }, { "epoch": 1.25, "learning_rate": 8.268808548161212e-06, "logits/chosen": -2.3510074615478516, "logits/rejected": -2.84963059425354, "logps/chosen": -118.63616943359375, "logps/rejected": -285.22930908203125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.208961844444275, "rewards/margins": 6.818010330200195, "rewards/rejected": -8.026971817016602, "step": 8015 }, { "epoch": 1.25, "learning_rate": 8.268075107630064e-06, "logits/chosen": -2.2861411571502686, "logits/rejected": -3.1182868480682373, "logps/chosen": -198.69430541992188, "logps/rejected": -493.7527160644531, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.03426668047904968, "rewards/margins": 8.129415512084961, "rewards/rejected": -8.095149040222168, "step": 8016 }, { "epoch": 1.25, "learning_rate": 8.267341667098916e-06, "logits/chosen": -2.925565242767334, "logits/rejected": -2.784393548965454, "logps/chosen": -132.41775512695312, "logps/rejected": -242.06201171875, "loss": 1.1279, "rewards/accuracies": 0.5, "rewards/chosen": -3.295105457305908, "rewards/margins": 2.92527437210083, "rewards/rejected": -6.220379829406738, "step": 8017 }, { "epoch": 1.25, "learning_rate": 8.266608226567768e-06, "logits/chosen": -2.850088596343994, "logits/rejected": -3.030590295791626, "logps/chosen": -158.19784545898438, "logps/rejected": -380.1377868652344, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -0.25828781723976135, "rewards/margins": 6.733180999755859, "rewards/rejected": -6.99146842956543, "step": 8018 }, { "epoch": 1.25, "learning_rate": 8.26587478603662e-06, "logits/chosen": -2.0108437538146973, "logits/rejected": -2.2190301418304443, "logps/chosen": -160.37477111816406, "logps/rejected": -187.15830993652344, "loss": 1.9569, "rewards/accuracies": 0.5, "rewards/chosen": -4.6432905197143555, "rewards/margins": 0.7722219228744507, "rewards/rejected": -5.415512561798096, "step": 8019 }, { "epoch": 1.25, "learning_rate": 8.265141345505473e-06, "logits/chosen": -3.056133985519409, "logits/rejected": -2.223119020462036, "logps/chosen": -417.12884521484375, "logps/rejected": -216.11495971679688, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -3.181208848953247, "rewards/margins": 5.516689777374268, "rewards/rejected": -8.697898864746094, "step": 8020 }, { "epoch": 1.25, "learning_rate": 8.264407904974325e-06, "logits/chosen": -2.364288091659546, "logits/rejected": -2.8120875358581543, "logps/chosen": -105.17295837402344, "logps/rejected": -241.2274627685547, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.9844307899475098, "rewards/margins": 6.397558212280273, "rewards/rejected": -8.381988525390625, "step": 8021 }, { "epoch": 1.25, "learning_rate": 8.263674464443177e-06, "logits/chosen": -2.772919178009033, "logits/rejected": -2.63926100730896, "logps/chosen": -503.7484130859375, "logps/rejected": -417.3665771484375, "loss": 0.256, "rewards/accuracies": 1.0, "rewards/chosen": -2.1159372329711914, "rewards/margins": 3.7070250511169434, "rewards/rejected": -5.822962284088135, "step": 8022 }, { "epoch": 1.25, "learning_rate": 8.262941023912029e-06, "logits/chosen": -0.8181660175323486, "logits/rejected": -2.94143009185791, "logps/chosen": -55.81422805786133, "logps/rejected": -442.283935546875, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -2.6476705074310303, "rewards/margins": 4.443543434143066, "rewards/rejected": -7.091214179992676, "step": 8023 }, { "epoch": 1.25, "learning_rate": 8.26220758338088e-06, "logits/chosen": -2.805464744567871, "logits/rejected": -2.3302245140075684, "logps/chosen": -431.5849914550781, "logps/rejected": -398.7758483886719, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.4867990016937256, "rewards/margins": 6.08231782913208, "rewards/rejected": -8.569116592407227, "step": 8024 }, { "epoch": 1.25, "learning_rate": 8.261474142849732e-06, "logits/chosen": -3.226043224334717, "logits/rejected": -3.3666393756866455, "logps/chosen": -39.2402229309082, "logps/rejected": -127.42588806152344, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.6104570627212524, "rewards/margins": 6.048320293426514, "rewards/rejected": -6.658777236938477, "step": 8025 }, { "epoch": 1.25, "learning_rate": 8.260740702318584e-06, "logits/chosen": -2.6775906085968018, "logits/rejected": -2.2593133449554443, "logps/chosen": -446.96588134765625, "logps/rejected": -446.64544677734375, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.9245784282684326, "rewards/margins": 6.831050872802734, "rewards/rejected": -8.75562858581543, "step": 8026 }, { "epoch": 1.25, "learning_rate": 8.260007261787436e-06, "logits/chosen": -2.5614659786224365, "logits/rejected": -2.7474522590637207, "logps/chosen": -228.51254272460938, "logps/rejected": -220.216064453125, "loss": 0.8891, "rewards/accuracies": 0.5, "rewards/chosen": -2.308725595474243, "rewards/margins": 2.8067264556884766, "rewards/rejected": -5.115451812744141, "step": 8027 }, { "epoch": 1.25, "learning_rate": 8.259273821256288e-06, "logits/chosen": -2.5985848903656006, "logits/rejected": -2.6861672401428223, "logps/chosen": -102.62391662597656, "logps/rejected": -235.1954345703125, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.2225167751312256, "rewards/margins": 4.740786552429199, "rewards/rejected": -6.963303565979004, "step": 8028 }, { "epoch": 1.25, "learning_rate": 8.258540380725142e-06, "logits/chosen": -2.1147379875183105, "logits/rejected": -2.839466094970703, "logps/chosen": -416.828369140625, "logps/rejected": -482.73321533203125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.583975315093994, "rewards/margins": 6.711970329284668, "rewards/rejected": -9.29594612121582, "step": 8029 }, { "epoch": 1.25, "learning_rate": 8.257806940193994e-06, "logits/chosen": -2.415681838989258, "logits/rejected": -2.8481380939483643, "logps/chosen": -84.34909057617188, "logps/rejected": -115.07344055175781, "loss": 1.6579, "rewards/accuracies": 0.5, "rewards/chosen": -2.8730549812316895, "rewards/margins": 0.3554307222366333, "rewards/rejected": -3.228485584259033, "step": 8030 }, { "epoch": 1.25, "learning_rate": 8.257073499662845e-06, "logits/chosen": -2.39241886138916, "logits/rejected": -2.682814598083496, "logps/chosen": -301.80560302734375, "logps/rejected": -417.9011535644531, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.2270599603652954, "rewards/margins": 8.225605964660645, "rewards/rejected": -9.452665328979492, "step": 8031 }, { "epoch": 1.25, "learning_rate": 8.256340059131697e-06, "logits/chosen": -1.439499855041504, "logits/rejected": -2.781080722808838, "logps/chosen": -67.3137435913086, "logps/rejected": -332.5065612792969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4175620079040527, "rewards/margins": 10.318925857543945, "rewards/rejected": -11.736488342285156, "step": 8032 }, { "epoch": 1.25, "learning_rate": 8.25560661860055e-06, "logits/chosen": -1.706091284751892, "logits/rejected": -2.7623300552368164, "logps/chosen": -119.33076477050781, "logps/rejected": -331.6033935546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.2161842584609985, "rewards/margins": 8.332833290100098, "rewards/rejected": -9.549016952514648, "step": 8033 }, { "epoch": 1.25, "learning_rate": 8.254873178069401e-06, "logits/chosen": -1.8688832521438599, "logits/rejected": -2.7315962314605713, "logps/chosen": -406.9517517089844, "logps/rejected": -520.233642578125, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -3.321352481842041, "rewards/margins": 6.658042907714844, "rewards/rejected": -9.979394912719727, "step": 8034 }, { "epoch": 1.25, "learning_rate": 8.254139737538255e-06, "logits/chosen": -2.3658974170684814, "logits/rejected": -2.384493827819824, "logps/chosen": -401.6999206542969, "logps/rejected": -383.9565124511719, "loss": 1.2483, "rewards/accuracies": 0.5, "rewards/chosen": -5.029753684997559, "rewards/margins": 0.5795608758926392, "rewards/rejected": -5.609314918518066, "step": 8035 }, { "epoch": 1.25, "learning_rate": 8.253406297007107e-06, "logits/chosen": -3.0265727043151855, "logits/rejected": -2.6831157207489014, "logps/chosen": -354.8585205078125, "logps/rejected": -602.7032470703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.608464002609253, "rewards/margins": 6.996386528015137, "rewards/rejected": -8.604850769042969, "step": 8036 }, { "epoch": 1.25, "learning_rate": 8.252672856475958e-06, "logits/chosen": -3.0410759449005127, "logits/rejected": -2.9881675243377686, "logps/chosen": -327.5806579589844, "logps/rejected": -407.5955810546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.1697700023651123, "rewards/margins": 9.61178970336914, "rewards/rejected": -12.781559944152832, "step": 8037 }, { "epoch": 1.25, "learning_rate": 8.251939415944812e-06, "logits/chosen": -1.34458589553833, "logits/rejected": -2.8127458095550537, "logps/chosen": -105.54449462890625, "logps/rejected": -342.5766296386719, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5786662697792053, "rewards/margins": 8.174344062805176, "rewards/rejected": -8.753010749816895, "step": 8038 }, { "epoch": 1.25, "learning_rate": 8.251205975413664e-06, "logits/chosen": -2.470276355743408, "logits/rejected": -2.565525531768799, "logps/chosen": -95.23841094970703, "logps/rejected": -233.0131072998047, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.6464755535125732, "rewards/margins": 5.739679336547852, "rewards/rejected": -7.386155128479004, "step": 8039 }, { "epoch": 1.25, "learning_rate": 8.250472534882516e-06, "logits/chosen": -1.8763211965560913, "logits/rejected": -2.8392062187194824, "logps/chosen": -97.06690216064453, "logps/rejected": -239.58729553222656, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -0.9577911496162415, "rewards/margins": 4.275879859924316, "rewards/rejected": -5.233671188354492, "step": 8040 }, { "epoch": 1.25, "learning_rate": 8.249739094351368e-06, "logits/chosen": -2.545880079269409, "logits/rejected": -2.2784478664398193, "logps/chosen": -357.866455078125, "logps/rejected": -412.141357421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.549345016479492, "rewards/margins": 8.67728042602539, "rewards/rejected": -13.226625442504883, "step": 8041 }, { "epoch": 1.25, "learning_rate": 8.24900565382022e-06, "logits/chosen": -2.308670997619629, "logits/rejected": -2.8678088188171387, "logps/chosen": -621.8094482421875, "logps/rejected": -705.112060546875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.0271682739257812, "rewards/margins": 6.722689628601074, "rewards/rejected": -7.7498579025268555, "step": 8042 }, { "epoch": 1.25, "learning_rate": 8.248272213289071e-06, "logits/chosen": -1.719823956489563, "logits/rejected": -3.044729471206665, "logps/chosen": -100.14653778076172, "logps/rejected": -408.98760986328125, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -2.490389108657837, "rewards/margins": 6.572770118713379, "rewards/rejected": -9.063159942626953, "step": 8043 }, { "epoch": 1.25, "learning_rate": 8.247538772757923e-06, "logits/chosen": -2.5877015590667725, "logits/rejected": -2.359388589859009, "logps/chosen": -129.38037109375, "logps/rejected": -359.6796875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.4949257373809814, "rewards/margins": 8.013818740844727, "rewards/rejected": -9.508744239807129, "step": 8044 }, { "epoch": 1.25, "learning_rate": 8.246805332226775e-06, "logits/chosen": -3.0172131061553955, "logits/rejected": -1.4209380149841309, "logps/chosen": -304.32647705078125, "logps/rejected": -85.55677795410156, "loss": 0.3686, "rewards/accuracies": 0.5, "rewards/chosen": -1.3775405883789062, "rewards/margins": 3.4892916679382324, "rewards/rejected": -4.8668317794799805, "step": 8045 }, { "epoch": 1.25, "learning_rate": 8.246071891695627e-06, "logits/chosen": -1.7878035306930542, "logits/rejected": -2.667983293533325, "logps/chosen": -94.29914855957031, "logps/rejected": -194.16677856445312, "loss": 0.2379, "rewards/accuracies": 1.0, "rewards/chosen": -3.841054916381836, "rewards/margins": 2.651597499847412, "rewards/rejected": -6.492652893066406, "step": 8046 }, { "epoch": 1.25, "learning_rate": 8.24533845116448e-06, "logits/chosen": -2.3249828815460205, "logits/rejected": -3.000901699066162, "logps/chosen": -175.411865234375, "logps/rejected": -358.77789306640625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.3470340967178345, "rewards/margins": 7.602169513702393, "rewards/rejected": -8.949203491210938, "step": 8047 }, { "epoch": 1.25, "learning_rate": 8.244605010633332e-06, "logits/chosen": -1.6272609233856201, "logits/rejected": -2.7919809818267822, "logps/chosen": -67.80772399902344, "logps/rejected": -345.91998291015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7621145248413086, "rewards/margins": 8.861082077026367, "rewards/rejected": -10.62319564819336, "step": 8048 }, { "epoch": 1.25, "learning_rate": 8.243871570102184e-06, "logits/chosen": -2.7985281944274902, "logits/rejected": -2.764556407928467, "logps/chosen": -165.23516845703125, "logps/rejected": -397.2673645019531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6064613461494446, "rewards/margins": 9.809060096740723, "rewards/rejected": -10.415521621704102, "step": 8049 }, { "epoch": 1.25, "learning_rate": 8.243138129571036e-06, "logits/chosen": -2.935112237930298, "logits/rejected": -2.2308239936828613, "logps/chosen": -468.47100830078125, "logps/rejected": -302.9796142578125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.346968173980713, "rewards/margins": 6.531763553619385, "rewards/rejected": -8.878731727600098, "step": 8050 }, { "epoch": 1.25, "learning_rate": 8.242404689039888e-06, "logits/chosen": -3.0005710124969482, "logits/rejected": -2.038764238357544, "logps/chosen": -257.5211181640625, "logps/rejected": -149.4201202392578, "loss": 0.1207, "rewards/accuracies": 1.0, "rewards/chosen": -1.916822910308838, "rewards/margins": 3.240802764892578, "rewards/rejected": -5.157625675201416, "step": 8051 }, { "epoch": 1.25, "learning_rate": 8.24167124850874e-06, "logits/chosen": -1.4190305471420288, "logits/rejected": -2.4183707237243652, "logps/chosen": -144.3086700439453, "logps/rejected": -429.17352294921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.369726538658142, "rewards/margins": 9.319175720214844, "rewards/rejected": -10.688901901245117, "step": 8052 }, { "epoch": 1.25, "learning_rate": 8.240937807977592e-06, "logits/chosen": -2.7073473930358887, "logits/rejected": -2.801910161972046, "logps/chosen": -125.42732238769531, "logps/rejected": -148.45928955078125, "loss": 0.1344, "rewards/accuracies": 1.0, "rewards/chosen": -2.1706643104553223, "rewards/margins": 4.032345294952393, "rewards/rejected": -6.203009605407715, "step": 8053 }, { "epoch": 1.25, "learning_rate": 8.240204367446444e-06, "logits/chosen": -2.696324586868286, "logits/rejected": -2.456930160522461, "logps/chosen": -194.75991821289062, "logps/rejected": -270.4428405761719, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.11229018867015839, "rewards/margins": 8.350117683410645, "rewards/rejected": -8.462408065795898, "step": 8054 }, { "epoch": 1.25, "learning_rate": 8.239470926915296e-06, "logits/chosen": -2.8056607246398926, "logits/rejected": -2.3701577186584473, "logps/chosen": -366.56842041015625, "logps/rejected": -299.0107421875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -0.923054575920105, "rewards/margins": 4.96817684173584, "rewards/rejected": -5.891231536865234, "step": 8055 }, { "epoch": 1.25, "learning_rate": 8.23873748638415e-06, "logits/chosen": -2.851969003677368, "logits/rejected": -2.2064335346221924, "logps/chosen": -266.80615234375, "logps/rejected": -161.07174682617188, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": 1.9016053676605225, "rewards/margins": 7.006792068481445, "rewards/rejected": -5.105186462402344, "step": 8056 }, { "epoch": 1.25, "learning_rate": 8.238004045853001e-06, "logits/chosen": -1.7787690162658691, "logits/rejected": -2.2886452674865723, "logps/chosen": -399.49786376953125, "logps/rejected": -595.4288940429688, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -0.8202236294746399, "rewards/margins": 11.934611320495605, "rewards/rejected": -12.75483512878418, "step": 8057 }, { "epoch": 1.25, "learning_rate": 8.237270605321853e-06, "logits/chosen": -2.345649003982544, "logits/rejected": -2.7953553199768066, "logps/chosen": -130.73870849609375, "logps/rejected": -553.6202392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.7196850180625916, "rewards/margins": 12.533528327941895, "rewards/rejected": -11.8138427734375, "step": 8058 }, { "epoch": 1.25, "learning_rate": 8.236537164790705e-06, "logits/chosen": -2.655390977859497, "logits/rejected": -2.3598201274871826, "logps/chosen": -311.1361999511719, "logps/rejected": -332.740966796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.263617515563965, "rewards/margins": 8.102107048034668, "rewards/rejected": -12.365724563598633, "step": 8059 }, { "epoch": 1.25, "learning_rate": 8.235803724259557e-06, "logits/chosen": -2.6731414794921875, "logits/rejected": -2.7322964668273926, "logps/chosen": -136.09561157226562, "logps/rejected": -238.9728546142578, "loss": 0.6922, "rewards/accuracies": 0.5, "rewards/chosen": -2.8535408973693848, "rewards/margins": 5.4847283363342285, "rewards/rejected": -8.338269233703613, "step": 8060 }, { "epoch": 1.25, "learning_rate": 8.235070283728409e-06, "logits/chosen": -2.7462992668151855, "logits/rejected": -3.178351402282715, "logps/chosen": -281.78564453125, "logps/rejected": -393.78460693359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.6964492797851562, "rewards/margins": 8.122395515441895, "rewards/rejected": -10.81884479522705, "step": 8061 }, { "epoch": 1.25, "learning_rate": 8.23433684319726e-06, "logits/chosen": -2.923478841781616, "logits/rejected": -2.6347498893737793, "logps/chosen": -197.73886108398438, "logps/rejected": -210.00045776367188, "loss": 0.9232, "rewards/accuracies": 0.5, "rewards/chosen": -1.952351689338684, "rewards/margins": 2.8628950119018555, "rewards/rejected": -4.81524658203125, "step": 8062 }, { "epoch": 1.25, "learning_rate": 8.233603402666112e-06, "logits/chosen": -2.2629928588867188, "logits/rejected": -2.580643653869629, "logps/chosen": -66.53407287597656, "logps/rejected": -96.15121459960938, "loss": 0.1449, "rewards/accuracies": 1.0, "rewards/chosen": -1.5990270376205444, "rewards/margins": 2.1134033203125, "rewards/rejected": -3.712430477142334, "step": 8063 }, { "epoch": 1.25, "learning_rate": 8.232869962134964e-06, "logits/chosen": -2.3642361164093018, "logits/rejected": -2.8774075508117676, "logps/chosen": -194.5745086669922, "logps/rejected": -433.92333984375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.332594394683838, "rewards/margins": 7.656829833984375, "rewards/rejected": -9.989423751831055, "step": 8064 }, { "epoch": 1.25, "learning_rate": 8.232136521603818e-06, "logits/chosen": -2.9195942878723145, "logits/rejected": -2.9977223873138428, "logps/chosen": -133.81703186035156, "logps/rejected": -248.08267211914062, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9181559681892395, "rewards/margins": 7.67722749710083, "rewards/rejected": -8.595383644104004, "step": 8065 }, { "epoch": 1.25, "learning_rate": 8.23140308107267e-06, "logits/chosen": -1.9082927703857422, "logits/rejected": -2.579496383666992, "logps/chosen": -99.01316833496094, "logps/rejected": -274.385498046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.608550786972046, "rewards/margins": 7.299306869506836, "rewards/rejected": -8.907857894897461, "step": 8066 }, { "epoch": 1.25, "learning_rate": 8.230669640541522e-06, "logits/chosen": -2.923241138458252, "logits/rejected": -2.7599294185638428, "logps/chosen": -215.62725830078125, "logps/rejected": -228.91981506347656, "loss": 1.5051, "rewards/accuracies": 0.5, "rewards/chosen": -3.1636269092559814, "rewards/margins": 0.6039586067199707, "rewards/rejected": -3.767585515975952, "step": 8067 }, { "epoch": 1.25, "learning_rate": 8.229936200010373e-06, "logits/chosen": -2.951444625854492, "logits/rejected": -2.784669876098633, "logps/chosen": -299.0625, "logps/rejected": -314.99151611328125, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -2.823575973510742, "rewards/margins": 5.21961784362793, "rewards/rejected": -8.043193817138672, "step": 8068 }, { "epoch": 1.25, "learning_rate": 8.229202759479227e-06, "logits/chosen": -1.944724440574646, "logits/rejected": -0.5611765384674072, "logps/chosen": -422.9396667480469, "logps/rejected": -264.2140197753906, "loss": 1.6158, "rewards/accuracies": 0.5, "rewards/chosen": -6.2562127113342285, "rewards/margins": -1.1376864910125732, "rewards/rejected": -5.118526458740234, "step": 8069 }, { "epoch": 1.26, "learning_rate": 8.228469318948079e-06, "logits/chosen": -2.7195212841033936, "logits/rejected": -2.664647102355957, "logps/chosen": -147.51023864746094, "logps/rejected": -312.890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1394447088241577, "rewards/margins": 9.06494426727295, "rewards/rejected": -10.204389572143555, "step": 8070 }, { "epoch": 1.26, "learning_rate": 8.22773587841693e-06, "logits/chosen": -2.869541645050049, "logits/rejected": -2.3899824619293213, "logps/chosen": -199.9871368408203, "logps/rejected": -256.6874084472656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3484795093536377, "rewards/margins": 9.306001663208008, "rewards/rejected": -9.654481887817383, "step": 8071 }, { "epoch": 1.26, "learning_rate": 8.227002437885783e-06, "logits/chosen": -2.8934402465820312, "logits/rejected": -2.6854512691497803, "logps/chosen": -376.0562438964844, "logps/rejected": -263.8947448730469, "loss": 0.2026, "rewards/accuracies": 1.0, "rewards/chosen": -3.478240966796875, "rewards/margins": 3.7620861530303955, "rewards/rejected": -7.240326881408691, "step": 8072 }, { "epoch": 1.26, "learning_rate": 8.226268997354634e-06, "logits/chosen": -2.3326423168182373, "logits/rejected": -2.6108343601226807, "logps/chosen": -35.18440628051758, "logps/rejected": -253.3834228515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.24247591197490692, "rewards/margins": 8.348867416381836, "rewards/rejected": -8.59134292602539, "step": 8073 }, { "epoch": 1.26, "learning_rate": 8.225535556823488e-06, "logits/chosen": -2.131045341491699, "logits/rejected": -2.924680233001709, "logps/chosen": -82.66930389404297, "logps/rejected": -432.313720703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.176218271255493, "rewards/margins": 9.366703033447266, "rewards/rejected": -11.54292106628418, "step": 8074 }, { "epoch": 1.26, "learning_rate": 8.22480211629234e-06, "logits/chosen": -1.4172978401184082, "logits/rejected": -2.6688179969787598, "logps/chosen": -219.25094604492188, "logps/rejected": -511.5892028808594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4815032482147217, "rewards/margins": 8.802801132202148, "rewards/rejected": -10.28430461883545, "step": 8075 }, { "epoch": 1.26, "learning_rate": 8.224068675761192e-06, "logits/chosen": -2.2587697505950928, "logits/rejected": -2.7035059928894043, "logps/chosen": -125.53535461425781, "logps/rejected": -217.78793334960938, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -2.9633991718292236, "rewards/margins": 4.2269182205200195, "rewards/rejected": -7.190317630767822, "step": 8076 }, { "epoch": 1.26, "learning_rate": 8.223335235230044e-06, "logits/chosen": -1.983465552330017, "logits/rejected": -3.001652479171753, "logps/chosen": -68.89408111572266, "logps/rejected": -343.563720703125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.214141368865967, "rewards/margins": 5.870087146759033, "rewards/rejected": -8.084228515625, "step": 8077 }, { "epoch": 1.26, "learning_rate": 8.222601794698896e-06, "logits/chosen": -2.5775160789489746, "logits/rejected": -2.831346035003662, "logps/chosen": -364.439697265625, "logps/rejected": -435.8092346191406, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9305633306503296, "rewards/margins": 8.648305892944336, "rewards/rejected": -10.578868865966797, "step": 8078 }, { "epoch": 1.26, "learning_rate": 8.221868354167747e-06, "logits/chosen": -1.7672046422958374, "logits/rejected": -2.8456146717071533, "logps/chosen": -55.77899169921875, "logps/rejected": -301.747802734375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -1.0453500747680664, "rewards/margins": 5.467734336853027, "rewards/rejected": -6.513084411621094, "step": 8079 }, { "epoch": 1.26, "learning_rate": 8.2211349136366e-06, "logits/chosen": -2.7812318801879883, "logits/rejected": -2.6104116439819336, "logps/chosen": -186.47714233398438, "logps/rejected": -69.59300994873047, "loss": 1.2185, "rewards/accuracies": 0.5, "rewards/chosen": -3.1726198196411133, "rewards/margins": -0.5918866395950317, "rewards/rejected": -2.580733060836792, "step": 8080 }, { "epoch": 1.26, "learning_rate": 8.220401473105451e-06, "logits/chosen": -2.5920538902282715, "logits/rejected": -2.8957772254943848, "logps/chosen": -411.3340759277344, "logps/rejected": -240.77735900878906, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": 2.1627259254455566, "rewards/margins": 4.367538928985596, "rewards/rejected": -2.204813003540039, "step": 8081 }, { "epoch": 1.26, "learning_rate": 8.219668032574303e-06, "logits/chosen": -1.7425007820129395, "logits/rejected": -2.7292702198028564, "logps/chosen": -205.48391723632812, "logps/rejected": -409.44464111328125, "loss": 0.201, "rewards/accuracies": 1.0, "rewards/chosen": -2.959486484527588, "rewards/margins": 6.883070468902588, "rewards/rejected": -9.842556953430176, "step": 8082 }, { "epoch": 1.26, "learning_rate": 8.218934592043157e-06, "logits/chosen": -2.6984469890594482, "logits/rejected": -0.8810949325561523, "logps/chosen": -253.96469116210938, "logps/rejected": -98.93843078613281, "loss": 1.7389, "rewards/accuracies": 0.5, "rewards/chosen": -4.481584072113037, "rewards/margins": 0.9977787733078003, "rewards/rejected": -5.479362964630127, "step": 8083 }, { "epoch": 1.26, "learning_rate": 8.218201151512009e-06, "logits/chosen": -3.0765721797943115, "logits/rejected": -2.6899802684783936, "logps/chosen": -572.6161499023438, "logps/rejected": -351.661376953125, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -3.916194200515747, "rewards/margins": 5.25673770904541, "rewards/rejected": -9.172931671142578, "step": 8084 }, { "epoch": 1.26, "learning_rate": 8.21746771098086e-06, "logits/chosen": -3.0741899013519287, "logits/rejected": -2.912442207336426, "logps/chosen": -194.43142700195312, "logps/rejected": -125.26188659667969, "loss": 0.7911, "rewards/accuracies": 0.5, "rewards/chosen": -2.4286842346191406, "rewards/margins": 2.413511276245117, "rewards/rejected": -4.842195510864258, "step": 8085 }, { "epoch": 1.26, "learning_rate": 8.216734270449712e-06, "logits/chosen": -2.9829399585723877, "logits/rejected": -2.9575817584991455, "logps/chosen": -94.54389953613281, "logps/rejected": -178.10951232910156, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -0.4438728392124176, "rewards/margins": 5.452693939208984, "rewards/rejected": -5.896566867828369, "step": 8086 }, { "epoch": 1.26, "learning_rate": 8.216000829918564e-06, "logits/chosen": -2.662773609161377, "logits/rejected": -3.2993078231811523, "logps/chosen": -936.0872802734375, "logps/rejected": -884.6627197265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.016348361968994, "rewards/margins": 6.66656494140625, "rewards/rejected": -9.682912826538086, "step": 8087 }, { "epoch": 1.26, "learning_rate": 8.215267389387416e-06, "logits/chosen": -1.5227668285369873, "logits/rejected": -2.9761383533477783, "logps/chosen": -107.49275970458984, "logps/rejected": -308.5185852050781, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -1.6514618396759033, "rewards/margins": 5.109602451324463, "rewards/rejected": -6.761064529418945, "step": 8088 }, { "epoch": 1.26, "learning_rate": 8.214533948856268e-06, "logits/chosen": -2.6764352321624756, "logits/rejected": -2.249307870864868, "logps/chosen": -186.2500457763672, "logps/rejected": -301.2071533203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7981933951377869, "rewards/margins": 6.674145698547363, "rewards/rejected": -7.472338676452637, "step": 8089 }, { "epoch": 1.26, "learning_rate": 8.21380050832512e-06, "logits/chosen": -0.7316348552703857, "logits/rejected": -2.0761568546295166, "logps/chosen": -152.3048095703125, "logps/rejected": -537.6544189453125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.6980404257774353, "rewards/margins": 9.495067596435547, "rewards/rejected": -10.193107604980469, "step": 8090 }, { "epoch": 1.26, "learning_rate": 8.213067067793972e-06, "logits/chosen": -2.7400577068328857, "logits/rejected": -2.399895668029785, "logps/chosen": -321.70574951171875, "logps/rejected": -206.95135498046875, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -1.0635048151016235, "rewards/margins": 4.1639790534973145, "rewards/rejected": -5.227483749389648, "step": 8091 }, { "epoch": 1.26, "learning_rate": 8.212333627262825e-06, "logits/chosen": -2.507758617401123, "logits/rejected": -2.9994490146636963, "logps/chosen": -158.9293975830078, "logps/rejected": -291.7111511230469, "loss": 0.1392, "rewards/accuracies": 1.0, "rewards/chosen": -3.33233642578125, "rewards/margins": 3.506974935531616, "rewards/rejected": -6.839311599731445, "step": 8092 }, { "epoch": 1.26, "learning_rate": 8.211600186731677e-06, "logits/chosen": -2.5373761653900146, "logits/rejected": -3.000276803970337, "logps/chosen": -276.57684326171875, "logps/rejected": -454.7497863769531, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.3285164833068848, "rewards/margins": 6.879917144775391, "rewards/rejected": -8.208434104919434, "step": 8093 }, { "epoch": 1.26, "learning_rate": 8.210866746200529e-06, "logits/chosen": -2.4626221656799316, "logits/rejected": -2.7440202236175537, "logps/chosen": -197.1952362060547, "logps/rejected": -181.09182739257812, "loss": 0.5855, "rewards/accuracies": 0.5, "rewards/chosen": -1.6042282581329346, "rewards/margins": 4.922233581542969, "rewards/rejected": -6.526462078094482, "step": 8094 }, { "epoch": 1.26, "learning_rate": 8.210133305669381e-06, "logits/chosen": -2.758918046951294, "logits/rejected": -2.0356462001800537, "logps/chosen": -346.1089172363281, "logps/rejected": -203.27696228027344, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -0.3843299150466919, "rewards/margins": 6.756402492523193, "rewards/rejected": -7.140732288360596, "step": 8095 }, { "epoch": 1.26, "learning_rate": 8.209399865138233e-06, "logits/chosen": -2.1463873386383057, "logits/rejected": -3.058162212371826, "logps/chosen": -65.37549591064453, "logps/rejected": -263.4403991699219, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.31439000368118286, "rewards/margins": 6.791258811950684, "rewards/rejected": -7.105648994445801, "step": 8096 }, { "epoch": 1.26, "learning_rate": 8.208666424607085e-06, "logits/chosen": -2.968588352203369, "logits/rejected": -2.8259034156799316, "logps/chosen": -412.5384521484375, "logps/rejected": -531.8872680664062, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.9612724781036377, "rewards/margins": 7.853122234344482, "rewards/rejected": -8.8143949508667, "step": 8097 }, { "epoch": 1.26, "learning_rate": 8.207932984075937e-06, "logits/chosen": -2.737165927886963, "logits/rejected": -2.982938528060913, "logps/chosen": -52.876869201660156, "logps/rejected": -181.91799926757812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.8477075099945068, "rewards/margins": 7.216614723205566, "rewards/rejected": -8.064322471618652, "step": 8098 }, { "epoch": 1.26, "learning_rate": 8.207199543544788e-06, "logits/chosen": -1.6421318054199219, "logits/rejected": -2.557640790939331, "logps/chosen": -143.3032684326172, "logps/rejected": -324.0130310058594, "loss": 0.5456, "rewards/accuracies": 0.5, "rewards/chosen": -4.2687506675720215, "rewards/margins": 4.295680999755859, "rewards/rejected": -8.564432144165039, "step": 8099 }, { "epoch": 1.26, "learning_rate": 8.20646610301364e-06, "logits/chosen": -2.7896595001220703, "logits/rejected": -2.7254374027252197, "logps/chosen": -102.70883178710938, "logps/rejected": -225.80027770996094, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.7848165035247803, "rewards/margins": 5.902846336364746, "rewards/rejected": -7.6876630783081055, "step": 8100 }, { "epoch": 1.26, "learning_rate": 8.205732662482494e-06, "logits/chosen": -2.8888533115386963, "logits/rejected": -3.031886577606201, "logps/chosen": -81.05696868896484, "logps/rejected": -160.48779296875, "loss": 1.917, "rewards/accuracies": 0.5, "rewards/chosen": -3.1630687713623047, "rewards/margins": 1.8096907138824463, "rewards/rejected": -4.972759246826172, "step": 8101 }, { "epoch": 1.26, "learning_rate": 8.204999221951346e-06, "logits/chosen": -2.1008710861206055, "logits/rejected": -2.846299648284912, "logps/chosen": -306.7409362792969, "logps/rejected": -339.7655029296875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.824066162109375, "rewards/margins": 7.05789852142334, "rewards/rejected": -9.881965637207031, "step": 8102 }, { "epoch": 1.26, "learning_rate": 8.2042657814202e-06, "logits/chosen": -2.683454990386963, "logits/rejected": -2.686589479446411, "logps/chosen": -376.8674621582031, "logps/rejected": -379.06341552734375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.14683838188648224, "rewards/margins": 7.9225006103515625, "rewards/rejected": -7.775662422180176, "step": 8103 }, { "epoch": 1.26, "learning_rate": 8.203532340889051e-06, "logits/chosen": -0.9601386785507202, "logits/rejected": -2.7213125228881836, "logps/chosen": -45.431114196777344, "logps/rejected": -321.0591125488281, "loss": 0.1185, "rewards/accuracies": 1.0, "rewards/chosen": -1.6474025249481201, "rewards/margins": 7.522443771362305, "rewards/rejected": -9.169846534729004, "step": 8104 }, { "epoch": 1.26, "learning_rate": 8.202798900357903e-06, "logits/chosen": -1.494386911392212, "logits/rejected": -2.390761375427246, "logps/chosen": -85.31404113769531, "logps/rejected": -303.90570068359375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.803863525390625, "rewards/margins": 5.402865409851074, "rewards/rejected": -6.206728935241699, "step": 8105 }, { "epoch": 1.26, "learning_rate": 8.202065459826755e-06, "logits/chosen": -2.570120334625244, "logits/rejected": -2.9288132190704346, "logps/chosen": -53.84907913208008, "logps/rejected": -277.03369140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.1053742915391922, "rewards/margins": 8.34927749633789, "rewards/rejected": -8.243904113769531, "step": 8106 }, { "epoch": 1.26, "learning_rate": 8.201332019295607e-06, "logits/chosen": -2.293591022491455, "logits/rejected": -3.0919010639190674, "logps/chosen": -387.5101013183594, "logps/rejected": -551.494873046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.460364580154419, "rewards/margins": 7.435957908630371, "rewards/rejected": -9.896322250366211, "step": 8107 }, { "epoch": 1.26, "learning_rate": 8.200598578764459e-06, "logits/chosen": -1.0385390520095825, "logits/rejected": -2.78377628326416, "logps/chosen": -123.933837890625, "logps/rejected": -364.4222106933594, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.9384891986846924, "rewards/margins": 5.236199378967285, "rewards/rejected": -7.174688816070557, "step": 8108 }, { "epoch": 1.26, "learning_rate": 8.199865138233312e-06, "logits/chosen": -1.3764537572860718, "logits/rejected": -1.6624433994293213, "logps/chosen": -1001.1356811523438, "logps/rejected": -1081.7978515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.8379201889038086, "rewards/margins": 10.930757522583008, "rewards/rejected": -14.768677711486816, "step": 8109 }, { "epoch": 1.26, "learning_rate": 8.199131697702164e-06, "logits/chosen": -3.224431037902832, "logits/rejected": -3.1239683628082275, "logps/chosen": -702.1453247070312, "logps/rejected": -881.44384765625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.8076767921447754, "rewards/margins": 6.078306674957275, "rewards/rejected": -8.88598346710205, "step": 8110 }, { "epoch": 1.26, "learning_rate": 8.198398257171016e-06, "logits/chosen": -2.723361015319824, "logits/rejected": -2.3301920890808105, "logps/chosen": -221.599365234375, "logps/rejected": -283.3768310546875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.996354818344116, "rewards/margins": 6.975149154663086, "rewards/rejected": -9.971504211425781, "step": 8111 }, { "epoch": 1.26, "learning_rate": 8.197664816639868e-06, "logits/chosen": -2.2974677085876465, "logits/rejected": -2.4489707946777344, "logps/chosen": -574.1464233398438, "logps/rejected": -500.101806640625, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -1.795135259628296, "rewards/margins": 5.341888427734375, "rewards/rejected": -7.13702392578125, "step": 8112 }, { "epoch": 1.26, "learning_rate": 8.19693137610872e-06, "logits/chosen": -2.6918089389801025, "logits/rejected": -2.787867784500122, "logps/chosen": -58.15711212158203, "logps/rejected": -186.12831115722656, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -0.6209381222724915, "rewards/margins": 6.517054557800293, "rewards/rejected": -7.1379923820495605, "step": 8113 }, { "epoch": 1.26, "learning_rate": 8.196197935577572e-06, "logits/chosen": -2.3585031032562256, "logits/rejected": -2.794689416885376, "logps/chosen": -407.4855041503906, "logps/rejected": -524.5440063476562, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.0575428009033203, "rewards/margins": 5.737396240234375, "rewards/rejected": -7.794939041137695, "step": 8114 }, { "epoch": 1.26, "learning_rate": 8.195464495046424e-06, "logits/chosen": -2.8882131576538086, "logits/rejected": -2.725522994995117, "logps/chosen": -246.2558135986328, "logps/rejected": -303.359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.049673557281494, "rewards/margins": 6.720050811767578, "rewards/rejected": -10.769723892211914, "step": 8115 }, { "epoch": 1.26, "learning_rate": 8.194731054515275e-06, "logits/chosen": -1.8885433673858643, "logits/rejected": -2.7429637908935547, "logps/chosen": -164.47923278808594, "logps/rejected": -335.8136291503906, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.6363706588745117, "rewards/margins": 8.669331550598145, "rewards/rejected": -10.305702209472656, "step": 8116 }, { "epoch": 1.26, "learning_rate": 8.193997613984127e-06, "logits/chosen": -2.3976290225982666, "logits/rejected": -2.819770336151123, "logps/chosen": -347.243408203125, "logps/rejected": -580.5517578125, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -2.4464571475982666, "rewards/margins": 7.842568874359131, "rewards/rejected": -10.289026260375977, "step": 8117 }, { "epoch": 1.26, "learning_rate": 8.193264173452981e-06, "logits/chosen": -2.6091694831848145, "logits/rejected": -2.8262665271759033, "logps/chosen": -99.65438842773438, "logps/rejected": -200.83172607421875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.5600428581237793, "rewards/margins": 5.701171398162842, "rewards/rejected": -8.261214256286621, "step": 8118 }, { "epoch": 1.26, "learning_rate": 8.192530732921833e-06, "logits/chosen": -2.3940796852111816, "logits/rejected": -2.874861478805542, "logps/chosen": -40.97273254394531, "logps/rejected": -170.77804565429688, "loss": 0.0851, "rewards/accuracies": 1.0, "rewards/chosen": -2.532621145248413, "rewards/margins": 4.531106948852539, "rewards/rejected": -7.063728332519531, "step": 8119 }, { "epoch": 1.26, "learning_rate": 8.191797292390685e-06, "logits/chosen": -1.6081706285476685, "logits/rejected": -2.7268292903900146, "logps/chosen": -114.97073364257812, "logps/rejected": -501.97906494140625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.540629267692566, "rewards/margins": 11.995905876159668, "rewards/rejected": -13.536535263061523, "step": 8120 }, { "epoch": 1.26, "learning_rate": 8.191063851859536e-06, "logits/chosen": -1.977357268333435, "logits/rejected": -2.8184878826141357, "logps/chosen": -169.27835083007812, "logps/rejected": -301.4398193359375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.707972764968872, "rewards/margins": 6.242306709289551, "rewards/rejected": -8.950279235839844, "step": 8121 }, { "epoch": 1.26, "learning_rate": 8.190330411328388e-06, "logits/chosen": -0.984492838382721, "logits/rejected": -1.3844633102416992, "logps/chosen": -260.0131530761719, "logps/rejected": -436.7558898925781, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.3629403114318848, "rewards/margins": 8.725431442260742, "rewards/rejected": -10.088371276855469, "step": 8122 }, { "epoch": 1.26, "learning_rate": 8.18959697079724e-06, "logits/chosen": -0.5433230400085449, "logits/rejected": -1.2566300630569458, "logps/chosen": -163.59393310546875, "logps/rejected": -587.27685546875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1651289463043213, "rewards/margins": 7.972169876098633, "rewards/rejected": -9.137298583984375, "step": 8123 }, { "epoch": 1.26, "learning_rate": 8.188863530266092e-06, "logits/chosen": -2.4204580783843994, "logits/rejected": -2.8123559951782227, "logps/chosen": -56.796321868896484, "logps/rejected": -226.2802734375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.907585620880127, "rewards/margins": 7.45326566696167, "rewards/rejected": -8.360851287841797, "step": 8124 }, { "epoch": 1.26, "learning_rate": 8.188130089734944e-06, "logits/chosen": -1.7624262571334839, "logits/rejected": -2.532985210418701, "logps/chosen": -132.3909149169922, "logps/rejected": -254.46221923828125, "loss": 0.0503, "rewards/accuracies": 1.0, "rewards/chosen": -3.537865161895752, "rewards/margins": 5.09574556350708, "rewards/rejected": -8.633610725402832, "step": 8125 }, { "epoch": 1.26, "learning_rate": 8.187396649203796e-06, "logits/chosen": -1.294494867324829, "logits/rejected": -2.636432409286499, "logps/chosen": -131.84854125976562, "logps/rejected": -430.37701416015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.0113868713378906, "rewards/margins": 8.569646835327148, "rewards/rejected": -10.581033706665039, "step": 8126 }, { "epoch": 1.26, "learning_rate": 8.18666320867265e-06, "logits/chosen": -2.533245325088501, "logits/rejected": -2.6169662475585938, "logps/chosen": -225.08705139160156, "logps/rejected": -264.0018005371094, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -0.8709449768066406, "rewards/margins": 5.51928186416626, "rewards/rejected": -6.390226364135742, "step": 8127 }, { "epoch": 1.26, "learning_rate": 8.185929768141501e-06, "logits/chosen": -2.3397045135498047, "logits/rejected": -3.068817138671875, "logps/chosen": -112.35147094726562, "logps/rejected": -427.34722900390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.539513111114502, "rewards/margins": 7.492480278015137, "rewards/rejected": -9.03199291229248, "step": 8128 }, { "epoch": 1.26, "learning_rate": 8.185196327610353e-06, "logits/chosen": -2.2672555446624756, "logits/rejected": -2.6027746200561523, "logps/chosen": -134.88986206054688, "logps/rejected": -193.99826049804688, "loss": 0.1442, "rewards/accuracies": 1.0, "rewards/chosen": -2.162777900695801, "rewards/margins": 5.469326496124268, "rewards/rejected": -7.63210391998291, "step": 8129 }, { "epoch": 1.26, "learning_rate": 8.184462887079205e-06, "logits/chosen": -1.0592933893203735, "logits/rejected": -2.322205066680908, "logps/chosen": -227.43804931640625, "logps/rejected": -517.751953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.2682881355285645, "rewards/margins": 8.147579193115234, "rewards/rejected": -12.415867805480957, "step": 8130 }, { "epoch": 1.26, "learning_rate": 8.183729446548057e-06, "logits/chosen": -2.7018260955810547, "logits/rejected": -1.9280779361724854, "logps/chosen": -558.111083984375, "logps/rejected": -367.8714904785156, "loss": 0.059, "rewards/accuracies": 1.0, "rewards/chosen": -5.491560459136963, "rewards/margins": 5.593810558319092, "rewards/rejected": -11.085371017456055, "step": 8131 }, { "epoch": 1.26, "learning_rate": 8.182996006016909e-06, "logits/chosen": -2.324157476425171, "logits/rejected": -3.166402578353882, "logps/chosen": -100.0083236694336, "logps/rejected": -413.9989929199219, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.0655055046081543, "rewards/margins": 7.613306045532227, "rewards/rejected": -8.678812026977539, "step": 8132 }, { "epoch": 1.26, "learning_rate": 8.18226256548576e-06, "logits/chosen": -2.6771841049194336, "logits/rejected": -2.6944663524627686, "logps/chosen": -380.45123291015625, "logps/rejected": -330.5118103027344, "loss": 1.4762, "rewards/accuracies": 0.5, "rewards/chosen": -5.359643936157227, "rewards/margins": 4.008401870727539, "rewards/rejected": -9.368045806884766, "step": 8133 }, { "epoch": 1.27, "learning_rate": 8.181529124954613e-06, "logits/chosen": -2.634392499923706, "logits/rejected": -1.6271485090255737, "logps/chosen": -182.857177734375, "logps/rejected": -179.43443298339844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.0725162029266357, "rewards/margins": 7.643755912780762, "rewards/rejected": -10.716272354125977, "step": 8134 }, { "epoch": 1.27, "learning_rate": 8.180795684423466e-06, "logits/chosen": -2.712616443634033, "logits/rejected": -1.575547218322754, "logps/chosen": -310.56817626953125, "logps/rejected": -154.59715270996094, "loss": 0.6377, "rewards/accuracies": 0.5, "rewards/chosen": -3.6033520698547363, "rewards/margins": 3.046334981918335, "rewards/rejected": -6.649686813354492, "step": 8135 }, { "epoch": 1.27, "learning_rate": 8.180062243892318e-06, "logits/chosen": -2.2424190044403076, "logits/rejected": -1.7986207008361816, "logps/chosen": -294.8018798828125, "logps/rejected": -286.7696838378906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7497496604919434, "rewards/margins": 9.111021995544434, "rewards/rejected": -10.860772132873535, "step": 8136 }, { "epoch": 1.27, "learning_rate": 8.179328803361172e-06, "logits/chosen": -2.6019811630249023, "logits/rejected": -2.6751859188079834, "logps/chosen": -302.35491943359375, "logps/rejected": -343.4434509277344, "loss": 0.7819, "rewards/accuracies": 0.5, "rewards/chosen": -4.631082534790039, "rewards/margins": 3.561245918273926, "rewards/rejected": -8.192328453063965, "step": 8137 }, { "epoch": 1.27, "learning_rate": 8.178595362830024e-06, "logits/chosen": -3.0103650093078613, "logits/rejected": -2.498676061630249, "logps/chosen": -458.589599609375, "logps/rejected": -333.82391357421875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.457577228546143, "rewards/margins": 7.067042350769043, "rewards/rejected": -11.524620056152344, "step": 8138 }, { "epoch": 1.27, "learning_rate": 8.177861922298875e-06, "logits/chosen": -1.5029253959655762, "logits/rejected": -2.411328077316284, "logps/chosen": -80.69510650634766, "logps/rejected": -256.1669921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.604292154312134, "rewards/margins": 8.680070877075195, "rewards/rejected": -12.284363746643066, "step": 8139 }, { "epoch": 1.27, "learning_rate": 8.177128481767727e-06, "logits/chosen": -1.2786966562271118, "logits/rejected": -2.6126508712768555, "logps/chosen": -97.75448608398438, "logps/rejected": -193.2772674560547, "loss": 0.9885, "rewards/accuracies": 0.5, "rewards/chosen": -2.850914478302002, "rewards/margins": 3.1518020629882812, "rewards/rejected": -6.002717018127441, "step": 8140 }, { "epoch": 1.27, "learning_rate": 8.176395041236579e-06, "logits/chosen": -0.9946788549423218, "logits/rejected": -2.1220266819000244, "logps/chosen": -189.43304443359375, "logps/rejected": -369.94677734375, "loss": 0.2721, "rewards/accuracies": 1.0, "rewards/chosen": -3.756167411804199, "rewards/margins": 5.40478515625, "rewards/rejected": -9.1609525680542, "step": 8141 }, { "epoch": 1.27, "learning_rate": 8.175661600705431e-06, "logits/chosen": -2.655395984649658, "logits/rejected": -2.3554630279541016, "logps/chosen": -371.5753173828125, "logps/rejected": -467.4071044921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.31731116771698, "rewards/margins": 8.467358589172363, "rewards/rejected": -9.784669876098633, "step": 8142 }, { "epoch": 1.27, "learning_rate": 8.174928160174283e-06, "logits/chosen": -2.505244493484497, "logits/rejected": -2.788707971572876, "logps/chosen": -91.38539123535156, "logps/rejected": -261.31866455078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5491268038749695, "rewards/margins": 8.149740219116211, "rewards/rejected": -8.698866844177246, "step": 8143 }, { "epoch": 1.27, "learning_rate": 8.174194719643135e-06, "logits/chosen": -2.1588287353515625, "logits/rejected": -2.796342134475708, "logps/chosen": -286.546630859375, "logps/rejected": -582.8387451171875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.9523391723632812, "rewards/margins": 7.250946521759033, "rewards/rejected": -8.203285217285156, "step": 8144 }, { "epoch": 1.27, "learning_rate": 8.173461279111988e-06, "logits/chosen": -2.138683795928955, "logits/rejected": -2.7801554203033447, "logps/chosen": -95.85374450683594, "logps/rejected": -157.39564514160156, "loss": 1.6608, "rewards/accuracies": 0.5, "rewards/chosen": -2.676140785217285, "rewards/margins": 0.9093523025512695, "rewards/rejected": -3.5854930877685547, "step": 8145 }, { "epoch": 1.27, "learning_rate": 8.17272783858084e-06, "logits/chosen": -1.0237025022506714, "logits/rejected": -2.715362071990967, "logps/chosen": -109.89114379882812, "logps/rejected": -508.470458984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.248907089233398, "rewards/margins": 7.632482528686523, "rewards/rejected": -11.881389617919922, "step": 8146 }, { "epoch": 1.27, "learning_rate": 8.171994398049692e-06, "logits/chosen": -2.4172213077545166, "logits/rejected": -2.746486186981201, "logps/chosen": -35.46351623535156, "logps/rejected": -133.54083251953125, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -1.4096896648406982, "rewards/margins": 4.557045936584473, "rewards/rejected": -5.96673583984375, "step": 8147 }, { "epoch": 1.27, "learning_rate": 8.171260957518544e-06, "logits/chosen": -2.7203571796417236, "logits/rejected": -2.202934980392456, "logps/chosen": -150.4625701904297, "logps/rejected": -290.49420166015625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.830822229385376, "rewards/margins": 7.092789649963379, "rewards/rejected": -9.923612594604492, "step": 8148 }, { "epoch": 1.27, "learning_rate": 8.170527516987396e-06, "logits/chosen": -2.882861614227295, "logits/rejected": -2.670849561691284, "logps/chosen": -131.31259155273438, "logps/rejected": -126.50003814697266, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -1.2440346479415894, "rewards/margins": 5.107337474822998, "rewards/rejected": -6.351372241973877, "step": 8149 }, { "epoch": 1.27, "learning_rate": 8.169794076456248e-06, "logits/chosen": -2.9275858402252197, "logits/rejected": -1.8739123344421387, "logps/chosen": -355.01812744140625, "logps/rejected": -201.63356018066406, "loss": 0.5897, "rewards/accuracies": 0.5, "rewards/chosen": -4.399588108062744, "rewards/margins": 1.401631474494934, "rewards/rejected": -5.801219463348389, "step": 8150 }, { "epoch": 1.27, "learning_rate": 8.1690606359251e-06, "logits/chosen": -2.2726190090179443, "logits/rejected": -2.762800931930542, "logps/chosen": -194.91912841796875, "logps/rejected": -440.4670104980469, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.426379442214966, "rewards/margins": 6.712303638458252, "rewards/rejected": -9.138683319091797, "step": 8151 }, { "epoch": 1.27, "learning_rate": 8.168327195393952e-06, "logits/chosen": -3.040297746658325, "logits/rejected": -2.4201908111572266, "logps/chosen": -643.02978515625, "logps/rejected": -462.6984558105469, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -3.4071578979492188, "rewards/margins": 4.5870819091796875, "rewards/rejected": -7.994239807128906, "step": 8152 }, { "epoch": 1.27, "learning_rate": 8.167593754862803e-06, "logits/chosen": -2.1676106452941895, "logits/rejected": -2.3646812438964844, "logps/chosen": -158.3958740234375, "logps/rejected": -236.75648498535156, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5008987188339233, "rewards/margins": 7.831613063812256, "rewards/rejected": -9.332511901855469, "step": 8153 }, { "epoch": 1.27, "learning_rate": 8.166860314331657e-06, "logits/chosen": -2.226469039916992, "logits/rejected": -2.8363993167877197, "logps/chosen": -68.0827865600586, "logps/rejected": -174.21597290039062, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -2.821937084197998, "rewards/margins": 4.750492095947266, "rewards/rejected": -7.572429656982422, "step": 8154 }, { "epoch": 1.27, "learning_rate": 8.166126873800509e-06, "logits/chosen": -1.9739055633544922, "logits/rejected": -2.934386968612671, "logps/chosen": -156.9599609375, "logps/rejected": -296.38824462890625, "loss": 0.1996, "rewards/accuracies": 1.0, "rewards/chosen": -2.382120132446289, "rewards/margins": 3.9431662559509277, "rewards/rejected": -6.325286388397217, "step": 8155 }, { "epoch": 1.27, "learning_rate": 8.16539343326936e-06, "logits/chosen": -2.886974334716797, "logits/rejected": -2.6754543781280518, "logps/chosen": -266.50518798828125, "logps/rejected": -271.1036071777344, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -3.0056183338165283, "rewards/margins": 6.263467788696289, "rewards/rejected": -9.269085884094238, "step": 8156 }, { "epoch": 1.27, "learning_rate": 8.164659992738213e-06, "logits/chosen": -2.7217178344726562, "logits/rejected": -2.8673784732818604, "logps/chosen": -97.5330810546875, "logps/rejected": -337.1691589355469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3836815357208252, "rewards/margins": 10.169382095336914, "rewards/rejected": -11.55306339263916, "step": 8157 }, { "epoch": 1.27, "learning_rate": 8.163926552207064e-06, "logits/chosen": -1.0258913040161133, "logits/rejected": -1.6620848178863525, "logps/chosen": -61.91911315917969, "logps/rejected": -197.84152221679688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.9608820080757141, "rewards/margins": 6.601160526275635, "rewards/rejected": -7.562042236328125, "step": 8158 }, { "epoch": 1.27, "learning_rate": 8.163193111675916e-06, "logits/chosen": -2.0859549045562744, "logits/rejected": -2.9015276432037354, "logps/chosen": -221.88641357421875, "logps/rejected": -308.7806396484375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.0419189929962158, "rewards/margins": 6.251676559448242, "rewards/rejected": -7.293595790863037, "step": 8159 }, { "epoch": 1.27, "learning_rate": 8.162459671144768e-06, "logits/chosen": -2.7045631408691406, "logits/rejected": -2.2199416160583496, "logps/chosen": -169.99542236328125, "logps/rejected": -172.6969757080078, "loss": 0.2029, "rewards/accuracies": 1.0, "rewards/chosen": -2.3711087703704834, "rewards/margins": 2.9224672317504883, "rewards/rejected": -5.293576240539551, "step": 8160 }, { "epoch": 1.27, "learning_rate": 8.16172623061362e-06, "logits/chosen": -2.757967233657837, "logits/rejected": -2.3667593002319336, "logps/chosen": -559.00830078125, "logps/rejected": -512.1595458984375, "loss": 0.8735, "rewards/accuracies": 0.5, "rewards/chosen": -2.3222413063049316, "rewards/margins": 3.346613883972168, "rewards/rejected": -5.6688551902771, "step": 8161 }, { "epoch": 1.27, "learning_rate": 8.160992790082472e-06, "logits/chosen": -2.905186176300049, "logits/rejected": -3.0018088817596436, "logps/chosen": -285.7398681640625, "logps/rejected": -312.6148376464844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.0836517810821533, "rewards/margins": 7.728582382202148, "rewards/rejected": -9.812233924865723, "step": 8162 }, { "epoch": 1.27, "learning_rate": 8.160259349551326e-06, "logits/chosen": -2.0810019969940186, "logits/rejected": -2.757683753967285, "logps/chosen": -117.06149291992188, "logps/rejected": -275.68505859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.335071563720703, "rewards/margins": 7.307281017303467, "rewards/rejected": -9.642353057861328, "step": 8163 }, { "epoch": 1.27, "learning_rate": 8.159525909020177e-06, "logits/chosen": -2.1055803298950195, "logits/rejected": -2.4648287296295166, "logps/chosen": -110.54981994628906, "logps/rejected": -317.72796630859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.9103367328643799, "rewards/margins": 9.32861328125, "rewards/rejected": -10.2389497756958, "step": 8164 }, { "epoch": 1.27, "learning_rate": 8.15879246848903e-06, "logits/chosen": -2.633256673812866, "logits/rejected": -3.043823003768921, "logps/chosen": -34.517677307128906, "logps/rejected": -286.50469970703125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -0.43255293369293213, "rewards/margins": 10.673412322998047, "rewards/rejected": -11.105964660644531, "step": 8165 }, { "epoch": 1.27, "learning_rate": 8.158059027957881e-06, "logits/chosen": -2.1644678115844727, "logits/rejected": -2.7991626262664795, "logps/chosen": -107.70098876953125, "logps/rejected": -352.30572509765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.084040403366089, "rewards/margins": 8.349310874938965, "rewards/rejected": -10.433351516723633, "step": 8166 }, { "epoch": 1.27, "learning_rate": 8.157325587426733e-06, "logits/chosen": -2.199122190475464, "logits/rejected": -2.8439888954162598, "logps/chosen": -255.07064819335938, "logps/rejected": -373.6396484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.109002113342285, "rewards/margins": 7.84572696685791, "rewards/rejected": -9.954729080200195, "step": 8167 }, { "epoch": 1.27, "learning_rate": 8.156592146895585e-06, "logits/chosen": -2.294649600982666, "logits/rejected": -2.8080272674560547, "logps/chosen": -130.54617309570312, "logps/rejected": -475.85003662109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6865936517715454, "rewards/margins": 10.122642517089844, "rewards/rejected": -11.809236526489258, "step": 8168 }, { "epoch": 1.27, "learning_rate": 8.155858706364439e-06, "logits/chosen": -1.7745009660720825, "logits/rejected": -3.0053420066833496, "logps/chosen": -155.3242645263672, "logps/rejected": -395.03228759765625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.1831505298614502, "rewards/margins": 6.328639984130859, "rewards/rejected": -7.511790752410889, "step": 8169 }, { "epoch": 1.27, "learning_rate": 8.15512526583329e-06, "logits/chosen": -3.145711898803711, "logits/rejected": -3.298070192337036, "logps/chosen": -352.05712890625, "logps/rejected": -491.908935546875, "loss": 0.0763, "rewards/accuracies": 1.0, "rewards/chosen": -4.521268367767334, "rewards/margins": 2.6515634059906006, "rewards/rejected": -7.172832012176514, "step": 8170 }, { "epoch": 1.27, "learning_rate": 8.154391825302142e-06, "logits/chosen": -2.9775707721710205, "logits/rejected": -1.9709211587905884, "logps/chosen": -286.2212829589844, "logps/rejected": -129.9678955078125, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -1.0713838338851929, "rewards/margins": 6.265651702880859, "rewards/rejected": -7.337035179138184, "step": 8171 }, { "epoch": 1.27, "learning_rate": 8.153658384770996e-06, "logits/chosen": -2.7484488487243652, "logits/rejected": -3.2405848503112793, "logps/chosen": -112.77322387695312, "logps/rejected": -288.0786437988281, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.5905003547668457, "rewards/margins": 8.367749214172363, "rewards/rejected": -9.95824909210205, "step": 8172 }, { "epoch": 1.27, "learning_rate": 8.152924944239848e-06, "logits/chosen": -2.496375560760498, "logits/rejected": -3.291551351547241, "logps/chosen": -36.74903869628906, "logps/rejected": -250.58010864257812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.5527575612068176, "rewards/margins": 7.844888687133789, "rewards/rejected": -8.397645950317383, "step": 8173 }, { "epoch": 1.27, "learning_rate": 8.1521915037087e-06, "logits/chosen": -2.189293146133423, "logits/rejected": -2.772212028503418, "logps/chosen": -60.07175827026367, "logps/rejected": -322.8756103515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.9484281539916992, "rewards/margins": 9.855066299438477, "rewards/rejected": -11.803494453430176, "step": 8174 }, { "epoch": 1.27, "learning_rate": 8.151458063177551e-06, "logits/chosen": -2.8840596675872803, "logits/rejected": -2.409843921661377, "logps/chosen": -120.1567611694336, "logps/rejected": -183.27838134765625, "loss": 0.8598, "rewards/accuracies": 0.5, "rewards/chosen": -2.443680763244629, "rewards/margins": 3.6675338745117188, "rewards/rejected": -6.111214637756348, "step": 8175 }, { "epoch": 1.27, "learning_rate": 8.150724622646403e-06, "logits/chosen": -2.7072830200195312, "logits/rejected": -3.152973175048828, "logps/chosen": -274.90118408203125, "logps/rejected": -317.683837890625, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -2.8266146183013916, "rewards/margins": 3.825705051422119, "rewards/rejected": -6.65231990814209, "step": 8176 }, { "epoch": 1.27, "learning_rate": 8.149991182115255e-06, "logits/chosen": -1.5505136251449585, "logits/rejected": -2.757805585861206, "logps/chosen": -83.2197265625, "logps/rejected": -348.0157470703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.8447144031524658, "rewards/margins": 8.927799224853516, "rewards/rejected": -10.772514343261719, "step": 8177 }, { "epoch": 1.27, "learning_rate": 8.149257741584107e-06, "logits/chosen": -2.778855562210083, "logits/rejected": -3.0315403938293457, "logps/chosen": -48.14207458496094, "logps/rejected": -289.9346923828125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.3479809761047363, "rewards/margins": 7.335448265075684, "rewards/rejected": -8.683429718017578, "step": 8178 }, { "epoch": 1.27, "learning_rate": 8.148524301052959e-06, "logits/chosen": -2.20255446434021, "logits/rejected": -2.6978869438171387, "logps/chosen": -104.64209747314453, "logps/rejected": -347.60614013671875, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -1.2919811010360718, "rewards/margins": 7.561525344848633, "rewards/rejected": -8.853506088256836, "step": 8179 }, { "epoch": 1.27, "learning_rate": 8.147790860521811e-06, "logits/chosen": -2.958719253540039, "logits/rejected": -2.9099748134613037, "logps/chosen": -207.48550415039062, "logps/rejected": -384.23419189453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.8325562477111816, "rewards/margins": 8.49086856842041, "rewards/rejected": -11.32342529296875, "step": 8180 }, { "epoch": 1.27, "learning_rate": 8.147057419990664e-06, "logits/chosen": -2.7376034259796143, "logits/rejected": -2.73746395111084, "logps/chosen": -97.42262268066406, "logps/rejected": -186.11398315429688, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": -1.5631424188613892, "rewards/margins": 6.316062927246094, "rewards/rejected": -7.879205703735352, "step": 8181 }, { "epoch": 1.27, "learning_rate": 8.146323979459516e-06, "logits/chosen": -1.3680678606033325, "logits/rejected": -2.4526569843292236, "logps/chosen": -536.272216796875, "logps/rejected": -580.5665893554688, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.9197394847869873, "rewards/margins": 8.501102447509766, "rewards/rejected": -10.420842170715332, "step": 8182 }, { "epoch": 1.27, "learning_rate": 8.145590538928368e-06, "logits/chosen": -2.017826795578003, "logits/rejected": -2.6495440006256104, "logps/chosen": -71.5025634765625, "logps/rejected": -236.63351440429688, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9688777923583984, "rewards/margins": 8.098784446716309, "rewards/rejected": -9.067662239074707, "step": 8183 }, { "epoch": 1.27, "learning_rate": 8.14485709839722e-06, "logits/chosen": -0.9674429297447205, "logits/rejected": -1.281646490097046, "logps/chosen": -75.00145721435547, "logps/rejected": -292.20758056640625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.7422044277191162, "rewards/margins": 7.045819282531738, "rewards/rejected": -7.788023471832275, "step": 8184 }, { "epoch": 1.27, "learning_rate": 8.144123657866072e-06, "logits/chosen": -2.2971184253692627, "logits/rejected": -2.9680287837982178, "logps/chosen": -160.57318115234375, "logps/rejected": -267.6524963378906, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -1.899272084236145, "rewards/margins": 6.4983811378479, "rewards/rejected": -8.397653579711914, "step": 8185 }, { "epoch": 1.27, "learning_rate": 8.143390217334924e-06, "logits/chosen": -1.330490231513977, "logits/rejected": -2.7322607040405273, "logps/chosen": -122.46875762939453, "logps/rejected": -307.3943786621094, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.6807011365890503, "rewards/margins": 6.199200630187988, "rewards/rejected": -7.87990140914917, "step": 8186 }, { "epoch": 1.27, "learning_rate": 8.142656776803776e-06, "logits/chosen": -2.8676486015319824, "logits/rejected": -2.694258213043213, "logps/chosen": -211.84397888183594, "logps/rejected": -235.17274475097656, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.9138458967208862, "rewards/margins": 6.920865058898926, "rewards/rejected": -8.834711074829102, "step": 8187 }, { "epoch": 1.27, "learning_rate": 8.141923336272628e-06, "logits/chosen": -2.6945078372955322, "logits/rejected": -2.630871057510376, "logps/chosen": -588.4148559570312, "logps/rejected": -535.678955078125, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -3.9437332153320312, "rewards/margins": 6.280332565307617, "rewards/rejected": -10.224065780639648, "step": 8188 }, { "epoch": 1.27, "learning_rate": 8.14118989574148e-06, "logits/chosen": -2.771474838256836, "logits/rejected": -2.0221779346466064, "logps/chosen": -323.5404052734375, "logps/rejected": -300.7959899902344, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.1955788135528564, "rewards/margins": 6.695217132568359, "rewards/rejected": -7.890795707702637, "step": 8189 }, { "epoch": 1.27, "learning_rate": 8.140456455210333e-06, "logits/chosen": -1.0621131658554077, "logits/rejected": -2.8581013679504395, "logps/chosen": -116.32003784179688, "logps/rejected": -559.095703125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -0.6507724523544312, "rewards/margins": 7.009192943572998, "rewards/rejected": -7.659965515136719, "step": 8190 }, { "epoch": 1.27, "learning_rate": 8.139723014679185e-06, "logits/chosen": -2.05519437789917, "logits/rejected": -2.832097053527832, "logps/chosen": -139.09445190429688, "logps/rejected": -250.1781768798828, "loss": 2.8615, "rewards/accuracies": 0.5, "rewards/chosen": -4.76870059967041, "rewards/margins": 0.8958773612976074, "rewards/rejected": -5.664577960968018, "step": 8191 }, { "epoch": 1.27, "learning_rate": 8.138989574148037e-06, "logits/chosen": -3.111180543899536, "logits/rejected": -3.2092578411102295, "logps/chosen": -282.3265075683594, "logps/rejected": -473.79571533203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5319796800613403, "rewards/margins": 7.1324143409729, "rewards/rejected": -7.664394378662109, "step": 8192 }, { "epoch": 1.27, "learning_rate": 8.138256133616889e-06, "logits/chosen": -1.8034136295318604, "logits/rejected": -2.943881034851074, "logps/chosen": -185.41326904296875, "logps/rejected": -418.8979187011719, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/chosen": -2.0450966358184814, "rewards/margins": 4.701882362365723, "rewards/rejected": -6.746978759765625, "step": 8193 }, { "epoch": 1.27, "learning_rate": 8.13752269308574e-06, "logits/chosen": -2.168102741241455, "logits/rejected": -2.9386343955993652, "logps/chosen": -150.5690155029297, "logps/rejected": -376.0286865234375, "loss": 1.4235, "rewards/accuracies": 0.5, "rewards/chosen": -4.184883117675781, "rewards/margins": 2.807328462600708, "rewards/rejected": -6.99221134185791, "step": 8194 }, { "epoch": 1.27, "learning_rate": 8.136789252554592e-06, "logits/chosen": -2.9619410037994385, "logits/rejected": -2.8828799724578857, "logps/chosen": -631.212646484375, "logps/rejected": -479.9716796875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.0216095447540283, "rewards/margins": 7.224545955657959, "rewards/rejected": -9.24615478515625, "step": 8195 }, { "epoch": 1.27, "learning_rate": 8.136055812023444e-06, "logits/chosen": -3.035047769546509, "logits/rejected": -1.6973308324813843, "logps/chosen": -635.3212890625, "logps/rejected": -478.7934875488281, "loss": 0.1066, "rewards/accuracies": 1.0, "rewards/chosen": -3.780007839202881, "rewards/margins": 6.063226699829102, "rewards/rejected": -9.84323501586914, "step": 8196 }, { "epoch": 1.27, "learning_rate": 8.135322371492296e-06, "logits/chosen": -2.915356397628784, "logits/rejected": -2.8506147861480713, "logps/chosen": -236.3938446044922, "logps/rejected": -204.5558319091797, "loss": 2.5059, "rewards/accuracies": 0.5, "rewards/chosen": -3.8094491958618164, "rewards/margins": 2.772205352783203, "rewards/rejected": -6.5816545486450195, "step": 8197 }, { "epoch": 1.27, "learning_rate": 8.134588930961148e-06, "logits/chosen": -2.186976194381714, "logits/rejected": -2.9311182498931885, "logps/chosen": -472.7318115234375, "logps/rejected": -656.0538330078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.7759463787078857, "rewards/margins": 9.04002571105957, "rewards/rejected": -10.815972328186035, "step": 8198 }, { "epoch": 1.28, "learning_rate": 8.133855490430002e-06, "logits/chosen": -2.720965623855591, "logits/rejected": -2.7621395587921143, "logps/chosen": -31.124225616455078, "logps/rejected": -159.43299865722656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8055614829063416, "rewards/margins": 8.498736381530762, "rewards/rejected": -9.304298400878906, "step": 8199 }, { "epoch": 1.28, "learning_rate": 8.133122049898854e-06, "logits/chosen": -2.692627429962158, "logits/rejected": -2.3286077976226807, "logps/chosen": -240.3999786376953, "logps/rejected": -332.2598876953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.479561686515808, "rewards/margins": 7.208584785461426, "rewards/rejected": -8.688146591186523, "step": 8200 }, { "epoch": 1.28, "learning_rate": 8.132388609367705e-06, "logits/chosen": -1.7889727354049683, "logits/rejected": -2.8102967739105225, "logps/chosen": -165.192138671875, "logps/rejected": -300.6756286621094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.3751139640808105, "rewards/margins": 8.223926544189453, "rewards/rejected": -10.599040985107422, "step": 8201 }, { "epoch": 1.28, "learning_rate": 8.131655168836557e-06, "logits/chosen": -2.4849917888641357, "logits/rejected": -2.92240047454834, "logps/chosen": -638.7481079101562, "logps/rejected": -669.7794799804688, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.665112316608429, "rewards/margins": 7.185083389282227, "rewards/rejected": -7.85019588470459, "step": 8202 }, { "epoch": 1.28, "learning_rate": 8.13092172830541e-06, "logits/chosen": -2.735214948654175, "logits/rejected": -2.939983367919922, "logps/chosen": -126.4489517211914, "logps/rejected": -162.4072265625, "loss": 0.3933, "rewards/accuracies": 0.5, "rewards/chosen": -3.136552333831787, "rewards/margins": 2.7107622623443604, "rewards/rejected": -5.847314834594727, "step": 8203 }, { "epoch": 1.28, "learning_rate": 8.130188287774263e-06, "logits/chosen": -1.9829720258712769, "logits/rejected": -2.5231823921203613, "logps/chosen": -182.1873779296875, "logps/rejected": -319.1282958984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1474263668060303, "rewards/margins": 9.256665229797363, "rewards/rejected": -10.404091835021973, "step": 8204 }, { "epoch": 1.28, "learning_rate": 8.129454847243115e-06, "logits/chosen": -3.13735032081604, "logits/rejected": -1.9813817739486694, "logps/chosen": -398.6641845703125, "logps/rejected": -252.08087158203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.0020843446254730225, "rewards/margins": 8.57896900177002, "rewards/rejected": -8.581052780151367, "step": 8205 }, { "epoch": 1.28, "learning_rate": 8.128721406711966e-06, "logits/chosen": -2.9108669757843018, "logits/rejected": -2.4158289432525635, "logps/chosen": -328.08447265625, "logps/rejected": -188.73648071289062, "loss": 0.4789, "rewards/accuracies": 0.5, "rewards/chosen": -2.71392822265625, "rewards/margins": 2.6472697257995605, "rewards/rejected": -5.3611979484558105, "step": 8206 }, { "epoch": 1.28, "learning_rate": 8.12798796618082e-06, "logits/chosen": -2.282758951187134, "logits/rejected": -2.922666549682617, "logps/chosen": -99.52047729492188, "logps/rejected": -297.17779541015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.4057457447052002, "rewards/margins": 9.007925033569336, "rewards/rejected": -10.413671493530273, "step": 8207 }, { "epoch": 1.28, "learning_rate": 8.127254525649672e-06, "logits/chosen": -3.0664727687835693, "logits/rejected": -1.7155647277832031, "logps/chosen": -276.7151184082031, "logps/rejected": -55.9708251953125, "loss": 5.1205, "rewards/accuracies": 0.0, "rewards/chosen": -6.3832597732543945, "rewards/margins": -5.107072830200195, "rewards/rejected": -1.2761870622634888, "step": 8208 }, { "epoch": 1.28, "learning_rate": 8.126521085118524e-06, "logits/chosen": -1.9543583393096924, "logits/rejected": -3.010668992996216, "logps/chosen": -41.632259368896484, "logps/rejected": -218.0686798095703, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -2.387166976928711, "rewards/margins": 5.020970821380615, "rewards/rejected": -7.408137798309326, "step": 8209 }, { "epoch": 1.28, "learning_rate": 8.125787644587376e-06, "logits/chosen": -1.3259332180023193, "logits/rejected": -2.7352564334869385, "logps/chosen": -124.27972412109375, "logps/rejected": -465.51922607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.306969404220581, "rewards/margins": 11.830944061279297, "rewards/rejected": -13.13791275024414, "step": 8210 }, { "epoch": 1.28, "learning_rate": 8.125054204056228e-06, "logits/chosen": -2.5067243576049805, "logits/rejected": -2.44120717048645, "logps/chosen": -186.55393981933594, "logps/rejected": -316.29302978515625, "loss": 0.5297, "rewards/accuracies": 0.5, "rewards/chosen": -1.9870514869689941, "rewards/margins": 4.157567977905273, "rewards/rejected": -6.144619941711426, "step": 8211 }, { "epoch": 1.28, "learning_rate": 8.12432076352508e-06, "logits/chosen": -2.4208717346191406, "logits/rejected": -2.8513050079345703, "logps/chosen": -331.63232421875, "logps/rejected": -376.8427734375, "loss": 1.2779, "rewards/accuracies": 0.5, "rewards/chosen": -4.711559295654297, "rewards/margins": 1.0369007587432861, "rewards/rejected": -5.748459815979004, "step": 8212 }, { "epoch": 1.28, "learning_rate": 8.123587322993931e-06, "logits/chosen": -1.7135783433914185, "logits/rejected": -2.784724235534668, "logps/chosen": -145.3851318359375, "logps/rejected": -260.694091796875, "loss": 0.342, "rewards/accuracies": 1.0, "rewards/chosen": -2.2926487922668457, "rewards/margins": 3.0853397846221924, "rewards/rejected": -5.377988815307617, "step": 8213 }, { "epoch": 1.28, "learning_rate": 8.122853882462783e-06, "logits/chosen": -2.5138020515441895, "logits/rejected": -2.732377290725708, "logps/chosen": -135.16677856445312, "logps/rejected": -219.71365356445312, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -2.320770740509033, "rewards/margins": 6.814749717712402, "rewards/rejected": -9.135520935058594, "step": 8214 }, { "epoch": 1.28, "learning_rate": 8.122120441931635e-06, "logits/chosen": -2.871131658554077, "logits/rejected": -2.4104435443878174, "logps/chosen": -315.6186218261719, "logps/rejected": -218.8228759765625, "loss": 1.8366, "rewards/accuracies": 0.5, "rewards/chosen": -2.787534236907959, "rewards/margins": 2.2071573734283447, "rewards/rejected": -4.994691848754883, "step": 8215 }, { "epoch": 1.28, "learning_rate": 8.121387001400489e-06, "logits/chosen": -3.167537212371826, "logits/rejected": -2.5576648712158203, "logps/chosen": -326.8431091308594, "logps/rejected": -182.1700439453125, "loss": 2.5784, "rewards/accuracies": 0.5, "rewards/chosen": -3.951967716217041, "rewards/margins": 1.7304058074951172, "rewards/rejected": -5.682373523712158, "step": 8216 }, { "epoch": 1.28, "learning_rate": 8.12065356086934e-06, "logits/chosen": -2.704392433166504, "logits/rejected": -3.119755268096924, "logps/chosen": -265.011474609375, "logps/rejected": -372.52655029296875, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -1.2947715520858765, "rewards/margins": 5.879393577575684, "rewards/rejected": -7.174164772033691, "step": 8217 }, { "epoch": 1.28, "learning_rate": 8.119920120338192e-06, "logits/chosen": -2.0042531490325928, "logits/rejected": -2.8234846591949463, "logps/chosen": -348.5781555175781, "logps/rejected": -514.9769287109375, "loss": 1.0325, "rewards/accuracies": 0.5, "rewards/chosen": -4.774880409240723, "rewards/margins": 2.3685998916625977, "rewards/rejected": -7.14348030090332, "step": 8218 }, { "epoch": 1.28, "learning_rate": 8.119186679807044e-06, "logits/chosen": -2.7700424194335938, "logits/rejected": -2.786447763442993, "logps/chosen": -118.04991912841797, "logps/rejected": -263.049072265625, "loss": 0.7679, "rewards/accuracies": 0.5, "rewards/chosen": -5.569567680358887, "rewards/margins": 5.501201152801514, "rewards/rejected": -11.070768356323242, "step": 8219 }, { "epoch": 1.28, "learning_rate": 8.118453239275896e-06, "logits/chosen": -2.2573258876800537, "logits/rejected": -3.0227551460266113, "logps/chosen": -109.38919067382812, "logps/rejected": -374.072509765625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3275113105773926, "rewards/margins": 8.911575317382812, "rewards/rejected": -10.239087104797363, "step": 8220 }, { "epoch": 1.28, "learning_rate": 8.117719798744748e-06, "logits/chosen": -2.9131715297698975, "logits/rejected": -2.7805187702178955, "logps/chosen": -140.8873291015625, "logps/rejected": -236.54969787597656, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9391921758651733, "rewards/margins": 6.902991771697998, "rewards/rejected": -7.842184066772461, "step": 8221 }, { "epoch": 1.28, "learning_rate": 8.1169863582136e-06, "logits/chosen": -2.116581439971924, "logits/rejected": -2.2480177879333496, "logps/chosen": -543.2412719726562, "logps/rejected": -426.72906494140625, "loss": 0.1974, "rewards/accuracies": 1.0, "rewards/chosen": -2.107206344604492, "rewards/margins": 3.0629425048828125, "rewards/rejected": -5.170148849487305, "step": 8222 }, { "epoch": 1.28, "learning_rate": 8.116252917682452e-06, "logits/chosen": -1.5401318073272705, "logits/rejected": -2.3455893993377686, "logps/chosen": -208.81057739257812, "logps/rejected": -334.15557861328125, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -1.384743571281433, "rewards/margins": 6.087940216064453, "rewards/rejected": -7.472683906555176, "step": 8223 }, { "epoch": 1.28, "learning_rate": 8.115519477151304e-06, "logits/chosen": -3.071038246154785, "logits/rejected": -3.347318172454834, "logps/chosen": -303.7167663574219, "logps/rejected": -357.1208801269531, "loss": 0.0841, "rewards/accuracies": 1.0, "rewards/chosen": -2.3577065467834473, "rewards/margins": 4.230274677276611, "rewards/rejected": -6.587981224060059, "step": 8224 }, { "epoch": 1.28, "learning_rate": 8.114786036620157e-06, "logits/chosen": -1.9437464475631714, "logits/rejected": -2.8551626205444336, "logps/chosen": -110.49742126464844, "logps/rejected": -268.5235290527344, "loss": 1.5161, "rewards/accuracies": 0.5, "rewards/chosen": -3.395155906677246, "rewards/margins": 2.0890121459960938, "rewards/rejected": -5.48416805267334, "step": 8225 }, { "epoch": 1.28, "learning_rate": 8.114052596089009e-06, "logits/chosen": -2.616732597351074, "logits/rejected": -2.5710465908050537, "logps/chosen": -509.19146728515625, "logps/rejected": -449.8840637207031, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.3359954357147217, "rewards/margins": 6.3026123046875, "rewards/rejected": -7.638607978820801, "step": 8226 }, { "epoch": 1.28, "learning_rate": 8.113319155557861e-06, "logits/chosen": -2.419689893722534, "logits/rejected": -3.1323866844177246, "logps/chosen": -118.82687377929688, "logps/rejected": -270.62310791015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.1647188663482666, "rewards/margins": 8.410677909851074, "rewards/rejected": -11.575396537780762, "step": 8227 }, { "epoch": 1.28, "learning_rate": 8.112585715026713e-06, "logits/chosen": -1.5343421697616577, "logits/rejected": -2.8947718143463135, "logps/chosen": -141.45822143554688, "logps/rejected": -379.76483154296875, "loss": 0.0516, "rewards/accuracies": 1.0, "rewards/chosen": -2.2816104888916016, "rewards/margins": 3.216015338897705, "rewards/rejected": -5.497625827789307, "step": 8228 }, { "epoch": 1.28, "learning_rate": 8.111852274495565e-06, "logits/chosen": -2.314465045928955, "logits/rejected": -2.8740527629852295, "logps/chosen": -98.95077514648438, "logps/rejected": -218.5843505859375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.237247943878174, "rewards/margins": 5.806001663208008, "rewards/rejected": -8.043249130249023, "step": 8229 }, { "epoch": 1.28, "learning_rate": 8.111118833964417e-06, "logits/chosen": -2.480098247528076, "logits/rejected": -3.12923002243042, "logps/chosen": -61.03102111816406, "logps/rejected": -254.808349609375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.0942816734313965, "rewards/margins": 6.695833206176758, "rewards/rejected": -8.790114402770996, "step": 8230 }, { "epoch": 1.28, "learning_rate": 8.110385393433269e-06, "logits/chosen": -2.5757758617401123, "logits/rejected": -1.8635907173156738, "logps/chosen": -656.9378051757812, "logps/rejected": -499.96881103515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.46624109148979187, "rewards/margins": 8.060065269470215, "rewards/rejected": -7.59382438659668, "step": 8231 }, { "epoch": 1.28, "learning_rate": 8.10965195290212e-06, "logits/chosen": -3.141710042953491, "logits/rejected": -3.004728078842163, "logps/chosen": -540.76171875, "logps/rejected": -506.53216552734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8360260128974915, "rewards/margins": 7.958737373352051, "rewards/rejected": -8.794763565063477, "step": 8232 }, { "epoch": 1.28, "learning_rate": 8.108918512370972e-06, "logits/chosen": -1.7585797309875488, "logits/rejected": -2.861926794052124, "logps/chosen": -62.71977996826172, "logps/rejected": -412.58148193359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3693424463272095, "rewards/margins": 9.659418106079102, "rewards/rejected": -11.028759956359863, "step": 8233 }, { "epoch": 1.28, "learning_rate": 8.108185071839826e-06, "logits/chosen": -3.051525354385376, "logits/rejected": -3.257493019104004, "logps/chosen": -267.51483154296875, "logps/rejected": -233.0101318359375, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -2.1853225231170654, "rewards/margins": 3.165137767791748, "rewards/rejected": -5.350460052490234, "step": 8234 }, { "epoch": 1.28, "learning_rate": 8.107451631308678e-06, "logits/chosen": -2.7912917137145996, "logits/rejected": -2.1236841678619385, "logps/chosen": -215.7692108154297, "logps/rejected": -169.28567504882812, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.8222719430923462, "rewards/margins": 4.946593284606934, "rewards/rejected": -6.768865585327148, "step": 8235 }, { "epoch": 1.28, "learning_rate": 8.10671819077753e-06, "logits/chosen": -2.58856463432312, "logits/rejected": -3.0278890132904053, "logps/chosen": -175.66326904296875, "logps/rejected": -238.01473999023438, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.5550758242607117, "rewards/margins": 6.26485538482666, "rewards/rejected": -6.819931507110596, "step": 8236 }, { "epoch": 1.28, "learning_rate": 8.105984750246383e-06, "logits/chosen": -3.2122302055358887, "logits/rejected": -2.8484766483306885, "logps/chosen": -286.5499267578125, "logps/rejected": -290.0526123046875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": 0.36709439754486084, "rewards/margins": 6.367708206176758, "rewards/rejected": -6.000613689422607, "step": 8237 }, { "epoch": 1.28, "learning_rate": 8.105251309715235e-06, "logits/chosen": -2.7660324573516846, "logits/rejected": -2.0555272102355957, "logps/chosen": -193.7898712158203, "logps/rejected": -87.70799255371094, "loss": 2.6537, "rewards/accuracies": 0.5, "rewards/chosen": -3.7774391174316406, "rewards/margins": 0.7541234493255615, "rewards/rejected": -4.531562805175781, "step": 8238 }, { "epoch": 1.28, "learning_rate": 8.104517869184087e-06, "logits/chosen": -1.8397622108459473, "logits/rejected": -2.761347532272339, "logps/chosen": -70.4508056640625, "logps/rejected": -243.83950805664062, "loss": 1.0509, "rewards/accuracies": 0.5, "rewards/chosen": -3.7262096405029297, "rewards/margins": 2.5407907962799072, "rewards/rejected": -6.267000198364258, "step": 8239 }, { "epoch": 1.28, "learning_rate": 8.103784428652939e-06, "logits/chosen": -2.8370466232299805, "logits/rejected": -2.7685186862945557, "logps/chosen": -110.9052734375, "logps/rejected": -57.02043533325195, "loss": 2.2515, "rewards/accuracies": 0.5, "rewards/chosen": -3.4295547008514404, "rewards/margins": -0.14580535888671875, "rewards/rejected": -3.2837493419647217, "step": 8240 }, { "epoch": 1.28, "learning_rate": 8.10305098812179e-06, "logits/chosen": -2.6511189937591553, "logits/rejected": -2.911247491836548, "logps/chosen": -80.06600952148438, "logps/rejected": -265.0160827636719, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -4.240030765533447, "rewards/margins": 6.2188239097595215, "rewards/rejected": -10.458854675292969, "step": 8241 }, { "epoch": 1.28, "learning_rate": 8.102317547590643e-06, "logits/chosen": -1.6740436553955078, "logits/rejected": -2.986607551574707, "logps/chosen": -62.629547119140625, "logps/rejected": -380.93170166015625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.3639180660247803, "rewards/margins": 7.410677909851074, "rewards/rejected": -8.774596214294434, "step": 8242 }, { "epoch": 1.28, "learning_rate": 8.101584107059496e-06, "logits/chosen": -1.684277057647705, "logits/rejected": -2.5064780712127686, "logps/chosen": -175.85008239746094, "logps/rejected": -323.47979736328125, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -3.8967270851135254, "rewards/margins": 4.734452247619629, "rewards/rejected": -8.631178855895996, "step": 8243 }, { "epoch": 1.28, "learning_rate": 8.100850666528348e-06, "logits/chosen": -2.9204931259155273, "logits/rejected": -2.9406068325042725, "logps/chosen": -162.0167236328125, "logps/rejected": -199.65663146972656, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.4808311462402344, "rewards/margins": 6.36210823059082, "rewards/rejected": -7.842939376831055, "step": 8244 }, { "epoch": 1.28, "learning_rate": 8.1001172259972e-06, "logits/chosen": -1.6229546070098877, "logits/rejected": -2.8999381065368652, "logps/chosen": -219.39974975585938, "logps/rejected": -353.47906494140625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -4.421984672546387, "rewards/margins": 4.854869842529297, "rewards/rejected": -9.276854515075684, "step": 8245 }, { "epoch": 1.28, "learning_rate": 8.099383785466052e-06, "logits/chosen": -2.6220667362213135, "logits/rejected": -2.8808164596557617, "logps/chosen": -325.70318603515625, "logps/rejected": -599.5638427734375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.014484405517578, "rewards/margins": 8.989380836486816, "rewards/rejected": -11.003865242004395, "step": 8246 }, { "epoch": 1.28, "learning_rate": 8.098650344934904e-06, "logits/chosen": -2.8111696243286133, "logits/rejected": -2.0720436573028564, "logps/chosen": -214.57211303710938, "logps/rejected": -145.05801391601562, "loss": 0.1525, "rewards/accuracies": 1.0, "rewards/chosen": -2.3609275817871094, "rewards/margins": 2.277411937713623, "rewards/rejected": -4.638339519500732, "step": 8247 }, { "epoch": 1.28, "learning_rate": 8.097916904403756e-06, "logits/chosen": -2.283381700515747, "logits/rejected": -2.954049587249756, "logps/chosen": -52.93586730957031, "logps/rejected": -214.50399780273438, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.924509048461914, "rewards/margins": 5.940530776977539, "rewards/rejected": -7.865039825439453, "step": 8248 }, { "epoch": 1.28, "learning_rate": 8.097183463872607e-06, "logits/chosen": -2.349165916442871, "logits/rejected": -2.867575168609619, "logps/chosen": -104.71026611328125, "logps/rejected": -286.36053466796875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.7520787715911865, "rewards/margins": 6.816710948944092, "rewards/rejected": -8.5687894821167, "step": 8249 }, { "epoch": 1.28, "learning_rate": 8.09645002334146e-06, "logits/chosen": -2.214200735092163, "logits/rejected": -2.8822317123413086, "logps/chosen": -230.8536834716797, "logps/rejected": -478.8096008300781, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -4.5415167808532715, "rewards/margins": 7.168429374694824, "rewards/rejected": -11.709945678710938, "step": 8250 }, { "epoch": 1.28, "learning_rate": 8.095716582810311e-06, "logits/chosen": -2.006601572036743, "logits/rejected": -3.043477773666382, "logps/chosen": -291.72296142578125, "logps/rejected": -542.2810668945312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.217411756515503, "rewards/margins": 8.972854614257812, "rewards/rejected": -10.190266609191895, "step": 8251 }, { "epoch": 1.28, "learning_rate": 8.094983142279165e-06, "logits/chosen": -2.139613151550293, "logits/rejected": -2.871830463409424, "logps/chosen": -223.2914276123047, "logps/rejected": -311.6046447753906, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -3.80893611907959, "rewards/margins": 7.301935195922852, "rewards/rejected": -11.110871315002441, "step": 8252 }, { "epoch": 1.28, "learning_rate": 8.094249701748017e-06, "logits/chosen": -2.612579345703125, "logits/rejected": -2.6531596183776855, "logps/chosen": -55.19280242919922, "logps/rejected": -395.0724182128906, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.4223722219467163, "rewards/margins": 8.193029403686523, "rewards/rejected": -9.615402221679688, "step": 8253 }, { "epoch": 1.28, "learning_rate": 8.093516261216868e-06, "logits/chosen": -3.006286859512329, "logits/rejected": -3.1259162425994873, "logps/chosen": -261.5856628417969, "logps/rejected": -186.3509521484375, "loss": 0.0811, "rewards/accuracies": 1.0, "rewards/chosen": -3.246936321258545, "rewards/margins": 3.8705058097839355, "rewards/rejected": -7.1174421310424805, "step": 8254 }, { "epoch": 1.28, "learning_rate": 8.09278282068572e-06, "logits/chosen": -2.6816587448120117, "logits/rejected": -1.9784142971038818, "logps/chosen": -241.02993774414062, "logps/rejected": -315.1474304199219, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -1.5328068733215332, "rewards/margins": 4.870265007019043, "rewards/rejected": -6.403071880340576, "step": 8255 }, { "epoch": 1.28, "learning_rate": 8.092049380154572e-06, "logits/chosen": -1.3873045444488525, "logits/rejected": -2.292996883392334, "logps/chosen": -440.5635681152344, "logps/rejected": -724.8950805664062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.936499118804932, "rewards/margins": 9.642524719238281, "rewards/rejected": -14.579023361206055, "step": 8256 }, { "epoch": 1.28, "learning_rate": 8.091315939623424e-06, "logits/chosen": -1.1692290306091309, "logits/rejected": -2.9452362060546875, "logps/chosen": -90.97630310058594, "logps/rejected": -443.38037109375, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -2.5516505241394043, "rewards/margins": 6.725754261016846, "rewards/rejected": -9.27740478515625, "step": 8257 }, { "epoch": 1.28, "learning_rate": 8.090582499092276e-06, "logits/chosen": -1.4151620864868164, "logits/rejected": -2.755051851272583, "logps/chosen": -126.33125305175781, "logps/rejected": -338.5916748046875, "loss": 1.2256, "rewards/accuracies": 0.5, "rewards/chosen": -3.793318748474121, "rewards/margins": 4.443326950073242, "rewards/rejected": -8.236645698547363, "step": 8258 }, { "epoch": 1.28, "learning_rate": 8.089849058561128e-06, "logits/chosen": -2.2810916900634766, "logits/rejected": -2.1102001667022705, "logps/chosen": -350.10107421875, "logps/rejected": -295.979248046875, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -2.4716408252716064, "rewards/margins": 3.784966468811035, "rewards/rejected": -6.2566070556640625, "step": 8259 }, { "epoch": 1.28, "learning_rate": 8.08911561802998e-06, "logits/chosen": -1.8210980892181396, "logits/rejected": -2.83857798576355, "logps/chosen": -248.08474731445312, "logps/rejected": -641.9143676757812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.4045822620391846, "rewards/margins": 9.503150939941406, "rewards/rejected": -11.907733917236328, "step": 8260 }, { "epoch": 1.28, "learning_rate": 8.088382177498833e-06, "logits/chosen": -2.6386349201202393, "logits/rejected": -2.9825966358184814, "logps/chosen": -143.46804809570312, "logps/rejected": -390.7796325683594, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.2878124713897705, "rewards/margins": 6.04062557220459, "rewards/rejected": -8.328437805175781, "step": 8261 }, { "epoch": 1.28, "learning_rate": 8.087648736967685e-06, "logits/chosen": -2.3461508750915527, "logits/rejected": -2.9154040813446045, "logps/chosen": -415.36480712890625, "logps/rejected": -495.15533447265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.575940132141113, "rewards/margins": 7.921175956726074, "rewards/rejected": -12.497116088867188, "step": 8262 }, { "epoch": 1.29, "learning_rate": 8.086915296436537e-06, "logits/chosen": -2.1149590015411377, "logits/rejected": -3.0157549381256104, "logps/chosen": -69.22269439697266, "logps/rejected": -216.59727478027344, "loss": 0.2757, "rewards/accuracies": 1.0, "rewards/chosen": -1.817525863647461, "rewards/margins": 2.6315526962280273, "rewards/rejected": -4.449078559875488, "step": 8263 }, { "epoch": 1.29, "learning_rate": 8.086181855905389e-06, "logits/chosen": -2.052661418914795, "logits/rejected": -2.8736002445220947, "logps/chosen": -125.9100570678711, "logps/rejected": -180.9609375, "loss": 3.1683, "rewards/accuracies": 0.5, "rewards/chosen": -4.597196102142334, "rewards/margins": 1.106215000152588, "rewards/rejected": -5.703411102294922, "step": 8264 }, { "epoch": 1.29, "learning_rate": 8.085448415374241e-06, "logits/chosen": -2.613145351409912, "logits/rejected": -2.9154887199401855, "logps/chosen": -437.5594177246094, "logps/rejected": -476.0775146484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2392334938049316, "rewards/margins": 9.509249687194824, "rewards/rejected": -11.748483657836914, "step": 8265 }, { "epoch": 1.29, "learning_rate": 8.084714974843093e-06, "logits/chosen": -2.075984239578247, "logits/rejected": -2.858604669570923, "logps/chosen": -295.2721252441406, "logps/rejected": -361.228515625, "loss": 1.1173, "rewards/accuracies": 0.5, "rewards/chosen": -2.5847840309143066, "rewards/margins": 0.46493470668792725, "rewards/rejected": -3.0497186183929443, "step": 8266 }, { "epoch": 1.29, "learning_rate": 8.083981534311945e-06, "logits/chosen": -3.031270742416382, "logits/rejected": -2.848676919937134, "logps/chosen": -261.82928466796875, "logps/rejected": -317.948486328125, "loss": 0.0951, "rewards/accuracies": 1.0, "rewards/chosen": -5.805961608886719, "rewards/margins": 3.9301352500915527, "rewards/rejected": -9.73609733581543, "step": 8267 }, { "epoch": 1.29, "learning_rate": 8.083248093780796e-06, "logits/chosen": -2.92397403717041, "logits/rejected": -2.1737616062164307, "logps/chosen": -291.7007751464844, "logps/rejected": -207.90396118164062, "loss": 1.0283, "rewards/accuracies": 0.5, "rewards/chosen": -2.8918700218200684, "rewards/margins": 1.7022435665130615, "rewards/rejected": -4.594113826751709, "step": 8268 }, { "epoch": 1.29, "learning_rate": 8.08251465324965e-06, "logits/chosen": -3.086493730545044, "logits/rejected": -3.2812132835388184, "logps/chosen": -442.158935546875, "logps/rejected": -541.4288940429688, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -3.385122776031494, "rewards/margins": 6.125619411468506, "rewards/rejected": -9.5107421875, "step": 8269 }, { "epoch": 1.29, "learning_rate": 8.081781212718502e-06, "logits/chosen": -0.9350230097770691, "logits/rejected": -2.930969715118408, "logps/chosen": -76.42993927001953, "logps/rejected": -408.1690368652344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.646568775177002, "rewards/margins": 10.243205070495605, "rewards/rejected": -11.889774322509766, "step": 8270 }, { "epoch": 1.29, "learning_rate": 8.081047772187356e-06, "logits/chosen": -1.7909358739852905, "logits/rejected": -2.8988730907440186, "logps/chosen": -36.920589447021484, "logps/rejected": -227.75148010253906, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -2.1978769302368164, "rewards/margins": 5.6856279373168945, "rewards/rejected": -7.883504867553711, "step": 8271 }, { "epoch": 1.29, "learning_rate": 8.080314331656207e-06, "logits/chosen": -2.4201531410217285, "logits/rejected": -2.8877954483032227, "logps/chosen": -156.0363006591797, "logps/rejected": -128.78720092773438, "loss": 1.7222, "rewards/accuracies": 0.5, "rewards/chosen": -3.296342372894287, "rewards/margins": -0.7783385515213013, "rewards/rejected": -2.5180037021636963, "step": 8272 }, { "epoch": 1.29, "learning_rate": 8.07958089112506e-06, "logits/chosen": -2.058216094970703, "logits/rejected": -2.9770052433013916, "logps/chosen": -125.82881927490234, "logps/rejected": -230.2696533203125, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": -1.7397407293319702, "rewards/margins": 4.366872787475586, "rewards/rejected": -6.1066131591796875, "step": 8273 }, { "epoch": 1.29, "learning_rate": 8.078847450593911e-06, "logits/chosen": -2.6600372791290283, "logits/rejected": -1.806033730506897, "logps/chosen": -227.77969360351562, "logps/rejected": -178.44091796875, "loss": 0.67, "rewards/accuracies": 0.5, "rewards/chosen": -3.392810344696045, "rewards/margins": 0.7981930375099182, "rewards/rejected": -4.191003322601318, "step": 8274 }, { "epoch": 1.29, "learning_rate": 8.078114010062763e-06, "logits/chosen": -2.5669543743133545, "logits/rejected": -2.9412076473236084, "logps/chosen": -36.45759582519531, "logps/rejected": -189.4453125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.157109022140503, "rewards/margins": 6.575041770935059, "rewards/rejected": -8.73215103149414, "step": 8275 }, { "epoch": 1.29, "learning_rate": 8.077380569531615e-06, "logits/chosen": -2.521118640899658, "logits/rejected": -2.840933084487915, "logps/chosen": -35.4202766418457, "logps/rejected": -153.71319580078125, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -1.5380176305770874, "rewards/margins": 5.004651069641113, "rewards/rejected": -6.54266881942749, "step": 8276 }, { "epoch": 1.29, "learning_rate": 8.076647129000467e-06, "logits/chosen": -2.6485352516174316, "logits/rejected": -2.069329261779785, "logps/chosen": -746.7968139648438, "logps/rejected": -601.86279296875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -0.9565735459327698, "rewards/margins": 7.588500022888184, "rewards/rejected": -8.545073509216309, "step": 8277 }, { "epoch": 1.29, "learning_rate": 8.075913688469319e-06, "logits/chosen": -2.6026484966278076, "logits/rejected": -2.509392261505127, "logps/chosen": -434.0065002441406, "logps/rejected": -352.59368896484375, "loss": 0.079, "rewards/accuracies": 1.0, "rewards/chosen": -3.3248512744903564, "rewards/margins": 5.229891300201416, "rewards/rejected": -8.554742813110352, "step": 8278 }, { "epoch": 1.29, "learning_rate": 8.075180247938172e-06, "logits/chosen": -3.163475275039673, "logits/rejected": -3.1356632709503174, "logps/chosen": -405.1677551269531, "logps/rejected": -289.7784729003906, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.7242267727851868, "rewards/margins": 7.2357683181762695, "rewards/rejected": -7.959995269775391, "step": 8279 }, { "epoch": 1.29, "learning_rate": 8.074446807407024e-06, "logits/chosen": -2.5741868019104004, "logits/rejected": -2.4491546154022217, "logps/chosen": -165.4143829345703, "logps/rejected": -217.7659912109375, "loss": 0.6319, "rewards/accuracies": 0.5, "rewards/chosen": -4.162447929382324, "rewards/margins": 3.708360433578491, "rewards/rejected": -7.8708086013793945, "step": 8280 }, { "epoch": 1.29, "learning_rate": 8.073713366875876e-06, "logits/chosen": -1.9929757118225098, "logits/rejected": -2.8323237895965576, "logps/chosen": -183.45350646972656, "logps/rejected": -354.2637634277344, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.2567344903945923, "rewards/margins": 6.806321144104004, "rewards/rejected": -8.063055038452148, "step": 8281 }, { "epoch": 1.29, "learning_rate": 8.072979926344728e-06, "logits/chosen": -2.5673184394836426, "logits/rejected": -2.6989505290985107, "logps/chosen": -153.82972717285156, "logps/rejected": -393.6669616699219, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.0025315284729004, "rewards/margins": 7.412162780761719, "rewards/rejected": -9.414693832397461, "step": 8282 }, { "epoch": 1.29, "learning_rate": 8.07224648581358e-06, "logits/chosen": -2.4333906173706055, "logits/rejected": -2.6491637229919434, "logps/chosen": -242.60397338867188, "logps/rejected": -312.55078125, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -1.889904260635376, "rewards/margins": 3.4882380962371826, "rewards/rejected": -5.378142356872559, "step": 8283 }, { "epoch": 1.29, "learning_rate": 8.071513045282432e-06, "logits/chosen": -2.4605355262756348, "logits/rejected": -2.9675424098968506, "logps/chosen": -189.216064453125, "logps/rejected": -401.47760009765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2435696125030518, "rewards/margins": 8.818013191223145, "rewards/rejected": -11.061582565307617, "step": 8284 }, { "epoch": 1.29, "learning_rate": 8.070779604751283e-06, "logits/chosen": -2.32926869392395, "logits/rejected": -3.1743834018707275, "logps/chosen": -130.11351013183594, "logps/rejected": -329.29693603515625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.3880763053894043, "rewards/margins": 5.661594390869141, "rewards/rejected": -8.049670219421387, "step": 8285 }, { "epoch": 1.29, "learning_rate": 8.070046164220135e-06, "logits/chosen": -1.3888537883758545, "logits/rejected": -2.923285961151123, "logps/chosen": -149.90692138671875, "logps/rejected": -505.267333984375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.365891695022583, "rewards/margins": 6.436075210571289, "rewards/rejected": -7.801967144012451, "step": 8286 }, { "epoch": 1.29, "learning_rate": 8.069312723688987e-06, "logits/chosen": -2.790928363800049, "logits/rejected": -2.754377603530884, "logps/chosen": -614.7922973632812, "logps/rejected": -764.4251708984375, "loss": 0.0448, "rewards/accuracies": 1.0, "rewards/chosen": -3.72896146774292, "rewards/margins": 5.553896903991699, "rewards/rejected": -9.282858848571777, "step": 8287 }, { "epoch": 1.29, "learning_rate": 8.06857928315784e-06, "logits/chosen": -2.702674150466919, "logits/rejected": -2.711365222930908, "logps/chosen": -94.64179992675781, "logps/rejected": -363.39776611328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.699869155883789, "rewards/margins": 8.290701866149902, "rewards/rejected": -10.990571022033691, "step": 8288 }, { "epoch": 1.29, "learning_rate": 8.067845842626693e-06, "logits/chosen": -2.5406627655029297, "logits/rejected": -1.8818485736846924, "logps/chosen": -287.718994140625, "logps/rejected": -273.2151794433594, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.8376197814941406, "rewards/margins": 5.916284561157227, "rewards/rejected": -8.753904342651367, "step": 8289 }, { "epoch": 1.29, "learning_rate": 8.067112402095545e-06, "logits/chosen": -2.747985363006592, "logits/rejected": -2.778470993041992, "logps/chosen": -79.90530395507812, "logps/rejected": -234.6544189453125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.35400390625, "rewards/margins": 6.076824188232422, "rewards/rejected": -7.430828094482422, "step": 8290 }, { "epoch": 1.29, "learning_rate": 8.066378961564396e-06, "logits/chosen": -1.4836570024490356, "logits/rejected": -2.580571174621582, "logps/chosen": -173.97023010253906, "logps/rejected": -379.91473388671875, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -3.4992361068725586, "rewards/margins": 4.1038689613342285, "rewards/rejected": -7.603104591369629, "step": 8291 }, { "epoch": 1.29, "learning_rate": 8.065645521033248e-06, "logits/chosen": -2.2044026851654053, "logits/rejected": -2.770045042037964, "logps/chosen": -154.71340942382812, "logps/rejected": -384.5544128417969, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.4055664539337158, "rewards/margins": 8.852916717529297, "rewards/rejected": -10.25848388671875, "step": 8292 }, { "epoch": 1.29, "learning_rate": 8.0649120805021e-06, "logits/chosen": -2.9476373195648193, "logits/rejected": -2.8736448287963867, "logps/chosen": -433.36138916015625, "logps/rejected": -503.96185302734375, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -2.304553985595703, "rewards/margins": 5.640162467956543, "rewards/rejected": -7.944716453552246, "step": 8293 }, { "epoch": 1.29, "learning_rate": 8.064178639970952e-06, "logits/chosen": -2.166670560836792, "logits/rejected": -2.7554004192352295, "logps/chosen": -137.66781616210938, "logps/rejected": -490.8905029296875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.6310100555419922, "rewards/margins": 7.754938125610352, "rewards/rejected": -9.385948181152344, "step": 8294 }, { "epoch": 1.29, "learning_rate": 8.063445199439804e-06, "logits/chosen": -2.5463101863861084, "logits/rejected": -2.8822805881500244, "logps/chosen": -483.92010498046875, "logps/rejected": -390.2972106933594, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -1.9044189453125, "rewards/margins": 3.73382568359375, "rewards/rejected": -5.63824462890625, "step": 8295 }, { "epoch": 1.29, "learning_rate": 8.062711758908658e-06, "logits/chosen": -2.043975591659546, "logits/rejected": -2.743224620819092, "logps/chosen": -78.701416015625, "logps/rejected": -181.52304077148438, "loss": 0.0844, "rewards/accuracies": 1.0, "rewards/chosen": -3.757781982421875, "rewards/margins": 2.7327804565429688, "rewards/rejected": -6.490562438964844, "step": 8296 }, { "epoch": 1.29, "learning_rate": 8.06197831837751e-06, "logits/chosen": -2.405959367752075, "logits/rejected": -2.7292940616607666, "logps/chosen": -467.56060791015625, "logps/rejected": -438.3379821777344, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.688401460647583, "rewards/margins": 5.262088775634766, "rewards/rejected": -7.9504899978637695, "step": 8297 }, { "epoch": 1.29, "learning_rate": 8.061244877846361e-06, "logits/chosen": -2.875749111175537, "logits/rejected": -2.032231330871582, "logps/chosen": -330.7816467285156, "logps/rejected": -216.22235107421875, "loss": 1.3778, "rewards/accuracies": 0.5, "rewards/chosen": -4.257419586181641, "rewards/margins": 1.9914703369140625, "rewards/rejected": -6.248889923095703, "step": 8298 }, { "epoch": 1.29, "learning_rate": 8.060511437315213e-06, "logits/chosen": -2.1636929512023926, "logits/rejected": -3.0838963985443115, "logps/chosen": -122.02238464355469, "logps/rejected": -215.97576904296875, "loss": 0.0884, "rewards/accuracies": 1.0, "rewards/chosen": -1.4904190301895142, "rewards/margins": 3.4500763416290283, "rewards/rejected": -4.940495491027832, "step": 8299 }, { "epoch": 1.29, "learning_rate": 8.059777996784065e-06, "logits/chosen": -2.615854024887085, "logits/rejected": -3.246621608734131, "logps/chosen": -706.3466796875, "logps/rejected": -816.609619140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.929164171218872, "rewards/margins": 10.065505027770996, "rewards/rejected": -11.994668960571289, "step": 8300 }, { "epoch": 1.29, "learning_rate": 8.059044556252917e-06, "logits/chosen": -1.3970164060592651, "logits/rejected": -2.9273364543914795, "logps/chosen": -23.818729400634766, "logps/rejected": -354.36407470703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4393676817417145, "rewards/margins": 6.499462127685547, "rewards/rejected": -6.9388298988342285, "step": 8301 }, { "epoch": 1.29, "learning_rate": 8.058311115721769e-06, "logits/chosen": -2.9967596530914307, "logits/rejected": -2.7139699459075928, "logps/chosen": -152.54281616210938, "logps/rejected": -173.22828674316406, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -2.079317092895508, "rewards/margins": 5.045819282531738, "rewards/rejected": -7.125136375427246, "step": 8302 }, { "epoch": 1.29, "learning_rate": 8.057577675190622e-06, "logits/chosen": -0.853373646736145, "logits/rejected": -2.656451463699341, "logps/chosen": -90.98885345458984, "logps/rejected": -454.179443359375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -1.8289538621902466, "rewards/margins": 5.8358001708984375, "rewards/rejected": -7.6647539138793945, "step": 8303 }, { "epoch": 1.29, "learning_rate": 8.056844234659474e-06, "logits/chosen": -1.773115634918213, "logits/rejected": -2.525485038757324, "logps/chosen": -176.54640197753906, "logps/rejected": -383.417236328125, "loss": 0.4846, "rewards/accuracies": 0.5, "rewards/chosen": -4.975715160369873, "rewards/margins": 1.2731587886810303, "rewards/rejected": -6.248874187469482, "step": 8304 }, { "epoch": 1.29, "learning_rate": 8.056110794128328e-06, "logits/chosen": -3.1509618759155273, "logits/rejected": -2.7333285808563232, "logps/chosen": -623.5772094726562, "logps/rejected": -415.7511291503906, "loss": 1.0449, "rewards/accuracies": 0.5, "rewards/chosen": -4.332230091094971, "rewards/margins": 2.8944077491760254, "rewards/rejected": -7.226637840270996, "step": 8305 }, { "epoch": 1.29, "learning_rate": 8.05537735359718e-06, "logits/chosen": -1.5526074171066284, "logits/rejected": -2.8043713569641113, "logps/chosen": -234.78219604492188, "logps/rejected": -391.1610107421875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.822039008140564, "rewards/margins": 6.380338668823242, "rewards/rejected": -8.202378273010254, "step": 8306 }, { "epoch": 1.29, "learning_rate": 8.054643913066032e-06, "logits/chosen": -2.6773464679718018, "logits/rejected": -2.0981879234313965, "logps/chosen": -302.0785827636719, "logps/rejected": -357.5162353515625, "loss": 0.1093, "rewards/accuracies": 1.0, "rewards/chosen": -3.3830554485321045, "rewards/margins": 4.377700328826904, "rewards/rejected": -7.760756015777588, "step": 8307 }, { "epoch": 1.29, "learning_rate": 8.053910472534883e-06, "logits/chosen": -2.737271308898926, "logits/rejected": -2.342435836791992, "logps/chosen": -229.87864685058594, "logps/rejected": -150.48997497558594, "loss": 0.0472, "rewards/accuracies": 1.0, "rewards/chosen": -1.4246482849121094, "rewards/margins": 5.88563871383667, "rewards/rejected": -7.310286521911621, "step": 8308 }, { "epoch": 1.29, "learning_rate": 8.053177032003735e-06, "logits/chosen": -1.7920552492141724, "logits/rejected": -2.4463305473327637, "logps/chosen": -71.1103515625, "logps/rejected": -234.29379272460938, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -1.6935575008392334, "rewards/margins": 4.602860450744629, "rewards/rejected": -6.296418190002441, "step": 8309 }, { "epoch": 1.29, "learning_rate": 8.052443591472587e-06, "logits/chosen": -2.568924903869629, "logits/rejected": -2.875986337661743, "logps/chosen": -92.87294006347656, "logps/rejected": -309.01568603515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4211831092834473, "rewards/margins": 8.899980545043945, "rewards/rejected": -10.321163177490234, "step": 8310 }, { "epoch": 1.29, "learning_rate": 8.051710150941439e-06, "logits/chosen": -1.021268606185913, "logits/rejected": -2.413571834564209, "logps/chosen": -197.266845703125, "logps/rejected": -513.2808837890625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.280740261077881, "rewards/margins": 7.196347236633301, "rewards/rejected": -10.477087020874023, "step": 8311 }, { "epoch": 1.29, "learning_rate": 8.050976710410291e-06, "logits/chosen": -2.317949056625366, "logits/rejected": -3.1017568111419678, "logps/chosen": -459.95721435546875, "logps/rejected": -760.92236328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2668604850769043, "rewards/margins": 10.291504859924316, "rewards/rejected": -12.558364868164062, "step": 8312 }, { "epoch": 1.29, "learning_rate": 8.050243269879143e-06, "logits/chosen": -1.2057174444198608, "logits/rejected": -2.7932968139648438, "logps/chosen": -52.09304428100586, "logps/rejected": -309.47222900390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.877893090248108, "rewards/margins": 7.1822590827941895, "rewards/rejected": -9.060152053833008, "step": 8313 }, { "epoch": 1.29, "learning_rate": 8.049509829347996e-06, "logits/chosen": -2.8966774940490723, "logits/rejected": -2.3818750381469727, "logps/chosen": -242.38165283203125, "logps/rejected": -138.68450927734375, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -1.486799716949463, "rewards/margins": 4.59769868850708, "rewards/rejected": -6.084498405456543, "step": 8314 }, { "epoch": 1.29, "learning_rate": 8.048776388816848e-06, "logits/chosen": -2.864564895629883, "logits/rejected": -2.464158296585083, "logps/chosen": -276.079833984375, "logps/rejected": -246.26515197753906, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.9262443780899048, "rewards/margins": 5.8560590744018555, "rewards/rejected": -7.782303333282471, "step": 8315 }, { "epoch": 1.29, "learning_rate": 8.0480429482857e-06, "logits/chosen": -2.550546884536743, "logits/rejected": -2.9811840057373047, "logps/chosen": -109.51174926757812, "logps/rejected": -177.74298095703125, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -1.3804419040679932, "rewards/margins": 4.230852127075195, "rewards/rejected": -5.611294269561768, "step": 8316 }, { "epoch": 1.29, "learning_rate": 8.047309507754552e-06, "logits/chosen": -2.723498821258545, "logits/rejected": -1.93356454372406, "logps/chosen": -342.3739929199219, "logps/rejected": -250.7702178955078, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.8486579656600952, "rewards/margins": 8.261768341064453, "rewards/rejected": -10.110426902770996, "step": 8317 }, { "epoch": 1.29, "learning_rate": 8.046576067223404e-06, "logits/chosen": -2.3884143829345703, "logits/rejected": -2.934744358062744, "logps/chosen": -97.7137680053711, "logps/rejected": -175.81884765625, "loss": 0.1147, "rewards/accuracies": 1.0, "rewards/chosen": -2.9389286041259766, "rewards/margins": 2.6640748977661133, "rewards/rejected": -5.60300350189209, "step": 8318 }, { "epoch": 1.29, "learning_rate": 8.045842626692256e-06, "logits/chosen": -2.5171566009521484, "logits/rejected": -1.5957229137420654, "logps/chosen": -208.9702606201172, "logps/rejected": -211.32302856445312, "loss": 0.0862, "rewards/accuracies": 1.0, "rewards/chosen": -2.0576090812683105, "rewards/margins": 2.475546360015869, "rewards/rejected": -4.53315544128418, "step": 8319 }, { "epoch": 1.29, "learning_rate": 8.045109186161108e-06, "logits/chosen": -2.203051805496216, "logits/rejected": -2.9604649543762207, "logps/chosen": -190.6995391845703, "logps/rejected": -432.6636962890625, "loss": 0.0608, "rewards/accuracies": 1.0, "rewards/chosen": -2.523426055908203, "rewards/margins": 5.431131362915039, "rewards/rejected": -7.954557418823242, "step": 8320 }, { "epoch": 1.29, "learning_rate": 8.04437574562996e-06, "logits/chosen": -2.9896368980407715, "logits/rejected": -2.7408323287963867, "logps/chosen": -344.97882080078125, "logps/rejected": -186.31256103515625, "loss": 0.373, "rewards/accuracies": 0.5, "rewards/chosen": -2.442190170288086, "rewards/margins": 3.306807518005371, "rewards/rejected": -5.748997688293457, "step": 8321 }, { "epoch": 1.29, "learning_rate": 8.043642305098811e-06, "logits/chosen": -2.113224983215332, "logits/rejected": -2.895627737045288, "logps/chosen": -237.39276123046875, "logps/rejected": -565.5635986328125, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -2.174046277999878, "rewards/margins": 4.844474792480469, "rewards/rejected": -7.018521308898926, "step": 8322 }, { "epoch": 1.29, "learning_rate": 8.042908864567665e-06, "logits/chosen": -2.0084879398345947, "logits/rejected": -2.939356565475464, "logps/chosen": -236.75576782226562, "logps/rejected": -297.9356384277344, "loss": 0.5785, "rewards/accuracies": 0.5, "rewards/chosen": -3.79750394821167, "rewards/margins": 0.9527469873428345, "rewards/rejected": -4.750250816345215, "step": 8323 }, { "epoch": 1.29, "learning_rate": 8.042175424036517e-06, "logits/chosen": -2.7137691974639893, "logits/rejected": -3.2136070728302, "logps/chosen": -86.36985778808594, "logps/rejected": -321.0066833496094, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8240671157836914, "rewards/margins": 8.498878479003906, "rewards/rejected": -9.322946548461914, "step": 8324 }, { "epoch": 1.29, "learning_rate": 8.041441983505369e-06, "logits/chosen": -2.2878901958465576, "logits/rejected": -2.6853461265563965, "logps/chosen": -65.92173767089844, "logps/rejected": -95.21177673339844, "loss": 0.3099, "rewards/accuracies": 1.0, "rewards/chosen": -1.5326937437057495, "rewards/margins": 2.9413506984710693, "rewards/rejected": -4.474044322967529, "step": 8325 }, { "epoch": 1.29, "learning_rate": 8.04070854297422e-06, "logits/chosen": -1.743714690208435, "logits/rejected": -2.8891327381134033, "logps/chosen": -71.57429504394531, "logps/rejected": -220.4358367919922, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -1.9569220542907715, "rewards/margins": 4.564700603485107, "rewards/rejected": -6.521622657775879, "step": 8326 }, { "epoch": 1.3, "learning_rate": 8.039975102443073e-06, "logits/chosen": -1.750526785850525, "logits/rejected": -2.791147470474243, "logps/chosen": -184.87554931640625, "logps/rejected": -447.62261962890625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.6102027893066406, "rewards/margins": 6.33225154876709, "rewards/rejected": -8.942455291748047, "step": 8327 }, { "epoch": 1.3, "learning_rate": 8.039241661911924e-06, "logits/chosen": -1.6955444812774658, "logits/rejected": -2.8086116313934326, "logps/chosen": -114.3462905883789, "logps/rejected": -323.5167541503906, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.5719643831253052, "rewards/margins": 7.277121543884277, "rewards/rejected": -8.849085807800293, "step": 8328 }, { "epoch": 1.3, "learning_rate": 8.038508221380776e-06, "logits/chosen": -2.2218966484069824, "logits/rejected": -3.042137384414673, "logps/chosen": -356.8580627441406, "logps/rejected": -406.19757080078125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -2.110644578933716, "rewards/margins": 6.9728102684021, "rewards/rejected": -9.083455085754395, "step": 8329 }, { "epoch": 1.3, "learning_rate": 8.037774780849628e-06, "logits/chosen": -2.824227809906006, "logits/rejected": -2.4298532009124756, "logps/chosen": -167.71554565429688, "logps/rejected": -81.34027099609375, "loss": 1.0064, "rewards/accuracies": 0.5, "rewards/chosen": -2.582993984222412, "rewards/margins": -0.31669068336486816, "rewards/rejected": -2.266303539276123, "step": 8330 }, { "epoch": 1.3, "learning_rate": 8.03704134031848e-06, "logits/chosen": -1.6785300970077515, "logits/rejected": -2.8237757682800293, "logps/chosen": -49.01691436767578, "logps/rejected": -285.1343994140625, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -0.7681208252906799, "rewards/margins": 5.5489702224731445, "rewards/rejected": -6.31709098815918, "step": 8331 }, { "epoch": 1.3, "learning_rate": 8.036307899787334e-06, "logits/chosen": -1.9589451551437378, "logits/rejected": -2.880643606185913, "logps/chosen": -191.82882690429688, "logps/rejected": -551.131591796875, "loss": 2.4876, "rewards/accuracies": 0.5, "rewards/chosen": -6.685870170593262, "rewards/margins": 2.246267795562744, "rewards/rejected": -8.932138442993164, "step": 8332 }, { "epoch": 1.3, "learning_rate": 8.035574459256186e-06, "logits/chosen": -2.8633463382720947, "logits/rejected": -2.9023962020874023, "logps/chosen": -208.56784057617188, "logps/rejected": -277.7210693359375, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -1.4256634712219238, "rewards/margins": 5.444157600402832, "rewards/rejected": -6.869821071624756, "step": 8333 }, { "epoch": 1.3, "learning_rate": 8.034841018725037e-06, "logits/chosen": -2.3167922496795654, "logits/rejected": -2.947653293609619, "logps/chosen": -342.3526306152344, "logps/rejected": -428.070068359375, "loss": 0.1624, "rewards/accuracies": 1.0, "rewards/chosen": -2.772531032562256, "rewards/margins": 3.591081380844116, "rewards/rejected": -6.363612174987793, "step": 8334 }, { "epoch": 1.3, "learning_rate": 8.03410757819389e-06, "logits/chosen": -2.6085498332977295, "logits/rejected": -2.841205596923828, "logps/chosen": -75.1537094116211, "logps/rejected": -160.31533813476562, "loss": 0.2611, "rewards/accuracies": 1.0, "rewards/chosen": -3.2801263332366943, "rewards/margins": 3.624180555343628, "rewards/rejected": -6.904306888580322, "step": 8335 }, { "epoch": 1.3, "learning_rate": 8.033374137662741e-06, "logits/chosen": -2.5251500606536865, "logits/rejected": -2.639845132827759, "logps/chosen": -184.4938201904297, "logps/rejected": -207.509033203125, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -2.4569625854492188, "rewards/margins": 5.2786865234375, "rewards/rejected": -7.735649108886719, "step": 8336 }, { "epoch": 1.3, "learning_rate": 8.032640697131593e-06, "logits/chosen": -1.747667908668518, "logits/rejected": -3.195920705795288, "logps/chosen": -160.12786865234375, "logps/rejected": -408.92584228515625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.4730281829833984, "rewards/margins": 6.11032772064209, "rewards/rejected": -7.583355903625488, "step": 8337 }, { "epoch": 1.3, "learning_rate": 8.031907256600447e-06, "logits/chosen": -2.9878199100494385, "logits/rejected": -3.1857707500457764, "logps/chosen": -201.32664489746094, "logps/rejected": -263.8905334472656, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.4983188509941101, "rewards/margins": 6.6206207275390625, "rewards/rejected": -7.118939399719238, "step": 8338 }, { "epoch": 1.3, "learning_rate": 8.031173816069298e-06, "logits/chosen": -2.9015650749206543, "logits/rejected": -2.5343685150146484, "logps/chosen": -300.0677490234375, "logps/rejected": -317.588134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2128236293792725, "rewards/margins": 9.631136894226074, "rewards/rejected": -11.843960762023926, "step": 8339 }, { "epoch": 1.3, "learning_rate": 8.03044037553815e-06, "logits/chosen": -2.5100960731506348, "logits/rejected": -2.8910670280456543, "logps/chosen": -27.84198760986328, "logps/rejected": -222.86663818359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2180715799331665, "rewards/margins": 8.471343040466309, "rewards/rejected": -9.689414978027344, "step": 8340 }, { "epoch": 1.3, "learning_rate": 8.029706935007004e-06, "logits/chosen": -3.15966534614563, "logits/rejected": -3.411619186401367, "logps/chosen": -73.26575469970703, "logps/rejected": -172.2896728515625, "loss": 0.1337, "rewards/accuracies": 1.0, "rewards/chosen": -1.1159963607788086, "rewards/margins": 2.743962287902832, "rewards/rejected": -3.8599586486816406, "step": 8341 }, { "epoch": 1.3, "learning_rate": 8.028973494475856e-06, "logits/chosen": -2.812741279602051, "logits/rejected": -2.670027256011963, "logps/chosen": -288.59735107421875, "logps/rejected": -343.827392578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.4917103052139282, "rewards/margins": 8.435429573059082, "rewards/rejected": -9.927139282226562, "step": 8342 }, { "epoch": 1.3, "learning_rate": 8.028240053944708e-06, "logits/chosen": -2.4333789348602295, "logits/rejected": -2.980402946472168, "logps/chosen": -127.69032287597656, "logps/rejected": -267.04901123046875, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -2.355489730834961, "rewards/margins": 5.063088417053223, "rewards/rejected": -7.418578147888184, "step": 8343 }, { "epoch": 1.3, "learning_rate": 8.02750661341356e-06, "logits/chosen": -2.9825751781463623, "logits/rejected": -3.1010499000549316, "logps/chosen": -84.26029968261719, "logps/rejected": -207.94651794433594, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.847669243812561, "rewards/margins": 6.004446983337402, "rewards/rejected": -7.852116584777832, "step": 8344 }, { "epoch": 1.3, "learning_rate": 8.026773172882411e-06, "logits/chosen": -2.748990535736084, "logits/rejected": -1.4737141132354736, "logps/chosen": -537.29931640625, "logps/rejected": -379.7211608886719, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -4.164572238922119, "rewards/margins": 5.014169692993164, "rewards/rejected": -9.178741455078125, "step": 8345 }, { "epoch": 1.3, "learning_rate": 8.026039732351263e-06, "logits/chosen": -2.9310665130615234, "logits/rejected": -2.3581578731536865, "logps/chosen": -567.3291015625, "logps/rejected": -426.0889892578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6300315856933594, "rewards/margins": 6.892171382904053, "rewards/rejected": -7.52220344543457, "step": 8346 }, { "epoch": 1.3, "learning_rate": 8.025306291820115e-06, "logits/chosen": -2.7595369815826416, "logits/rejected": -2.402637243270874, "logps/chosen": -359.3423156738281, "logps/rejected": -327.5853271484375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 0.37708795070648193, "rewards/margins": 8.49528694152832, "rewards/rejected": -8.11819839477539, "step": 8347 }, { "epoch": 1.3, "learning_rate": 8.024572851288967e-06, "logits/chosen": -0.9486582279205322, "logits/rejected": -2.9014947414398193, "logps/chosen": -161.27743530273438, "logps/rejected": -348.47509765625, "loss": 0.3594, "rewards/accuracies": 0.5, "rewards/chosen": -3.179539918899536, "rewards/margins": 5.674839019775391, "rewards/rejected": -8.854378700256348, "step": 8348 }, { "epoch": 1.3, "learning_rate": 8.023839410757819e-06, "logits/chosen": -2.6662120819091797, "logits/rejected": -2.7115395069122314, "logps/chosen": -266.7276611328125, "logps/rejected": -244.7733612060547, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.407893419265747, "rewards/margins": 5.5762739181518555, "rewards/rejected": -7.984167098999023, "step": 8349 }, { "epoch": 1.3, "learning_rate": 8.023105970226673e-06, "logits/chosen": -2.1385388374328613, "logits/rejected": -2.9387011528015137, "logps/chosen": -62.3941650390625, "logps/rejected": -261.70928955078125, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -3.881181240081787, "rewards/margins": 4.277245044708252, "rewards/rejected": -8.158426284790039, "step": 8350 }, { "epoch": 1.3, "learning_rate": 8.022372529695524e-06, "logits/chosen": -2.92712664604187, "logits/rejected": -2.753455400466919, "logps/chosen": -193.85678100585938, "logps/rejected": -228.14395141601562, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.1491297483444214, "rewards/margins": 4.513369560241699, "rewards/rejected": -4.66249942779541, "step": 8351 }, { "epoch": 1.3, "learning_rate": 8.021639089164376e-06, "logits/chosen": -2.6780524253845215, "logits/rejected": -2.5141425132751465, "logps/chosen": -109.326171875, "logps/rejected": -121.73463439941406, "loss": 0.128, "rewards/accuracies": 1.0, "rewards/chosen": -0.978237509727478, "rewards/margins": 2.9459025859832764, "rewards/rejected": -3.924140214920044, "step": 8352 }, { "epoch": 1.3, "learning_rate": 8.020905648633228e-06, "logits/chosen": -1.8725812435150146, "logits/rejected": -2.6075496673583984, "logps/chosen": -263.3822937011719, "logps/rejected": -492.52105712890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.282731294631958, "rewards/margins": 9.470113754272461, "rewards/rejected": -11.75284481048584, "step": 8353 }, { "epoch": 1.3, "learning_rate": 8.02017220810208e-06, "logits/chosen": -0.81232750415802, "logits/rejected": -2.14801287651062, "logps/chosen": -105.3904800415039, "logps/rejected": -541.5128173828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.6287951469421387, "rewards/margins": 8.064945220947266, "rewards/rejected": -10.693740844726562, "step": 8354 }, { "epoch": 1.3, "learning_rate": 8.019438767570932e-06, "logits/chosen": -2.9787979125976562, "logits/rejected": -1.4653348922729492, "logps/chosen": -316.9332275390625, "logps/rejected": -149.71099853515625, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/chosen": -0.09425507485866547, "rewards/margins": 3.3251919746398926, "rewards/rejected": -3.419447183609009, "step": 8355 }, { "epoch": 1.3, "learning_rate": 8.018705327039784e-06, "logits/chosen": -2.6779372692108154, "logits/rejected": -2.9318480491638184, "logps/chosen": -67.10660552978516, "logps/rejected": -233.18731689453125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.8388019800186157, "rewards/margins": 7.668123722076416, "rewards/rejected": -8.506925582885742, "step": 8356 }, { "epoch": 1.3, "learning_rate": 8.017971886508636e-06, "logits/chosen": -2.786289930343628, "logits/rejected": -2.5079667568206787, "logps/chosen": -299.3427429199219, "logps/rejected": -495.41326904296875, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": -2.203148603439331, "rewards/margins": 5.8042402267456055, "rewards/rejected": -8.007389068603516, "step": 8357 }, { "epoch": 1.3, "learning_rate": 8.017238445977488e-06, "logits/chosen": -2.538905143737793, "logits/rejected": -3.1356680393218994, "logps/chosen": -43.73917770385742, "logps/rejected": -364.63818359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.188272476196289, "rewards/margins": 9.777663230895996, "rewards/rejected": -10.965935707092285, "step": 8358 }, { "epoch": 1.3, "learning_rate": 8.016505005446341e-06, "logits/chosen": -2.458824872970581, "logits/rejected": -2.6580851078033447, "logps/chosen": -147.272705078125, "logps/rejected": -349.1827392578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.9969260692596436, "rewards/margins": 8.138427734375, "rewards/rejected": -10.135353088378906, "step": 8359 }, { "epoch": 1.3, "learning_rate": 8.015771564915193e-06, "logits/chosen": -2.937013864517212, "logits/rejected": -2.819462776184082, "logps/chosen": -606.7576904296875, "logps/rejected": -334.1852722167969, "loss": 0.5037, "rewards/accuracies": 0.5, "rewards/chosen": -1.6557068824768066, "rewards/margins": 2.6509032249450684, "rewards/rejected": -4.306610107421875, "step": 8360 }, { "epoch": 1.3, "learning_rate": 8.015038124384045e-06, "logits/chosen": -2.9676711559295654, "logits/rejected": -2.78302001953125, "logps/chosen": -329.4483642578125, "logps/rejected": -370.3205871582031, "loss": 2.1988, "rewards/accuracies": 0.5, "rewards/chosen": -5.01984167098999, "rewards/margins": 1.6868624687194824, "rewards/rejected": -6.706704139709473, "step": 8361 }, { "epoch": 1.3, "learning_rate": 8.014304683852897e-06, "logits/chosen": -1.436901569366455, "logits/rejected": -3.0711395740509033, "logps/chosen": -95.01025390625, "logps/rejected": -306.0170593261719, "loss": 0.1911, "rewards/accuracies": 1.0, "rewards/chosen": -3.242774486541748, "rewards/margins": 3.2891860008239746, "rewards/rejected": -6.531960487365723, "step": 8362 }, { "epoch": 1.3, "learning_rate": 8.013571243321749e-06, "logits/chosen": -1.2921860218048096, "logits/rejected": -2.751483917236328, "logps/chosen": -242.49668884277344, "logps/rejected": -221.92274475097656, "loss": 0.9467, "rewards/accuracies": 0.5, "rewards/chosen": -3.6648879051208496, "rewards/margins": 1.1209840774536133, "rewards/rejected": -4.785871982574463, "step": 8363 }, { "epoch": 1.3, "learning_rate": 8.0128378027906e-06, "logits/chosen": -2.6245028972625732, "logits/rejected": -3.0734751224517822, "logps/chosen": -140.61038208007812, "logps/rejected": -171.5030059814453, "loss": 0.1728, "rewards/accuracies": 1.0, "rewards/chosen": -2.6090939044952393, "rewards/margins": 1.6749380826950073, "rewards/rejected": -4.284031867980957, "step": 8364 }, { "epoch": 1.3, "learning_rate": 8.012104362259452e-06, "logits/chosen": -1.9212086200714111, "logits/rejected": -2.8611867427825928, "logps/chosen": -385.88970947265625, "logps/rejected": -541.7493896484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.703460693359375, "rewards/margins": 6.81352424621582, "rewards/rejected": -8.516984939575195, "step": 8365 }, { "epoch": 1.3, "learning_rate": 8.011370921728304e-06, "logits/chosen": -2.6944143772125244, "logits/rejected": -1.4802805185317993, "logps/chosen": -352.8050537109375, "logps/rejected": -160.65516662597656, "loss": 0.5205, "rewards/accuracies": 0.5, "rewards/chosen": -2.45546817779541, "rewards/margins": 2.6379384994506836, "rewards/rejected": -5.093406677246094, "step": 8366 }, { "epoch": 1.3, "learning_rate": 8.010637481197156e-06, "logits/chosen": -2.5501036643981934, "logits/rejected": -2.945268392562866, "logps/chosen": -548.4945068359375, "logps/rejected": -335.02789306640625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.5745372772216797, "rewards/margins": 7.060615539550781, "rewards/rejected": -9.635152816772461, "step": 8367 }, { "epoch": 1.3, "learning_rate": 8.00990404066601e-06, "logits/chosen": -2.7311270236968994, "logits/rejected": -2.503971576690674, "logps/chosen": -135.85568237304688, "logps/rejected": -274.86053466796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6732470989227295, "rewards/margins": 8.675745010375977, "rewards/rejected": -10.348991394042969, "step": 8368 }, { "epoch": 1.3, "learning_rate": 8.009170600134862e-06, "logits/chosen": -2.6577200889587402, "logits/rejected": -1.3551299571990967, "logps/chosen": -429.2138977050781, "logps/rejected": -90.94586181640625, "loss": 0.6211, "rewards/accuracies": 0.5, "rewards/chosen": 0.103363037109375, "rewards/margins": 3.0880441665649414, "rewards/rejected": -2.9846811294555664, "step": 8369 }, { "epoch": 1.3, "learning_rate": 8.008437159603713e-06, "logits/chosen": -1.9188650846481323, "logits/rejected": -2.9290223121643066, "logps/chosen": -234.14749145507812, "logps/rejected": -362.82122802734375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.033924102783203, "rewards/margins": 5.805468559265137, "rewards/rejected": -7.83939266204834, "step": 8370 }, { "epoch": 1.3, "learning_rate": 8.007703719072565e-06, "logits/chosen": -1.3789379596710205, "logits/rejected": -2.6828622817993164, "logps/chosen": -93.32986450195312, "logps/rejected": -502.5753173828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.716464638710022, "rewards/margins": 12.833250045776367, "rewards/rejected": -14.549715042114258, "step": 8371 }, { "epoch": 1.3, "learning_rate": 8.006970278541419e-06, "logits/chosen": -2.7081563472747803, "logits/rejected": -2.52213454246521, "logps/chosen": -478.3989562988281, "logps/rejected": -426.3726501464844, "loss": 0.4096, "rewards/accuracies": 0.5, "rewards/chosen": -2.061964511871338, "rewards/margins": 3.7237157821655273, "rewards/rejected": -5.785680294036865, "step": 8372 }, { "epoch": 1.3, "learning_rate": 8.00623683801027e-06, "logits/chosen": -2.1801204681396484, "logits/rejected": -2.5232882499694824, "logps/chosen": -163.48443603515625, "logps/rejected": -305.2432556152344, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.8210127949714661, "rewards/margins": 6.0799455642700195, "rewards/rejected": -6.900958061218262, "step": 8373 }, { "epoch": 1.3, "learning_rate": 8.005503397479123e-06, "logits/chosen": -2.8344638347625732, "logits/rejected": -2.9716567993164062, "logps/chosen": -437.6690979003906, "logps/rejected": -528.9080200195312, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -2.076364040374756, "rewards/margins": 5.048130512237549, "rewards/rejected": -7.124494552612305, "step": 8374 }, { "epoch": 1.3, "learning_rate": 8.004769956947975e-06, "logits/chosen": -2.639554738998413, "logits/rejected": -3.026747465133667, "logps/chosen": -461.304931640625, "logps/rejected": -642.9193115234375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.892473578453064, "rewards/margins": 6.231700897216797, "rewards/rejected": -8.124175071716309, "step": 8375 }, { "epoch": 1.3, "learning_rate": 8.004036516416826e-06, "logits/chosen": -2.5797646045684814, "logits/rejected": -2.7771830558776855, "logps/chosen": -119.52276611328125, "logps/rejected": -145.6317596435547, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/chosen": -3.4270596504211426, "rewards/margins": 4.1310133934021, "rewards/rejected": -7.558073043823242, "step": 8376 }, { "epoch": 1.3, "learning_rate": 8.00330307588568e-06, "logits/chosen": -1.0546343326568604, "logits/rejected": -2.040663719177246, "logps/chosen": -169.6407470703125, "logps/rejected": -659.1856689453125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.4348859786987305, "rewards/margins": 9.378227233886719, "rewards/rejected": -11.813112258911133, "step": 8377 }, { "epoch": 1.3, "learning_rate": 8.002569635354532e-06, "logits/chosen": -1.478852391242981, "logits/rejected": -2.698659896850586, "logps/chosen": -128.52468872070312, "logps/rejected": -401.9674072265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1392393112182617, "rewards/margins": 8.830638885498047, "rewards/rejected": -9.969878196716309, "step": 8378 }, { "epoch": 1.3, "learning_rate": 8.001836194823384e-06, "logits/chosen": -2.8067386150360107, "logits/rejected": -2.5058650970458984, "logps/chosen": -96.3696060180664, "logps/rejected": -120.86614990234375, "loss": 0.7776, "rewards/accuracies": 0.5, "rewards/chosen": -2.4237005710601807, "rewards/margins": 1.6901870965957642, "rewards/rejected": -4.113887786865234, "step": 8379 }, { "epoch": 1.3, "learning_rate": 8.001102754292236e-06, "logits/chosen": -3.0927016735076904, "logits/rejected": -2.842080593109131, "logps/chosen": -361.5420227050781, "logps/rejected": -262.0395202636719, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -2.6068100929260254, "rewards/margins": 5.251203536987305, "rewards/rejected": -7.85801362991333, "step": 8380 }, { "epoch": 1.3, "learning_rate": 8.000369313761088e-06, "logits/chosen": -1.707405924797058, "logits/rejected": -2.6553783416748047, "logps/chosen": -147.4558868408203, "logps/rejected": -424.8437805175781, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3252646923065186, "rewards/margins": 8.113951683044434, "rewards/rejected": -9.439216613769531, "step": 8381 }, { "epoch": 1.3, "learning_rate": 7.99963587322994e-06, "logits/chosen": -2.824357032775879, "logits/rejected": -2.5892202854156494, "logps/chosen": -78.05657958984375, "logps/rejected": -175.50042724609375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -3.7023730278015137, "rewards/margins": 6.353799819946289, "rewards/rejected": -10.056173324584961, "step": 8382 }, { "epoch": 1.3, "learning_rate": 7.998902432698791e-06, "logits/chosen": -2.8744750022888184, "logits/rejected": -2.98429012298584, "logps/chosen": -88.67695617675781, "logps/rejected": -136.84140014648438, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -0.8323562145233154, "rewards/margins": 6.251779556274414, "rewards/rejected": -7.084136009216309, "step": 8383 }, { "epoch": 1.3, "learning_rate": 7.998168992167643e-06, "logits/chosen": -2.485308885574341, "logits/rejected": -3.0318491458892822, "logps/chosen": -124.46521759033203, "logps/rejected": -344.3280029296875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -3.976797103881836, "rewards/margins": 5.46586275100708, "rewards/rejected": -9.442659378051758, "step": 8384 }, { "epoch": 1.3, "learning_rate": 7.997435551636495e-06, "logits/chosen": -3.077220916748047, "logits/rejected": -2.553508996963501, "logps/chosen": -232.83297729492188, "logps/rejected": -144.4788055419922, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -0.8666599988937378, "rewards/margins": 6.190890312194824, "rewards/rejected": -7.057550430297852, "step": 8385 }, { "epoch": 1.3, "learning_rate": 7.996702111105349e-06, "logits/chosen": -2.3645741939544678, "logits/rejected": -3.055454969406128, "logps/chosen": -246.60475158691406, "logps/rejected": -325.93792724609375, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -0.7624893188476562, "rewards/margins": 5.766613960266113, "rewards/rejected": -6.5291032791137695, "step": 8386 }, { "epoch": 1.3, "learning_rate": 7.9959686705742e-06, "logits/chosen": -2.595388889312744, "logits/rejected": -2.3633718490600586, "logps/chosen": -222.300048828125, "logps/rejected": -425.91717529296875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -0.9478538632392883, "rewards/margins": 8.100215911865234, "rewards/rejected": -9.048069953918457, "step": 8387 }, { "epoch": 1.3, "learning_rate": 7.995235230043052e-06, "logits/chosen": -2.5489649772644043, "logits/rejected": -3.0704095363616943, "logps/chosen": -201.7235565185547, "logps/rejected": -352.485107421875, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": -2.5693793296813965, "rewards/margins": 3.6044535636901855, "rewards/rejected": -6.173832893371582, "step": 8388 }, { "epoch": 1.3, "learning_rate": 7.994501789511904e-06, "logits/chosen": -1.8446651697158813, "logits/rejected": -3.2736454010009766, "logps/chosen": -149.20159912109375, "logps/rejected": -456.20172119140625, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": -2.8592209815979004, "rewards/margins": 3.6860105991363525, "rewards/rejected": -6.545231819152832, "step": 8389 }, { "epoch": 1.3, "learning_rate": 7.993768348980756e-06, "logits/chosen": -2.334463596343994, "logits/rejected": -2.80835223197937, "logps/chosen": -569.6279296875, "logps/rejected": -532.703125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.732522487640381, "rewards/margins": 6.797598838806152, "rewards/rejected": -9.530120849609375, "step": 8390 }, { "epoch": 1.3, "learning_rate": 7.993034908449608e-06, "logits/chosen": -2.805150270462036, "logits/rejected": -1.434186577796936, "logps/chosen": -158.63491821289062, "logps/rejected": -188.61489868164062, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": 1.4183011054992676, "rewards/margins": 6.863166809082031, "rewards/rejected": -5.4448652267456055, "step": 8391 }, { "epoch": 1.31, "learning_rate": 7.99230146791846e-06, "logits/chosen": -2.0041117668151855, "logits/rejected": -2.9083361625671387, "logps/chosen": -313.0035400390625, "logps/rejected": -619.942138671875, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": -3.958498954772949, "rewards/margins": 5.2679948806762695, "rewards/rejected": -9.226493835449219, "step": 8392 }, { "epoch": 1.31, "learning_rate": 7.991568027387312e-06, "logits/chosen": -1.4371236562728882, "logits/rejected": -1.9783433675765991, "logps/chosen": -108.94723510742188, "logps/rejected": -172.95387268066406, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": -2.361297130584717, "rewards/margins": 3.3075413703918457, "rewards/rejected": -5.6688385009765625, "step": 8393 }, { "epoch": 1.31, "learning_rate": 7.990834586856165e-06, "logits/chosen": -3.088881731033325, "logits/rejected": -2.773540735244751, "logps/chosen": -108.7920150756836, "logps/rejected": -275.4474182128906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.7393710613250732, "rewards/margins": 7.862583637237549, "rewards/rejected": -7.123212814331055, "step": 8394 }, { "epoch": 1.31, "learning_rate": 7.990101146325017e-06, "logits/chosen": -2.2847964763641357, "logits/rejected": -3.018671751022339, "logps/chosen": -100.64215087890625, "logps/rejected": -268.8993835449219, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -3.3531556129455566, "rewards/margins": 8.238258361816406, "rewards/rejected": -11.591413497924805, "step": 8395 }, { "epoch": 1.31, "learning_rate": 7.989367705793869e-06, "logits/chosen": -1.59559965133667, "logits/rejected": -2.769139051437378, "logps/chosen": -106.4688949584961, "logps/rejected": -252.5694580078125, "loss": 0.9827, "rewards/accuracies": 0.5, "rewards/chosen": -5.855774402618408, "rewards/margins": 3.4787778854370117, "rewards/rejected": -9.334552764892578, "step": 8396 }, { "epoch": 1.31, "learning_rate": 7.988634265262721e-06, "logits/chosen": -2.515622615814209, "logits/rejected": -3.0741078853607178, "logps/chosen": -629.5195922851562, "logps/rejected": -562.1729125976562, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.1065292358398438, "rewards/margins": 6.8711018562316895, "rewards/rejected": -9.977630615234375, "step": 8397 }, { "epoch": 1.31, "learning_rate": 7.987900824731573e-06, "logits/chosen": -2.2693188190460205, "logits/rejected": -2.8544394969940186, "logps/chosen": -120.52130126953125, "logps/rejected": -310.29620361328125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.658862829208374, "rewards/margins": 6.371525764465332, "rewards/rejected": -8.030388832092285, "step": 8398 }, { "epoch": 1.31, "learning_rate": 7.987167384200425e-06, "logits/chosen": -2.3371663093566895, "logits/rejected": -3.2251529693603516, "logps/chosen": -114.52027893066406, "logps/rejected": -412.00897216796875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.522414445877075, "rewards/margins": 6.409541606903076, "rewards/rejected": -8.93195629119873, "step": 8399 }, { "epoch": 1.31, "learning_rate": 7.986433943669277e-06, "logits/chosen": -3.1195571422576904, "logits/rejected": -3.0106523036956787, "logps/chosen": -403.39453125, "logps/rejected": -385.6578369140625, "loss": 0.1366, "rewards/accuracies": 1.0, "rewards/chosen": -1.2617995738983154, "rewards/margins": 4.150371551513672, "rewards/rejected": -5.412171363830566, "step": 8400 }, { "epoch": 1.31, "learning_rate": 7.985700503138128e-06, "logits/chosen": -2.5384714603424072, "logits/rejected": -2.8688933849334717, "logps/chosen": -328.17767333984375, "logps/rejected": -391.37884521484375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.5394408702850342, "rewards/margins": 6.080313205718994, "rewards/rejected": -7.619753837585449, "step": 8401 }, { "epoch": 1.31, "learning_rate": 7.98496706260698e-06, "logits/chosen": -2.1857097148895264, "logits/rejected": -2.8471271991729736, "logps/chosen": -63.13071823120117, "logps/rejected": -261.7893371582031, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.8011852502822876, "rewards/margins": 7.516239643096924, "rewards/rejected": -9.317424774169922, "step": 8402 }, { "epoch": 1.31, "learning_rate": 7.984233622075834e-06, "logits/chosen": -3.0913023948669434, "logits/rejected": -2.9363088607788086, "logps/chosen": -114.58998107910156, "logps/rejected": -90.0796890258789, "loss": 1.7471, "rewards/accuracies": 0.5, "rewards/chosen": -5.913008689880371, "rewards/margins": -0.002581357955932617, "rewards/rejected": -5.910427093505859, "step": 8403 }, { "epoch": 1.31, "learning_rate": 7.983500181544686e-06, "logits/chosen": -2.624748945236206, "logits/rejected": -2.065781593322754, "logps/chosen": -179.3484344482422, "logps/rejected": -180.5614471435547, "loss": 0.8384, "rewards/accuracies": 0.5, "rewards/chosen": -5.520010471343994, "rewards/margins": 2.9335439205169678, "rewards/rejected": -8.453554153442383, "step": 8404 }, { "epoch": 1.31, "learning_rate": 7.982766741013538e-06, "logits/chosen": -2.986302137374878, "logits/rejected": -2.848954677581787, "logps/chosen": -81.55548095703125, "logps/rejected": -92.68592071533203, "loss": 0.2697, "rewards/accuracies": 1.0, "rewards/chosen": -2.1544017791748047, "rewards/margins": 3.8477888107299805, "rewards/rejected": -6.002190589904785, "step": 8405 }, { "epoch": 1.31, "learning_rate": 7.982033300482391e-06, "logits/chosen": -3.1378724575042725, "logits/rejected": -2.9628849029541016, "logps/chosen": -281.3748474121094, "logps/rejected": -213.8433837890625, "loss": 0.6795, "rewards/accuracies": 0.5, "rewards/chosen": -1.6108330488204956, "rewards/margins": 2.3385958671569824, "rewards/rejected": -3.9494290351867676, "step": 8406 }, { "epoch": 1.31, "learning_rate": 7.981299859951243e-06, "logits/chosen": -1.8335570096969604, "logits/rejected": -2.9298224449157715, "logps/chosen": -168.48263549804688, "logps/rejected": -330.3989562988281, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": -2.112182140350342, "rewards/margins": 4.2474164962768555, "rewards/rejected": -6.359598636627197, "step": 8407 }, { "epoch": 1.31, "learning_rate": 7.980566419420095e-06, "logits/chosen": -1.9029865264892578, "logits/rejected": -2.5863800048828125, "logps/chosen": -424.65423583984375, "logps/rejected": -512.6788330078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.6135838031768799, "rewards/margins": 11.276853561401367, "rewards/rejected": -9.66326904296875, "step": 8408 }, { "epoch": 1.31, "learning_rate": 7.979832978888947e-06, "logits/chosen": -1.7962250709533691, "logits/rejected": -2.5769383907318115, "logps/chosen": -168.86073303222656, "logps/rejected": -236.6837158203125, "loss": 0.1829, "rewards/accuracies": 1.0, "rewards/chosen": -3.8082315921783447, "rewards/margins": 3.9275448322296143, "rewards/rejected": -7.735776424407959, "step": 8409 }, { "epoch": 1.31, "learning_rate": 7.979099538357799e-06, "logits/chosen": -2.386223316192627, "logits/rejected": -2.974790334701538, "logps/chosen": -63.08796310424805, "logps/rejected": -280.42047119140625, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -2.6710917949676514, "rewards/margins": 5.232521057128906, "rewards/rejected": -7.903613090515137, "step": 8410 }, { "epoch": 1.31, "learning_rate": 7.97836609782665e-06, "logits/chosen": -2.8795433044433594, "logits/rejected": -2.926952838897705, "logps/chosen": -235.26364135742188, "logps/rejected": -381.8894348144531, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": 0.736960232257843, "rewards/margins": 7.280527114868164, "rewards/rejected": -6.543567180633545, "step": 8411 }, { "epoch": 1.31, "learning_rate": 7.977632657295504e-06, "logits/chosen": -2.6300671100616455, "logits/rejected": -2.5538010597229004, "logps/chosen": -183.69219970703125, "logps/rejected": -253.77658081054688, "loss": 0.153, "rewards/accuracies": 1.0, "rewards/chosen": -3.7947824001312256, "rewards/margins": 4.0968756675720215, "rewards/rejected": -7.891657829284668, "step": 8412 }, { "epoch": 1.31, "learning_rate": 7.976899216764356e-06, "logits/chosen": -2.8477227687835693, "logits/rejected": -2.9002845287323, "logps/chosen": -261.7968444824219, "logps/rejected": -270.60223388671875, "loss": 0.4992, "rewards/accuracies": 0.5, "rewards/chosen": -1.9083137512207031, "rewards/margins": 2.886788845062256, "rewards/rejected": -4.795103073120117, "step": 8413 }, { "epoch": 1.31, "learning_rate": 7.976165776233208e-06, "logits/chosen": -2.0065534114837646, "logits/rejected": -2.6018013954162598, "logps/chosen": -180.39744567871094, "logps/rejected": -445.3358154296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.0738115310668945, "rewards/margins": 9.716196060180664, "rewards/rejected": -11.790006637573242, "step": 8414 }, { "epoch": 1.31, "learning_rate": 7.97543233570206e-06, "logits/chosen": -1.6300407648086548, "logits/rejected": -2.861867904663086, "logps/chosen": -192.26708984375, "logps/rejected": -342.7884216308594, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.015758991241455, "rewards/margins": 5.688146114349365, "rewards/rejected": -7.70390510559082, "step": 8415 }, { "epoch": 1.31, "learning_rate": 7.974698895170912e-06, "logits/chosen": -2.9352855682373047, "logits/rejected": -2.9631752967834473, "logps/chosen": -161.37966918945312, "logps/rejected": -176.28982543945312, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": 0.7081108093261719, "rewards/margins": 5.623361587524414, "rewards/rejected": -4.915250778198242, "step": 8416 }, { "epoch": 1.31, "learning_rate": 7.973965454639764e-06, "logits/chosen": -2.766148805618286, "logits/rejected": -2.040358781814575, "logps/chosen": -372.9918518066406, "logps/rejected": -182.62472534179688, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -3.42498779296875, "rewards/margins": 5.093147277832031, "rewards/rejected": -8.518135070800781, "step": 8417 }, { "epoch": 1.31, "learning_rate": 7.973232014108615e-06, "logits/chosen": -2.3853142261505127, "logits/rejected": -2.6725378036499023, "logps/chosen": -162.3599853515625, "logps/rejected": -210.9166717529297, "loss": 1.3681, "rewards/accuracies": 0.5, "rewards/chosen": -3.640550136566162, "rewards/margins": 1.7158279418945312, "rewards/rejected": -5.356378078460693, "step": 8418 }, { "epoch": 1.31, "learning_rate": 7.972498573577467e-06, "logits/chosen": -2.7718865871429443, "logits/rejected": -2.9935522079467773, "logps/chosen": -162.85922241210938, "logps/rejected": -259.015869140625, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -3.6717379093170166, "rewards/margins": 4.724218368530273, "rewards/rejected": -8.395956039428711, "step": 8419 }, { "epoch": 1.31, "learning_rate": 7.97176513304632e-06, "logits/chosen": -2.085034132003784, "logits/rejected": -3.0653083324432373, "logps/chosen": -213.36370849609375, "logps/rejected": -344.44317626953125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -2.276740312576294, "rewards/margins": 5.346179962158203, "rewards/rejected": -7.622920513153076, "step": 8420 }, { "epoch": 1.31, "learning_rate": 7.971031692515173e-06, "logits/chosen": -2.9068715572357178, "logits/rejected": -3.270047187805176, "logps/chosen": -170.6851043701172, "logps/rejected": -436.5841064453125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.6274868249893188, "rewards/margins": 6.39984130859375, "rewards/rejected": -8.027327537536621, "step": 8421 }, { "epoch": 1.31, "learning_rate": 7.970298251984025e-06, "logits/chosen": -2.814720869064331, "logits/rejected": -2.7734975814819336, "logps/chosen": -279.6064758300781, "logps/rejected": -226.14193725585938, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": -1.3636727333068848, "rewards/margins": 4.335679054260254, "rewards/rejected": -5.699351787567139, "step": 8422 }, { "epoch": 1.31, "learning_rate": 7.969564811452877e-06, "logits/chosen": -2.73000431060791, "logits/rejected": -3.2785534858703613, "logps/chosen": -74.40928649902344, "logps/rejected": -289.9958190917969, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.4062485694885254, "rewards/margins": 7.199489116668701, "rewards/rejected": -9.605737686157227, "step": 8423 }, { "epoch": 1.31, "learning_rate": 7.968831370921728e-06, "logits/chosen": -2.7245776653289795, "logits/rejected": -2.641347646713257, "logps/chosen": -176.3954620361328, "logps/rejected": -188.84051513671875, "loss": 0.2524, "rewards/accuracies": 1.0, "rewards/chosen": -2.573641300201416, "rewards/margins": 1.4626519680023193, "rewards/rejected": -4.036293029785156, "step": 8424 }, { "epoch": 1.31, "learning_rate": 7.96809793039058e-06, "logits/chosen": -2.211519479751587, "logits/rejected": -2.9192538261413574, "logps/chosen": -198.14523315429688, "logps/rejected": -348.7812805175781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.4749221801757812, "rewards/margins": 9.528748512268066, "rewards/rejected": -11.003671646118164, "step": 8425 }, { "epoch": 1.31, "learning_rate": 7.967364489859432e-06, "logits/chosen": -1.5738966464996338, "logits/rejected": -2.9430179595947266, "logps/chosen": -95.8648452758789, "logps/rejected": -259.6694030761719, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -3.077263116836548, "rewards/margins": 5.836418151855469, "rewards/rejected": -8.913681030273438, "step": 8426 }, { "epoch": 1.31, "learning_rate": 7.966631049328284e-06, "logits/chosen": -2.533216714859009, "logits/rejected": -2.997915267944336, "logps/chosen": -51.859004974365234, "logps/rejected": -266.16107177734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5128631591796875, "rewards/margins": 7.495555877685547, "rewards/rejected": -9.008419036865234, "step": 8427 }, { "epoch": 1.31, "learning_rate": 7.965897608797136e-06, "logits/chosen": -2.4621756076812744, "logits/rejected": -2.9730281829833984, "logps/chosen": -317.92498779296875, "logps/rejected": -350.5220947265625, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -2.849656581878662, "rewards/margins": 7.01683235168457, "rewards/rejected": -9.86648941040039, "step": 8428 }, { "epoch": 1.31, "learning_rate": 7.965164168265988e-06, "logits/chosen": -2.4623401165008545, "logits/rejected": -2.7567100524902344, "logps/chosen": -69.49444580078125, "logps/rejected": -163.39736938476562, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -2.4215476512908936, "rewards/margins": 4.549535274505615, "rewards/rejected": -6.97108268737793, "step": 8429 }, { "epoch": 1.31, "learning_rate": 7.964430727734841e-06, "logits/chosen": -2.4643900394439697, "logits/rejected": -2.90285325050354, "logps/chosen": -59.529747009277344, "logps/rejected": -317.0417785644531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.3453108072280884, "rewards/margins": 8.602158546447754, "rewards/rejected": -8.947469711303711, "step": 8430 }, { "epoch": 1.31, "learning_rate": 7.963697287203693e-06, "logits/chosen": -2.743678569793701, "logits/rejected": -2.3773012161254883, "logps/chosen": -361.4259033203125, "logps/rejected": -491.49578857421875, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -2.3002634048461914, "rewards/margins": 4.728485107421875, "rewards/rejected": -7.028748512268066, "step": 8431 }, { "epoch": 1.31, "learning_rate": 7.962963846672545e-06, "logits/chosen": -1.649609088897705, "logits/rejected": -2.9465203285217285, "logps/chosen": -75.92921447753906, "logps/rejected": -341.1510925292969, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.2784897089004517, "rewards/margins": 8.040550231933594, "rewards/rejected": -9.319040298461914, "step": 8432 }, { "epoch": 1.31, "learning_rate": 7.962230406141397e-06, "logits/chosen": -1.6663963794708252, "logits/rejected": -2.668119430541992, "logps/chosen": -376.7825927734375, "logps/rejected": -441.36199951171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.5583114624023438, "rewards/margins": 9.560840606689453, "rewards/rejected": -12.119152069091797, "step": 8433 }, { "epoch": 1.31, "learning_rate": 7.961496965610249e-06, "logits/chosen": -2.287987470626831, "logits/rejected": -2.70760178565979, "logps/chosen": -133.05274963378906, "logps/rejected": -187.739501953125, "loss": 0.1251, "rewards/accuracies": 1.0, "rewards/chosen": -1.9951213598251343, "rewards/margins": 5.183000564575195, "rewards/rejected": -7.178122043609619, "step": 8434 }, { "epoch": 1.31, "learning_rate": 7.9607635250791e-06, "logits/chosen": -2.282552480697632, "logits/rejected": -2.751211404800415, "logps/chosen": -286.2464904785156, "logps/rejected": -388.04449462890625, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.6284449100494385, "rewards/margins": 4.874043941497803, "rewards/rejected": -7.50248908996582, "step": 8435 }, { "epoch": 1.31, "learning_rate": 7.960030084547953e-06, "logits/chosen": -2.0303444862365723, "logits/rejected": -2.735069751739502, "logps/chosen": -71.2660903930664, "logps/rejected": -256.96746826171875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1263642311096191, "rewards/margins": 5.270852565765381, "rewards/rejected": -6.397216796875, "step": 8436 }, { "epoch": 1.31, "learning_rate": 7.959296644016805e-06, "logits/chosen": -1.932801604270935, "logits/rejected": -2.368042230606079, "logps/chosen": -138.31369018554688, "logps/rejected": -224.75709533691406, "loss": 0.1537, "rewards/accuracies": 1.0, "rewards/chosen": -5.059008598327637, "rewards/margins": 1.808342695236206, "rewards/rejected": -6.867351531982422, "step": 8437 }, { "epoch": 1.31, "learning_rate": 7.958563203485658e-06, "logits/chosen": -2.346017360687256, "logits/rejected": -2.94476056098938, "logps/chosen": -482.72100830078125, "logps/rejected": -613.41259765625, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -3.4695115089416504, "rewards/margins": 5.307662010192871, "rewards/rejected": -8.777173042297363, "step": 8438 }, { "epoch": 1.31, "learning_rate": 7.95782976295451e-06, "logits/chosen": -2.436075448989868, "logits/rejected": -3.0387520790100098, "logps/chosen": -488.7593994140625, "logps/rejected": -632.23193359375, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": -1.595413327217102, "rewards/margins": 7.251095294952393, "rewards/rejected": -8.846508979797363, "step": 8439 }, { "epoch": 1.31, "learning_rate": 7.957096322423364e-06, "logits/chosen": -2.5565524101257324, "logits/rejected": -2.838759183883667, "logps/chosen": -160.99224853515625, "logps/rejected": -120.87193298339844, "loss": 1.4169, "rewards/accuracies": 0.5, "rewards/chosen": -3.308116912841797, "rewards/margins": -0.15183579921722412, "rewards/rejected": -3.1562812328338623, "step": 8440 }, { "epoch": 1.31, "learning_rate": 7.956362881892215e-06, "logits/chosen": -2.8351759910583496, "logits/rejected": -2.803270101547241, "logps/chosen": -709.1574096679688, "logps/rejected": -508.748046875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.376293659210205, "rewards/margins": 5.7673234939575195, "rewards/rejected": -8.143617630004883, "step": 8441 }, { "epoch": 1.31, "learning_rate": 7.955629441361067e-06, "logits/chosen": -2.710615396499634, "logits/rejected": -3.0513339042663574, "logps/chosen": -137.73153686523438, "logps/rejected": -249.4571533203125, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -3.22499942779541, "rewards/margins": 4.472280502319336, "rewards/rejected": -7.697279930114746, "step": 8442 }, { "epoch": 1.31, "learning_rate": 7.95489600082992e-06, "logits/chosen": -1.649778127670288, "logits/rejected": -2.746270179748535, "logps/chosen": -188.28305053710938, "logps/rejected": -503.712646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.112648367881775, "rewards/margins": 10.21517562866211, "rewards/rejected": -11.327824592590332, "step": 8443 }, { "epoch": 1.31, "learning_rate": 7.954162560298771e-06, "logits/chosen": -2.5460121631622314, "logits/rejected": -2.831462860107422, "logps/chosen": -201.69244384765625, "logps/rejected": -308.69256591796875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.4238884449005127, "rewards/margins": 7.479775428771973, "rewards/rejected": -8.903663635253906, "step": 8444 }, { "epoch": 1.31, "learning_rate": 7.953429119767623e-06, "logits/chosen": -2.860401153564453, "logits/rejected": -2.9355874061584473, "logps/chosen": -95.26802062988281, "logps/rejected": -205.02960205078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.6670505404472351, "rewards/margins": 7.5737714767456055, "rewards/rejected": -8.240821838378906, "step": 8445 }, { "epoch": 1.31, "learning_rate": 7.952695679236475e-06, "logits/chosen": -1.75486159324646, "logits/rejected": -2.7982614040374756, "logps/chosen": -165.36973571777344, "logps/rejected": -326.5377502441406, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.3511909544467926, "rewards/margins": 8.414517402648926, "rewards/rejected": -8.063326835632324, "step": 8446 }, { "epoch": 1.31, "learning_rate": 7.951962238705327e-06, "logits/chosen": -2.7661964893341064, "logits/rejected": -2.458224296569824, "logps/chosen": -87.09934997558594, "logps/rejected": -118.80648040771484, "loss": 0.3511, "rewards/accuracies": 1.0, "rewards/chosen": -3.6272311210632324, "rewards/margins": 1.533024787902832, "rewards/rejected": -5.1602559089660645, "step": 8447 }, { "epoch": 1.31, "learning_rate": 7.95122879817418e-06, "logits/chosen": -2.802644968032837, "logits/rejected": -2.0941898822784424, "logps/chosen": -351.0089416503906, "logps/rejected": -290.6734313964844, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8635477423667908, "rewards/margins": 6.54472541809082, "rewards/rejected": -7.408273220062256, "step": 8448 }, { "epoch": 1.31, "learning_rate": 7.950495357643032e-06, "logits/chosen": -3.135674476623535, "logits/rejected": -2.491342306137085, "logps/chosen": -532.8960571289062, "logps/rejected": -309.18438720703125, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -4.13248348236084, "rewards/margins": 5.001508712768555, "rewards/rejected": -9.133992195129395, "step": 8449 }, { "epoch": 1.31, "learning_rate": 7.949761917111884e-06, "logits/chosen": -1.6299495697021484, "logits/rejected": -2.69256591796875, "logps/chosen": -219.38504028320312, "logps/rejected": -626.5768432617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.122129201889038, "rewards/margins": 11.597383499145508, "rewards/rejected": -13.719512939453125, "step": 8450 }, { "epoch": 1.31, "learning_rate": 7.949028476580736e-06, "logits/chosen": -2.385648250579834, "logits/rejected": -3.084049940109253, "logps/chosen": -532.9692993164062, "logps/rejected": -275.2574462890625, "loss": 0.7388, "rewards/accuracies": 0.5, "rewards/chosen": -3.3912792205810547, "rewards/margins": 1.8541030883789062, "rewards/rejected": -5.245382308959961, "step": 8451 }, { "epoch": 1.31, "learning_rate": 7.948295036049588e-06, "logits/chosen": -2.8164641857147217, "logits/rejected": -2.899226665496826, "logps/chosen": -386.3939514160156, "logps/rejected": -360.9010009765625, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -4.098205089569092, "rewards/margins": 3.7413105964660645, "rewards/rejected": -7.839515686035156, "step": 8452 }, { "epoch": 1.31, "learning_rate": 7.94756159551844e-06, "logits/chosen": -2.8450241088867188, "logits/rejected": -3.1827592849731445, "logps/chosen": -270.02197265625, "logps/rejected": -470.7375183105469, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.6134026050567627, "rewards/margins": 7.869330406188965, "rewards/rejected": -10.482732772827148, "step": 8453 }, { "epoch": 1.31, "learning_rate": 7.946828154987292e-06, "logits/chosen": -1.8720593452453613, "logits/rejected": -2.9276626110076904, "logps/chosen": -214.45799255371094, "logps/rejected": -610.0092163085938, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.1010749340057373, "rewards/margins": 8.280739784240723, "rewards/rejected": -10.381814956665039, "step": 8454 }, { "epoch": 1.31, "learning_rate": 7.946094714456143e-06, "logits/chosen": -2.010676383972168, "logits/rejected": -2.9238693714141846, "logps/chosen": -151.69577026367188, "logps/rejected": -260.78558349609375, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -2.717423677444458, "rewards/margins": 6.07806396484375, "rewards/rejected": -8.795487403869629, "step": 8455 }, { "epoch": 1.32, "learning_rate": 7.945361273924995e-06, "logits/chosen": -2.9511120319366455, "logits/rejected": -1.7363215684890747, "logps/chosen": -563.1989135742188, "logps/rejected": -298.51812744140625, "loss": 0.0888, "rewards/accuracies": 1.0, "rewards/chosen": -0.6285766959190369, "rewards/margins": 7.252360820770264, "rewards/rejected": -7.880937576293945, "step": 8456 }, { "epoch": 1.32, "learning_rate": 7.944627833393849e-06, "logits/chosen": -1.761025071144104, "logits/rejected": -1.8332334756851196, "logps/chosen": -135.5055389404297, "logps/rejected": -234.9608154296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.016341395676136017, "rewards/margins": 8.361154556274414, "rewards/rejected": -8.377495765686035, "step": 8457 }, { "epoch": 1.32, "learning_rate": 7.9438943928627e-06, "logits/chosen": -2.8817851543426514, "logits/rejected": -2.721179723739624, "logps/chosen": -205.5382080078125, "logps/rejected": -356.97271728515625, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -1.5296013355255127, "rewards/margins": 7.416859149932861, "rewards/rejected": -8.946460723876953, "step": 8458 }, { "epoch": 1.32, "learning_rate": 7.943160952331553e-06, "logits/chosen": -2.1987485885620117, "logits/rejected": -2.7960398197174072, "logps/chosen": -34.851593017578125, "logps/rejected": -237.58026123046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.696000576019287, "rewards/margins": 7.5550384521484375, "rewards/rejected": -9.251039505004883, "step": 8459 }, { "epoch": 1.32, "learning_rate": 7.942427511800405e-06, "logits/chosen": -1.9369555711746216, "logits/rejected": -2.8858418464660645, "logps/chosen": -199.27456665039062, "logps/rejected": -472.599609375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.6893532276153564, "rewards/margins": 6.409609794616699, "rewards/rejected": -8.098962783813477, "step": 8460 }, { "epoch": 1.32, "learning_rate": 7.941694071269256e-06, "logits/chosen": -2.2183873653411865, "logits/rejected": -2.7140414714813232, "logps/chosen": -50.85593032836914, "logps/rejected": -332.83221435546875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9955422878265381, "rewards/margins": 8.619060516357422, "rewards/rejected": -9.614603042602539, "step": 8461 }, { "epoch": 1.32, "learning_rate": 7.940960630738108e-06, "logits/chosen": -1.2933571338653564, "logits/rejected": -2.5413405895233154, "logps/chosen": -201.83657836914062, "logps/rejected": -378.5675354003906, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.0498886108398438, "rewards/margins": 7.033789157867432, "rewards/rejected": -9.083677291870117, "step": 8462 }, { "epoch": 1.32, "learning_rate": 7.94022719020696e-06, "logits/chosen": -2.698082208633423, "logits/rejected": -1.9954873323440552, "logps/chosen": -232.1593017578125, "logps/rejected": -259.2041015625, "loss": 0.2182, "rewards/accuracies": 1.0, "rewards/chosen": -0.5664306879043579, "rewards/margins": 6.374699592590332, "rewards/rejected": -6.9411301612854, "step": 8463 }, { "epoch": 1.32, "learning_rate": 7.939493749675812e-06, "logits/chosen": -2.4715678691864014, "logits/rejected": -3.0029637813568115, "logps/chosen": -160.6617889404297, "logps/rejected": -247.37527465820312, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.2794777154922485, "rewards/margins": 5.818086624145508, "rewards/rejected": -7.097564220428467, "step": 8464 }, { "epoch": 1.32, "learning_rate": 7.938760309144664e-06, "logits/chosen": -2.2722105979919434, "logits/rejected": -1.6827480792999268, "logps/chosen": -243.50906372070312, "logps/rejected": -288.6142883300781, "loss": 1.145, "rewards/accuracies": 0.5, "rewards/chosen": -1.7590912580490112, "rewards/margins": 4.231868743896484, "rewards/rejected": -5.990960121154785, "step": 8465 }, { "epoch": 1.32, "learning_rate": 7.938026868613518e-06, "logits/chosen": -2.0570948123931885, "logits/rejected": -3.09187388420105, "logps/chosen": -171.97329711914062, "logps/rejected": -536.5467529296875, "loss": 0.7731, "rewards/accuracies": 0.5, "rewards/chosen": -5.989409923553467, "rewards/margins": 4.816946983337402, "rewards/rejected": -10.806356430053711, "step": 8466 }, { "epoch": 1.32, "learning_rate": 7.93729342808237e-06, "logits/chosen": -2.8407821655273438, "logits/rejected": -2.495418071746826, "logps/chosen": -120.04978942871094, "logps/rejected": -120.52532196044922, "loss": 1.9075, "rewards/accuracies": 0.5, "rewards/chosen": -3.1282291412353516, "rewards/margins": 1.5488646030426025, "rewards/rejected": -4.677093505859375, "step": 8467 }, { "epoch": 1.32, "learning_rate": 7.936559987551221e-06, "logits/chosen": -1.7742732763290405, "logits/rejected": -2.9391040802001953, "logps/chosen": -109.03253173828125, "logps/rejected": -304.9906005859375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -1.5792756080627441, "rewards/margins": 6.472331523895264, "rewards/rejected": -8.051607131958008, "step": 8468 }, { "epoch": 1.32, "learning_rate": 7.935826547020073e-06, "logits/chosen": -1.8175971508026123, "logits/rejected": -2.997316598892212, "logps/chosen": -48.02159881591797, "logps/rejected": -413.27398681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.001527190208435, "rewards/margins": 12.288790702819824, "rewards/rejected": -13.29031753540039, "step": 8469 }, { "epoch": 1.32, "learning_rate": 7.935093106488925e-06, "logits/chosen": -2.3925788402557373, "logits/rejected": -2.6362974643707275, "logps/chosen": -346.7579345703125, "logps/rejected": -357.98309326171875, "loss": 2.0902, "rewards/accuracies": 0.5, "rewards/chosen": -5.096153259277344, "rewards/margins": 1.1205434799194336, "rewards/rejected": -6.216696739196777, "step": 8470 }, { "epoch": 1.32, "learning_rate": 7.934359665957777e-06, "logits/chosen": -2.176459789276123, "logits/rejected": -2.6406846046447754, "logps/chosen": -181.22219848632812, "logps/rejected": -413.4976501464844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.6977219581604004, "rewards/margins": 8.767553329467773, "rewards/rejected": -11.465275764465332, "step": 8471 }, { "epoch": 1.32, "learning_rate": 7.93362622542663e-06, "logits/chosen": -2.8920154571533203, "logits/rejected": -2.1396689414978027, "logps/chosen": -553.5435791015625, "logps/rejected": -353.689208984375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.6222702264785767, "rewards/margins": 8.690202713012695, "rewards/rejected": -10.31247329711914, "step": 8472 }, { "epoch": 1.32, "learning_rate": 7.932892784895482e-06, "logits/chosen": -2.977782726287842, "logits/rejected": -1.2335350513458252, "logps/chosen": -800.9251708984375, "logps/rejected": -362.69482421875, "loss": 0.435, "rewards/accuracies": 0.5, "rewards/chosen": -1.7298675775527954, "rewards/margins": 5.379682540893555, "rewards/rejected": -7.1095499992370605, "step": 8473 }, { "epoch": 1.32, "learning_rate": 7.932159344364334e-06, "logits/chosen": -2.5020508766174316, "logits/rejected": -2.8884999752044678, "logps/chosen": -101.25822448730469, "logps/rejected": -232.96267700195312, "loss": 2.021, "rewards/accuracies": 0.5, "rewards/chosen": -4.6486687660217285, "rewards/margins": -1.3201947212219238, "rewards/rejected": -3.3284740447998047, "step": 8474 }, { "epoch": 1.32, "learning_rate": 7.931425903833188e-06, "logits/chosen": -2.9272429943084717, "logits/rejected": -2.7675282955169678, "logps/chosen": -158.59214782714844, "logps/rejected": -233.81646728515625, "loss": 0.2021, "rewards/accuracies": 1.0, "rewards/chosen": -3.10184645652771, "rewards/margins": 4.347975254058838, "rewards/rejected": -7.449821472167969, "step": 8475 }, { "epoch": 1.32, "learning_rate": 7.93069246330204e-06, "logits/chosen": -2.854926586151123, "logits/rejected": -2.973381757736206, "logps/chosen": -287.4942626953125, "logps/rejected": -309.8666687011719, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.5594606399536133, "rewards/margins": 6.602593421936035, "rewards/rejected": -10.162054061889648, "step": 8476 }, { "epoch": 1.32, "learning_rate": 7.929959022770892e-06, "logits/chosen": -2.4043188095092773, "logits/rejected": -2.6090574264526367, "logps/chosen": -173.93621826171875, "logps/rejected": -401.51483154296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3561592102050781, "rewards/margins": 9.935782432556152, "rewards/rejected": -11.29194164276123, "step": 8477 }, { "epoch": 1.32, "learning_rate": 7.929225582239743e-06, "logits/chosen": -1.526874303817749, "logits/rejected": -2.2272675037384033, "logps/chosen": -442.292724609375, "logps/rejected": -440.848876953125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -4.528973579406738, "rewards/margins": 7.299529075622559, "rewards/rejected": -11.828502655029297, "step": 8478 }, { "epoch": 1.32, "learning_rate": 7.928492141708595e-06, "logits/chosen": -2.51922345161438, "logits/rejected": -3.0187337398529053, "logps/chosen": -55.809967041015625, "logps/rejected": -195.4305877685547, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": -1.4342879056930542, "rewards/margins": 4.262310981750488, "rewards/rejected": -5.696599006652832, "step": 8479 }, { "epoch": 1.32, "learning_rate": 7.927758701177447e-06, "logits/chosen": -1.660208821296692, "logits/rejected": -2.2715795040130615, "logps/chosen": -460.4423828125, "logps/rejected": -543.6959228515625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.1023454666137695, "rewards/margins": 7.696112632751465, "rewards/rejected": -10.798458099365234, "step": 8480 }, { "epoch": 1.32, "learning_rate": 7.927025260646299e-06, "logits/chosen": -2.4154200553894043, "logits/rejected": -3.1077587604522705, "logps/chosen": -91.68168640136719, "logps/rejected": -243.01602172851562, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.847679376602173, "rewards/margins": 6.042714595794678, "rewards/rejected": -8.89039421081543, "step": 8481 }, { "epoch": 1.32, "learning_rate": 7.926291820115151e-06, "logits/chosen": -2.167487859725952, "logits/rejected": -2.687394142150879, "logps/chosen": -125.49717712402344, "logps/rejected": -268.2085266113281, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -0.521714448928833, "rewards/margins": 7.194462299346924, "rewards/rejected": -7.716176986694336, "step": 8482 }, { "epoch": 1.32, "learning_rate": 7.925558379584005e-06, "logits/chosen": -1.6045584678649902, "logits/rejected": -2.7279229164123535, "logps/chosen": -55.76274871826172, "logps/rejected": -335.6507873535156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1737614870071411, "rewards/margins": 9.440738677978516, "rewards/rejected": -10.614500045776367, "step": 8483 }, { "epoch": 1.32, "learning_rate": 7.924824939052856e-06, "logits/chosen": -1.6345473527908325, "logits/rejected": -2.995582103729248, "logps/chosen": -80.3169174194336, "logps/rejected": -523.262939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7193644046783447, "rewards/margins": 13.952926635742188, "rewards/rejected": -14.672290802001953, "step": 8484 }, { "epoch": 1.32, "learning_rate": 7.924091498521708e-06, "logits/chosen": -2.681784152984619, "logits/rejected": -2.9090545177459717, "logps/chosen": -34.5009651184082, "logps/rejected": -301.55682373046875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.365107536315918, "rewards/margins": 6.527353763580322, "rewards/rejected": -7.89246129989624, "step": 8485 }, { "epoch": 1.32, "learning_rate": 7.92335805799056e-06, "logits/chosen": -2.269456386566162, "logits/rejected": -2.6557350158691406, "logps/chosen": -109.40904235839844, "logps/rejected": -354.78082275390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.99708890914917, "rewards/margins": 8.563884735107422, "rewards/rejected": -10.56097412109375, "step": 8486 }, { "epoch": 1.32, "learning_rate": 7.922624617459412e-06, "logits/chosen": -1.9106285572052002, "logits/rejected": -2.922975778579712, "logps/chosen": -67.02542877197266, "logps/rejected": -225.77593994140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.5145273208618164, "rewards/margins": 7.005507469177246, "rewards/rejected": -7.5200347900390625, "step": 8487 }, { "epoch": 1.32, "learning_rate": 7.921891176928264e-06, "logits/chosen": -2.9065439701080322, "logits/rejected": -2.11922287940979, "logps/chosen": -366.3531494140625, "logps/rejected": -304.03399658203125, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -3.321124315261841, "rewards/margins": 4.848292350769043, "rewards/rejected": -8.169416427612305, "step": 8488 }, { "epoch": 1.32, "learning_rate": 7.921157736397116e-06, "logits/chosen": -2.8739898204803467, "logits/rejected": -3.059248447418213, "logps/chosen": -514.3744506835938, "logps/rejected": -556.9060668945312, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.1889610290527344, "rewards/margins": 6.904104232788086, "rewards/rejected": -10.09306526184082, "step": 8489 }, { "epoch": 1.32, "learning_rate": 7.920424295865968e-06, "logits/chosen": -2.8347907066345215, "logits/rejected": -1.3386470079421997, "logps/chosen": -491.8179016113281, "logps/rejected": -170.14218139648438, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": -2.114666700363159, "rewards/margins": 5.426582336425781, "rewards/rejected": -7.5412492752075195, "step": 8490 }, { "epoch": 1.32, "learning_rate": 7.91969085533482e-06, "logits/chosen": -2.8859262466430664, "logits/rejected": -3.0048511028289795, "logps/chosen": -121.79899597167969, "logps/rejected": -194.451171875, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -0.5273994207382202, "rewards/margins": 4.392390251159668, "rewards/rejected": -4.919789791107178, "step": 8491 }, { "epoch": 1.32, "learning_rate": 7.918957414803673e-06, "logits/chosen": -2.102604389190674, "logits/rejected": -2.981158971786499, "logps/chosen": -132.96240234375, "logps/rejected": -287.8888244628906, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -0.4608585238456726, "rewards/margins": 6.036084175109863, "rewards/rejected": -6.49694299697876, "step": 8492 }, { "epoch": 1.32, "learning_rate": 7.918223974272525e-06, "logits/chosen": -1.457029104232788, "logits/rejected": -2.726748466491699, "logps/chosen": -87.26947021484375, "logps/rejected": -294.8712158203125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.49505352973938, "rewards/margins": 6.043767929077148, "rewards/rejected": -8.53882122039795, "step": 8493 }, { "epoch": 1.32, "learning_rate": 7.917490533741377e-06, "logits/chosen": -2.8054964542388916, "logits/rejected": -3.1683926582336426, "logps/chosen": -144.03321838378906, "logps/rejected": -242.85791015625, "loss": 0.1949, "rewards/accuracies": 1.0, "rewards/chosen": -1.3513370752334595, "rewards/margins": 4.317943096160889, "rewards/rejected": -5.669280529022217, "step": 8494 }, { "epoch": 1.32, "learning_rate": 7.916757093210229e-06, "logits/chosen": -1.6114442348480225, "logits/rejected": -2.6959471702575684, "logps/chosen": -70.04764556884766, "logps/rejected": -245.27944946289062, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.9909688830375671, "rewards/margins": 6.695472717285156, "rewards/rejected": -7.686441421508789, "step": 8495 }, { "epoch": 1.32, "learning_rate": 7.91602365267908e-06, "logits/chosen": -2.7703232765197754, "logits/rejected": -2.736076593399048, "logps/chosen": -196.75953674316406, "logps/rejected": -150.39146423339844, "loss": 0.146, "rewards/accuracies": 1.0, "rewards/chosen": -2.011157989501953, "rewards/margins": 2.2436227798461914, "rewards/rejected": -4.2547807693481445, "step": 8496 }, { "epoch": 1.32, "learning_rate": 7.915290212147933e-06, "logits/chosen": -1.4179493188858032, "logits/rejected": -2.631469964981079, "logps/chosen": -100.086181640625, "logps/rejected": -314.0673828125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.9846997261047363, "rewards/margins": 6.027606964111328, "rewards/rejected": -8.012307167053223, "step": 8497 }, { "epoch": 1.32, "learning_rate": 7.914556771616784e-06, "logits/chosen": -2.4536731243133545, "logits/rejected": -3.161471366882324, "logps/chosen": -388.81219482421875, "logps/rejected": -484.5896301269531, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.0131843090057373, "rewards/margins": 6.67881441116333, "rewards/rejected": -8.691998481750488, "step": 8498 }, { "epoch": 1.32, "learning_rate": 7.913823331085636e-06, "logits/chosen": -2.9920737743377686, "logits/rejected": -2.5771780014038086, "logps/chosen": -146.2910919189453, "logps/rejected": -282.76275634765625, "loss": 0.0718, "rewards/accuracies": 1.0, "rewards/chosen": -2.7529635429382324, "rewards/margins": 7.490578651428223, "rewards/rejected": -10.243541717529297, "step": 8499 }, { "epoch": 1.32, "learning_rate": 7.913089890554488e-06, "logits/chosen": -2.864514112472534, "logits/rejected": -2.510209560394287, "logps/chosen": -287.8983154296875, "logps/rejected": -155.27691650390625, "loss": 3.2801, "rewards/accuracies": 0.0, "rewards/chosen": -8.148109436035156, "rewards/margins": -3.159038782119751, "rewards/rejected": -4.989070415496826, "step": 8500 }, { "epoch": 1.32, "learning_rate": 7.912356450023342e-06, "logits/chosen": -2.0531270503997803, "logits/rejected": -2.963839530944824, "logps/chosen": -148.09422302246094, "logps/rejected": -516.6460571289062, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.466224193572998, "rewards/margins": 7.325547218322754, "rewards/rejected": -9.791770935058594, "step": 8501 }, { "epoch": 1.32, "learning_rate": 7.911623009492194e-06, "logits/chosen": -2.740300416946411, "logits/rejected": -2.085094928741455, "logps/chosen": -119.78556823730469, "logps/rejected": -136.64865112304688, "loss": 0.5927, "rewards/accuracies": 0.5, "rewards/chosen": -1.7709159851074219, "rewards/margins": 3.0808491706848145, "rewards/rejected": -4.851765155792236, "step": 8502 }, { "epoch": 1.32, "learning_rate": 7.910889568961045e-06, "logits/chosen": -2.813476085662842, "logits/rejected": -1.9942355155944824, "logps/chosen": -212.80255126953125, "logps/rejected": -212.99574279785156, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.3904749155044556, "rewards/margins": 6.783616065979004, "rewards/rejected": -8.174091339111328, "step": 8503 }, { "epoch": 1.32, "learning_rate": 7.910156128429897e-06, "logits/chosen": -1.752939224243164, "logits/rejected": -2.889069080352783, "logps/chosen": -133.01187133789062, "logps/rejected": -362.3523864746094, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.9621685743331909, "rewards/margins": 6.387212753295898, "rewards/rejected": -7.349381446838379, "step": 8504 }, { "epoch": 1.32, "learning_rate": 7.90942268789875e-06, "logits/chosen": -2.1712117195129395, "logits/rejected": -2.0001513957977295, "logps/chosen": -630.4797973632812, "logps/rejected": -604.6429443359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6899673938751221, "rewards/margins": 12.825439453125, "rewards/rejected": -13.51540756225586, "step": 8505 }, { "epoch": 1.32, "learning_rate": 7.908689247367603e-06, "logits/chosen": -2.7882437705993652, "logits/rejected": -2.797292470932007, "logps/chosen": -130.44595336914062, "logps/rejected": -305.10699462890625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.9581737518310547, "rewards/margins": 9.54275131225586, "rewards/rejected": -11.500925064086914, "step": 8506 }, { "epoch": 1.32, "learning_rate": 7.907955806836455e-06, "logits/chosen": -2.6509273052215576, "logits/rejected": -3.1166486740112305, "logps/chosen": -61.34342575073242, "logps/rejected": -244.8134765625, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -2.0286879539489746, "rewards/margins": 5.903257369995117, "rewards/rejected": -7.931944847106934, "step": 8507 }, { "epoch": 1.32, "learning_rate": 7.907222366305307e-06, "logits/chosen": -2.7526493072509766, "logits/rejected": -2.540299654006958, "logps/chosen": -84.23030853271484, "logps/rejected": -219.4373779296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8402184247970581, "rewards/margins": 7.813802719116211, "rewards/rejected": -8.654020309448242, "step": 8508 }, { "epoch": 1.32, "learning_rate": 7.906488925774158e-06, "logits/chosen": -2.5990028381347656, "logits/rejected": -2.862194299697876, "logps/chosen": -253.315185546875, "logps/rejected": -405.19525146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8356690406799316, "rewards/margins": 11.35739517211914, "rewards/rejected": -13.19306468963623, "step": 8509 }, { "epoch": 1.32, "learning_rate": 7.905755485243012e-06, "logits/chosen": -2.7655370235443115, "logits/rejected": -2.445119857788086, "logps/chosen": -260.69647216796875, "logps/rejected": -243.0444793701172, "loss": 0.3053, "rewards/accuracies": 1.0, "rewards/chosen": -1.9727462530136108, "rewards/margins": 4.934252738952637, "rewards/rejected": -6.906998634338379, "step": 8510 }, { "epoch": 1.32, "learning_rate": 7.905022044711864e-06, "logits/chosen": -2.694603443145752, "logits/rejected": -1.148338794708252, "logps/chosen": -497.189208984375, "logps/rejected": -79.85594177246094, "loss": 3.2849, "rewards/accuracies": 0.0, "rewards/chosen": -6.2590227127075195, "rewards/margins": -3.2435994148254395, "rewards/rejected": -3.01542329788208, "step": 8511 }, { "epoch": 1.32, "learning_rate": 7.904288604180716e-06, "logits/chosen": -2.438910722732544, "logits/rejected": -2.783202886581421, "logps/chosen": -63.27745056152344, "logps/rejected": -239.01962280273438, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": -1.31562340259552, "rewards/margins": 5.292293548583984, "rewards/rejected": -6.607916831970215, "step": 8512 }, { "epoch": 1.32, "learning_rate": 7.903555163649568e-06, "logits/chosen": -2.919395685195923, "logits/rejected": -2.2924439907073975, "logps/chosen": -483.5548400878906, "logps/rejected": -407.95733642578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6843235492706299, "rewards/margins": 8.95913028717041, "rewards/rejected": -9.643453598022461, "step": 8513 }, { "epoch": 1.32, "learning_rate": 7.90282172311842e-06, "logits/chosen": -2.3325862884521484, "logits/rejected": -2.7177791595458984, "logps/chosen": -125.90768432617188, "logps/rejected": -262.434814453125, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -3.0750744342803955, "rewards/margins": 6.824487686157227, "rewards/rejected": -9.899561882019043, "step": 8514 }, { "epoch": 1.32, "learning_rate": 7.902088282587271e-06, "logits/chosen": -2.436255931854248, "logits/rejected": -3.108138084411621, "logps/chosen": -314.96636962890625, "logps/rejected": -664.62451171875, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -2.5911903381347656, "rewards/margins": 6.159200668334961, "rewards/rejected": -8.750391006469727, "step": 8515 }, { "epoch": 1.32, "learning_rate": 7.901354842056123e-06, "logits/chosen": -1.8779125213623047, "logits/rejected": -2.5527215003967285, "logps/chosen": -67.22320556640625, "logps/rejected": -338.1940612792969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5650246143341064, "rewards/margins": 9.748678207397461, "rewards/rejected": -11.313703536987305, "step": 8516 }, { "epoch": 1.32, "learning_rate": 7.900621401524975e-06, "logits/chosen": -2.899702310562134, "logits/rejected": -1.608620524406433, "logps/chosen": -526.314453125, "logps/rejected": -207.95156860351562, "loss": 0.5876, "rewards/accuracies": 1.0, "rewards/chosen": -3.2161476612091064, "rewards/margins": 0.23389434814453125, "rewards/rejected": -3.4500420093536377, "step": 8517 }, { "epoch": 1.32, "learning_rate": 7.899887960993827e-06, "logits/chosen": -2.325590133666992, "logits/rejected": -3.067950963973999, "logps/chosen": -339.5036926269531, "logps/rejected": -541.8516845703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.43452760577201843, "rewards/margins": 10.107322692871094, "rewards/rejected": -10.541851043701172, "step": 8518 }, { "epoch": 1.32, "learning_rate": 7.89915452046268e-06, "logits/chosen": -2.171701192855835, "logits/rejected": -3.183288335800171, "logps/chosen": -396.43841552734375, "logps/rejected": -525.5225219726562, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -1.2068878412246704, "rewards/margins": 6.055527210235596, "rewards/rejected": -7.262414932250977, "step": 8519 }, { "epoch": 1.33, "learning_rate": 7.898421079931532e-06, "logits/chosen": -1.5481477975845337, "logits/rejected": -2.909756660461426, "logps/chosen": -201.82493591308594, "logps/rejected": -582.2276611328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.5107994079589844, "rewards/margins": 9.408685684204102, "rewards/rejected": -10.919485092163086, "step": 8520 }, { "epoch": 1.33, "learning_rate": 7.897687639400384e-06, "logits/chosen": -1.1591131687164307, "logits/rejected": -2.6664507389068604, "logps/chosen": -204.20263671875, "logps/rejected": -475.9776611328125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -0.814190685749054, "rewards/margins": 7.010250091552734, "rewards/rejected": -7.824440956115723, "step": 8521 }, { "epoch": 1.33, "learning_rate": 7.896954198869236e-06, "logits/chosen": -2.8591763973236084, "logits/rejected": -2.5965287685394287, "logps/chosen": -185.8472900390625, "logps/rejected": -201.43959045410156, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.9182945489883423, "rewards/margins": 6.385067939758301, "rewards/rejected": -8.303362846374512, "step": 8522 }, { "epoch": 1.33, "learning_rate": 7.896220758338088e-06, "logits/chosen": -2.45330810546875, "logits/rejected": -2.893707752227783, "logps/chosen": -605.9242553710938, "logps/rejected": -469.8659973144531, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.2152085304260254, "rewards/margins": 6.722123146057129, "rewards/rejected": -7.937331199645996, "step": 8523 }, { "epoch": 1.33, "learning_rate": 7.89548731780694e-06, "logits/chosen": -2.8832430839538574, "logits/rejected": -2.7167563438415527, "logps/chosen": -162.77606201171875, "logps/rejected": -141.77035522460938, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.6600190997123718, "rewards/margins": 5.519366264343262, "rewards/rejected": -6.179385185241699, "step": 8524 }, { "epoch": 1.33, "learning_rate": 7.894753877275792e-06, "logits/chosen": -2.7634546756744385, "logits/rejected": -2.2435801029205322, "logps/chosen": -179.99594116210938, "logps/rejected": -204.8910369873047, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.4973430633544922, "rewards/margins": 7.363632678985596, "rewards/rejected": -8.86097526550293, "step": 8525 }, { "epoch": 1.33, "learning_rate": 7.894020436744644e-06, "logits/chosen": -2.2543387413024902, "logits/rejected": -2.649641752243042, "logps/chosen": -623.4756469726562, "logps/rejected": -591.9508056640625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.38776969909668, "rewards/margins": 6.005575180053711, "rewards/rejected": -10.39334487915039, "step": 8526 }, { "epoch": 1.33, "learning_rate": 7.893286996213496e-06, "logits/chosen": -2.321024179458618, "logits/rejected": -2.9835164546966553, "logps/chosen": -346.17718505859375, "logps/rejected": -549.8768310546875, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -1.5181496143341064, "rewards/margins": 5.7471466064453125, "rewards/rejected": -7.26529598236084, "step": 8527 }, { "epoch": 1.33, "learning_rate": 7.89255355568235e-06, "logits/chosen": -2.883993625640869, "logits/rejected": -2.6612229347229004, "logps/chosen": -171.0135040283203, "logps/rejected": -301.9697265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.0960350036621094, "rewards/margins": 7.944512367248535, "rewards/rejected": -11.040547370910645, "step": 8528 }, { "epoch": 1.33, "learning_rate": 7.891820115151201e-06, "logits/chosen": -2.8366167545318604, "logits/rejected": -2.747694492340088, "logps/chosen": -338.8180847167969, "logps/rejected": -230.0221710205078, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -1.8663361072540283, "rewards/margins": 4.949165344238281, "rewards/rejected": -6.8155012130737305, "step": 8529 }, { "epoch": 1.33, "learning_rate": 7.891086674620053e-06, "logits/chosen": -1.9516117572784424, "logits/rejected": -3.0447998046875, "logps/chosen": -80.57862854003906, "logps/rejected": -233.07144165039062, "loss": 0.1172, "rewards/accuracies": 1.0, "rewards/chosen": -3.3505640029907227, "rewards/margins": 2.185610294342041, "rewards/rejected": -5.5361738204956055, "step": 8530 }, { "epoch": 1.33, "learning_rate": 7.890353234088905e-06, "logits/chosen": -2.950793981552124, "logits/rejected": -2.885603427886963, "logps/chosen": -408.0785827636719, "logps/rejected": -402.97857666015625, "loss": 1.0801, "rewards/accuracies": 0.5, "rewards/chosen": -5.056610107421875, "rewards/margins": 1.4124343395233154, "rewards/rejected": -6.4690446853637695, "step": 8531 }, { "epoch": 1.33, "learning_rate": 7.889619793557757e-06, "logits/chosen": -2.682004928588867, "logits/rejected": -0.7980414032936096, "logps/chosen": -588.1163330078125, "logps/rejected": -159.11798095703125, "loss": 1.1664, "rewards/accuracies": 0.5, "rewards/chosen": -4.397709846496582, "rewards/margins": 2.8286566734313965, "rewards/rejected": -7.22636604309082, "step": 8532 }, { "epoch": 1.33, "learning_rate": 7.888886353026609e-06, "logits/chosen": -2.6307027339935303, "logits/rejected": -3.072802782058716, "logps/chosen": -181.46844482421875, "logps/rejected": -362.44635009765625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -3.9295167922973633, "rewards/margins": 6.461017608642578, "rewards/rejected": -10.390534400939941, "step": 8533 }, { "epoch": 1.33, "learning_rate": 7.88815291249546e-06, "logits/chosen": -2.4276058673858643, "logits/rejected": -2.846672296524048, "logps/chosen": -147.63369750976562, "logps/rejected": -298.07891845703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.7376468181610107, "rewards/margins": 8.058099746704102, "rewards/rejected": -10.795745849609375, "step": 8534 }, { "epoch": 1.33, "learning_rate": 7.887419471964312e-06, "logits/chosen": -2.501276731491089, "logits/rejected": -3.033564329147339, "logps/chosen": -376.4577941894531, "logps/rejected": -580.8287353515625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.6697328090667725, "rewards/margins": 7.297329425811768, "rewards/rejected": -8.967061996459961, "step": 8535 }, { "epoch": 1.33, "learning_rate": 7.886686031433164e-06, "logits/chosen": -2.751134157180786, "logits/rejected": -1.7788869142532349, "logps/chosen": -840.39501953125, "logps/rejected": -426.9435119628906, "loss": 0.033, "rewards/accuracies": 1.0, "rewards/chosen": -5.461457252502441, "rewards/margins": 3.8258628845214844, "rewards/rejected": -9.287320137023926, "step": 8536 }, { "epoch": 1.33, "learning_rate": 7.885952590902018e-06, "logits/chosen": -2.3126392364501953, "logits/rejected": -2.8776073455810547, "logps/chosen": -91.95480346679688, "logps/rejected": -204.4096221923828, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -0.9478054046630859, "rewards/margins": 6.377169609069824, "rewards/rejected": -7.32497501373291, "step": 8537 }, { "epoch": 1.33, "learning_rate": 7.88521915037087e-06, "logits/chosen": -2.8585290908813477, "logits/rejected": -2.886880397796631, "logps/chosen": -138.00885009765625, "logps/rejected": -154.4034423828125, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": -3.028503894805908, "rewards/margins": 4.549015045166016, "rewards/rejected": -7.577518463134766, "step": 8538 }, { "epoch": 1.33, "learning_rate": 7.884485709839722e-06, "logits/chosen": -3.0121049880981445, "logits/rejected": -2.5190024375915527, "logps/chosen": -519.05078125, "logps/rejected": -389.69146728515625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.6739566326141357, "rewards/margins": 6.27528190612793, "rewards/rejected": -8.949237823486328, "step": 8539 }, { "epoch": 1.33, "learning_rate": 7.883752269308575e-06, "logits/chosen": -2.457754373550415, "logits/rejected": -2.81738018989563, "logps/chosen": -1001.989990234375, "logps/rejected": -1292.0059814453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1862716674804688, "rewards/margins": 9.353446960449219, "rewards/rejected": -12.539718627929688, "step": 8540 }, { "epoch": 1.33, "learning_rate": 7.883018828777427e-06, "logits/chosen": -2.9379684925079346, "logits/rejected": -2.7211501598358154, "logps/chosen": -173.86337280273438, "logps/rejected": -212.03546142578125, "loss": 0.3378, "rewards/accuracies": 1.0, "rewards/chosen": -2.0504443645477295, "rewards/margins": 3.206505298614502, "rewards/rejected": -5.2569499015808105, "step": 8541 }, { "epoch": 1.33, "learning_rate": 7.882285388246279e-06, "logits/chosen": -2.5827243328094482, "logits/rejected": -2.1365506649017334, "logps/chosen": -266.7857971191406, "logps/rejected": -435.705810546875, "loss": 0.2058, "rewards/accuracies": 1.0, "rewards/chosen": -4.660489082336426, "rewards/margins": 2.8669679164886475, "rewards/rejected": -7.527457237243652, "step": 8542 }, { "epoch": 1.33, "learning_rate": 7.88155194771513e-06, "logits/chosen": -2.5818982124328613, "logits/rejected": -2.8916850090026855, "logps/chosen": -68.67567443847656, "logps/rejected": -223.523193359375, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -3.244866371154785, "rewards/margins": 5.8467912673950195, "rewards/rejected": -9.091657638549805, "step": 8543 }, { "epoch": 1.33, "learning_rate": 7.880818507183983e-06, "logits/chosen": -1.5665011405944824, "logits/rejected": -2.597299814224243, "logps/chosen": -157.89990234375, "logps/rejected": -304.540771484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.1736841201782227, "rewards/margins": 7.650162696838379, "rewards/rejected": -9.823846817016602, "step": 8544 }, { "epoch": 1.33, "learning_rate": 7.880085066652835e-06, "logits/chosen": -2.5744469165802, "logits/rejected": -2.766955852508545, "logps/chosen": -510.71087646484375, "logps/rejected": -656.6510009765625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.5656371116638184, "rewards/margins": 7.764140605926514, "rewards/rejected": -10.329777717590332, "step": 8545 }, { "epoch": 1.33, "learning_rate": 7.879351626121688e-06, "logits/chosen": -2.3087308406829834, "logits/rejected": -3.089639902114868, "logps/chosen": -45.762664794921875, "logps/rejected": -325.91168212890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9892827272415161, "rewards/margins": 9.724520683288574, "rewards/rejected": -11.7138032913208, "step": 8546 }, { "epoch": 1.33, "learning_rate": 7.87861818559054e-06, "logits/chosen": -2.5702857971191406, "logits/rejected": -2.9899773597717285, "logps/chosen": -82.40170288085938, "logps/rejected": -199.959716796875, "loss": 0.8154, "rewards/accuracies": 0.5, "rewards/chosen": -5.026834964752197, "rewards/margins": 3.196711778640747, "rewards/rejected": -8.223546981811523, "step": 8547 }, { "epoch": 1.33, "learning_rate": 7.877884745059392e-06, "logits/chosen": -2.4918906688690186, "logits/rejected": -3.063228130340576, "logps/chosen": -315.52154541015625, "logps/rejected": -417.9248046875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.020977973937988, "rewards/margins": 7.114095687866211, "rewards/rejected": -11.1350736618042, "step": 8548 }, { "epoch": 1.33, "learning_rate": 7.877151304528244e-06, "logits/chosen": -2.740130662918091, "logits/rejected": -1.1329370737075806, "logps/chosen": -455.2872009277344, "logps/rejected": -416.1009521484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": 0.8238250613212585, "rewards/margins": 7.870366096496582, "rewards/rejected": -7.046541213989258, "step": 8549 }, { "epoch": 1.33, "learning_rate": 7.876417863997096e-06, "logits/chosen": -2.9017345905303955, "logits/rejected": -1.7789267301559448, "logps/chosen": -325.62396240234375, "logps/rejected": -276.8722229003906, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.070603847503662, "rewards/margins": 7.757453918457031, "rewards/rejected": -8.828058242797852, "step": 8550 }, { "epoch": 1.33, "learning_rate": 7.875684423465947e-06, "logits/chosen": -2.2799620628356934, "logits/rejected": -2.7583138942718506, "logps/chosen": -192.08322143554688, "logps/rejected": -311.88604736328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.8608299493789673, "rewards/margins": 7.019759654998779, "rewards/rejected": -8.880589485168457, "step": 8551 }, { "epoch": 1.33, "learning_rate": 7.8749509829348e-06, "logits/chosen": -2.1752991676330566, "logits/rejected": -3.004150152206421, "logps/chosen": -89.1034927368164, "logps/rejected": -260.388427734375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -2.1699392795562744, "rewards/margins": 5.544567108154297, "rewards/rejected": -7.714506149291992, "step": 8552 }, { "epoch": 1.33, "learning_rate": 7.874217542403651e-06, "logits/chosen": -1.80341637134552, "logits/rejected": -2.8977015018463135, "logps/chosen": -71.27708435058594, "logps/rejected": -200.15231323242188, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -2.0554981231689453, "rewards/margins": 5.789510726928711, "rewards/rejected": -7.8450093269348145, "step": 8553 }, { "epoch": 1.33, "learning_rate": 7.873484101872503e-06, "logits/chosen": -1.6761596202850342, "logits/rejected": -3.0329205989837646, "logps/chosen": -84.11285400390625, "logps/rejected": -242.9915313720703, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": -2.5154638290405273, "rewards/margins": 3.528402328491211, "rewards/rejected": -6.043866157531738, "step": 8554 }, { "epoch": 1.33, "learning_rate": 7.872750661341357e-06, "logits/chosen": -2.7975587844848633, "logits/rejected": -2.1717731952667236, "logps/chosen": -273.13385009765625, "logps/rejected": -277.4512939453125, "loss": 0.8806, "rewards/accuracies": 0.5, "rewards/chosen": -6.782273292541504, "rewards/margins": 3.278705596923828, "rewards/rejected": -10.060978889465332, "step": 8555 }, { "epoch": 1.33, "learning_rate": 7.872017220810209e-06, "logits/chosen": -2.589738368988037, "logits/rejected": -2.7937443256378174, "logps/chosen": -333.79010009765625, "logps/rejected": -395.54327392578125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -1.2365249395370483, "rewards/margins": 5.009035587310791, "rewards/rejected": -6.245560646057129, "step": 8556 }, { "epoch": 1.33, "learning_rate": 7.87128378027906e-06, "logits/chosen": -2.3192293643951416, "logits/rejected": -2.8057572841644287, "logps/chosen": -88.0948257446289, "logps/rejected": -193.3542938232422, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -2.9122085571289062, "rewards/margins": 5.802028656005859, "rewards/rejected": -8.714237213134766, "step": 8557 }, { "epoch": 1.33, "learning_rate": 7.870550339747912e-06, "logits/chosen": -3.025327444076538, "logits/rejected": -2.691829204559326, "logps/chosen": -93.22933197021484, "logps/rejected": -368.26123046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.5584732294082642, "rewards/margins": 9.522223472595215, "rewards/rejected": -10.080696105957031, "step": 8558 }, { "epoch": 1.33, "learning_rate": 7.869816899216764e-06, "logits/chosen": -2.563070058822632, "logits/rejected": -2.8252065181732178, "logps/chosen": -92.43029022216797, "logps/rejected": -270.2387390136719, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -1.6767207384109497, "rewards/margins": 6.599574089050293, "rewards/rejected": -8.276294708251953, "step": 8559 }, { "epoch": 1.33, "learning_rate": 7.869083458685616e-06, "logits/chosen": -2.9626967906951904, "logits/rejected": -3.1271886825561523, "logps/chosen": -228.76980590820312, "logps/rejected": -319.66217041015625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -3.626107692718506, "rewards/margins": 5.523759841918945, "rewards/rejected": -9.14986801147461, "step": 8560 }, { "epoch": 1.33, "learning_rate": 7.868350018154468e-06, "logits/chosen": -0.3509400188922882, "logits/rejected": -1.7013040781021118, "logps/chosen": -144.54354858398438, "logps/rejected": -522.1983642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.203880548477173, "rewards/margins": 12.793266296386719, "rewards/rejected": -15.997146606445312, "step": 8561 }, { "epoch": 1.33, "learning_rate": 7.86761657762332e-06, "logits/chosen": -1.7739402055740356, "logits/rejected": -2.7340667247772217, "logps/chosen": -98.83065795898438, "logps/rejected": -376.36944580078125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.178886890411377, "rewards/margins": 5.898758888244629, "rewards/rejected": -8.077646255493164, "step": 8562 }, { "epoch": 1.33, "learning_rate": 7.866883137092172e-06, "logits/chosen": -2.231726884841919, "logits/rejected": -3.0420515537261963, "logps/chosen": -85.8272705078125, "logps/rejected": -243.66632080078125, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -3.6225156784057617, "rewards/margins": 5.652949333190918, "rewards/rejected": -9.27546501159668, "step": 8563 }, { "epoch": 1.33, "learning_rate": 7.866149696561025e-06, "logits/chosen": -2.72617769241333, "logits/rejected": -2.5351414680480957, "logps/chosen": -536.2286376953125, "logps/rejected": -379.83038330078125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -3.2994203567504883, "rewards/margins": 6.229674816131592, "rewards/rejected": -9.529094696044922, "step": 8564 }, { "epoch": 1.33, "learning_rate": 7.865416256029877e-06, "logits/chosen": -2.2541263103485107, "logits/rejected": -2.882084846496582, "logps/chosen": -147.9207000732422, "logps/rejected": -349.8030700683594, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.129357099533081, "rewards/margins": 8.379096031188965, "rewards/rejected": -11.508453369140625, "step": 8565 }, { "epoch": 1.33, "learning_rate": 7.864682815498729e-06, "logits/chosen": -2.6669204235076904, "logits/rejected": -1.1388789415359497, "logps/chosen": -173.95692443847656, "logps/rejected": -173.10980224609375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.8336342573165894, "rewards/margins": 8.9268159866333, "rewards/rejected": -10.76045036315918, "step": 8566 }, { "epoch": 1.33, "learning_rate": 7.863949374967581e-06, "logits/chosen": -2.285496711730957, "logits/rejected": -2.043611764907837, "logps/chosen": -152.69631958007812, "logps/rejected": -283.0519714355469, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4446892738342285, "rewards/margins": 7.083115577697754, "rewards/rejected": -8.527804374694824, "step": 8567 }, { "epoch": 1.33, "learning_rate": 7.863215934436433e-06, "logits/chosen": -1.7876243591308594, "logits/rejected": -2.5485470294952393, "logps/chosen": -125.30061340332031, "logps/rejected": -437.6797790527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4033347964286804, "rewards/margins": 10.543661117553711, "rewards/rejected": -10.946995735168457, "step": 8568 }, { "epoch": 1.33, "learning_rate": 7.862482493905285e-06, "logits/chosen": -2.768709421157837, "logits/rejected": -2.737745523452759, "logps/chosen": -772.9453125, "logps/rejected": -602.997314453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.5164257287979126, "rewards/margins": 8.496786117553711, "rewards/rejected": -10.013211250305176, "step": 8569 }, { "epoch": 1.33, "learning_rate": 7.861749053374137e-06, "logits/chosen": -2.6549136638641357, "logits/rejected": -3.058436632156372, "logps/chosen": -216.09825134277344, "logps/rejected": -275.806640625, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -1.0859076976776123, "rewards/margins": 6.57675838470459, "rewards/rejected": -7.662666320800781, "step": 8570 }, { "epoch": 1.33, "learning_rate": 7.861015612842988e-06, "logits/chosen": -2.6125552654266357, "logits/rejected": -1.392423152923584, "logps/chosen": -379.9741516113281, "logps/rejected": -198.85499572753906, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -4.306117534637451, "rewards/margins": 6.08563232421875, "rewards/rejected": -10.39175033569336, "step": 8571 }, { "epoch": 1.33, "learning_rate": 7.860282172311842e-06, "logits/chosen": -2.9128520488739014, "logits/rejected": -3.0325331687927246, "logps/chosen": -97.83219909667969, "logps/rejected": -320.02215576171875, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": -0.5049583315849304, "rewards/margins": 5.510732650756836, "rewards/rejected": -6.01569128036499, "step": 8572 }, { "epoch": 1.33, "learning_rate": 7.859548731780694e-06, "logits/chosen": -2.7161214351654053, "logits/rejected": -2.7158827781677246, "logps/chosen": -131.47271728515625, "logps/rejected": -178.64854431152344, "loss": 0.1073, "rewards/accuracies": 1.0, "rewards/chosen": -2.4872288703918457, "rewards/margins": 4.215577602386475, "rewards/rejected": -6.70280647277832, "step": 8573 }, { "epoch": 1.33, "learning_rate": 7.858815291249547e-06, "logits/chosen": -2.348057985305786, "logits/rejected": -2.8354156017303467, "logps/chosen": -132.89393615722656, "logps/rejected": -191.3578338623047, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.0140490531921387, "rewards/margins": 7.122946739196777, "rewards/rejected": -8.136996269226074, "step": 8574 }, { "epoch": 1.33, "learning_rate": 7.8580818507184e-06, "logits/chosen": -2.8269479274749756, "logits/rejected": -2.841475009918213, "logps/chosen": -251.27655029296875, "logps/rejected": -305.52764892578125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.6384086608886719, "rewards/margins": 7.4177751541137695, "rewards/rejected": -8.056183815002441, "step": 8575 }, { "epoch": 1.33, "learning_rate": 7.857348410187251e-06, "logits/chosen": -1.9102741479873657, "logits/rejected": -2.936983585357666, "logps/chosen": -572.3685302734375, "logps/rejected": -659.9664306640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.3621978759765625, "rewards/margins": 8.117212295532227, "rewards/rejected": -10.479410171508789, "step": 8576 }, { "epoch": 1.33, "learning_rate": 7.856614969656103e-06, "logits/chosen": -2.3321025371551514, "logits/rejected": -2.9915354251861572, "logps/chosen": -52.85294723510742, "logps/rejected": -214.41493225097656, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -2.4401633739471436, "rewards/margins": 5.38948917388916, "rewards/rejected": -7.829652786254883, "step": 8577 }, { "epoch": 1.33, "learning_rate": 7.855881529124955e-06, "logits/chosen": -2.8317623138427734, "logits/rejected": -2.0413358211517334, "logps/chosen": -170.8838653564453, "logps/rejected": -167.18212890625, "loss": 0.1868, "rewards/accuracies": 1.0, "rewards/chosen": -5.110569000244141, "rewards/margins": 2.9997806549072266, "rewards/rejected": -8.110349655151367, "step": 8578 }, { "epoch": 1.33, "learning_rate": 7.855148088593807e-06, "logits/chosen": -2.8763303756713867, "logits/rejected": -2.1101884841918945, "logps/chosen": -209.21170043945312, "logps/rejected": -159.5782470703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.9128131866455078, "rewards/margins": 7.610527992248535, "rewards/rejected": -8.523341178894043, "step": 8579 }, { "epoch": 1.33, "learning_rate": 7.854414648062659e-06, "logits/chosen": -2.866431474685669, "logits/rejected": -2.7084197998046875, "logps/chosen": -267.0587463378906, "logps/rejected": -410.63348388671875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.8855185508728027, "rewards/margins": 7.04226016998291, "rewards/rejected": -8.927778244018555, "step": 8580 }, { "epoch": 1.33, "learning_rate": 7.853681207531512e-06, "logits/chosen": -2.3761637210845947, "logits/rejected": -2.933295249938965, "logps/chosen": -176.35301208496094, "logps/rejected": -326.8631286621094, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -1.835523247718811, "rewards/margins": 5.7965850830078125, "rewards/rejected": -7.632108688354492, "step": 8581 }, { "epoch": 1.33, "learning_rate": 7.852947767000364e-06, "logits/chosen": -2.315502882003784, "logits/rejected": -2.896700143814087, "logps/chosen": -92.80741119384766, "logps/rejected": -255.46263122558594, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.867598295211792, "rewards/margins": 7.654494285583496, "rewards/rejected": -10.522092819213867, "step": 8582 }, { "epoch": 1.33, "learning_rate": 7.852214326469216e-06, "logits/chosen": -2.607914686203003, "logits/rejected": -2.0452065467834473, "logps/chosen": -833.0164184570312, "logps/rejected": -508.6341552734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.523590087890625, "rewards/margins": 7.53420352935791, "rewards/rejected": -9.057793617248535, "step": 8583 }, { "epoch": 1.33, "learning_rate": 7.851480885938068e-06, "logits/chosen": -2.74721360206604, "logits/rejected": -2.708968162536621, "logps/chosen": -410.7823486328125, "logps/rejected": -196.9005584716797, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -3.7146522998809814, "rewards/margins": 4.973043441772461, "rewards/rejected": -8.68769645690918, "step": 8584 }, { "epoch": 1.34, "learning_rate": 7.85074744540692e-06, "logits/chosen": -1.651346206665039, "logits/rejected": -2.2260043621063232, "logps/chosen": -273.2746276855469, "logps/rejected": -445.333984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.458040237426758, "rewards/margins": 8.364076614379883, "rewards/rejected": -12.82211685180664, "step": 8585 }, { "epoch": 1.34, "learning_rate": 7.850014004875772e-06, "logits/chosen": -2.6796531677246094, "logits/rejected": -2.935636043548584, "logps/chosen": -176.05813598632812, "logps/rejected": -193.049560546875, "loss": 0.0734, "rewards/accuracies": 1.0, "rewards/chosen": -3.595747947692871, "rewards/margins": 4.338147163391113, "rewards/rejected": -7.933895111083984, "step": 8586 }, { "epoch": 1.34, "learning_rate": 7.849280564344624e-06, "logits/chosen": -2.976209878921509, "logits/rejected": -2.899630308151245, "logps/chosen": -411.41387939453125, "logps/rejected": -384.2499084472656, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -1.0416748523712158, "rewards/margins": 5.890142440795898, "rewards/rejected": -6.931817531585693, "step": 8587 }, { "epoch": 1.34, "learning_rate": 7.848547123813475e-06, "logits/chosen": -2.1183059215545654, "logits/rejected": -2.7428698539733887, "logps/chosen": -357.3712158203125, "logps/rejected": -454.38714599609375, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": -2.653059959411621, "rewards/margins": 3.903869152069092, "rewards/rejected": -6.556929111480713, "step": 8588 }, { "epoch": 1.34, "learning_rate": 7.847813683282327e-06, "logits/chosen": -2.9158949851989746, "logits/rejected": -2.894280433654785, "logps/chosen": -188.4356231689453, "logps/rejected": -205.01397705078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.3182437419891357, "rewards/margins": 7.125818729400635, "rewards/rejected": -9.444062232971191, "step": 8589 }, { "epoch": 1.34, "learning_rate": 7.847080242751181e-06, "logits/chosen": -0.8268251419067383, "logits/rejected": -2.2954390048980713, "logps/chosen": -108.7808837890625, "logps/rejected": -401.60870361328125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -2.6253724098205566, "rewards/margins": 7.634555816650391, "rewards/rejected": -10.259928703308105, "step": 8590 }, { "epoch": 1.34, "learning_rate": 7.846346802220033e-06, "logits/chosen": -3.163374900817871, "logits/rejected": -2.7300987243652344, "logps/chosen": -453.4298400878906, "logps/rejected": -416.791748046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.8941131830215454, "rewards/margins": 7.88602352142334, "rewards/rejected": -9.780136108398438, "step": 8591 }, { "epoch": 1.34, "learning_rate": 7.845613361688885e-06, "logits/chosen": -1.8819465637207031, "logits/rejected": -2.4144747257232666, "logps/chosen": -647.9456787109375, "logps/rejected": -715.710693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2071990966796875, "rewards/margins": 10.400708198547363, "rewards/rejected": -11.60790729522705, "step": 8592 }, { "epoch": 1.34, "learning_rate": 7.844879921157737e-06, "logits/chosen": -2.4996089935302734, "logits/rejected": -2.90632963180542, "logps/chosen": -378.3132629394531, "logps/rejected": -419.5510559082031, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/chosen": -1.5725033283233643, "rewards/margins": 6.35530948638916, "rewards/rejected": -7.927812576293945, "step": 8593 }, { "epoch": 1.34, "learning_rate": 7.844146480626588e-06, "logits/chosen": -2.4195637702941895, "logits/rejected": -2.8684470653533936, "logps/chosen": -579.2166748046875, "logps/rejected": -682.378173828125, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -1.8109092712402344, "rewards/margins": 5.133826732635498, "rewards/rejected": -6.944736003875732, "step": 8594 }, { "epoch": 1.34, "learning_rate": 7.84341304009544e-06, "logits/chosen": -2.239511013031006, "logits/rejected": -2.5514609813690186, "logps/chosen": -230.8330841064453, "logps/rejected": -330.51812744140625, "loss": 0.3217, "rewards/accuracies": 1.0, "rewards/chosen": -3.72483229637146, "rewards/margins": 4.237496852874756, "rewards/rejected": -7.962328910827637, "step": 8595 }, { "epoch": 1.34, "learning_rate": 7.842679599564292e-06, "logits/chosen": -2.136453628540039, "logits/rejected": -2.715289831161499, "logps/chosen": -640.2583618164062, "logps/rejected": -622.8868408203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2253308296203613, "rewards/margins": 7.1168694496154785, "rewards/rejected": -9.34220027923584, "step": 8596 }, { "epoch": 1.34, "learning_rate": 7.841946159033144e-06, "logits/chosen": -1.4819680452346802, "logits/rejected": -2.9543569087982178, "logps/chosen": -364.57220458984375, "logps/rejected": -433.0206298828125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.274426221847534, "rewards/margins": 6.368840217590332, "rewards/rejected": -8.643266677856445, "step": 8597 }, { "epoch": 1.34, "learning_rate": 7.841212718501996e-06, "logits/chosen": -1.9438265562057495, "logits/rejected": -1.244826316833496, "logps/chosen": -173.28880310058594, "logps/rejected": -352.51080322265625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -3.495471954345703, "rewards/margins": 5.739189147949219, "rewards/rejected": -9.234661102294922, "step": 8598 }, { "epoch": 1.34, "learning_rate": 7.84047927797085e-06, "logits/chosen": -3.0585105419158936, "logits/rejected": -2.1872923374176025, "logps/chosen": -302.3431091308594, "logps/rejected": -188.71209716796875, "loss": 0.4102, "rewards/accuracies": 0.5, "rewards/chosen": -3.731182336807251, "rewards/margins": 4.26718282699585, "rewards/rejected": -7.99836540222168, "step": 8599 }, { "epoch": 1.34, "learning_rate": 7.839745837439701e-06, "logits/chosen": -3.0739099979400635, "logits/rejected": -2.7759084701538086, "logps/chosen": -627.9449462890625, "logps/rejected": -463.89642333984375, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -2.080249309539795, "rewards/margins": 5.5032877922058105, "rewards/rejected": -7.5835371017456055, "step": 8600 }, { "epoch": 1.34, "learning_rate": 7.839012396908553e-06, "logits/chosen": -2.7906532287597656, "logits/rejected": -2.7303943634033203, "logps/chosen": -450.6596374511719, "logps/rejected": -599.4318237304688, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -5.433371543884277, "rewards/margins": 8.178643226623535, "rewards/rejected": -13.612014770507812, "step": 8601 }, { "epoch": 1.34, "learning_rate": 7.838278956377405e-06, "logits/chosen": -1.92440664768219, "logits/rejected": -2.9163997173309326, "logps/chosen": -98.71610260009766, "logps/rejected": -299.1802978515625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.223846912384033, "rewards/margins": 6.030884265899658, "rewards/rejected": -8.254731178283691, "step": 8602 }, { "epoch": 1.34, "learning_rate": 7.837545515846257e-06, "logits/chosen": -1.6874594688415527, "logits/rejected": -2.9282097816467285, "logps/chosen": -77.93994140625, "logps/rejected": -435.51226806640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.1701889038085938, "rewards/margins": 8.968246459960938, "rewards/rejected": -11.138435363769531, "step": 8603 }, { "epoch": 1.34, "learning_rate": 7.836812075315109e-06, "logits/chosen": -1.633188009262085, "logits/rejected": -2.6210389137268066, "logps/chosen": -211.9239501953125, "logps/rejected": -378.8787841796875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.6216111183166504, "rewards/margins": 6.099165439605713, "rewards/rejected": -8.720776557922363, "step": 8604 }, { "epoch": 1.34, "learning_rate": 7.83607863478396e-06, "logits/chosen": -1.9318888187408447, "logits/rejected": -2.6240336894989014, "logps/chosen": -79.41285705566406, "logps/rejected": -234.6181182861328, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.34658509492874146, "rewards/margins": 8.786487579345703, "rewards/rejected": -9.133071899414062, "step": 8605 }, { "epoch": 1.34, "learning_rate": 7.835345194252814e-06, "logits/chosen": -2.458378314971924, "logits/rejected": -2.6759631633758545, "logps/chosen": -213.81591796875, "logps/rejected": -393.3818664550781, "loss": 0.8674, "rewards/accuracies": 0.5, "rewards/chosen": -6.488279342651367, "rewards/margins": 4.953294277191162, "rewards/rejected": -11.441574096679688, "step": 8606 }, { "epoch": 1.34, "learning_rate": 7.834611753721666e-06, "logits/chosen": -2.294135570526123, "logits/rejected": -3.116818428039551, "logps/chosen": -171.7516632080078, "logps/rejected": -456.9951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.687988758087158, "rewards/margins": 11.826393127441406, "rewards/rejected": -14.514381408691406, "step": 8607 }, { "epoch": 1.34, "learning_rate": 7.83387831319052e-06, "logits/chosen": -2.4508731365203857, "logits/rejected": -2.512709140777588, "logps/chosen": -283.14117431640625, "logps/rejected": -268.31146240234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.2884337902069092, "rewards/margins": 7.275059700012207, "rewards/rejected": -8.563493728637695, "step": 8608 }, { "epoch": 1.34, "learning_rate": 7.833144872659372e-06, "logits/chosen": -2.4069063663482666, "logits/rejected": -2.967149496078491, "logps/chosen": -240.6628875732422, "logps/rejected": -415.793212890625, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -4.067805290222168, "rewards/margins": 4.638671875, "rewards/rejected": -8.706477165222168, "step": 8609 }, { "epoch": 1.34, "learning_rate": 7.832411432128224e-06, "logits/chosen": -1.6505942344665527, "logits/rejected": -2.2860515117645264, "logps/chosen": -117.46676635742188, "logps/rejected": -301.03741455078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.273224353790283, "rewards/margins": 7.804588317871094, "rewards/rejected": -11.077813148498535, "step": 8610 }, { "epoch": 1.34, "learning_rate": 7.831677991597075e-06, "logits/chosen": -2.773587226867676, "logits/rejected": -2.7650296688079834, "logps/chosen": -108.4795913696289, "logps/rejected": -166.47996520996094, "loss": 0.663, "rewards/accuracies": 0.5, "rewards/chosen": -5.727862358093262, "rewards/margins": 2.784397602081299, "rewards/rejected": -8.512260437011719, "step": 8611 }, { "epoch": 1.34, "learning_rate": 7.830944551065927e-06, "logits/chosen": -2.945463180541992, "logits/rejected": -2.6982219219207764, "logps/chosen": -165.3852996826172, "logps/rejected": -302.5628662109375, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -1.9497486352920532, "rewards/margins": 5.8190717697143555, "rewards/rejected": -7.768820285797119, "step": 8612 }, { "epoch": 1.34, "learning_rate": 7.83021111053478e-06, "logits/chosen": -2.811662197113037, "logits/rejected": -2.4738881587982178, "logps/chosen": -393.3174133300781, "logps/rejected": -462.2508850097656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.712573766708374, "rewards/margins": 8.37027359008789, "rewards/rejected": -11.082847595214844, "step": 8613 }, { "epoch": 1.34, "learning_rate": 7.829477670003631e-06, "logits/chosen": -2.5123846530914307, "logits/rejected": -1.811824917793274, "logps/chosen": -551.8334350585938, "logps/rejected": -282.4802551269531, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.1941521167755127, "rewards/margins": 8.178031921386719, "rewards/rejected": -10.372182846069336, "step": 8614 }, { "epoch": 1.34, "learning_rate": 7.828744229472483e-06, "logits/chosen": -2.542832136154175, "logits/rejected": -2.7609434127807617, "logps/chosen": -102.3171615600586, "logps/rejected": -301.4403076171875, "loss": 0.0516, "rewards/accuracies": 1.0, "rewards/chosen": -2.330063819885254, "rewards/margins": 5.684098720550537, "rewards/rejected": -8.01416301727295, "step": 8615 }, { "epoch": 1.34, "learning_rate": 7.828010788941335e-06, "logits/chosen": -2.609745740890503, "logits/rejected": -2.3702280521392822, "logps/chosen": -618.1390991210938, "logps/rejected": -455.52783203125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -0.6984378695487976, "rewards/margins": 8.192533493041992, "rewards/rejected": -8.890971183776855, "step": 8616 }, { "epoch": 1.34, "learning_rate": 7.827277348410188e-06, "logits/chosen": -2.3832461833953857, "logits/rejected": -2.8022351264953613, "logps/chosen": -108.59069061279297, "logps/rejected": -201.94515991210938, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -3.1100380420684814, "rewards/margins": 4.2549543380737305, "rewards/rejected": -7.364992618560791, "step": 8617 }, { "epoch": 1.34, "learning_rate": 7.82654390787904e-06, "logits/chosen": -2.408297300338745, "logits/rejected": -2.720367908477783, "logps/chosen": -218.342529296875, "logps/rejected": -234.23800659179688, "loss": 0.3733, "rewards/accuracies": 0.5, "rewards/chosen": -4.876387596130371, "rewards/margins": 2.327802896499634, "rewards/rejected": -7.204190254211426, "step": 8618 }, { "epoch": 1.34, "learning_rate": 7.825810467347892e-06, "logits/chosen": -2.3153810501098633, "logits/rejected": -2.715392827987671, "logps/chosen": -201.99957275390625, "logps/rejected": -505.3255920410156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.794003009796143, "rewards/margins": 8.933675765991211, "rewards/rejected": -13.727678298950195, "step": 8619 }, { "epoch": 1.34, "learning_rate": 7.825077026816744e-06, "logits/chosen": -2.4099032878875732, "logits/rejected": -2.9167978763580322, "logps/chosen": -331.204345703125, "logps/rejected": -441.8067932128906, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.6504268646240234, "rewards/margins": 7.845992088317871, "rewards/rejected": -10.496418952941895, "step": 8620 }, { "epoch": 1.34, "learning_rate": 7.824343586285596e-06, "logits/chosen": -1.0409107208251953, "logits/rejected": -2.7070469856262207, "logps/chosen": -135.1035919189453, "logps/rejected": -603.99072265625, "loss": 0.727, "rewards/accuracies": 0.5, "rewards/chosen": -2.448371648788452, "rewards/margins": 7.596501350402832, "rewards/rejected": -10.044873237609863, "step": 8621 }, { "epoch": 1.34, "learning_rate": 7.823610145754448e-06, "logits/chosen": -2.4680445194244385, "logits/rejected": -3.024461269378662, "logps/chosen": -226.23043823242188, "logps/rejected": -297.3200988769531, "loss": 1.7592, "rewards/accuracies": 0.5, "rewards/chosen": -5.806813716888428, "rewards/margins": 2.326406717300415, "rewards/rejected": -8.133220672607422, "step": 8622 }, { "epoch": 1.34, "learning_rate": 7.8228767052233e-06, "logits/chosen": -1.783897042274475, "logits/rejected": -2.5216970443725586, "logps/chosen": -248.61691284179688, "logps/rejected": -280.50775146484375, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -3.7130470275878906, "rewards/margins": 4.239628791809082, "rewards/rejected": -7.952675819396973, "step": 8623 }, { "epoch": 1.34, "learning_rate": 7.822143264692152e-06, "logits/chosen": -2.3254847526550293, "logits/rejected": -2.920668125152588, "logps/chosen": -190.03204345703125, "logps/rejected": -524.8416748046875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.1310402154922485, "rewards/margins": 6.182605743408203, "rewards/rejected": -7.31364631652832, "step": 8624 }, { "epoch": 1.34, "learning_rate": 7.821409824161003e-06, "logits/chosen": -1.860142707824707, "logits/rejected": -2.7700858116149902, "logps/chosen": -87.29976654052734, "logps/rejected": -428.79840087890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.9109807014465332, "rewards/margins": 9.634672164916992, "rewards/rejected": -11.545652389526367, "step": 8625 }, { "epoch": 1.34, "learning_rate": 7.820676383629857e-06, "logits/chosen": -0.2982402443885803, "logits/rejected": -2.3425159454345703, "logps/chosen": -97.9106674194336, "logps/rejected": -886.2176513671875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.919877052307129, "rewards/margins": 10.548942565917969, "rewards/rejected": -13.468819618225098, "step": 8626 }, { "epoch": 1.34, "learning_rate": 7.819942943098709e-06, "logits/chosen": -1.8792543411254883, "logits/rejected": -2.825282573699951, "logps/chosen": -220.28480529785156, "logps/rejected": -524.4767456054688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8170108795166016, "rewards/margins": 9.927485466003418, "rewards/rejected": -13.74449634552002, "step": 8627 }, { "epoch": 1.34, "learning_rate": 7.81920950256756e-06, "logits/chosen": -1.283464789390564, "logits/rejected": -2.7389495372772217, "logps/chosen": -201.39451599121094, "logps/rejected": -525.1677856445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.689206838607788, "rewards/margins": 12.95809555053711, "rewards/rejected": -14.647302627563477, "step": 8628 }, { "epoch": 1.34, "learning_rate": 7.818476062036413e-06, "logits/chosen": -2.4858577251434326, "logits/rejected": -2.9296464920043945, "logps/chosen": -85.97124481201172, "logps/rejected": -200.58837890625, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -1.8102622032165527, "rewards/margins": 5.385447025299072, "rewards/rejected": -7.195709228515625, "step": 8629 }, { "epoch": 1.34, "learning_rate": 7.817742621505265e-06, "logits/chosen": -3.0663986206054688, "logits/rejected": -3.0791337490081787, "logps/chosen": -148.2699737548828, "logps/rejected": -211.29966735839844, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.1369056701660156, "rewards/margins": 6.732366561889648, "rewards/rejected": -7.869272232055664, "step": 8630 }, { "epoch": 1.34, "learning_rate": 7.817009180974116e-06, "logits/chosen": -1.8486065864562988, "logits/rejected": -2.7799429893493652, "logps/chosen": -119.5858154296875, "logps/rejected": -429.2020263671875, "loss": 0.3819, "rewards/accuracies": 0.5, "rewards/chosen": -4.180893898010254, "rewards/margins": 4.415740966796875, "rewards/rejected": -8.596634864807129, "step": 8631 }, { "epoch": 1.34, "learning_rate": 7.816275740442968e-06, "logits/chosen": -2.692181348800659, "logits/rejected": -3.1791064739227295, "logps/chosen": -435.2095642089844, "logps/rejected": -459.4776611328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.738943576812744, "rewards/margins": 6.978349685668945, "rewards/rejected": -9.717292785644531, "step": 8632 }, { "epoch": 1.34, "learning_rate": 7.81554229991182e-06, "logits/chosen": -2.47751784324646, "logits/rejected": -3.235137701034546, "logps/chosen": -153.82534790039062, "logps/rejected": -438.85357666015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.1337099075317383, "rewards/margins": 9.07463264465332, "rewards/rejected": -11.208342552185059, "step": 8633 }, { "epoch": 1.34, "learning_rate": 7.814808859380672e-06, "logits/chosen": -1.9563809633255005, "logits/rejected": -2.466242551803589, "logps/chosen": -188.5413055419922, "logps/rejected": -184.16639709472656, "loss": 0.07, "rewards/accuracies": 1.0, "rewards/chosen": -3.303603172302246, "rewards/margins": 4.928249359130859, "rewards/rejected": -8.231852531433105, "step": 8634 }, { "epoch": 1.34, "learning_rate": 7.814075418849526e-06, "logits/chosen": -2.912308931350708, "logits/rejected": -2.064258337020874, "logps/chosen": -176.4093017578125, "logps/rejected": -89.75975799560547, "loss": 2.4651, "rewards/accuracies": 0.5, "rewards/chosen": -3.8638997077941895, "rewards/margins": -0.13948535919189453, "rewards/rejected": -3.724414348602295, "step": 8635 }, { "epoch": 1.34, "learning_rate": 7.813341978318377e-06, "logits/chosen": -2.0674991607666016, "logits/rejected": -2.832789897918701, "logps/chosen": -105.64423370361328, "logps/rejected": -353.15716552734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.8192152976989746, "rewards/margins": 8.059782028198242, "rewards/rejected": -10.878997802734375, "step": 8636 }, { "epoch": 1.34, "learning_rate": 7.81260853778723e-06, "logits/chosen": -1.492856502532959, "logits/rejected": -2.198476791381836, "logps/chosen": -240.49981689453125, "logps/rejected": -316.72540283203125, "loss": 1.7494, "rewards/accuracies": 0.5, "rewards/chosen": -5.6160149574279785, "rewards/margins": 4.40931510925293, "rewards/rejected": -10.02532958984375, "step": 8637 }, { "epoch": 1.34, "learning_rate": 7.811875097256081e-06, "logits/chosen": -2.7008681297302246, "logits/rejected": -2.8185172080993652, "logps/chosen": -79.26261901855469, "logps/rejected": -232.13140869140625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.947551965713501, "rewards/margins": 7.518633842468262, "rewards/rejected": -8.4661865234375, "step": 8638 }, { "epoch": 1.34, "learning_rate": 7.811141656724933e-06, "logits/chosen": -2.6037919521331787, "logits/rejected": -2.073408842086792, "logps/chosen": -194.00640869140625, "logps/rejected": -148.7820587158203, "loss": 0.1356, "rewards/accuracies": 1.0, "rewards/chosen": -3.108466386795044, "rewards/margins": 4.25520133972168, "rewards/rejected": -7.3636674880981445, "step": 8639 }, { "epoch": 1.34, "learning_rate": 7.810408216193787e-06, "logits/chosen": -2.7053399085998535, "logits/rejected": -3.0170507431030273, "logps/chosen": -938.9892578125, "logps/rejected": -893.9161376953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.332191467285156, "rewards/margins": 8.345537185668945, "rewards/rejected": -12.677728652954102, "step": 8640 }, { "epoch": 1.34, "learning_rate": 7.809674775662639e-06, "logits/chosen": -2.894780158996582, "logits/rejected": -2.881974697113037, "logps/chosen": -446.1087646484375, "logps/rejected": -461.15399169921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.5663299560546875, "rewards/margins": 10.33676528930664, "rewards/rejected": -12.903095245361328, "step": 8641 }, { "epoch": 1.34, "learning_rate": 7.80894133513149e-06, "logits/chosen": -2.900618314743042, "logits/rejected": -1.911320447921753, "logps/chosen": -381.666748046875, "logps/rejected": -225.0243682861328, "loss": 2.3239, "rewards/accuracies": 0.5, "rewards/chosen": -4.513128757476807, "rewards/margins": -0.7238156795501709, "rewards/rejected": -3.7893130779266357, "step": 8642 }, { "epoch": 1.34, "learning_rate": 7.808207894600342e-06, "logits/chosen": -3.0588877201080322, "logits/rejected": -2.487483263015747, "logps/chosen": -376.3129577636719, "logps/rejected": -244.89007568359375, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -3.2129039764404297, "rewards/margins": 4.454213619232178, "rewards/rejected": -7.667117595672607, "step": 8643 }, { "epoch": 1.34, "learning_rate": 7.807474454069196e-06, "logits/chosen": -2.753660202026367, "logits/rejected": -2.3716254234313965, "logps/chosen": -541.693603515625, "logps/rejected": -586.9949340820312, "loss": 0.8376, "rewards/accuracies": 0.5, "rewards/chosen": -6.815840244293213, "rewards/margins": 0.31558823585510254, "rewards/rejected": -7.1314287185668945, "step": 8644 }, { "epoch": 1.34, "learning_rate": 7.806741013538048e-06, "logits/chosen": -2.739884614944458, "logits/rejected": -2.9289486408233643, "logps/chosen": -116.16627502441406, "logps/rejected": -270.49560546875, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.28233224153518677, "rewards/margins": 6.699979782104492, "rewards/rejected": -6.982312202453613, "step": 8645 }, { "epoch": 1.34, "learning_rate": 7.8060075730069e-06, "logits/chosen": -2.5185511112213135, "logits/rejected": -2.6870737075805664, "logps/chosen": -97.8355712890625, "logps/rejected": -224.72616577148438, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.3186452388763428, "rewards/margins": 5.730927467346191, "rewards/rejected": -8.049572944641113, "step": 8646 }, { "epoch": 1.34, "learning_rate": 7.805274132475752e-06, "logits/chosen": -2.7225141525268555, "logits/rejected": -1.8658415079116821, "logps/chosen": -336.9129943847656, "logps/rejected": -237.14529418945312, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.6124998331069946, "rewards/margins": 6.455732345581055, "rewards/rejected": -8.068232536315918, "step": 8647 }, { "epoch": 1.34, "learning_rate": 7.804540691944603e-06, "logits/chosen": -1.9640414714813232, "logits/rejected": -2.706411361694336, "logps/chosen": -49.857398986816406, "logps/rejected": -292.2379150390625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.9610817432403564, "rewards/margins": 7.647872447967529, "rewards/rejected": -9.608954429626465, "step": 8648 }, { "epoch": 1.35, "learning_rate": 7.803807251413455e-06, "logits/chosen": -1.6771936416625977, "logits/rejected": -2.9158132076263428, "logps/chosen": -250.9928741455078, "logps/rejected": -580.3492431640625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -2.9299564361572266, "rewards/margins": 5.1573052406311035, "rewards/rejected": -8.087262153625488, "step": 8649 }, { "epoch": 1.35, "learning_rate": 7.803073810882307e-06, "logits/chosen": -2.866058349609375, "logits/rejected": -2.0580027103424072, "logps/chosen": -516.2611694335938, "logps/rejected": -256.3611145019531, "loss": 0.1585, "rewards/accuracies": 1.0, "rewards/chosen": -3.2397704124450684, "rewards/margins": 5.472159385681152, "rewards/rejected": -8.711930274963379, "step": 8650 }, { "epoch": 1.35, "learning_rate": 7.802340370351159e-06, "logits/chosen": -1.8019415140151978, "logits/rejected": -2.4899001121520996, "logps/chosen": -92.18997192382812, "logps/rejected": -398.33648681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.38197213411331177, "rewards/margins": 10.019247055053711, "rewards/rejected": -10.401219367980957, "step": 8651 }, { "epoch": 1.35, "learning_rate": 7.801606929820011e-06, "logits/chosen": -2.6087794303894043, "logits/rejected": -2.6511642932891846, "logps/chosen": -597.0430297851562, "logps/rejected": -514.8228149414062, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.5738754272460938, "rewards/margins": 7.95394229888916, "rewards/rejected": -9.527817726135254, "step": 8652 }, { "epoch": 1.35, "learning_rate": 7.800873489288864e-06, "logits/chosen": -2.303739547729492, "logits/rejected": -2.730844497680664, "logps/chosen": -127.36109161376953, "logps/rejected": -332.2919006347656, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.0813374519348145, "rewards/margins": 8.25632381439209, "rewards/rejected": -11.337661743164062, "step": 8653 }, { "epoch": 1.35, "learning_rate": 7.800140048757716e-06, "logits/chosen": -2.585449695587158, "logits/rejected": -2.0564310550689697, "logps/chosen": -183.8740997314453, "logps/rejected": -291.3990478515625, "loss": 3.1644, "rewards/accuracies": 0.5, "rewards/chosen": -4.949494361877441, "rewards/margins": 2.519115447998047, "rewards/rejected": -7.468609809875488, "step": 8654 }, { "epoch": 1.35, "learning_rate": 7.799406608226568e-06, "logits/chosen": -1.5298787355422974, "logits/rejected": -2.739125967025757, "logps/chosen": -119.29879760742188, "logps/rejected": -454.6724853515625, "loss": 0.1294, "rewards/accuracies": 1.0, "rewards/chosen": -3.4483771324157715, "rewards/margins": 7.252216815948486, "rewards/rejected": -10.700593948364258, "step": 8655 }, { "epoch": 1.35, "learning_rate": 7.79867316769542e-06, "logits/chosen": -2.6769015789031982, "logits/rejected": -2.766757011413574, "logps/chosen": -94.08772277832031, "logps/rejected": -248.43362426757812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9989840984344482, "rewards/margins": 9.591533660888672, "rewards/rejected": -11.590517044067383, "step": 8656 }, { "epoch": 1.35, "learning_rate": 7.797939727164272e-06, "logits/chosen": -1.2519664764404297, "logits/rejected": -2.632525682449341, "logps/chosen": -82.09466552734375, "logps/rejected": -470.86865234375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.1250922679901123, "rewards/margins": 9.411383628845215, "rewards/rejected": -10.536476135253906, "step": 8657 }, { "epoch": 1.35, "learning_rate": 7.797206286633124e-06, "logits/chosen": -2.3970606327056885, "logits/rejected": -2.6944146156311035, "logps/chosen": -278.74188232421875, "logps/rejected": -355.8103942871094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.3788261413574219, "rewards/margins": 8.923251152038574, "rewards/rejected": -10.302077293395996, "step": 8658 }, { "epoch": 1.35, "learning_rate": 7.796472846101976e-06, "logits/chosen": -1.92388117313385, "logits/rejected": -2.6565399169921875, "logps/chosen": -189.58135986328125, "logps/rejected": -426.5187072753906, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.0076820850372314, "rewards/margins": 8.52798843383789, "rewards/rejected": -11.53567123413086, "step": 8659 }, { "epoch": 1.35, "learning_rate": 7.795739405570828e-06, "logits/chosen": -1.9404605627059937, "logits/rejected": -2.7234785556793213, "logps/chosen": -174.49752807617188, "logps/rejected": -325.290771484375, "loss": 0.288, "rewards/accuracies": 1.0, "rewards/chosen": -4.236124515533447, "rewards/margins": 4.000195503234863, "rewards/rejected": -8.236320495605469, "step": 8660 }, { "epoch": 1.35, "learning_rate": 7.79500596503968e-06, "logits/chosen": -2.5863354206085205, "logits/rejected": -2.342010736465454, "logps/chosen": -675.9732666015625, "logps/rejected": -556.60888671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1602401733398438, "rewards/margins": 7.95252799987793, "rewards/rejected": -9.112768173217773, "step": 8661 }, { "epoch": 1.35, "learning_rate": 7.794272524508533e-06, "logits/chosen": -1.6466708183288574, "logits/rejected": -2.752295970916748, "logps/chosen": -149.64833068847656, "logps/rejected": -369.8046875, "loss": 0.5339, "rewards/accuracies": 0.5, "rewards/chosen": -3.6531615257263184, "rewards/margins": 3.2108633518218994, "rewards/rejected": -6.864025115966797, "step": 8662 }, { "epoch": 1.35, "learning_rate": 7.793539083977385e-06, "logits/chosen": -1.9437103271484375, "logits/rejected": -2.5430569648742676, "logps/chosen": -68.5847396850586, "logps/rejected": -206.85231018066406, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.12664270401001, "rewards/margins": 7.386898994445801, "rewards/rejected": -11.513542175292969, "step": 8663 }, { "epoch": 1.35, "learning_rate": 7.792805643446237e-06, "logits/chosen": -2.758211612701416, "logits/rejected": -2.4384162425994873, "logps/chosen": -968.7085571289062, "logps/rejected": -767.9515380859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.277270555496216, "rewards/margins": 8.24659538269043, "rewards/rejected": -10.523866653442383, "step": 8664 }, { "epoch": 1.35, "learning_rate": 7.792072202915089e-06, "logits/chosen": -2.92276930809021, "logits/rejected": -3.1171841621398926, "logps/chosen": -53.519691467285156, "logps/rejected": -147.70550537109375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.4851298332214355, "rewards/margins": 5.68272590637207, "rewards/rejected": -9.167856216430664, "step": 8665 }, { "epoch": 1.35, "learning_rate": 7.79133876238394e-06, "logits/chosen": -2.990424156188965, "logits/rejected": -2.411663293838501, "logps/chosen": -138.22140502929688, "logps/rejected": -228.23260498046875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -3.5248804092407227, "rewards/margins": 5.5949296951293945, "rewards/rejected": -9.119810104370117, "step": 8666 }, { "epoch": 1.35, "learning_rate": 7.790605321852792e-06, "logits/chosen": -2.331960916519165, "logits/rejected": -2.948873519897461, "logps/chosen": -277.42230224609375, "logps/rejected": -348.648681640625, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": -3.7960851192474365, "rewards/margins": 3.5615062713623047, "rewards/rejected": -7.35759162902832, "step": 8667 }, { "epoch": 1.35, "learning_rate": 7.789871881321644e-06, "logits/chosen": -1.5774742364883423, "logits/rejected": -2.700072765350342, "logps/chosen": -128.53648376464844, "logps/rejected": -275.467041015625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.318511962890625, "rewards/margins": 6.997317790985107, "rewards/rejected": -10.31583023071289, "step": 8668 }, { "epoch": 1.35, "learning_rate": 7.789138440790496e-06, "logits/chosen": -2.385807752609253, "logits/rejected": -1.532371997833252, "logps/chosen": -182.28302001953125, "logps/rejected": -127.71981811523438, "loss": 1.5748, "rewards/accuracies": 0.0, "rewards/chosen": -5.131402969360352, "rewards/margins": -1.2822266817092896, "rewards/rejected": -3.8491759300231934, "step": 8669 }, { "epoch": 1.35, "learning_rate": 7.78840500025935e-06, "logits/chosen": -2.8019018173217773, "logits/rejected": -2.8210227489471436, "logps/chosen": -106.73005676269531, "logps/rejected": -217.92718505859375, "loss": 1.5524, "rewards/accuracies": 0.5, "rewards/chosen": -7.106594085693359, "rewards/margins": 1.180265188217163, "rewards/rejected": -8.286859512329102, "step": 8670 }, { "epoch": 1.35, "learning_rate": 7.787671559728202e-06, "logits/chosen": -2.6709814071655273, "logits/rejected": -3.1530821323394775, "logps/chosen": -639.5916137695312, "logps/rejected": -628.6301879882812, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.5828866958618164, "rewards/margins": 5.740734100341797, "rewards/rejected": -9.323620796203613, "step": 8671 }, { "epoch": 1.35, "learning_rate": 7.786938119197054e-06, "logits/chosen": -2.6551473140716553, "logits/rejected": -3.0046725273132324, "logps/chosen": -67.86569213867188, "logps/rejected": -200.27088928222656, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.2409324645996094, "rewards/margins": 5.881685256958008, "rewards/rejected": -9.122617721557617, "step": 8672 }, { "epoch": 1.35, "learning_rate": 7.786204678665905e-06, "logits/chosen": -1.5289939641952515, "logits/rejected": -2.1090831756591797, "logps/chosen": -136.5352325439453, "logps/rejected": -118.79498291015625, "loss": 0.4149, "rewards/accuracies": 0.5, "rewards/chosen": -1.559810996055603, "rewards/margins": 3.4832515716552734, "rewards/rejected": -5.043062210083008, "step": 8673 }, { "epoch": 1.35, "learning_rate": 7.785471238134759e-06, "logits/chosen": -2.1000301837921143, "logits/rejected": -2.5753650665283203, "logps/chosen": -175.7482147216797, "logps/rejected": -236.01882934570312, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.5735465288162231, "rewards/margins": 7.164236068725586, "rewards/rejected": -8.73778247833252, "step": 8674 }, { "epoch": 1.35, "learning_rate": 7.784737797603611e-06, "logits/chosen": -1.1857928037643433, "logits/rejected": -2.8034920692443848, "logps/chosen": -165.5668182373047, "logps/rejected": -508.67193603515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.2824158668518066, "rewards/margins": 8.156765937805176, "rewards/rejected": -11.43918228149414, "step": 8675 }, { "epoch": 1.35, "learning_rate": 7.784004357072463e-06, "logits/chosen": -2.819772958755493, "logits/rejected": -2.8277411460876465, "logps/chosen": -335.03717041015625, "logps/rejected": -510.98846435546875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -3.1764907836914062, "rewards/margins": 5.620083332061768, "rewards/rejected": -8.796574592590332, "step": 8676 }, { "epoch": 1.35, "learning_rate": 7.783270916541315e-06, "logits/chosen": -2.8086907863616943, "logits/rejected": -2.8909952640533447, "logps/chosen": -192.34091186523438, "logps/rejected": -223.44265747070312, "loss": 0.3608, "rewards/accuracies": 1.0, "rewards/chosen": -4.856141090393066, "rewards/margins": 0.9291398525238037, "rewards/rejected": -5.785280704498291, "step": 8677 }, { "epoch": 1.35, "learning_rate": 7.782537476010167e-06, "logits/chosen": -1.8453519344329834, "logits/rejected": -2.869215250015259, "logps/chosen": -110.24939727783203, "logps/rejected": -388.1488037109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.609873652458191, "rewards/margins": 9.578140258789062, "rewards/rejected": -11.188013076782227, "step": 8678 }, { "epoch": 1.35, "learning_rate": 7.78180403547902e-06, "logits/chosen": -2.5072057247161865, "logits/rejected": -3.295924663543701, "logps/chosen": -90.3788833618164, "logps/rejected": -359.080810546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.7425400018692017, "rewards/margins": 6.816618919372559, "rewards/rejected": -8.559158325195312, "step": 8679 }, { "epoch": 1.35, "learning_rate": 7.781070594947872e-06, "logits/chosen": -2.342170238494873, "logits/rejected": -2.8700599670410156, "logps/chosen": -191.44027709960938, "logps/rejected": -380.81695556640625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.6836609840393066, "rewards/margins": 9.836246490478516, "rewards/rejected": -11.51990795135498, "step": 8680 }, { "epoch": 1.35, "learning_rate": 7.780337154416724e-06, "logits/chosen": -2.6373071670532227, "logits/rejected": -2.847574234008789, "logps/chosen": -373.6601867675781, "logps/rejected": -392.218994140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.5762230157852173, "rewards/margins": 7.577715873718262, "rewards/rejected": -9.153938293457031, "step": 8681 }, { "epoch": 1.35, "learning_rate": 7.779603713885576e-06, "logits/chosen": -2.8520092964172363, "logits/rejected": -2.790621757507324, "logps/chosen": -116.7403793334961, "logps/rejected": -121.01866912841797, "loss": 0.105, "rewards/accuracies": 1.0, "rewards/chosen": -4.079612731933594, "rewards/margins": 2.2142093181610107, "rewards/rejected": -6.293821811676025, "step": 8682 }, { "epoch": 1.35, "learning_rate": 7.778870273354428e-06, "logits/chosen": -3.02858829498291, "logits/rejected": -2.2907214164733887, "logps/chosen": -380.45574951171875, "logps/rejected": -290.85443115234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.2945525646209717, "rewards/margins": 8.261043548583984, "rewards/rejected": -10.555595397949219, "step": 8683 }, { "epoch": 1.35, "learning_rate": 7.77813683282328e-06, "logits/chosen": -1.77852463722229, "logits/rejected": -2.6635866165161133, "logps/chosen": -118.32609558105469, "logps/rejected": -456.7315368652344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.186812162399292, "rewards/margins": 7.902036666870117, "rewards/rejected": -10.088849067687988, "step": 8684 }, { "epoch": 1.35, "learning_rate": 7.777403392292131e-06, "logits/chosen": -2.9275965690612793, "logits/rejected": -2.333629608154297, "logps/chosen": -521.0916748046875, "logps/rejected": -314.5478515625, "loss": 0.573, "rewards/accuracies": 0.5, "rewards/chosen": -2.4679417610168457, "rewards/margins": 2.7507071495056152, "rewards/rejected": -5.218648910522461, "step": 8685 }, { "epoch": 1.35, "learning_rate": 7.776669951760983e-06, "logits/chosen": -2.8996212482452393, "logits/rejected": -2.835538148880005, "logps/chosen": -196.75759887695312, "logps/rejected": -174.31222534179688, "loss": 0.1665, "rewards/accuracies": 1.0, "rewards/chosen": -2.6251540184020996, "rewards/margins": 5.7463226318359375, "rewards/rejected": -8.371477127075195, "step": 8686 }, { "epoch": 1.35, "learning_rate": 7.775936511229835e-06, "logits/chosen": -2.8877601623535156, "logits/rejected": -2.6180243492126465, "logps/chosen": -142.57858276367188, "logps/rejected": -174.5193328857422, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.8266079425811768, "rewards/margins": 6.094580173492432, "rewards/rejected": -8.921188354492188, "step": 8687 }, { "epoch": 1.35, "learning_rate": 7.775203070698689e-06, "logits/chosen": -2.7055346965789795, "logits/rejected": -2.9326047897338867, "logps/chosen": -238.98388671875, "logps/rejected": -412.202392578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9505783319473267, "rewards/margins": 8.255287170410156, "rewards/rejected": -9.205865859985352, "step": 8688 }, { "epoch": 1.35, "learning_rate": 7.77446963016754e-06, "logits/chosen": -2.74106502532959, "logits/rejected": -2.910433053970337, "logps/chosen": -135.32089233398438, "logps/rejected": -233.70724487304688, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -2.2625885009765625, "rewards/margins": 4.248488426208496, "rewards/rejected": -6.511076927185059, "step": 8689 }, { "epoch": 1.35, "learning_rate": 7.773736189636392e-06, "logits/chosen": -2.8266825675964355, "logits/rejected": -3.0373175144195557, "logps/chosen": -519.3411254882812, "logps/rejected": -506.29095458984375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.444798231124878, "rewards/margins": 7.961064338684082, "rewards/rejected": -9.405862808227539, "step": 8690 }, { "epoch": 1.35, "learning_rate": 7.773002749105244e-06, "logits/chosen": -3.197467088699341, "logits/rejected": -3.231447696685791, "logps/chosen": -330.96661376953125, "logps/rejected": -332.7359313964844, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.974729299545288, "rewards/margins": 6.703316688537598, "rewards/rejected": -10.678045272827148, "step": 8691 }, { "epoch": 1.35, "learning_rate": 7.772269308574096e-06, "logits/chosen": -2.425572156906128, "logits/rejected": -2.738892078399658, "logps/chosen": -68.60692596435547, "logps/rejected": -290.3860168457031, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -3.231191873550415, "rewards/margins": 6.368905067443848, "rewards/rejected": -9.60009765625, "step": 8692 }, { "epoch": 1.35, "learning_rate": 7.771535868042948e-06, "logits/chosen": -1.7692792415618896, "logits/rejected": -2.9081554412841797, "logps/chosen": -119.2503662109375, "logps/rejected": -478.9194030761719, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.684516191482544, "rewards/margins": 7.208014965057373, "rewards/rejected": -9.89253044128418, "step": 8693 }, { "epoch": 1.35, "learning_rate": 7.7708024275118e-06, "logits/chosen": -2.7510924339294434, "logits/rejected": -3.20639967918396, "logps/chosen": -155.26095581054688, "logps/rejected": -343.9955749511719, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -4.179632186889648, "rewards/margins": 4.534221649169922, "rewards/rejected": -8.71385383605957, "step": 8694 }, { "epoch": 1.35, "learning_rate": 7.770068986980652e-06, "logits/chosen": -1.6568013429641724, "logits/rejected": -2.9883549213409424, "logps/chosen": -108.81044006347656, "logps/rejected": -317.73223876953125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.09018364548683167, "rewards/margins": 5.947567939758301, "rewards/rejected": -6.037752151489258, "step": 8695 }, { "epoch": 1.35, "learning_rate": 7.769335546449504e-06, "logits/chosen": -1.6326156854629517, "logits/rejected": -2.36706280708313, "logps/chosen": -115.51577758789062, "logps/rejected": -246.28860473632812, "loss": 0.238, "rewards/accuracies": 1.0, "rewards/chosen": -4.533295154571533, "rewards/margins": 3.6980998516082764, "rewards/rejected": -8.23139476776123, "step": 8696 }, { "epoch": 1.35, "learning_rate": 7.768602105918357e-06, "logits/chosen": -2.639066457748413, "logits/rejected": -3.1105754375457764, "logps/chosen": -52.58888244628906, "logps/rejected": -336.9712219238281, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.2802369594573975, "rewards/margins": 8.168514251708984, "rewards/rejected": -10.448751449584961, "step": 8697 }, { "epoch": 1.35, "learning_rate": 7.76786866538721e-06, "logits/chosen": -2.5963573455810547, "logits/rejected": -3.013646364212036, "logps/chosen": -134.02035522460938, "logps/rejected": -221.56350708007812, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -2.781916618347168, "rewards/margins": 4.352020263671875, "rewards/rejected": -7.133936882019043, "step": 8698 }, { "epoch": 1.35, "learning_rate": 7.767135224856061e-06, "logits/chosen": -2.9915013313293457, "logits/rejected": -1.9452825784683228, "logps/chosen": -388.24407958984375, "logps/rejected": -240.339111328125, "loss": 0.0632, "rewards/accuracies": 1.0, "rewards/chosen": -3.226925849914551, "rewards/margins": 5.22847843170166, "rewards/rejected": -8.455404281616211, "step": 8699 }, { "epoch": 1.35, "learning_rate": 7.766401784324913e-06, "logits/chosen": -2.9621334075927734, "logits/rejected": -2.7944111824035645, "logps/chosen": -321.96826171875, "logps/rejected": -327.2897644042969, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -4.644302845001221, "rewards/margins": 6.122076034545898, "rewards/rejected": -10.766378402709961, "step": 8700 }, { "epoch": 1.35, "learning_rate": 7.765668343793765e-06, "logits/chosen": -1.9112215042114258, "logits/rejected": -2.8022971153259277, "logps/chosen": -134.73841857910156, "logps/rejected": -324.513427734375, "loss": 0.059, "rewards/accuracies": 1.0, "rewards/chosen": -3.4662821292877197, "rewards/margins": 4.299092769622803, "rewards/rejected": -7.765375137329102, "step": 8701 }, { "epoch": 1.35, "learning_rate": 7.764934903262617e-06, "logits/chosen": -2.1561896800994873, "logits/rejected": -2.9231925010681152, "logps/chosen": -394.69866943359375, "logps/rejected": -689.4900512695312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.309674024581909, "rewards/margins": 9.241983413696289, "rewards/rejected": -11.551657676696777, "step": 8702 }, { "epoch": 1.35, "learning_rate": 7.764201462731469e-06, "logits/chosen": -2.764906406402588, "logits/rejected": -2.9194929599761963, "logps/chosen": -662.1148681640625, "logps/rejected": -311.9488525390625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.6676888465881348, "rewards/margins": 6.288360595703125, "rewards/rejected": -7.956049919128418, "step": 8703 }, { "epoch": 1.35, "learning_rate": 7.76346802220032e-06, "logits/chosen": -2.63493275642395, "logits/rejected": -2.902819871902466, "logps/chosen": -128.308837890625, "logps/rejected": -213.61683654785156, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.192115306854248, "rewards/margins": 7.994429588317871, "rewards/rejected": -10.186544418334961, "step": 8704 }, { "epoch": 1.35, "learning_rate": 7.762734581669172e-06, "logits/chosen": -2.3850107192993164, "logits/rejected": -2.906458854675293, "logps/chosen": -70.07221984863281, "logps/rejected": -324.6802978515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.1614482402801514, "rewards/margins": 9.547988891601562, "rewards/rejected": -11.709436416625977, "step": 8705 }, { "epoch": 1.35, "learning_rate": 7.762001141138026e-06, "logits/chosen": -2.70237398147583, "logits/rejected": -3.092175006866455, "logps/chosen": -321.9493408203125, "logps/rejected": -426.9217834472656, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.38215446472168, "rewards/margins": 6.884486198425293, "rewards/rejected": -11.266641616821289, "step": 8706 }, { "epoch": 1.35, "learning_rate": 7.761267700606878e-06, "logits/chosen": -2.486186981201172, "logits/rejected": -2.7895185947418213, "logps/chosen": -70.13900756835938, "logps/rejected": -186.18408203125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.228531360626221, "rewards/margins": 6.55348539352417, "rewards/rejected": -11.78201675415039, "step": 8707 }, { "epoch": 1.35, "learning_rate": 7.760534260075731e-06, "logits/chosen": -3.2750864028930664, "logits/rejected": -3.0934929847717285, "logps/chosen": -236.81106567382812, "logps/rejected": -193.10952758789062, "loss": 0.1146, "rewards/accuracies": 1.0, "rewards/chosen": -3.3928985595703125, "rewards/margins": 6.921533584594727, "rewards/rejected": -10.314432144165039, "step": 8708 }, { "epoch": 1.35, "learning_rate": 7.759800819544583e-06, "logits/chosen": -1.0684548616409302, "logits/rejected": -2.1439788341522217, "logps/chosen": -663.092041015625, "logps/rejected": -329.8793029785156, "loss": 2.5751, "rewards/accuracies": 0.5, "rewards/chosen": -8.880074501037598, "rewards/margins": -0.2674434185028076, "rewards/rejected": -8.612630844116211, "step": 8709 }, { "epoch": 1.35, "learning_rate": 7.759067379013435e-06, "logits/chosen": -2.9265005588531494, "logits/rejected": -2.422287702560425, "logps/chosen": -231.40965270996094, "logps/rejected": -246.56622314453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5890820026397705, "rewards/margins": 7.3460693359375, "rewards/rejected": -8.935151100158691, "step": 8710 }, { "epoch": 1.35, "learning_rate": 7.758333938482287e-06, "logits/chosen": -1.3567882776260376, "logits/rejected": -2.519636392593384, "logps/chosen": -127.8126449584961, "logps/rejected": -538.898681640625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.6878113746643066, "rewards/margins": 8.799386978149414, "rewards/rejected": -11.487197875976562, "step": 8711 }, { "epoch": 1.35, "learning_rate": 7.757600497951139e-06, "logits/chosen": -1.7828648090362549, "logits/rejected": -2.8763065338134766, "logps/chosen": -322.10601806640625, "logps/rejected": -654.2353515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.788633823394775, "rewards/margins": 9.848672866821289, "rewards/rejected": -14.637306213378906, "step": 8712 }, { "epoch": 1.36, "learning_rate": 7.75686705741999e-06, "logits/chosen": -1.3851723670959473, "logits/rejected": -2.655811071395874, "logps/chosen": -170.791015625, "logps/rejected": -498.952392578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.7025951147079468, "rewards/margins": 9.381430625915527, "rewards/rejected": -11.084025382995605, "step": 8713 }, { "epoch": 1.36, "learning_rate": 7.756133616888843e-06, "logits/chosen": -2.428535223007202, "logits/rejected": -2.534665822982788, "logps/chosen": -222.80081176757812, "logps/rejected": -310.05419921875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.4000039100646973, "rewards/margins": 5.45487117767334, "rewards/rejected": -6.854874610900879, "step": 8714 }, { "epoch": 1.36, "learning_rate": 7.755400176357696e-06, "logits/chosen": -2.5587024688720703, "logits/rejected": -2.0074479579925537, "logps/chosen": -285.13140869140625, "logps/rejected": -222.24072265625, "loss": 0.5665, "rewards/accuracies": 0.5, "rewards/chosen": -5.301305770874023, "rewards/margins": 3.025195598602295, "rewards/rejected": -8.326501846313477, "step": 8715 }, { "epoch": 1.36, "learning_rate": 7.754666735826548e-06, "logits/chosen": -2.15281081199646, "logits/rejected": -2.5241127014160156, "logps/chosen": -359.11395263671875, "logps/rejected": -283.29095458984375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -3.8050050735473633, "rewards/margins": 4.914945125579834, "rewards/rejected": -8.719949722290039, "step": 8716 }, { "epoch": 1.36, "learning_rate": 7.7539332952954e-06, "logits/chosen": -1.5941686630249023, "logits/rejected": -2.5153019428253174, "logps/chosen": -68.685791015625, "logps/rejected": -130.30056762695312, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -3.8154993057250977, "rewards/margins": 3.958091974258423, "rewards/rejected": -7.773591041564941, "step": 8717 }, { "epoch": 1.36, "learning_rate": 7.753199854764252e-06, "logits/chosen": -2.140535831451416, "logits/rejected": -2.876030445098877, "logps/chosen": -116.88700103759766, "logps/rejected": -253.206787109375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.56727933883667, "rewards/margins": 6.880738735198975, "rewards/rejected": -9.448018074035645, "step": 8718 }, { "epoch": 1.36, "learning_rate": 7.752466414233104e-06, "logits/chosen": -1.9995877742767334, "logits/rejected": -3.0409016609191895, "logps/chosen": -134.06915283203125, "logps/rejected": -308.0893859863281, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.1906230449676514, "rewards/margins": 7.065443992614746, "rewards/rejected": -10.256067276000977, "step": 8719 }, { "epoch": 1.36, "learning_rate": 7.751732973701956e-06, "logits/chosen": -3.05853009223938, "logits/rejected": -2.727219820022583, "logps/chosen": -312.9474182128906, "logps/rejected": -222.27426147460938, "loss": 0.5713, "rewards/accuracies": 0.5, "rewards/chosen": -4.636597633361816, "rewards/margins": 1.36721932888031, "rewards/rejected": -6.003817081451416, "step": 8720 }, { "epoch": 1.36, "learning_rate": 7.750999533170807e-06, "logits/chosen": -1.9103156328201294, "logits/rejected": -2.529191255569458, "logps/chosen": -162.34017944335938, "logps/rejected": -328.61224365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5443034172058105, "rewards/margins": 12.521379470825195, "rewards/rejected": -15.065683364868164, "step": 8721 }, { "epoch": 1.36, "learning_rate": 7.75026609263966e-06, "logits/chosen": -2.325955867767334, "logits/rejected": -2.718202829360962, "logps/chosen": -113.5013656616211, "logps/rejected": -276.28643798828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.7207834720611572, "rewards/margins": 8.770856857299805, "rewards/rejected": -11.4916410446167, "step": 8722 }, { "epoch": 1.36, "learning_rate": 7.749532652108511e-06, "logits/chosen": -2.758899211883545, "logits/rejected": -1.372429609298706, "logps/chosen": -552.5716552734375, "logps/rejected": -431.5440979003906, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.5694451332092285, "rewards/margins": 7.2010674476623535, "rewards/rejected": -9.770512580871582, "step": 8723 }, { "epoch": 1.36, "learning_rate": 7.748799211577365e-06, "logits/chosen": -2.1009011268615723, "logits/rejected": -2.77744197845459, "logps/chosen": -129.4563446044922, "logps/rejected": -416.8188781738281, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -3.1478731632232666, "rewards/margins": 3.9848251342773438, "rewards/rejected": -7.132698059082031, "step": 8724 }, { "epoch": 1.36, "learning_rate": 7.748065771046217e-06, "logits/chosen": -2.0542571544647217, "logits/rejected": -2.754162311553955, "logps/chosen": -610.80908203125, "logps/rejected": -469.771484375, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -3.4276185035705566, "rewards/margins": 5.154430389404297, "rewards/rejected": -8.582049369812012, "step": 8725 }, { "epoch": 1.36, "learning_rate": 7.747332330515069e-06, "logits/chosen": -1.846861720085144, "logits/rejected": -2.856996774673462, "logps/chosen": -97.29362487792969, "logps/rejected": -369.8495178222656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.803859233856201, "rewards/margins": 10.14730453491211, "rewards/rejected": -12.951164245605469, "step": 8726 }, { "epoch": 1.36, "learning_rate": 7.74659888998392e-06, "logits/chosen": -1.774910569190979, "logits/rejected": -3.112769365310669, "logps/chosen": -195.4096221923828, "logps/rejected": -418.0294189453125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.5240561962127686, "rewards/margins": 6.9622979164123535, "rewards/rejected": -8.48635482788086, "step": 8727 }, { "epoch": 1.36, "learning_rate": 7.745865449452772e-06, "logits/chosen": -2.775546073913574, "logits/rejected": -2.1174299716949463, "logps/chosen": -604.0005493164062, "logps/rejected": -463.3427429199219, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -5.59794807434082, "rewards/margins": 3.8428726196289062, "rewards/rejected": -9.440820693969727, "step": 8728 }, { "epoch": 1.36, "learning_rate": 7.745132008921624e-06, "logits/chosen": -2.792043447494507, "logits/rejected": -2.471588134765625, "logps/chosen": -244.57235717773438, "logps/rejected": -210.49624633789062, "loss": 1.8998, "rewards/accuracies": 0.5, "rewards/chosen": -4.435389518737793, "rewards/margins": 1.2967432737350464, "rewards/rejected": -5.732132911682129, "step": 8729 }, { "epoch": 1.36, "learning_rate": 7.744398568390476e-06, "logits/chosen": -2.6878302097320557, "logits/rejected": -2.748915433883667, "logps/chosen": -409.5848693847656, "logps/rejected": -368.81103515625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.646557331085205, "rewards/margins": 7.557931423187256, "rewards/rejected": -12.204488754272461, "step": 8730 }, { "epoch": 1.36, "learning_rate": 7.743665127859328e-06, "logits/chosen": -1.7773923873901367, "logits/rejected": -2.8254148960113525, "logps/chosen": -193.8022003173828, "logps/rejected": -334.4294738769531, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -3.21337890625, "rewards/margins": 3.863339424133301, "rewards/rejected": -7.076718330383301, "step": 8731 }, { "epoch": 1.36, "learning_rate": 7.74293168732818e-06, "logits/chosen": -2.6755712032318115, "logits/rejected": -2.8293793201446533, "logps/chosen": -141.57266235351562, "logps/rejected": -160.29171752929688, "loss": 0.2766, "rewards/accuracies": 1.0, "rewards/chosen": -5.612603187561035, "rewards/margins": 2.296854257583618, "rewards/rejected": -7.909457206726074, "step": 8732 }, { "epoch": 1.36, "learning_rate": 7.742198246797033e-06, "logits/chosen": -2.593600034713745, "logits/rejected": -2.971446990966797, "logps/chosen": -422.65106201171875, "logps/rejected": -401.9299011230469, "loss": 0.1419, "rewards/accuracies": 1.0, "rewards/chosen": -4.70733642578125, "rewards/margins": 5.061326503753662, "rewards/rejected": -9.76866340637207, "step": 8733 }, { "epoch": 1.36, "learning_rate": 7.741464806265885e-06, "logits/chosen": -2.3498594760894775, "logits/rejected": -2.911623001098633, "logps/chosen": -142.38275146484375, "logps/rejected": -387.20330810546875, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -1.882939100265503, "rewards/margins": 5.926828384399414, "rewards/rejected": -7.809767723083496, "step": 8734 }, { "epoch": 1.36, "learning_rate": 7.740731365734737e-06, "logits/chosen": -3.1445188522338867, "logits/rejected": -3.174124240875244, "logps/chosen": -417.56695556640625, "logps/rejected": -608.9055786132812, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.140510559082031, "rewards/margins": 6.027910232543945, "rewards/rejected": -10.168420791625977, "step": 8735 }, { "epoch": 1.36, "learning_rate": 7.739997925203589e-06, "logits/chosen": -1.7944766283035278, "logits/rejected": -2.7145514488220215, "logps/chosen": -242.69163513183594, "logps/rejected": -350.9887390136719, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4104690551757812, "rewards/margins": 6.738666534423828, "rewards/rejected": -8.14913558959961, "step": 8736 }, { "epoch": 1.36, "learning_rate": 7.739264484672441e-06, "logits/chosen": -1.6840306520462036, "logits/rejected": -2.361067533493042, "logps/chosen": -91.2993392944336, "logps/rejected": -395.1546325683594, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -4.131319046020508, "rewards/margins": 7.147977828979492, "rewards/rejected": -11.279296875, "step": 8737 }, { "epoch": 1.36, "learning_rate": 7.738531044141293e-06, "logits/chosen": -0.7811620831489563, "logits/rejected": -2.2461981773376465, "logps/chosen": -172.25656127929688, "logps/rejected": -467.81451416015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.287977933883667, "rewards/margins": 7.8666911125183105, "rewards/rejected": -10.154668807983398, "step": 8738 }, { "epoch": 1.36, "learning_rate": 7.737797603610145e-06, "logits/chosen": -2.5236427783966064, "logits/rejected": -2.4874751567840576, "logps/chosen": -124.19956970214844, "logps/rejected": -278.9835205078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.304999351501465, "rewards/margins": 7.632504463195801, "rewards/rejected": -11.937503814697266, "step": 8739 }, { "epoch": 1.36, "learning_rate": 7.737064163078997e-06, "logits/chosen": -2.2826898097991943, "logits/rejected": -2.467122793197632, "logps/chosen": -122.58403778076172, "logps/rejected": -170.85183715820312, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -3.2657995223999023, "rewards/margins": 4.844847679138184, "rewards/rejected": -8.110647201538086, "step": 8740 }, { "epoch": 1.36, "learning_rate": 7.73633072254785e-06, "logits/chosen": -2.0189216136932373, "logits/rejected": -2.9800074100494385, "logps/chosen": -214.12908935546875, "logps/rejected": -453.9011535644531, "loss": 0.4228, "rewards/accuracies": 0.5, "rewards/chosen": -3.367898464202881, "rewards/margins": 5.246495246887207, "rewards/rejected": -8.61439323425293, "step": 8741 }, { "epoch": 1.36, "learning_rate": 7.735597282016702e-06, "logits/chosen": -1.9514031410217285, "logits/rejected": -3.16507887840271, "logps/chosen": -138.27096557617188, "logps/rejected": -345.12969970703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.6205801963806152, "rewards/margins": 6.6136298179626465, "rewards/rejected": -10.234210014343262, "step": 8742 }, { "epoch": 1.36, "learning_rate": 7.734863841485556e-06, "logits/chosen": -2.613640308380127, "logits/rejected": -2.6954126358032227, "logps/chosen": -343.1430358886719, "logps/rejected": -342.95477294921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.870713233947754, "rewards/margins": 6.657527923583984, "rewards/rejected": -12.528241157531738, "step": 8743 }, { "epoch": 1.36, "learning_rate": 7.734130400954407e-06, "logits/chosen": -2.47572660446167, "logits/rejected": -2.8688318729400635, "logps/chosen": -51.68812942504883, "logps/rejected": -293.88671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.591552257537842, "rewards/margins": 8.671390533447266, "rewards/rejected": -11.262943267822266, "step": 8744 }, { "epoch": 1.36, "learning_rate": 7.73339696042326e-06, "logits/chosen": -0.9036728143692017, "logits/rejected": -2.3214967250823975, "logps/chosen": -144.91738891601562, "logps/rejected": -499.692626953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7838332653045654, "rewards/margins": 11.000068664550781, "rewards/rejected": -12.78390121459961, "step": 8745 }, { "epoch": 1.36, "learning_rate": 7.732663519892111e-06, "logits/chosen": -1.8026931285858154, "logits/rejected": -2.5503110885620117, "logps/chosen": -150.11611938476562, "logps/rejected": -433.0406188964844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.063500165939331, "rewards/margins": 11.614215850830078, "rewards/rejected": -13.677716255187988, "step": 8746 }, { "epoch": 1.36, "learning_rate": 7.731930079360963e-06, "logits/chosen": -1.7512130737304688, "logits/rejected": -2.7388055324554443, "logps/chosen": -500.5927734375, "logps/rejected": -617.005859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2603317499160767, "rewards/margins": 9.04533576965332, "rewards/rejected": -10.30566692352295, "step": 8747 }, { "epoch": 1.36, "learning_rate": 7.731196638829815e-06, "logits/chosen": -2.998988151550293, "logits/rejected": -2.0549492835998535, "logps/chosen": -194.97743225097656, "logps/rejected": -161.514892578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.8225822448730469, "rewards/margins": 8.206897735595703, "rewards/rejected": -10.02947998046875, "step": 8748 }, { "epoch": 1.36, "learning_rate": 7.730463198298667e-06, "logits/chosen": -2.380352020263672, "logits/rejected": -2.8391032218933105, "logps/chosen": -85.72723388671875, "logps/rejected": -334.21221923828125, "loss": 0.1092, "rewards/accuracies": 1.0, "rewards/chosen": -1.6935490369796753, "rewards/margins": 3.388148307800293, "rewards/rejected": -5.081697463989258, "step": 8749 }, { "epoch": 1.36, "learning_rate": 7.729729757767519e-06, "logits/chosen": -2.471637725830078, "logits/rejected": -2.842075824737549, "logps/chosen": -90.70706176757812, "logps/rejected": -153.04676818847656, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": -4.503458023071289, "rewards/margins": 4.379350662231445, "rewards/rejected": -8.882808685302734, "step": 8750 }, { "epoch": 1.36, "learning_rate": 7.728996317236372e-06, "logits/chosen": -2.088153839111328, "logits/rejected": -2.9245049953460693, "logps/chosen": -79.98347473144531, "logps/rejected": -318.5267639160156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.3417177200317383, "rewards/margins": 7.590842247009277, "rewards/rejected": -10.932559967041016, "step": 8751 }, { "epoch": 1.36, "learning_rate": 7.728262876705224e-06, "logits/chosen": -2.556645154953003, "logits/rejected": -2.750321626663208, "logps/chosen": -207.9078826904297, "logps/rejected": -251.5706787109375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.3755335807800293, "rewards/margins": 6.5834503173828125, "rewards/rejected": -9.958984375, "step": 8752 }, { "epoch": 1.36, "learning_rate": 7.727529436174076e-06, "logits/chosen": -2.191265106201172, "logits/rejected": -2.735504627227783, "logps/chosen": -391.71441650390625, "logps/rejected": -531.8282470703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.8688292503356934, "rewards/margins": 7.81348180770874, "rewards/rejected": -11.682311058044434, "step": 8753 }, { "epoch": 1.36, "learning_rate": 7.726795995642928e-06, "logits/chosen": -3.150416374206543, "logits/rejected": -2.9491159915924072, "logps/chosen": -557.99951171875, "logps/rejected": -339.9905700683594, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.2301619052886963, "rewards/margins": 6.603644847869873, "rewards/rejected": -8.833806991577148, "step": 8754 }, { "epoch": 1.36, "learning_rate": 7.72606255511178e-06, "logits/chosen": -2.3040523529052734, "logits/rejected": -2.940789222717285, "logps/chosen": -95.70100402832031, "logps/rejected": -494.44561767578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1083905696868896, "rewards/margins": 8.606010437011719, "rewards/rejected": -10.714401245117188, "step": 8755 }, { "epoch": 1.36, "learning_rate": 7.725329114580632e-06, "logits/chosen": -2.5905826091766357, "logits/rejected": -3.1271798610687256, "logps/chosen": -156.1570281982422, "logps/rejected": -356.13482666015625, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -4.2298431396484375, "rewards/margins": 7.947368621826172, "rewards/rejected": -12.17721176147461, "step": 8756 }, { "epoch": 1.36, "learning_rate": 7.724595674049484e-06, "logits/chosen": -2.7536518573760986, "logits/rejected": -2.766512155532837, "logps/chosen": -87.08879852294922, "logps/rejected": -159.9525146484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.1602609157562256, "rewards/margins": 9.11671257019043, "rewards/rejected": -11.276973724365234, "step": 8757 }, { "epoch": 1.36, "learning_rate": 7.723862233518335e-06, "logits/chosen": -1.8183027505874634, "logits/rejected": -2.5246028900146484, "logps/chosen": -96.38801574707031, "logps/rejected": -388.95355224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.135880708694458, "rewards/margins": 11.969103813171387, "rewards/rejected": -14.104984283447266, "step": 8758 }, { "epoch": 1.36, "learning_rate": 7.723128792987187e-06, "logits/chosen": -2.6034765243530273, "logits/rejected": -2.9537527561187744, "logps/chosen": -675.59228515625, "logps/rejected": -661.0670776367188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.064302444458008, "rewards/margins": 7.8842291831970215, "rewards/rejected": -10.948532104492188, "step": 8759 }, { "epoch": 1.36, "learning_rate": 7.722395352456041e-06, "logits/chosen": -2.6456685066223145, "logits/rejected": -3.0467886924743652, "logps/chosen": -74.48428344726562, "logps/rejected": -230.31002807617188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.8432915210723877, "rewards/margins": 7.546438217163086, "rewards/rejected": -10.389730453491211, "step": 8760 }, { "epoch": 1.36, "learning_rate": 7.721661911924893e-06, "logits/chosen": -1.765350580215454, "logits/rejected": -2.593482494354248, "logps/chosen": -224.0137939453125, "logps/rejected": -290.43792724609375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.1899940967559814, "rewards/margins": 7.323436737060547, "rewards/rejected": -10.51343059539795, "step": 8761 }, { "epoch": 1.36, "learning_rate": 7.720928471393745e-06, "logits/chosen": -2.3548104763031006, "logits/rejected": -2.7896432876586914, "logps/chosen": -105.13275146484375, "logps/rejected": -184.07913208007812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.7417991161346436, "rewards/margins": 6.923364162445068, "rewards/rejected": -9.665163040161133, "step": 8762 }, { "epoch": 1.36, "learning_rate": 7.720195030862597e-06, "logits/chosen": -1.465227484703064, "logits/rejected": -2.937776565551758, "logps/chosen": -101.4864501953125, "logps/rejected": -403.1510009765625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.122192144393921, "rewards/margins": 7.7558488845825195, "rewards/rejected": -9.87804126739502, "step": 8763 }, { "epoch": 1.36, "learning_rate": 7.719461590331448e-06, "logits/chosen": -2.4650301933288574, "logits/rejected": -2.715409278869629, "logps/chosen": -102.82893371582031, "logps/rejected": -199.32781982421875, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -4.705573558807373, "rewards/margins": 4.6514763832092285, "rewards/rejected": -9.357049942016602, "step": 8764 }, { "epoch": 1.36, "learning_rate": 7.7187281498003e-06, "logits/chosen": -1.819846510887146, "logits/rejected": -2.939059019088745, "logps/chosen": -64.58419799804688, "logps/rejected": -383.91400146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.347550868988037, "rewards/margins": 10.559715270996094, "rewards/rejected": -12.907266616821289, "step": 8765 }, { "epoch": 1.36, "learning_rate": 7.717994709269152e-06, "logits/chosen": -2.4091367721557617, "logits/rejected": -2.7537102699279785, "logps/chosen": -273.7409362792969, "logps/rejected": -382.2183532714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.583491563796997, "rewards/margins": 12.285974502563477, "rewards/rejected": -13.869465827941895, "step": 8766 }, { "epoch": 1.36, "learning_rate": 7.717261268738004e-06, "logits/chosen": -2.4621055126190186, "logits/rejected": -2.7058372497558594, "logps/chosen": -253.05938720703125, "logps/rejected": -355.340087890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.403721570968628, "rewards/margins": 9.942012786865234, "rewards/rejected": -11.345733642578125, "step": 8767 }, { "epoch": 1.36, "learning_rate": 7.716527828206858e-06, "logits/chosen": -2.804516315460205, "logits/rejected": -3.079624652862549, "logps/chosen": -513.3651733398438, "logps/rejected": -505.73809814453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6051318645477295, "rewards/margins": 10.722221374511719, "rewards/rejected": -14.327353477478027, "step": 8768 }, { "epoch": 1.36, "learning_rate": 7.71579438767571e-06, "logits/chosen": -1.6121985912322998, "logits/rejected": -2.5532257556915283, "logps/chosen": -138.4773712158203, "logps/rejected": -456.7662658691406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6392173767089844, "rewards/margins": 11.14768123626709, "rewards/rejected": -14.786898612976074, "step": 8769 }, { "epoch": 1.36, "learning_rate": 7.715060947144561e-06, "logits/chosen": -2.9208390712738037, "logits/rejected": -3.0714619159698486, "logps/chosen": -105.0643310546875, "logps/rejected": -213.2099151611328, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -2.4123001098632812, "rewards/margins": 5.5398664474487305, "rewards/rejected": -7.9521660804748535, "step": 8770 }, { "epoch": 1.36, "learning_rate": 7.714327506613413e-06, "logits/chosen": -2.07133412361145, "logits/rejected": -2.697537660598755, "logps/chosen": -113.72343444824219, "logps/rejected": -338.7276306152344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.331814765930176, "rewards/margins": 8.402931213378906, "rewards/rejected": -11.734745979309082, "step": 8771 }, { "epoch": 1.36, "learning_rate": 7.713594066082265e-06, "logits/chosen": -1.933361530303955, "logits/rejected": -3.028142213821411, "logps/chosen": -169.1727294921875, "logps/rejected": -659.996826171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.8064098358154297, "rewards/margins": 10.216876983642578, "rewards/rejected": -12.023286819458008, "step": 8772 }, { "epoch": 1.36, "learning_rate": 7.712860625551117e-06, "logits/chosen": -2.668041706085205, "logits/rejected": -3.1655824184417725, "logps/chosen": -292.60357666015625, "logps/rejected": -376.3771057128906, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.930314064025879, "rewards/margins": 6.468819618225098, "rewards/rejected": -9.399133682250977, "step": 8773 }, { "epoch": 1.36, "learning_rate": 7.712127185019969e-06, "logits/chosen": -2.050424575805664, "logits/rejected": -2.897099256515503, "logps/chosen": -107.84098052978516, "logps/rejected": -334.70440673828125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.067662000656128, "rewards/margins": 8.663874626159668, "rewards/rejected": -11.731536865234375, "step": 8774 }, { "epoch": 1.36, "learning_rate": 7.711393744488822e-06, "logits/chosen": -2.8761658668518066, "logits/rejected": -2.2311861515045166, "logps/chosen": -240.76467895507812, "logps/rejected": -219.75042724609375, "loss": 0.3266, "rewards/accuracies": 1.0, "rewards/chosen": -4.880213737487793, "rewards/margins": 3.5602381229400635, "rewards/rejected": -8.440452575683594, "step": 8775 }, { "epoch": 1.36, "learning_rate": 7.710660303957674e-06, "logits/chosen": -2.3234410285949707, "logits/rejected": -2.77176833152771, "logps/chosen": -169.7594451904297, "logps/rejected": -263.94061279296875, "loss": 1.381, "rewards/accuracies": 0.5, "rewards/chosen": -6.3184990882873535, "rewards/margins": 2.662182092666626, "rewards/rejected": -8.980681419372559, "step": 8776 }, { "epoch": 1.37, "learning_rate": 7.709926863426528e-06, "logits/chosen": -1.7585668563842773, "logits/rejected": -2.5818891525268555, "logps/chosen": -89.2120361328125, "logps/rejected": -364.4505920410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.888885498046875, "rewards/margins": 10.805728912353516, "rewards/rejected": -12.69461441040039, "step": 8777 }, { "epoch": 1.37, "learning_rate": 7.70919342289538e-06, "logits/chosen": -2.1233584880828857, "logits/rejected": -2.77225399017334, "logps/chosen": -165.25872802734375, "logps/rejected": -397.2427978515625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.6116724014282227, "rewards/margins": 7.464252471923828, "rewards/rejected": -11.07592487335205, "step": 8778 }, { "epoch": 1.37, "learning_rate": 7.708459982364232e-06, "logits/chosen": -2.7756974697113037, "logits/rejected": -1.4602257013320923, "logps/chosen": -329.7298583984375, "logps/rejected": -162.17262268066406, "loss": 0.1934, "rewards/accuracies": 1.0, "rewards/chosen": -6.041184902191162, "rewards/margins": 3.851938486099243, "rewards/rejected": -9.893123626708984, "step": 8779 }, { "epoch": 1.37, "learning_rate": 7.707726541833084e-06, "logits/chosen": -2.5467028617858887, "logits/rejected": -2.891012191772461, "logps/chosen": -104.84077453613281, "logps/rejected": -175.80319213867188, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": -5.238900661468506, "rewards/margins": 3.8081483840942383, "rewards/rejected": -9.047048568725586, "step": 8780 }, { "epoch": 1.37, "learning_rate": 7.706993101301935e-06, "logits/chosen": -1.6588239669799805, "logits/rejected": -2.8183085918426514, "logps/chosen": -108.74085998535156, "logps/rejected": -353.9852294921875, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -3.8905482292175293, "rewards/margins": 5.356317520141602, "rewards/rejected": -9.246866226196289, "step": 8781 }, { "epoch": 1.37, "learning_rate": 7.706259660770787e-06, "logits/chosen": -2.6511154174804688, "logits/rejected": -2.128624439239502, "logps/chosen": -199.79702758789062, "logps/rejected": -212.44595336914062, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.7206542491912842, "rewards/margins": 5.85075044631958, "rewards/rejected": -7.571404457092285, "step": 8782 }, { "epoch": 1.37, "learning_rate": 7.70552622023964e-06, "logits/chosen": -2.822014808654785, "logits/rejected": -2.171539068222046, "logps/chosen": -201.6040496826172, "logps/rejected": -151.329833984375, "loss": 0.8013, "rewards/accuracies": 0.5, "rewards/chosen": -2.6635940074920654, "rewards/margins": 3.352130889892578, "rewards/rejected": -6.015725135803223, "step": 8783 }, { "epoch": 1.37, "learning_rate": 7.704792779708491e-06, "logits/chosen": -2.534252882003784, "logits/rejected": -2.8410093784332275, "logps/chosen": -275.7180480957031, "logps/rejected": -337.18170166015625, "loss": 0.1917, "rewards/accuracies": 1.0, "rewards/chosen": -6.021531105041504, "rewards/margins": 7.109256744384766, "rewards/rejected": -13.13078784942627, "step": 8784 }, { "epoch": 1.37, "learning_rate": 7.704059339177343e-06, "logits/chosen": -2.6774044036865234, "logits/rejected": -2.6385345458984375, "logps/chosen": -217.14125061035156, "logps/rejected": -281.09600830078125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.7055726051330566, "rewards/margins": 6.064326286315918, "rewards/rejected": -8.769899368286133, "step": 8785 }, { "epoch": 1.37, "learning_rate": 7.703325898646196e-06, "logits/chosen": -2.6840415000915527, "logits/rejected": -2.8927183151245117, "logps/chosen": -110.71153259277344, "logps/rejected": -374.39874267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5641586780548096, "rewards/margins": 10.302848815917969, "rewards/rejected": -13.867008209228516, "step": 8786 }, { "epoch": 1.37, "learning_rate": 7.702592458115048e-06, "logits/chosen": -2.2262558937072754, "logits/rejected": -2.631659507751465, "logps/chosen": -89.51539611816406, "logps/rejected": -204.9010009765625, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -3.737095355987549, "rewards/margins": 5.269432067871094, "rewards/rejected": -9.006526947021484, "step": 8787 }, { "epoch": 1.37, "learning_rate": 7.7018590175839e-06, "logits/chosen": -1.39116370677948, "logits/rejected": -2.7057669162750244, "logps/chosen": -142.25970458984375, "logps/rejected": -338.3365783691406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.7911767959594727, "rewards/margins": 10.138219833374023, "rewards/rejected": -13.929396629333496, "step": 8788 }, { "epoch": 1.37, "learning_rate": 7.701125577052752e-06, "logits/chosen": -2.5424089431762695, "logits/rejected": -2.937490701675415, "logps/chosen": -119.67183685302734, "logps/rejected": -345.2619934082031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.071782112121582, "rewards/margins": 10.383638381958008, "rewards/rejected": -13.455421447753906, "step": 8789 }, { "epoch": 1.37, "learning_rate": 7.700392136521604e-06, "logits/chosen": -2.8307876586914062, "logits/rejected": -2.5182418823242188, "logps/chosen": -321.3341064453125, "logps/rejected": -256.8835754394531, "loss": 2.1907, "rewards/accuracies": 0.5, "rewards/chosen": -7.134787082672119, "rewards/margins": 2.0912156105041504, "rewards/rejected": -9.22600269317627, "step": 8790 }, { "epoch": 1.37, "learning_rate": 7.699658695990456e-06, "logits/chosen": -2.8854167461395264, "logits/rejected": -2.908154010772705, "logps/chosen": -279.72100830078125, "logps/rejected": -315.7275390625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9712181091308594, "rewards/margins": 8.008377075195312, "rewards/rejected": -9.979595184326172, "step": 8791 }, { "epoch": 1.37, "learning_rate": 7.698925255459308e-06, "logits/chosen": -2.415151596069336, "logits/rejected": -2.6800785064697266, "logps/chosen": -218.92559814453125, "logps/rejected": -364.4173278808594, "loss": 0.2001, "rewards/accuracies": 1.0, "rewards/chosen": -3.2100274562835693, "rewards/margins": 6.2718987464904785, "rewards/rejected": -9.481925964355469, "step": 8792 }, { "epoch": 1.37, "learning_rate": 7.69819181492816e-06, "logits/chosen": -1.1075330972671509, "logits/rejected": -2.5357751846313477, "logps/chosen": -100.75411987304688, "logps/rejected": -244.76502990722656, "loss": 1.7711, "rewards/accuracies": 0.5, "rewards/chosen": -5.046161651611328, "rewards/margins": 4.103864669799805, "rewards/rejected": -9.150026321411133, "step": 8793 }, { "epoch": 1.37, "learning_rate": 7.697458374397012e-06, "logits/chosen": -2.1905429363250732, "logits/rejected": -2.6018919944763184, "logps/chosen": -255.01498413085938, "logps/rejected": -260.6219177246094, "loss": 2.6721, "rewards/accuracies": 0.5, "rewards/chosen": -6.9839911460876465, "rewards/margins": 3.6472671031951904, "rewards/rejected": -10.631258010864258, "step": 8794 }, { "epoch": 1.37, "learning_rate": 7.696724933865865e-06, "logits/chosen": -2.506448268890381, "logits/rejected": -2.759948968887329, "logps/chosen": -106.12406158447266, "logps/rejected": -414.067626953125, "loss": 0.4172, "rewards/accuracies": 0.5, "rewards/chosen": -3.878842353820801, "rewards/margins": 5.0172271728515625, "rewards/rejected": -8.896069526672363, "step": 8795 }, { "epoch": 1.37, "learning_rate": 7.695991493334717e-06, "logits/chosen": -1.4822101593017578, "logits/rejected": -2.725050210952759, "logps/chosen": -138.39390563964844, "logps/rejected": -528.5059814453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9590598344802856, "rewards/margins": 11.047601699829102, "rewards/rejected": -13.006660461425781, "step": 8796 }, { "epoch": 1.37, "learning_rate": 7.695258052803569e-06, "logits/chosen": -1.914627194404602, "logits/rejected": -2.665029764175415, "logps/chosen": -404.6235046386719, "logps/rejected": -470.2762451171875, "loss": 1.8159, "rewards/accuracies": 0.5, "rewards/chosen": -5.762451648712158, "rewards/margins": 5.153263092041016, "rewards/rejected": -10.915715217590332, "step": 8797 }, { "epoch": 1.37, "learning_rate": 7.69452461227242e-06, "logits/chosen": -2.7206902503967285, "logits/rejected": -2.5855276584625244, "logps/chosen": -103.85221862792969, "logps/rejected": -208.0886993408203, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.96376371383667, "rewards/margins": 7.548697471618652, "rewards/rejected": -11.51246166229248, "step": 8798 }, { "epoch": 1.37, "learning_rate": 7.693791171741273e-06, "logits/chosen": -2.825327157974243, "logits/rejected": -2.264218330383301, "logps/chosen": -401.6580810546875, "logps/rejected": -357.80072021484375, "loss": 1.0266, "rewards/accuracies": 0.5, "rewards/chosen": -7.4030985832214355, "rewards/margins": 3.4374775886535645, "rewards/rejected": -10.840576171875, "step": 8799 }, { "epoch": 1.37, "learning_rate": 7.693057731210124e-06, "logits/chosen": -2.922579288482666, "logits/rejected": -2.5756242275238037, "logps/chosen": -229.28309631347656, "logps/rejected": -263.4314880371094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.225257873535156, "rewards/margins": 7.129189491271973, "rewards/rejected": -11.354447364807129, "step": 8800 }, { "epoch": 1.37, "learning_rate": 7.692324290678976e-06, "logits/chosen": -1.9518226385116577, "logits/rejected": -2.57342267036438, "logps/chosen": -167.35147094726562, "logps/rejected": -196.38796997070312, "loss": 0.2965, "rewards/accuracies": 1.0, "rewards/chosen": -4.148428916931152, "rewards/margins": 3.9302642345428467, "rewards/rejected": -8.078693389892578, "step": 8801 }, { "epoch": 1.37, "learning_rate": 7.691590850147828e-06, "logits/chosen": -2.3611197471618652, "logits/rejected": -2.9544153213500977, "logps/chosen": -202.7383270263672, "logps/rejected": -340.7158508300781, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9079933166503906, "rewards/margins": 10.342198371887207, "rewards/rejected": -12.250191688537598, "step": 8802 }, { "epoch": 1.37, "learning_rate": 7.69085740961668e-06, "logits/chosen": -2.6734697818756104, "logits/rejected": -1.9460984468460083, "logps/chosen": -174.7377471923828, "logps/rejected": -167.2581787109375, "loss": 2.1964, "rewards/accuracies": 0.5, "rewards/chosen": -6.207231044769287, "rewards/margins": -0.23940157890319824, "rewards/rejected": -5.967829704284668, "step": 8803 }, { "epoch": 1.37, "learning_rate": 7.690123969085534e-06, "logits/chosen": -2.9928629398345947, "logits/rejected": -2.9200692176818848, "logps/chosen": -151.68064880371094, "logps/rejected": -219.07579040527344, "loss": 1.3297, "rewards/accuracies": 0.5, "rewards/chosen": -3.967513084411621, "rewards/margins": 4.181775093078613, "rewards/rejected": -8.149288177490234, "step": 8804 }, { "epoch": 1.37, "learning_rate": 7.689390528554386e-06, "logits/chosen": -0.6046106815338135, "logits/rejected": -2.333042860031128, "logps/chosen": -48.56739807128906, "logps/rejected": -441.3721008300781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3975586891174316, "rewards/margins": 9.636301040649414, "rewards/rejected": -12.033859252929688, "step": 8805 }, { "epoch": 1.37, "learning_rate": 7.688657088023237e-06, "logits/chosen": -1.7927203178405762, "logits/rejected": -2.7796132564544678, "logps/chosen": -66.2803955078125, "logps/rejected": -249.82199096679688, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.835996150970459, "rewards/margins": 7.938216686248779, "rewards/rejected": -10.774212837219238, "step": 8806 }, { "epoch": 1.37, "learning_rate": 7.68792364749209e-06, "logits/chosen": -3.0178511142730713, "logits/rejected": -2.40871262550354, "logps/chosen": -371.9176025390625, "logps/rejected": -451.6025085449219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.14044451713562, "rewards/margins": 8.290383338928223, "rewards/rejected": -10.430828094482422, "step": 8807 }, { "epoch": 1.37, "learning_rate": 7.687190206960941e-06, "logits/chosen": -2.7605037689208984, "logits/rejected": -3.050736427307129, "logps/chosen": -221.28701782226562, "logps/rejected": -273.51263427734375, "loss": 2.2412, "rewards/accuracies": 0.5, "rewards/chosen": -6.018569469451904, "rewards/margins": 0.7762658596038818, "rewards/rejected": -6.794835090637207, "step": 8808 }, { "epoch": 1.37, "learning_rate": 7.686456766429795e-06, "logits/chosen": -3.0297811031341553, "logits/rejected": -2.4229896068573, "logps/chosen": -288.06121826171875, "logps/rejected": -244.50123596191406, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.652510166168213, "rewards/margins": 7.534700393676758, "rewards/rejected": -10.187210083007812, "step": 8809 }, { "epoch": 1.37, "learning_rate": 7.685723325898647e-06, "logits/chosen": -2.8166146278381348, "logits/rejected": -2.477348566055298, "logps/chosen": -306.03369140625, "logps/rejected": -335.79071044921875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -4.5916748046875, "rewards/margins": 5.33967399597168, "rewards/rejected": -9.93134880065918, "step": 8810 }, { "epoch": 1.37, "learning_rate": 7.684989885367499e-06, "logits/chosen": -2.1396119594573975, "logits/rejected": -3.0981838703155518, "logps/chosen": -148.1184539794922, "logps/rejected": -516.8623046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.035174608230591, "rewards/margins": 7.83938455581665, "rewards/rejected": -9.87455940246582, "step": 8811 }, { "epoch": 1.37, "learning_rate": 7.68425644483635e-06, "logits/chosen": -2.5054118633270264, "logits/rejected": -2.923614978790283, "logps/chosen": -253.3282012939453, "logps/rejected": -377.06561279296875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -3.7735414505004883, "rewards/margins": 4.788599967956543, "rewards/rejected": -8.562141418457031, "step": 8812 }, { "epoch": 1.37, "learning_rate": 7.683523004305204e-06, "logits/chosen": -2.517282485961914, "logits/rejected": -2.669114112854004, "logps/chosen": -526.968994140625, "logps/rejected": -415.1710205078125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -4.011569976806641, "rewards/margins": 7.98637580871582, "rewards/rejected": -11.997945785522461, "step": 8813 }, { "epoch": 1.37, "learning_rate": 7.682789563774056e-06, "logits/chosen": -1.9633874893188477, "logits/rejected": -2.800614356994629, "logps/chosen": -296.2002868652344, "logps/rejected": -326.6754150390625, "loss": 2.3357, "rewards/accuracies": 0.5, "rewards/chosen": -5.8629913330078125, "rewards/margins": 0.6596455574035645, "rewards/rejected": -6.522636890411377, "step": 8814 }, { "epoch": 1.37, "learning_rate": 7.682056123242908e-06, "logits/chosen": -2.307337999343872, "logits/rejected": -2.7109835147857666, "logps/chosen": -43.1562385559082, "logps/rejected": -307.80828857421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.152040481567383, "rewards/margins": 10.36648178100586, "rewards/rejected": -12.518522262573242, "step": 8815 }, { "epoch": 1.37, "learning_rate": 7.68132268271176e-06, "logits/chosen": -1.9733387231826782, "logits/rejected": -2.8095128536224365, "logps/chosen": -49.79201889038086, "logps/rejected": -226.76602172851562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3380612134933472, "rewards/margins": 9.412130355834961, "rewards/rejected": -10.750192642211914, "step": 8816 }, { "epoch": 1.37, "learning_rate": 7.680589242180611e-06, "logits/chosen": -2.4954335689544678, "logits/rejected": -2.155787467956543, "logps/chosen": -312.20941162109375, "logps/rejected": -206.4095458984375, "loss": 1.3358, "rewards/accuracies": 0.5, "rewards/chosen": -3.6611366271972656, "rewards/margins": 1.6390154361724854, "rewards/rejected": -5.300151824951172, "step": 8817 }, { "epoch": 1.37, "learning_rate": 7.679855801649463e-06, "logits/chosen": -1.9528450965881348, "logits/rejected": -2.9481983184814453, "logps/chosen": -104.51869201660156, "logps/rejected": -388.0341796875, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": -5.31535530090332, "rewards/margins": 8.096854209899902, "rewards/rejected": -13.412208557128906, "step": 8818 }, { "epoch": 1.37, "learning_rate": 7.679122361118315e-06, "logits/chosen": -2.3498799800872803, "logits/rejected": -2.721259832382202, "logps/chosen": -351.95904541015625, "logps/rejected": -421.06512451171875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -3.8347280025482178, "rewards/margins": 5.342425346374512, "rewards/rejected": -9.177153587341309, "step": 8819 }, { "epoch": 1.37, "learning_rate": 7.678388920587167e-06, "logits/chosen": -2.939732313156128, "logits/rejected": -2.407543897628784, "logps/chosen": -299.7377014160156, "logps/rejected": -424.12744140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.193215370178223, "rewards/margins": 10.201861381530762, "rewards/rejected": -15.395076751708984, "step": 8820 }, { "epoch": 1.37, "learning_rate": 7.677655480056019e-06, "logits/chosen": -2.0985920429229736, "logits/rejected": -2.843766212463379, "logps/chosen": -126.55404663085938, "logps/rejected": -373.7015380859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.0612070560455322, "rewards/margins": 8.83549690246582, "rewards/rejected": -11.89670467376709, "step": 8821 }, { "epoch": 1.37, "learning_rate": 7.676922039524873e-06, "logits/chosen": -0.9969488978385925, "logits/rejected": -2.833740711212158, "logps/chosen": -176.27366638183594, "logps/rejected": -532.79248046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9201364517211914, "rewards/margins": 7.137579917907715, "rewards/rejected": -10.057716369628906, "step": 8822 }, { "epoch": 1.37, "learning_rate": 7.676188598993724e-06, "logits/chosen": -1.7759788036346436, "logits/rejected": -2.78574275970459, "logps/chosen": -122.05650329589844, "logps/rejected": -290.7799987792969, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.795235633850098, "rewards/margins": 6.774781227111816, "rewards/rejected": -11.570016860961914, "step": 8823 }, { "epoch": 1.37, "learning_rate": 7.675455158462576e-06, "logits/chosen": -2.8031671047210693, "logits/rejected": -2.2009799480438232, "logps/chosen": -174.7271728515625, "logps/rejected": -111.75167846679688, "loss": 3.9256, "rewards/accuracies": 0.5, "rewards/chosen": -8.141745567321777, "rewards/margins": -1.294532299041748, "rewards/rejected": -6.8472137451171875, "step": 8824 }, { "epoch": 1.37, "learning_rate": 7.674721717931428e-06, "logits/chosen": -2.8238167762756348, "logits/rejected": -2.2434823513031006, "logps/chosen": -382.2299499511719, "logps/rejected": -284.73126220703125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -3.587695837020874, "rewards/margins": 6.845669746398926, "rewards/rejected": -10.433364868164062, "step": 8825 }, { "epoch": 1.37, "learning_rate": 7.67398827740028e-06, "logits/chosen": -2.865057945251465, "logits/rejected": -2.803408622741699, "logps/chosen": -296.4110107421875, "logps/rejected": -289.5755310058594, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.58153772354126, "rewards/margins": 6.26593542098999, "rewards/rejected": -11.84747314453125, "step": 8826 }, { "epoch": 1.37, "learning_rate": 7.673254836869132e-06, "logits/chosen": -1.98444664478302, "logits/rejected": -2.7737526893615723, "logps/chosen": -71.45438385009766, "logps/rejected": -322.3537902832031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.539729595184326, "rewards/margins": 8.330242156982422, "rewards/rejected": -11.869972229003906, "step": 8827 }, { "epoch": 1.37, "learning_rate": 7.672521396337984e-06, "logits/chosen": -2.800663709640503, "logits/rejected": -3.1054396629333496, "logps/chosen": -152.04351806640625, "logps/rejected": -296.752197265625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.1322813034057617, "rewards/margins": 7.161477088928223, "rewards/rejected": -10.293758392333984, "step": 8828 }, { "epoch": 1.37, "learning_rate": 7.671787955806836e-06, "logits/chosen": -1.336490511894226, "logits/rejected": -2.759509325027466, "logps/chosen": -99.90097045898438, "logps/rejected": -527.2452392578125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -5.977236747741699, "rewards/margins": 6.22603702545166, "rewards/rejected": -12.20327377319336, "step": 8829 }, { "epoch": 1.37, "learning_rate": 7.671054515275688e-06, "logits/chosen": -2.6787045001983643, "logits/rejected": -2.423529863357544, "logps/chosen": -388.470703125, "logps/rejected": -402.4453430175781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.888916015625, "rewards/margins": 10.848429679870605, "rewards/rejected": -15.737345695495605, "step": 8830 }, { "epoch": 1.37, "learning_rate": 7.670321074744541e-06, "logits/chosen": -2.8359696865081787, "logits/rejected": -2.905683994293213, "logps/chosen": -148.3660430908203, "logps/rejected": -182.02032470703125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -2.3491134643554688, "rewards/margins": 5.833049774169922, "rewards/rejected": -8.18216323852539, "step": 8831 }, { "epoch": 1.37, "learning_rate": 7.669587634213393e-06, "logits/chosen": -2.36905837059021, "logits/rejected": -2.773527145385742, "logps/chosen": -242.63568115234375, "logps/rejected": -208.1562042236328, "loss": 0.6659, "rewards/accuracies": 0.5, "rewards/chosen": -7.097458839416504, "rewards/margins": 1.7291159629821777, "rewards/rejected": -8.826574325561523, "step": 8832 }, { "epoch": 1.37, "learning_rate": 7.668854193682245e-06, "logits/chosen": -2.3283863067626953, "logits/rejected": -2.8320605754852295, "logps/chosen": -134.86050415039062, "logps/rejected": -343.20440673828125, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -5.579883575439453, "rewards/margins": 5.695631980895996, "rewards/rejected": -11.275516510009766, "step": 8833 }, { "epoch": 1.37, "learning_rate": 7.668120753151097e-06, "logits/chosen": -1.59235680103302, "logits/rejected": -2.8449811935424805, "logps/chosen": -107.52334594726562, "logps/rejected": -306.602783203125, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/chosen": -1.7160011529922485, "rewards/margins": 5.838653564453125, "rewards/rejected": -7.554655075073242, "step": 8834 }, { "epoch": 1.37, "learning_rate": 7.667387312619949e-06, "logits/chosen": -2.554407835006714, "logits/rejected": -2.7818307876586914, "logps/chosen": -135.81820678710938, "logps/rejected": -252.07589721679688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.413729190826416, "rewards/margins": 7.104197025299072, "rewards/rejected": -8.517926216125488, "step": 8835 }, { "epoch": 1.37, "learning_rate": 7.6666538720888e-06, "logits/chosen": -2.4669370651245117, "logits/rejected": -2.7162036895751953, "logps/chosen": -232.23814392089844, "logps/rejected": -238.86483764648438, "loss": 0.842, "rewards/accuracies": 0.5, "rewards/chosen": -3.664419651031494, "rewards/margins": 3.3733935356140137, "rewards/rejected": -7.037813186645508, "step": 8836 }, { "epoch": 1.37, "learning_rate": 7.665920431557652e-06, "logits/chosen": -3.087397575378418, "logits/rejected": -2.871516466140747, "logps/chosen": -206.8519744873047, "logps/rejected": -202.029296875, "loss": 0.1971, "rewards/accuracies": 1.0, "rewards/chosen": -1.8190842866897583, "rewards/margins": 5.897970199584961, "rewards/rejected": -7.71705436706543, "step": 8837 }, { "epoch": 1.37, "learning_rate": 7.665186991026504e-06, "logits/chosen": -2.903597831726074, "logits/rejected": -2.2030205726623535, "logps/chosen": -264.4960632324219, "logps/rejected": -235.53619384765625, "loss": 0.3892, "rewards/accuracies": 0.5, "rewards/chosen": -3.6446309089660645, "rewards/margins": 4.174092769622803, "rewards/rejected": -7.818723201751709, "step": 8838 }, { "epoch": 1.37, "learning_rate": 7.664453550495356e-06, "logits/chosen": -2.4986209869384766, "logits/rejected": -3.095963954925537, "logps/chosen": -105.66798400878906, "logps/rejected": -274.5545959472656, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.5415143966674805, "rewards/margins": 7.084914207458496, "rewards/rejected": -10.626428604125977, "step": 8839 }, { "epoch": 1.37, "learning_rate": 7.66372010996421e-06, "logits/chosen": -2.550550937652588, "logits/rejected": -2.8342785835266113, "logps/chosen": -72.72449493408203, "logps/rejected": -253.67132568359375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7228715419769287, "rewards/margins": 8.148916244506836, "rewards/rejected": -10.871788024902344, "step": 8840 }, { "epoch": 1.37, "learning_rate": 7.662986669433062e-06, "logits/chosen": -1.44568932056427, "logits/rejected": -2.8087260723114014, "logps/chosen": -159.63023376464844, "logps/rejected": -336.87103271484375, "loss": 0.0358, "rewards/accuracies": 1.0, "rewards/chosen": -4.7345290184021, "rewards/margins": 3.3134548664093018, "rewards/rejected": -8.04798412322998, "step": 8841 }, { "epoch": 1.38, "learning_rate": 7.662253228901914e-06, "logits/chosen": -1.5562664270401, "logits/rejected": -2.84350323677063, "logps/chosen": -166.987060546875, "logps/rejected": -389.30328369140625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -2.4466662406921387, "rewards/margins": 5.195505619049072, "rewards/rejected": -7.642171859741211, "step": 8842 }, { "epoch": 1.38, "learning_rate": 7.661519788370767e-06, "logits/chosen": -2.1289706230163574, "logits/rejected": -3.01798415184021, "logps/chosen": -139.93142700195312, "logps/rejected": -419.85443115234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.924355149269104, "rewards/margins": 8.624184608459473, "rewards/rejected": -10.548540115356445, "step": 8843 }, { "epoch": 1.38, "learning_rate": 7.660786347839619e-06, "logits/chosen": -1.9322896003723145, "logits/rejected": -2.7359459400177, "logps/chosen": -219.0632781982422, "logps/rejected": -332.2632751464844, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -3.1680450439453125, "rewards/margins": 5.180532455444336, "rewards/rejected": -8.348577499389648, "step": 8844 }, { "epoch": 1.38, "learning_rate": 7.660052907308471e-06, "logits/chosen": -2.860091209411621, "logits/rejected": -3.2288782596588135, "logps/chosen": -213.6061248779297, "logps/rejected": -384.69134521484375, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -2.179548978805542, "rewards/margins": 5.784475326538086, "rewards/rejected": -7.964023590087891, "step": 8845 }, { "epoch": 1.38, "learning_rate": 7.659319466777323e-06, "logits/chosen": -2.761768341064453, "logits/rejected": -2.3407680988311768, "logps/chosen": -338.51641845703125, "logps/rejected": -314.61383056640625, "loss": 0.3758, "rewards/accuracies": 0.5, "rewards/chosen": -4.813558578491211, "rewards/margins": 4.93412971496582, "rewards/rejected": -9.747688293457031, "step": 8846 }, { "epoch": 1.38, "learning_rate": 7.658586026246175e-06, "logits/chosen": -1.7083394527435303, "logits/rejected": -2.2270286083221436, "logps/chosen": -121.56375122070312, "logps/rejected": -430.9517822265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4643555879592896, "rewards/margins": 8.822580337524414, "rewards/rejected": -10.286935806274414, "step": 8847 }, { "epoch": 1.38, "learning_rate": 7.657852585715027e-06, "logits/chosen": -2.3273003101348877, "logits/rejected": -2.881239175796509, "logps/chosen": -35.559104919433594, "logps/rejected": -323.4378356933594, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.6713855266571045, "rewards/margins": 7.3705010414123535, "rewards/rejected": -9.041887283325195, "step": 8848 }, { "epoch": 1.38, "learning_rate": 7.65711914518388e-06, "logits/chosen": -2.9072582721710205, "logits/rejected": -2.9641082286834717, "logps/chosen": -422.8002624511719, "logps/rejected": -411.3719177246094, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -3.276949405670166, "rewards/margins": 5.23198938369751, "rewards/rejected": -8.508938789367676, "step": 8849 }, { "epoch": 1.38, "learning_rate": 7.656385704652732e-06, "logits/chosen": -2.9668567180633545, "logits/rejected": -2.6574254035949707, "logps/chosen": -326.1885070800781, "logps/rejected": -300.49853515625, "loss": 0.5688, "rewards/accuracies": 1.0, "rewards/chosen": -7.275160312652588, "rewards/margins": 0.271085262298584, "rewards/rejected": -7.546245574951172, "step": 8850 }, { "epoch": 1.38, "learning_rate": 7.655652264121584e-06, "logits/chosen": -1.4544007778167725, "logits/rejected": -2.641889810562134, "logps/chosen": -91.25076293945312, "logps/rejected": -299.6048583984375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.491238832473755, "rewards/margins": 7.502160549163818, "rewards/rejected": -10.993398666381836, "step": 8851 }, { "epoch": 1.38, "learning_rate": 7.654918823590436e-06, "logits/chosen": -2.2693090438842773, "logits/rejected": -2.957723617553711, "logps/chosen": -78.18138122558594, "logps/rejected": -326.23370361328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.7315878868103027, "rewards/margins": 10.446540832519531, "rewards/rejected": -14.178128242492676, "step": 8852 }, { "epoch": 1.38, "learning_rate": 7.654185383059288e-06, "logits/chosen": -2.201953887939453, "logits/rejected": -2.4264297485351562, "logps/chosen": -207.70819091796875, "logps/rejected": -334.298095703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.8438045978546143, "rewards/margins": 6.877895355224609, "rewards/rejected": -10.721699714660645, "step": 8853 }, { "epoch": 1.38, "learning_rate": 7.65345194252814e-06, "logits/chosen": -2.6621644496917725, "logits/rejected": -3.244861125946045, "logps/chosen": -55.299293518066406, "logps/rejected": -202.58583068847656, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -3.1797890663146973, "rewards/margins": 4.065464019775391, "rewards/rejected": -7.245253562927246, "step": 8854 }, { "epoch": 1.38, "learning_rate": 7.652718501996991e-06, "logits/chosen": -1.560390830039978, "logits/rejected": -2.314671754837036, "logps/chosen": -106.8147964477539, "logps/rejected": -359.64752197265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.620906114578247, "rewards/margins": 7.991945743560791, "rewards/rejected": -10.612852096557617, "step": 8855 }, { "epoch": 1.38, "learning_rate": 7.651985061465843e-06, "logits/chosen": -3.151416063308716, "logits/rejected": -2.7264528274536133, "logps/chosen": -614.3519287109375, "logps/rejected": -469.58526611328125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3318939208984375, "rewards/margins": 9.462201118469238, "rewards/rejected": -11.794095039367676, "step": 8856 }, { "epoch": 1.38, "learning_rate": 7.651251620934697e-06, "logits/chosen": -2.822117567062378, "logits/rejected": -2.7426486015319824, "logps/chosen": -571.5841064453125, "logps/rejected": -575.2847900390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.128570556640625, "rewards/margins": 9.3761568069458, "rewards/rejected": -10.504727363586426, "step": 8857 }, { "epoch": 1.38, "learning_rate": 7.650518180403549e-06, "logits/chosen": -2.5968329906463623, "logits/rejected": -2.5703508853912354, "logps/chosen": -159.74261474609375, "logps/rejected": -268.1245422363281, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.72723126411438, "rewards/margins": 7.993781089782715, "rewards/rejected": -10.721012115478516, "step": 8858 }, { "epoch": 1.38, "learning_rate": 7.6497847398724e-06, "logits/chosen": -2.803281545639038, "logits/rejected": -2.1624269485473633, "logps/chosen": -389.86151123046875, "logps/rejected": -333.27545166015625, "loss": 0.3964, "rewards/accuracies": 0.5, "rewards/chosen": -4.663821220397949, "rewards/margins": 1.241804599761963, "rewards/rejected": -5.90562629699707, "step": 8859 }, { "epoch": 1.38, "learning_rate": 7.649051299341252e-06, "logits/chosen": -2.5769870281219482, "logits/rejected": -2.7553789615631104, "logps/chosen": -350.76922607421875, "logps/rejected": -531.336181640625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.681938171386719, "rewards/margins": 7.317615509033203, "rewards/rejected": -11.999553680419922, "step": 8860 }, { "epoch": 1.38, "learning_rate": 7.648317858810104e-06, "logits/chosen": -2.731450319290161, "logits/rejected": -2.743018865585327, "logps/chosen": -551.0491943359375, "logps/rejected": -452.2259826660156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.7066190242767334, "rewards/margins": 9.391610145568848, "rewards/rejected": -13.098228454589844, "step": 8861 }, { "epoch": 1.38, "learning_rate": 7.647584418278956e-06, "logits/chosen": -2.6311633586883545, "logits/rejected": -1.9803404808044434, "logps/chosen": -388.45159912109375, "logps/rejected": -403.1978759765625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -2.510910987854004, "rewards/margins": 9.401714324951172, "rewards/rejected": -11.912626266479492, "step": 8862 }, { "epoch": 1.38, "learning_rate": 7.646850977747808e-06, "logits/chosen": -3.0007221698760986, "logits/rejected": -3.183077096939087, "logps/chosen": -174.78652954101562, "logps/rejected": -216.74658203125, "loss": 0.3264, "rewards/accuracies": 1.0, "rewards/chosen": -4.418257713317871, "rewards/margins": 3.5998616218566895, "rewards/rejected": -8.018119812011719, "step": 8863 }, { "epoch": 1.38, "learning_rate": 7.64611753721666e-06, "logits/chosen": -2.6066224575042725, "logits/rejected": -2.7342870235443115, "logps/chosen": -58.44464874267578, "logps/rejected": -200.8319854736328, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.354609966278076, "rewards/margins": 5.859848976135254, "rewards/rejected": -8.214459419250488, "step": 8864 }, { "epoch": 1.38, "learning_rate": 7.645384096685512e-06, "logits/chosen": -2.643436908721924, "logits/rejected": -2.818598508834839, "logps/chosen": -149.09335327148438, "logps/rejected": -185.7373504638672, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -3.081240653991699, "rewards/margins": 6.195558547973633, "rewards/rejected": -9.276798248291016, "step": 8865 }, { "epoch": 1.38, "learning_rate": 7.644650656154365e-06, "logits/chosen": -2.6052980422973633, "logits/rejected": -2.6547234058380127, "logps/chosen": -293.2201843261719, "logps/rejected": -487.993408203125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.176607131958008, "rewards/margins": 8.47594165802002, "rewards/rejected": -11.652548789978027, "step": 8866 }, { "epoch": 1.38, "learning_rate": 7.643917215623217e-06, "logits/chosen": -1.752150535583496, "logits/rejected": -2.6963865756988525, "logps/chosen": -252.91751098632812, "logps/rejected": -340.5279846191406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.9473228454589844, "rewards/margins": 8.582354545593262, "rewards/rejected": -11.529677391052246, "step": 8867 }, { "epoch": 1.38, "learning_rate": 7.643183775092069e-06, "logits/chosen": -2.717353105545044, "logits/rejected": -2.8409883975982666, "logps/chosen": -109.34481811523438, "logps/rejected": -214.220947265625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.9866447448730469, "rewards/margins": 6.669508934020996, "rewards/rejected": -8.656153678894043, "step": 8868 }, { "epoch": 1.38, "learning_rate": 7.642450334560921e-06, "logits/chosen": -2.3280181884765625, "logits/rejected": -2.8285892009735107, "logps/chosen": -284.16571044921875, "logps/rejected": -356.8677062988281, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -4.428936958312988, "rewards/margins": 6.934150695800781, "rewards/rejected": -11.36308765411377, "step": 8869 }, { "epoch": 1.38, "learning_rate": 7.641716894029773e-06, "logits/chosen": -2.94438099861145, "logits/rejected": -3.0079612731933594, "logps/chosen": -260.1059875488281, "logps/rejected": -355.4334716796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.34037971496582, "rewards/margins": 9.08016586303711, "rewards/rejected": -14.42054557800293, "step": 8870 }, { "epoch": 1.38, "learning_rate": 7.640983453498625e-06, "logits/chosen": -1.07981538772583, "logits/rejected": -2.08095121383667, "logps/chosen": -148.10504150390625, "logps/rejected": -267.5331115722656, "loss": 0.2045, "rewards/accuracies": 1.0, "rewards/chosen": -4.218877792358398, "rewards/margins": 4.288922309875488, "rewards/rejected": -8.507800102233887, "step": 8871 }, { "epoch": 1.38, "learning_rate": 7.640250012967477e-06, "logits/chosen": -2.0590462684631348, "logits/rejected": -2.860384464263916, "logps/chosen": -104.10755920410156, "logps/rejected": -347.7802734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.0399391651153564, "rewards/margins": 8.61484432220459, "rewards/rejected": -11.654783248901367, "step": 8872 }, { "epoch": 1.38, "learning_rate": 7.639516572436329e-06, "logits/chosen": -2.839595079421997, "logits/rejected": -3.1206395626068115, "logps/chosen": -98.7008056640625, "logps/rejected": -258.18798828125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.5944534540176392, "rewards/margins": 6.2598185539245605, "rewards/rejected": -7.85427188873291, "step": 8873 }, { "epoch": 1.38, "learning_rate": 7.63878313190518e-06, "logits/chosen": -2.265023946762085, "logits/rejected": -2.813714027404785, "logps/chosen": -162.39869689941406, "logps/rejected": -358.73712158203125, "loss": 0.8097, "rewards/accuracies": 0.5, "rewards/chosen": -3.5016510486602783, "rewards/margins": 8.088338851928711, "rewards/rejected": -11.589990615844727, "step": 8874 }, { "epoch": 1.38, "learning_rate": 7.638049691374034e-06, "logits/chosen": -2.5756092071533203, "logits/rejected": -2.726146697998047, "logps/chosen": -96.62751007080078, "logps/rejected": -259.64654541015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3103713989257812, "rewards/margins": 9.348638534545898, "rewards/rejected": -10.65900993347168, "step": 8875 }, { "epoch": 1.38, "learning_rate": 7.637316250842886e-06, "logits/chosen": -3.0353543758392334, "logits/rejected": -2.785813808441162, "logps/chosen": -172.54861450195312, "logps/rejected": -186.24050903320312, "loss": 0.5685, "rewards/accuracies": 0.5, "rewards/chosen": -4.559329509735107, "rewards/margins": 2.796449899673462, "rewards/rejected": -7.355779647827148, "step": 8876 }, { "epoch": 1.38, "learning_rate": 7.63658281031174e-06, "logits/chosen": -2.1174843311309814, "logits/rejected": -2.770402193069458, "logps/chosen": -503.2225646972656, "logps/rejected": -612.369384765625, "loss": 0.3746, "rewards/accuracies": 0.5, "rewards/chosen": -5.984325408935547, "rewards/margins": 2.9090089797973633, "rewards/rejected": -8.89333438873291, "step": 8877 }, { "epoch": 1.38, "learning_rate": 7.635849369780591e-06, "logits/chosen": -1.9621881246566772, "logits/rejected": -2.7569620609283447, "logps/chosen": -230.41995239257812, "logps/rejected": -435.9681396484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5199360847473145, "rewards/margins": 10.037040710449219, "rewards/rejected": -11.556976318359375, "step": 8878 }, { "epoch": 1.38, "learning_rate": 7.635115929249443e-06, "logits/chosen": -1.7452218532562256, "logits/rejected": -2.814831495285034, "logps/chosen": -106.48792266845703, "logps/rejected": -347.45770263671875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -1.624406099319458, "rewards/margins": 7.234367370605469, "rewards/rejected": -8.858774185180664, "step": 8879 }, { "epoch": 1.38, "learning_rate": 7.634382488718295e-06, "logits/chosen": -2.2020926475524902, "logits/rejected": -2.555680274963379, "logps/chosen": -135.96246337890625, "logps/rejected": -215.6651611328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.6440062522888184, "rewards/margins": 6.87351131439209, "rewards/rejected": -9.517518043518066, "step": 8880 }, { "epoch": 1.38, "learning_rate": 7.633649048187147e-06, "logits/chosen": -2.2128653526306152, "logits/rejected": -2.769627809524536, "logps/chosen": -131.9287567138672, "logps/rejected": -433.62628173828125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.088806629180908, "rewards/margins": 8.19861125946045, "rewards/rejected": -12.287418365478516, "step": 8881 }, { "epoch": 1.38, "learning_rate": 7.632915607655999e-06, "logits/chosen": -2.646578788757324, "logits/rejected": -2.161679983139038, "logps/chosen": -492.5064697265625, "logps/rejected": -511.9691162109375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.7790679931640625, "rewards/margins": 6.898683547973633, "rewards/rejected": -8.677751541137695, "step": 8882 }, { "epoch": 1.38, "learning_rate": 7.63218216712485e-06, "logits/chosen": -2.0420570373535156, "logits/rejected": -3.099161386489868, "logps/chosen": -57.90422058105469, "logps/rejected": -346.62823486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4711644649505615, "rewards/margins": 10.780574798583984, "rewards/rejected": -12.251739501953125, "step": 8883 }, { "epoch": 1.38, "learning_rate": 7.631448726593704e-06, "logits/chosen": -2.6469812393188477, "logits/rejected": -2.8757588863372803, "logps/chosen": -193.75308227539062, "logps/rejected": -382.1834411621094, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0251518487930298, "rewards/margins": 7.772958278656006, "rewards/rejected": -8.798110008239746, "step": 8884 }, { "epoch": 1.38, "learning_rate": 7.630715286062556e-06, "logits/chosen": -2.7202982902526855, "logits/rejected": -2.1089589595794678, "logps/chosen": -302.96832275390625, "logps/rejected": -507.66107177734375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.737715244293213, "rewards/margins": 7.185990333557129, "rewards/rejected": -10.9237060546875, "step": 8885 }, { "epoch": 1.38, "learning_rate": 7.629981845531408e-06, "logits/chosen": -2.545835256576538, "logits/rejected": -3.050088405609131, "logps/chosen": -113.0885009765625, "logps/rejected": -229.8525848388672, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.0536081790924072, "rewards/margins": 6.042788505554199, "rewards/rejected": -8.096397399902344, "step": 8886 }, { "epoch": 1.38, "learning_rate": 7.62924840500026e-06, "logits/chosen": -2.115252733230591, "logits/rejected": -2.9164512157440186, "logps/chosen": -80.43809509277344, "logps/rejected": -279.8701171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.262066125869751, "rewards/margins": 7.48896598815918, "rewards/rejected": -10.751031875610352, "step": 8887 }, { "epoch": 1.38, "learning_rate": 7.628514964469112e-06, "logits/chosen": -1.3217440843582153, "logits/rejected": -2.540562152862549, "logps/chosen": -100.20503234863281, "logps/rejected": -241.16180419921875, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -2.935587167739868, "rewards/margins": 5.140486240386963, "rewards/rejected": -8.07607364654541, "step": 8888 }, { "epoch": 1.38, "learning_rate": 7.627781523937964e-06, "logits/chosen": -3.235396385192871, "logits/rejected": -3.2091352939605713, "logps/chosen": -403.7602844238281, "logps/rejected": -400.67254638671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.240795135498047, "rewards/margins": 8.446281433105469, "rewards/rejected": -12.6870756149292, "step": 8889 }, { "epoch": 1.38, "learning_rate": 7.6270480834068156e-06, "logits/chosen": -2.156250238418579, "logits/rejected": -2.647158622741699, "logps/chosen": -767.1115112304688, "logps/rejected": -592.4944458007812, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -4.865523338317871, "rewards/margins": 5.794686317443848, "rewards/rejected": -10.660209655761719, "step": 8890 }, { "epoch": 1.38, "learning_rate": 7.6263146428756674e-06, "logits/chosen": -3.0266242027282715, "logits/rejected": -2.692781686782837, "logps/chosen": -265.1240539550781, "logps/rejected": -280.73846435546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.9724624156951904, "rewards/margins": 8.971564292907715, "rewards/rejected": -10.944026947021484, "step": 8891 }, { "epoch": 1.38, "learning_rate": 7.625581202344519e-06, "logits/chosen": -2.3696258068084717, "logits/rejected": -2.6361446380615234, "logps/chosen": -199.6837158203125, "logps/rejected": -275.32965087890625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -5.102906227111816, "rewards/margins": 6.163120269775391, "rewards/rejected": -11.266026496887207, "step": 8892 }, { "epoch": 1.38, "learning_rate": 7.624847761813373e-06, "logits/chosen": -1.930717945098877, "logits/rejected": -3.059373617172241, "logps/chosen": -76.25013732910156, "logps/rejected": -368.0467529296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.483667850494385, "rewards/margins": 7.8804497718811035, "rewards/rejected": -13.364117622375488, "step": 8893 }, { "epoch": 1.38, "learning_rate": 7.624114321282225e-06, "logits/chosen": -2.727332830429077, "logits/rejected": -2.5234436988830566, "logps/chosen": -183.84841918945312, "logps/rejected": -219.92567443847656, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.918407440185547, "rewards/margins": 6.598690986633301, "rewards/rejected": -9.517098426818848, "step": 8894 }, { "epoch": 1.38, "learning_rate": 7.623380880751077e-06, "logits/chosen": -2.0329926013946533, "logits/rejected": -2.812270402908325, "logps/chosen": -83.6479263305664, "logps/rejected": -308.0555419921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.184912919998169, "rewards/margins": 8.869731903076172, "rewards/rejected": -11.054643630981445, "step": 8895 }, { "epoch": 1.38, "learning_rate": 7.6226474402199285e-06, "logits/chosen": -2.83024525642395, "logits/rejected": -2.180148124694824, "logps/chosen": -301.47247314453125, "logps/rejected": -114.20858764648438, "loss": 2.9607, "rewards/accuracies": 0.5, "rewards/chosen": -8.307355880737305, "rewards/margins": -2.592226028442383, "rewards/rejected": -5.715129852294922, "step": 8896 }, { "epoch": 1.38, "learning_rate": 7.62191399968878e-06, "logits/chosen": -0.994192898273468, "logits/rejected": -2.131574869155884, "logps/chosen": -141.122314453125, "logps/rejected": -596.3033447265625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -3.4506893157958984, "rewards/margins": 11.308298110961914, "rewards/rejected": -14.758987426757812, "step": 8897 }, { "epoch": 1.38, "learning_rate": 7.621180559157632e-06, "logits/chosen": -1.9782992601394653, "logits/rejected": -2.682579278945923, "logps/chosen": -172.32705688476562, "logps/rejected": -342.0809326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.130742073059082, "rewards/margins": 9.764714241027832, "rewards/rejected": -12.895456314086914, "step": 8898 }, { "epoch": 1.38, "learning_rate": 7.620447118626484e-06, "logits/chosen": -2.028737783432007, "logits/rejected": -2.5615477561950684, "logps/chosen": -272.0087890625, "logps/rejected": -322.19757080078125, "loss": 1.589, "rewards/accuracies": 0.5, "rewards/chosen": -6.356596946716309, "rewards/margins": 0.6621464490890503, "rewards/rejected": -7.018743515014648, "step": 8899 }, { "epoch": 1.38, "learning_rate": 7.619713678095336e-06, "logits/chosen": -1.66455078125, "logits/rejected": -2.8547117710113525, "logps/chosen": -135.97796630859375, "logps/rejected": -464.531494140625, "loss": 0.0763, "rewards/accuracies": 1.0, "rewards/chosen": -3.931334972381592, "rewards/margins": 5.418732643127441, "rewards/rejected": -9.350068092346191, "step": 8900 }, { "epoch": 1.38, "learning_rate": 7.618980237564189e-06, "logits/chosen": -2.5637364387512207, "logits/rejected": -2.8037784099578857, "logps/chosen": -97.27511596679688, "logps/rejected": -365.84698486328125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.264643669128418, "rewards/margins": 8.15737533569336, "rewards/rejected": -10.422019004821777, "step": 8901 }, { "epoch": 1.38, "learning_rate": 7.618246797033042e-06, "logits/chosen": -2.8967723846435547, "logits/rejected": -2.9525201320648193, "logps/chosen": -356.8583984375, "logps/rejected": -299.1053466796875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -1.5843391418457031, "rewards/margins": 4.8964643478393555, "rewards/rejected": -6.480803489685059, "step": 8902 }, { "epoch": 1.38, "learning_rate": 7.617513356501894e-06, "logits/chosen": -2.7808196544647217, "logits/rejected": -2.51033616065979, "logps/chosen": -345.628662109375, "logps/rejected": -267.65606689453125, "loss": 1.1307, "rewards/accuracies": 0.5, "rewards/chosen": -4.103576183319092, "rewards/margins": 2.970205307006836, "rewards/rejected": -7.0737810134887695, "step": 8903 }, { "epoch": 1.38, "learning_rate": 7.616779915970746e-06, "logits/chosen": -1.2265952825546265, "logits/rejected": -2.8812003135681152, "logps/chosen": -53.715858459472656, "logps/rejected": -342.089599609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.794376850128174, "rewards/margins": 8.072446823120117, "rewards/rejected": -10.866823196411133, "step": 8904 }, { "epoch": 1.38, "learning_rate": 7.616046475439598e-06, "logits/chosen": -2.2465157508850098, "logits/rejected": -2.809810161590576, "logps/chosen": -112.40550994873047, "logps/rejected": -245.69912719726562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9736825227737427, "rewards/margins": 8.311569213867188, "rewards/rejected": -9.28525161743164, "step": 8905 }, { "epoch": 1.39, "learning_rate": 7.61531303490845e-06, "logits/chosen": -2.481151819229126, "logits/rejected": -2.8379337787628174, "logps/chosen": -79.82032012939453, "logps/rejected": -190.94891357421875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -2.6518328189849854, "rewards/margins": 4.710020065307617, "rewards/rejected": -7.361853122711182, "step": 8906 }, { "epoch": 1.39, "learning_rate": 7.614579594377302e-06, "logits/chosen": -2.352532386779785, "logits/rejected": -3.047799587249756, "logps/chosen": -41.270931243896484, "logps/rejected": -282.7918701171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.6364035606384277, "rewards/margins": 9.206342697143555, "rewards/rejected": -11.842745780944824, "step": 8907 }, { "epoch": 1.39, "learning_rate": 7.613846153846154e-06, "logits/chosen": -2.7259631156921387, "logits/rejected": -1.852824091911316, "logps/chosen": -369.2940673828125, "logps/rejected": -287.16796875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.0909409523010254, "rewards/margins": 7.623816013336182, "rewards/rejected": -10.714756965637207, "step": 8908 }, { "epoch": 1.39, "learning_rate": 7.6131127133150055e-06, "logits/chosen": -2.612520933151245, "logits/rejected": -2.528573751449585, "logps/chosen": -201.47525024414062, "logps/rejected": -345.771484375, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -2.88912296295166, "rewards/margins": 5.426970481872559, "rewards/rejected": -8.316093444824219, "step": 8909 }, { "epoch": 1.39, "learning_rate": 7.612379272783857e-06, "logits/chosen": -2.359686851501465, "logits/rejected": -2.3805017471313477, "logps/chosen": -406.5009765625, "logps/rejected": -360.50341796875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.6710562705993652, "rewards/margins": 7.0553059577941895, "rewards/rejected": -10.726362228393555, "step": 8910 }, { "epoch": 1.39, "learning_rate": 7.611645832252711e-06, "logits/chosen": -2.284857988357544, "logits/rejected": -2.765566110610962, "logps/chosen": -84.79130554199219, "logps/rejected": -303.1469421386719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.859349489212036, "rewards/margins": 9.59889030456543, "rewards/rejected": -12.458240509033203, "step": 8911 }, { "epoch": 1.39, "learning_rate": 7.610912391721563e-06, "logits/chosen": -1.8847148418426514, "logits/rejected": -2.6364011764526367, "logps/chosen": -220.01353454589844, "logps/rejected": -307.354248046875, "loss": 1.5924, "rewards/accuracies": 0.5, "rewards/chosen": -4.380041599273682, "rewards/margins": 2.8180551528930664, "rewards/rejected": -7.198096752166748, "step": 8912 }, { "epoch": 1.39, "learning_rate": 7.610178951190415e-06, "logits/chosen": -2.029301166534424, "logits/rejected": -2.733891248703003, "logps/chosen": -87.4652328491211, "logps/rejected": -321.5702209472656, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.8832099437713623, "rewards/margins": 6.476809501647949, "rewards/rejected": -8.36001968383789, "step": 8913 }, { "epoch": 1.39, "learning_rate": 7.6094455106592666e-06, "logits/chosen": -1.6607497930526733, "logits/rejected": -2.789506673812866, "logps/chosen": -118.23931121826172, "logps/rejected": -414.3800048828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.919853925704956, "rewards/margins": 9.50750732421875, "rewards/rejected": -12.427360534667969, "step": 8914 }, { "epoch": 1.39, "learning_rate": 7.6087120701281184e-06, "logits/chosen": -2.7439658641815186, "logits/rejected": -2.2061283588409424, "logps/chosen": -492.6929626464844, "logps/rejected": -415.0748291015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.3809866905212402, "rewards/margins": 7.518782615661621, "rewards/rejected": -10.899768829345703, "step": 8915 }, { "epoch": 1.39, "learning_rate": 7.60797862959697e-06, "logits/chosen": -2.847792387008667, "logits/rejected": -2.8411858081817627, "logps/chosen": -468.9775695800781, "logps/rejected": -564.570556640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.251795291900635, "rewards/margins": 8.585665702819824, "rewards/rejected": -13.8374605178833, "step": 8916 }, { "epoch": 1.39, "learning_rate": 7.607245189065822e-06, "logits/chosen": -2.129070281982422, "logits/rejected": -2.781545639038086, "logps/chosen": -310.4740295410156, "logps/rejected": -518.1625366210938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.423196792602539, "rewards/margins": 9.835464477539062, "rewards/rejected": -13.258661270141602, "step": 8917 }, { "epoch": 1.39, "learning_rate": 7.606511748534675e-06, "logits/chosen": -1.4932572841644287, "logits/rejected": -2.6319332122802734, "logps/chosen": -182.8256378173828, "logps/rejected": -316.8279724121094, "loss": 1.3962, "rewards/accuracies": 0.5, "rewards/chosen": -3.910433292388916, "rewards/margins": 3.785895586013794, "rewards/rejected": -7.696329116821289, "step": 8918 }, { "epoch": 1.39, "learning_rate": 7.605778308003527e-06, "logits/chosen": -3.2086222171783447, "logits/rejected": -3.159120798110962, "logps/chosen": -405.4381408691406, "logps/rejected": -408.4510498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.7973666191101074, "rewards/margins": 9.792623519897461, "rewards/rejected": -12.589990615844727, "step": 8919 }, { "epoch": 1.39, "learning_rate": 7.60504486747238e-06, "logits/chosen": -1.6866451501846313, "logits/rejected": -2.5466837882995605, "logps/chosen": -206.30728149414062, "logps/rejected": -448.5848083496094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8590099811553955, "rewards/margins": 9.583209991455078, "rewards/rejected": -12.442219734191895, "step": 8920 }, { "epoch": 1.39, "learning_rate": 7.604311426941232e-06, "logits/chosen": -2.6210877895355225, "logits/rejected": -2.7824220657348633, "logps/chosen": -81.4290542602539, "logps/rejected": -222.26710510253906, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -3.205855131149292, "rewards/margins": 6.730128288269043, "rewards/rejected": -9.935983657836914, "step": 8921 }, { "epoch": 1.39, "learning_rate": 7.603577986410084e-06, "logits/chosen": -2.4954421520233154, "logits/rejected": -2.6342976093292236, "logps/chosen": -103.89505004882812, "logps/rejected": -337.13885498046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.4881012439727783, "rewards/margins": 8.703213691711426, "rewards/rejected": -11.191314697265625, "step": 8922 }, { "epoch": 1.39, "learning_rate": 7.602844545878936e-06, "logits/chosen": -2.7095954418182373, "logits/rejected": -2.6683871746063232, "logps/chosen": -84.92166900634766, "logps/rejected": -280.48480224609375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -0.8514766693115234, "rewards/margins": 6.269330978393555, "rewards/rejected": -7.120807647705078, "step": 8923 }, { "epoch": 1.39, "learning_rate": 7.602111105347788e-06, "logits/chosen": -1.6551841497421265, "logits/rejected": -2.7405707836151123, "logps/chosen": -177.58172607421875, "logps/rejected": -396.8369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5413383841514587, "rewards/margins": 10.722795486450195, "rewards/rejected": -11.26413345336914, "step": 8924 }, { "epoch": 1.39, "learning_rate": 7.60137766481664e-06, "logits/chosen": -1.7918226718902588, "logits/rejected": -2.741642713546753, "logps/chosen": -207.94427490234375, "logps/rejected": -418.64129638671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.7332584857940674, "rewards/margins": 8.870119094848633, "rewards/rejected": -10.603378295898438, "step": 8925 }, { "epoch": 1.39, "learning_rate": 7.600644224285492e-06, "logits/chosen": -2.35516357421875, "logits/rejected": -2.814297914505005, "logps/chosen": -93.72206115722656, "logps/rejected": -363.0752258300781, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.3365349769592285, "rewards/margins": 7.509377479553223, "rewards/rejected": -9.84591293334961, "step": 8926 }, { "epoch": 1.39, "learning_rate": 7.5999107837543435e-06, "logits/chosen": -2.0894691944122314, "logits/rejected": -2.5481162071228027, "logps/chosen": -124.87905883789062, "logps/rejected": -279.1449279785156, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -3.0785725116729736, "rewards/margins": 7.6579132080078125, "rewards/rejected": -10.736486434936523, "step": 8927 }, { "epoch": 1.39, "learning_rate": 7.599177343223195e-06, "logits/chosen": -2.4257819652557373, "logits/rejected": -2.9011521339416504, "logps/chosen": -563.70654296875, "logps/rejected": -633.6976318359375, "loss": 0.0899, "rewards/accuracies": 1.0, "rewards/chosen": -4.6696457862854, "rewards/margins": 6.9346022605896, "rewards/rejected": -11.604248046875, "step": 8928 }, { "epoch": 1.39, "learning_rate": 7.598443902692049e-06, "logits/chosen": -2.8433055877685547, "logits/rejected": -2.550555467605591, "logps/chosen": -181.49667358398438, "logps/rejected": -136.95643615722656, "loss": 0.2399, "rewards/accuracies": 1.0, "rewards/chosen": -3.7301602363586426, "rewards/margins": 3.989335536956787, "rewards/rejected": -7.71949577331543, "step": 8929 }, { "epoch": 1.39, "learning_rate": 7.597710462160901e-06, "logits/chosen": -2.4012045860290527, "logits/rejected": -3.110386610031128, "logps/chosen": -286.7010192871094, "logps/rejected": -353.1298522949219, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.2753639221191406, "rewards/margins": 5.297536849975586, "rewards/rejected": -7.572900772094727, "step": 8930 }, { "epoch": 1.39, "learning_rate": 7.596977021629753e-06, "logits/chosen": -2.5404794216156006, "logits/rejected": -2.6932616233825684, "logps/chosen": -89.81719207763672, "logps/rejected": -241.0301971435547, "loss": 0.0343, "rewards/accuracies": 1.0, "rewards/chosen": -4.193589210510254, "rewards/margins": 4.562710762023926, "rewards/rejected": -8.75629997253418, "step": 8931 }, { "epoch": 1.39, "learning_rate": 7.596243581098605e-06, "logits/chosen": -2.7392690181732178, "logits/rejected": -2.7494266033172607, "logps/chosen": -610.4857177734375, "logps/rejected": -954.1068725585938, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -5.032224655151367, "rewards/margins": 5.893141746520996, "rewards/rejected": -10.925366401672363, "step": 8932 }, { "epoch": 1.39, "learning_rate": 7.5955101405674565e-06, "logits/chosen": -1.7763729095458984, "logits/rejected": -2.4141006469726562, "logps/chosen": -222.68197631835938, "logps/rejected": -337.7359619140625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.3303275108337402, "rewards/margins": 7.879465103149414, "rewards/rejected": -10.209793090820312, "step": 8933 }, { "epoch": 1.39, "learning_rate": 7.594776700036308e-06, "logits/chosen": -2.3535125255584717, "logits/rejected": -2.842233180999756, "logps/chosen": -246.49774169921875, "logps/rejected": -437.859130859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.2151435613632202, "rewards/margins": 8.793207168579102, "rewards/rejected": -10.00835132598877, "step": 8934 }, { "epoch": 1.39, "learning_rate": 7.594043259505161e-06, "logits/chosen": -2.6264090538024902, "logits/rejected": -1.9854787588119507, "logps/chosen": -490.68927001953125, "logps/rejected": -136.16725158691406, "loss": 0.7401, "rewards/accuracies": 0.5, "rewards/chosen": -4.872094631195068, "rewards/margins": 0.4995049238204956, "rewards/rejected": -5.3715996742248535, "step": 8935 }, { "epoch": 1.39, "learning_rate": 7.593309818974013e-06, "logits/chosen": -3.126962661743164, "logits/rejected": -2.4195122718811035, "logps/chosen": -304.5642395019531, "logps/rejected": -148.5606689453125, "loss": 0.6234, "rewards/accuracies": 0.5, "rewards/chosen": -2.091963291168213, "rewards/margins": 3.9735724925994873, "rewards/rejected": -6.065535545349121, "step": 8936 }, { "epoch": 1.39, "learning_rate": 7.592576378442865e-06, "logits/chosen": -2.6446359157562256, "logits/rejected": -2.774198293685913, "logps/chosen": -108.57051849365234, "logps/rejected": -224.03811645507812, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.6073014736175537, "rewards/margins": 6.136425018310547, "rewards/rejected": -9.74372673034668, "step": 8937 }, { "epoch": 1.39, "learning_rate": 7.591842937911718e-06, "logits/chosen": -2.884847402572632, "logits/rejected": -2.827883005142212, "logps/chosen": -122.08507537841797, "logps/rejected": -114.40534210205078, "loss": 0.7597, "rewards/accuracies": 0.5, "rewards/chosen": -5.507694244384766, "rewards/margins": 2.120896100997925, "rewards/rejected": -7.6285905838012695, "step": 8938 }, { "epoch": 1.39, "learning_rate": 7.59110949738057e-06, "logits/chosen": -1.8019520044326782, "logits/rejected": -2.922102689743042, "logps/chosen": -197.72561645507812, "logps/rejected": -534.24462890625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.7313473224639893, "rewards/margins": 8.593731880187988, "rewards/rejected": -11.325078964233398, "step": 8939 }, { "epoch": 1.39, "learning_rate": 7.590376056849422e-06, "logits/chosen": -1.9325644969940186, "logits/rejected": -2.7635819911956787, "logps/chosen": -99.56777954101562, "logps/rejected": -302.16363525390625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -5.2206010818481445, "rewards/margins": 4.909151077270508, "rewards/rejected": -10.129752159118652, "step": 8940 }, { "epoch": 1.39, "learning_rate": 7.589642616318274e-06, "logits/chosen": -2.828350067138672, "logits/rejected": -2.7311291694641113, "logps/chosen": -103.42140197753906, "logps/rejected": -183.470703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.9717888832092285, "rewards/margins": 7.768511772155762, "rewards/rejected": -9.740301132202148, "step": 8941 }, { "epoch": 1.39, "learning_rate": 7.588909175787126e-06, "logits/chosen": -0.4492669105529785, "logits/rejected": -2.7196481227874756, "logps/chosen": -107.46922302246094, "logps/rejected": -631.8803100585938, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.7541730403900146, "rewards/margins": 7.1480913162231445, "rewards/rejected": -10.902263641357422, "step": 8942 }, { "epoch": 1.39, "learning_rate": 7.588175735255978e-06, "logits/chosen": -2.504432201385498, "logits/rejected": -2.9481160640716553, "logps/chosen": -239.9318389892578, "logps/rejected": -331.83917236328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.719879627227783, "rewards/margins": 8.684579849243164, "rewards/rejected": -12.404458999633789, "step": 8943 }, { "epoch": 1.39, "learning_rate": 7.58744229472483e-06, "logits/chosen": -2.321378469467163, "logits/rejected": -2.8764331340789795, "logps/chosen": -110.33377838134766, "logps/rejected": -461.7499084472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3400726318359375, "rewards/margins": 12.080530166625977, "rewards/rejected": -13.420602798461914, "step": 8944 }, { "epoch": 1.39, "learning_rate": 7.5867088541936816e-06, "logits/chosen": -1.6265619993209839, "logits/rejected": -2.905808210372925, "logps/chosen": -184.60971069335938, "logps/rejected": -791.056640625, "loss": 0.8676, "rewards/accuracies": 0.5, "rewards/chosen": -6.1544342041015625, "rewards/margins": 1.3699615001678467, "rewards/rejected": -7.524395942687988, "step": 8945 }, { "epoch": 1.39, "learning_rate": 7.5859754136625334e-06, "logits/chosen": -2.7259037494659424, "logits/rejected": -2.1836676597595215, "logps/chosen": -221.853515625, "logps/rejected": -206.8412628173828, "loss": 1.0463, "rewards/accuracies": 0.5, "rewards/chosen": -6.324967384338379, "rewards/margins": 2.902735471725464, "rewards/rejected": -9.227703094482422, "step": 8946 }, { "epoch": 1.39, "learning_rate": 7.585241973131387e-06, "logits/chosen": -2.105260133743286, "logits/rejected": -2.7238566875457764, "logps/chosen": -76.06655883789062, "logps/rejected": -292.7579040527344, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.7465147972106934, "rewards/margins": 7.700161933898926, "rewards/rejected": -9.446676254272461, "step": 8947 }, { "epoch": 1.39, "learning_rate": 7.584508532600239e-06, "logits/chosen": -2.9495232105255127, "logits/rejected": -2.680968999862671, "logps/chosen": -176.98806762695312, "logps/rejected": -230.10101318359375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.7789642810821533, "rewards/margins": 6.856810092926025, "rewards/rejected": -8.635774612426758, "step": 8948 }, { "epoch": 1.39, "learning_rate": 7.583775092069091e-06, "logits/chosen": -2.144688844680786, "logits/rejected": -2.6079623699188232, "logps/chosen": -174.0004425048828, "logps/rejected": -318.74371337890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.3755149841308594, "rewards/margins": 10.291280746459961, "rewards/rejected": -11.66679573059082, "step": 8949 }, { "epoch": 1.39, "learning_rate": 7.583041651537943e-06, "logits/chosen": -1.6832797527313232, "logits/rejected": -2.2621939182281494, "logps/chosen": -40.59198760986328, "logps/rejected": -216.9456787109375, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": -2.4204089641571045, "rewards/margins": 5.494673728942871, "rewards/rejected": -7.915081977844238, "step": 8950 }, { "epoch": 1.39, "learning_rate": 7.5823082110067945e-06, "logits/chosen": -2.51751708984375, "logits/rejected": -2.3902904987335205, "logps/chosen": -139.59487915039062, "logps/rejected": -174.94671630859375, "loss": 0.5089, "rewards/accuracies": 0.5, "rewards/chosen": -3.40177059173584, "rewards/margins": 2.2719008922576904, "rewards/rejected": -5.673671722412109, "step": 8951 }, { "epoch": 1.39, "learning_rate": 7.581574770475647e-06, "logits/chosen": -2.2817928791046143, "logits/rejected": -2.951472043991089, "logps/chosen": -154.6977996826172, "logps/rejected": -262.9219970703125, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -2.901771306991577, "rewards/margins": 4.731222629547119, "rewards/rejected": -7.632993698120117, "step": 8952 }, { "epoch": 1.39, "learning_rate": 7.580841329944499e-06, "logits/chosen": -2.802048921585083, "logits/rejected": -2.7935678958892822, "logps/chosen": -234.07705688476562, "logps/rejected": -348.54742431640625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.7955210208892822, "rewards/margins": 7.927199363708496, "rewards/rejected": -10.722721099853516, "step": 8953 }, { "epoch": 1.39, "learning_rate": 7.580107889413351e-06, "logits/chosen": -2.9024498462677, "logits/rejected": -1.8980679512023926, "logps/chosen": -502.7631530761719, "logps/rejected": -325.82391357421875, "loss": 0.2839, "rewards/accuracies": 1.0, "rewards/chosen": -5.555414199829102, "rewards/margins": 3.6819992065429688, "rewards/rejected": -9.23741340637207, "step": 8954 }, { "epoch": 1.39, "learning_rate": 7.579374448882205e-06, "logits/chosen": -2.75339937210083, "logits/rejected": -3.1991050243377686, "logps/chosen": -78.25447082519531, "logps/rejected": -293.35028076171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.103814125061035, "rewards/margins": 8.204137802124023, "rewards/rejected": -11.307951927185059, "step": 8955 }, { "epoch": 1.39, "learning_rate": 7.5786410083510565e-06, "logits/chosen": -1.7716346979141235, "logits/rejected": -3.176107406616211, "logps/chosen": -287.0481872558594, "logps/rejected": -562.1114501953125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.6847542524337769, "rewards/margins": 9.4871244430542, "rewards/rejected": -11.171878814697266, "step": 8956 }, { "epoch": 1.39, "learning_rate": 7.577907567819908e-06, "logits/chosen": -1.4925142526626587, "logits/rejected": -2.429295539855957, "logps/chosen": -66.7275390625, "logps/rejected": -461.58172607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3938484191894531, "rewards/margins": 9.51143741607666, "rewards/rejected": -10.905285835266113, "step": 8957 }, { "epoch": 1.39, "learning_rate": 7.57717412728876e-06, "logits/chosen": -2.2148172855377197, "logits/rejected": -3.0028421878814697, "logps/chosen": -156.4276885986328, "logps/rejected": -391.8499755859375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.707530975341797, "rewards/margins": 6.111654281616211, "rewards/rejected": -8.819185256958008, "step": 8958 }, { "epoch": 1.39, "learning_rate": 7.576440686757612e-06, "logits/chosen": -1.633485198020935, "logits/rejected": -2.5620319843292236, "logps/chosen": -224.71343994140625, "logps/rejected": -380.9124755859375, "loss": 0.2934, "rewards/accuracies": 1.0, "rewards/chosen": -7.47654390335083, "rewards/margins": 1.084505558013916, "rewards/rejected": -8.561049461364746, "step": 8959 }, { "epoch": 1.39, "learning_rate": 7.575707246226464e-06, "logits/chosen": -2.9235615730285645, "logits/rejected": -3.0012385845184326, "logps/chosen": -414.996337890625, "logps/rejected": -522.6351318359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.762465715408325, "rewards/margins": 6.981959342956543, "rewards/rejected": -10.744424819946289, "step": 8960 }, { "epoch": 1.39, "learning_rate": 7.574973805695316e-06, "logits/chosen": -1.5128505229949951, "logits/rejected": -2.9689507484436035, "logps/chosen": -71.36558532714844, "logps/rejected": -453.76458740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8835864067077637, "rewards/margins": 10.228692054748535, "rewards/rejected": -13.112278938293457, "step": 8961 }, { "epoch": 1.39, "learning_rate": 7.574240365164168e-06, "logits/chosen": -2.5742087364196777, "logits/rejected": -1.7839914560317993, "logps/chosen": -259.6493835449219, "logps/rejected": -269.85565185546875, "loss": 0.0532, "rewards/accuracies": 1.0, "rewards/chosen": -0.7966592907905579, "rewards/margins": 6.987752914428711, "rewards/rejected": -7.784412384033203, "step": 8962 }, { "epoch": 1.39, "learning_rate": 7.57350692463302e-06, "logits/chosen": -2.844494342803955, "logits/rejected": -2.842259407043457, "logps/chosen": -311.6279296875, "logps/rejected": -285.35943603515625, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": -3.125312566757202, "rewards/margins": 4.662398338317871, "rewards/rejected": -7.787710666656494, "step": 8963 }, { "epoch": 1.39, "learning_rate": 7.572773484101873e-06, "logits/chosen": -2.8130507469177246, "logits/rejected": -2.8033440113067627, "logps/chosen": -129.4480438232422, "logps/rejected": -194.76248168945312, "loss": 1.6402, "rewards/accuracies": 0.5, "rewards/chosen": -7.186150550842285, "rewards/margins": 0.8179738521575928, "rewards/rejected": -8.004124641418457, "step": 8964 }, { "epoch": 1.39, "learning_rate": 7.572040043570725e-06, "logits/chosen": -2.9411346912384033, "logits/rejected": -2.5125863552093506, "logps/chosen": -212.84518432617188, "logps/rejected": -242.27114868164062, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/chosen": -2.7765188217163086, "rewards/margins": 4.591497421264648, "rewards/rejected": -7.368016242980957, "step": 8965 }, { "epoch": 1.39, "learning_rate": 7.571306603039577e-06, "logits/chosen": -2.7681500911712646, "logits/rejected": -2.5619494915008545, "logps/chosen": -686.578857421875, "logps/rejected": -469.96124267578125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.2796051502227783, "rewards/margins": 6.99564266204834, "rewards/rejected": -8.275247573852539, "step": 8966 }, { "epoch": 1.39, "learning_rate": 7.570573162508429e-06, "logits/chosen": -1.2585986852645874, "logits/rejected": -2.788412570953369, "logps/chosen": -144.30889892578125, "logps/rejected": -606.488525390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3997070789337158, "rewards/margins": 10.439437866210938, "rewards/rejected": -11.83914566040039, "step": 8967 }, { "epoch": 1.39, "learning_rate": 7.569839721977281e-06, "logits/chosen": -1.7486753463745117, "logits/rejected": -2.5930068492889404, "logps/chosen": -118.60137939453125, "logps/rejected": -368.34375, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.3531737327575684, "rewards/margins": 6.3014116287231445, "rewards/rejected": -8.654585838317871, "step": 8968 }, { "epoch": 1.39, "learning_rate": 7.569106281446133e-06, "logits/chosen": -1.619795322418213, "logits/rejected": -2.671844005584717, "logps/chosen": -66.34612274169922, "logps/rejected": -261.79986572265625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -3.140620708465576, "rewards/margins": 4.946743488311768, "rewards/rejected": -8.087364196777344, "step": 8969 }, { "epoch": 1.4, "learning_rate": 7.568372840914985e-06, "logits/chosen": -2.860413074493408, "logits/rejected": -1.936057209968567, "logps/chosen": -457.7069091796875, "logps/rejected": -395.90399169921875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.792410373687744, "rewards/margins": 7.398073196411133, "rewards/rejected": -11.190484046936035, "step": 8970 }, { "epoch": 1.4, "learning_rate": 7.567639400383837e-06, "logits/chosen": -2.8774099349975586, "logits/rejected": -2.9755589962005615, "logps/chosen": -260.343505859375, "logps/rejected": -303.5771789550781, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.4353069067001343, "rewards/margins": 5.622872352600098, "rewards/rejected": -7.05817985534668, "step": 8971 }, { "epoch": 1.4, "learning_rate": 7.566905959852689e-06, "logits/chosen": -2.2948107719421387, "logits/rejected": -2.4908103942871094, "logps/chosen": -121.42451477050781, "logps/rejected": -215.5858917236328, "loss": 0.0204, "rewards/accuracies": 1.0, "rewards/chosen": -3.9397149085998535, "rewards/margins": 4.712558746337891, "rewards/rejected": -8.652274131774902, "step": 8972 }, { "epoch": 1.4, "learning_rate": 7.566172519321543e-06, "logits/chosen": -2.340527057647705, "logits/rejected": -2.682408332824707, "logps/chosen": -120.90744018554688, "logps/rejected": -268.5931701660156, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.1790666580200195, "rewards/margins": 6.813206672668457, "rewards/rejected": -9.992273330688477, "step": 8973 }, { "epoch": 1.4, "learning_rate": 7.5654390787903945e-06, "logits/chosen": -0.6350752711296082, "logits/rejected": -0.8684680461883545, "logps/chosen": -155.96612548828125, "logps/rejected": -250.2599639892578, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -1.7463829517364502, "rewards/margins": 5.336522579193115, "rewards/rejected": -7.0829057693481445, "step": 8974 }, { "epoch": 1.4, "learning_rate": 7.564705638259246e-06, "logits/chosen": -2.0079526901245117, "logits/rejected": -2.7516536712646484, "logps/chosen": -188.35682678222656, "logps/rejected": -334.48663330078125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.1422057151794434, "rewards/margins": 6.403750419616699, "rewards/rejected": -9.5459566116333, "step": 8975 }, { "epoch": 1.4, "learning_rate": 7.563972197728098e-06, "logits/chosen": -1.6538597345352173, "logits/rejected": -2.5528078079223633, "logps/chosen": -181.28155517578125, "logps/rejected": -423.95001220703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.6680748462677, "rewards/margins": 7.188600540161133, "rewards/rejected": -9.85667610168457, "step": 8976 }, { "epoch": 1.4, "learning_rate": 7.56323875719695e-06, "logits/chosen": -2.841688632965088, "logits/rejected": -2.4862771034240723, "logps/chosen": -198.99700927734375, "logps/rejected": -96.06024932861328, "loss": 2.0119, "rewards/accuracies": 0.5, "rewards/chosen": -3.736182451248169, "rewards/margins": 0.6382014751434326, "rewards/rejected": -4.374383926391602, "step": 8977 }, { "epoch": 1.4, "learning_rate": 7.562505316665802e-06, "logits/chosen": -0.7211755514144897, "logits/rejected": -2.2089364528656006, "logps/chosen": -135.75973510742188, "logps/rejected": -409.3234558105469, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -3.090683937072754, "rewards/margins": 7.440680980682373, "rewards/rejected": -10.531364440917969, "step": 8978 }, { "epoch": 1.4, "learning_rate": 7.561771876134654e-06, "logits/chosen": -1.7985765933990479, "logits/rejected": -3.0380032062530518, "logps/chosen": -130.3861083984375, "logps/rejected": -267.154541015625, "loss": 0.2678, "rewards/accuracies": 1.0, "rewards/chosen": -6.0499677658081055, "rewards/margins": 1.7935665845870972, "rewards/rejected": -7.843534469604492, "step": 8979 }, { "epoch": 1.4, "learning_rate": 7.561038435603506e-06, "logits/chosen": -2.1338651180267334, "logits/rejected": -2.840914249420166, "logps/chosen": -123.780517578125, "logps/rejected": -438.2908020019531, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -3.7919301986694336, "rewards/margins": 7.734375, "rewards/rejected": -11.526305198669434, "step": 8980 }, { "epoch": 1.4, "learning_rate": 7.560304995072358e-06, "logits/chosen": -2.0260190963745117, "logits/rejected": -2.8772060871124268, "logps/chosen": -88.88832092285156, "logps/rejected": -310.1025390625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.172969341278076, "rewards/margins": 8.953659057617188, "rewards/rejected": -11.126628875732422, "step": 8981 }, { "epoch": 1.4, "learning_rate": 7.559571554541211e-06, "logits/chosen": -2.713609457015991, "logits/rejected": -2.6209733486175537, "logps/chosen": -275.66754150390625, "logps/rejected": -354.1710510253906, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.60392427444458, "rewards/margins": 5.525786399841309, "rewards/rejected": -8.129711151123047, "step": 8982 }, { "epoch": 1.4, "learning_rate": 7.558838114010063e-06, "logits/chosen": -1.9962220191955566, "logits/rejected": -2.4975597858428955, "logps/chosen": -324.9667053222656, "logps/rejected": -252.32107543945312, "loss": 0.0899, "rewards/accuracies": 1.0, "rewards/chosen": -2.6015257835388184, "rewards/margins": 4.09675407409668, "rewards/rejected": -6.69827938079834, "step": 8983 }, { "epoch": 1.4, "learning_rate": 7.558104673478915e-06, "logits/chosen": -1.9508581161499023, "logits/rejected": -2.8210787773132324, "logps/chosen": -128.93878173828125, "logps/rejected": -571.229736328125, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -3.071259021759033, "rewards/margins": 5.937137603759766, "rewards/rejected": -9.008397102355957, "step": 8984 }, { "epoch": 1.4, "learning_rate": 7.557371232947767e-06, "logits/chosen": -2.6588385105133057, "logits/rejected": -2.736553192138672, "logps/chosen": -132.98162841796875, "logps/rejected": -172.1009521484375, "loss": 0.2153, "rewards/accuracies": 1.0, "rewards/chosen": -2.844221591949463, "rewards/margins": 2.366942882537842, "rewards/rejected": -5.211164474487305, "step": 8985 }, { "epoch": 1.4, "learning_rate": 7.55663779241662e-06, "logits/chosen": -1.6382359266281128, "logits/rejected": -1.582066297531128, "logps/chosen": -300.76007080078125, "logps/rejected": -162.42681884765625, "loss": 0.0449, "rewards/accuracies": 1.0, "rewards/chosen": -2.992650270462036, "rewards/margins": 5.382181644439697, "rewards/rejected": -8.374832153320312, "step": 8986 }, { "epoch": 1.4, "learning_rate": 7.5559043518854715e-06, "logits/chosen": -2.9321041107177734, "logits/rejected": -2.936678409576416, "logps/chosen": -220.56832885742188, "logps/rejected": -216.47235107421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.799147129058838, "rewards/margins": 9.118240356445312, "rewards/rejected": -11.917387008666992, "step": 8987 }, { "epoch": 1.4, "learning_rate": 7.555170911354323e-06, "logits/chosen": -1.56171452999115, "logits/rejected": -2.411864995956421, "logps/chosen": -214.5327911376953, "logps/rejected": -235.34396362304688, "loss": 1.2281, "rewards/accuracies": 0.5, "rewards/chosen": -6.036709785461426, "rewards/margins": 1.59196138381958, "rewards/rejected": -7.628670692443848, "step": 8988 }, { "epoch": 1.4, "learning_rate": 7.554437470823175e-06, "logits/chosen": -2.455531597137451, "logits/rejected": -2.029355764389038, "logps/chosen": -158.37901306152344, "logps/rejected": -100.54871368408203, "loss": 1.3664, "rewards/accuracies": 0.5, "rewards/chosen": -4.259423732757568, "rewards/margins": 0.30770421028137207, "rewards/rejected": -4.5671281814575195, "step": 8989 }, { "epoch": 1.4, "learning_rate": 7.553704030292027e-06, "logits/chosen": -2.5730702877044678, "logits/rejected": -3.0264315605163574, "logps/chosen": -70.00799560546875, "logps/rejected": -253.99395751953125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.7605791091918945, "rewards/margins": 8.40006160736084, "rewards/rejected": -11.160640716552734, "step": 8990 }, { "epoch": 1.4, "learning_rate": 7.552970589760881e-06, "logits/chosen": -2.5448975563049316, "logits/rejected": -2.7841506004333496, "logps/chosen": -103.39567565917969, "logps/rejected": -238.0738067626953, "loss": 0.0668, "rewards/accuracies": 1.0, "rewards/chosen": -1.164982795715332, "rewards/margins": 3.721219778060913, "rewards/rejected": -4.886202335357666, "step": 8991 }, { "epoch": 1.4, "learning_rate": 7.5522371492297325e-06, "logits/chosen": -2.3067214488983154, "logits/rejected": -2.815181016921997, "logps/chosen": -127.93986511230469, "logps/rejected": -248.4783935546875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -2.8424854278564453, "rewards/margins": 5.204179763793945, "rewards/rejected": -8.04666519165039, "step": 8992 }, { "epoch": 1.4, "learning_rate": 7.5515037086985844e-06, "logits/chosen": -2.8750252723693848, "logits/rejected": -2.7103445529937744, "logps/chosen": -576.2125244140625, "logps/rejected": -915.1114501953125, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -3.241819143295288, "rewards/margins": 5.531266212463379, "rewards/rejected": -8.77308464050293, "step": 8993 }, { "epoch": 1.4, "learning_rate": 7.550770268167436e-06, "logits/chosen": -2.612847328186035, "logits/rejected": -2.287336826324463, "logps/chosen": -321.473388671875, "logps/rejected": -312.2205810546875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -3.4660022258758545, "rewards/margins": 7.347408294677734, "rewards/rejected": -10.813409805297852, "step": 8994 }, { "epoch": 1.4, "learning_rate": 7.550036827636288e-06, "logits/chosen": -2.304353713989258, "logits/rejected": -2.957064151763916, "logps/chosen": -145.3078155517578, "logps/rejected": -415.4712219238281, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.0937275886535645, "rewards/margins": 12.414880752563477, "rewards/rejected": -15.508609771728516, "step": 8995 }, { "epoch": 1.4, "learning_rate": 7.54930338710514e-06, "logits/chosen": -2.712448835372925, "logits/rejected": -2.080216646194458, "logps/chosen": -269.2035217285156, "logps/rejected": -286.11285400390625, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -2.6983582973480225, "rewards/margins": 5.227801322937012, "rewards/rejected": -7.926159858703613, "step": 8996 }, { "epoch": 1.4, "learning_rate": 7.548569946573992e-06, "logits/chosen": -1.8542448282241821, "logits/rejected": -2.731870651245117, "logps/chosen": -273.3129577636719, "logps/rejected": -472.59051513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6165895462036133, "rewards/margins": 13.182903289794922, "rewards/rejected": -15.799491882324219, "step": 8997 }, { "epoch": 1.4, "learning_rate": 7.547836506042844e-06, "logits/chosen": -2.5055222511291504, "logits/rejected": -2.713841438293457, "logps/chosen": -493.32220458984375, "logps/rejected": -494.3199462890625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.9839307069778442, "rewards/margins": 8.123778343200684, "rewards/rejected": -10.107708930969238, "step": 8998 }, { "epoch": 1.4, "learning_rate": 7.547103065511696e-06, "logits/chosen": -1.3894609212875366, "logits/rejected": -2.6669647693634033, "logps/chosen": -96.04701232910156, "logps/rejected": -219.40101623535156, "loss": 0.2495, "rewards/accuracies": 1.0, "rewards/chosen": -5.7024383544921875, "rewards/margins": 3.214171886444092, "rewards/rejected": -8.916610717773438, "step": 8999 }, { "epoch": 1.4, "learning_rate": 7.546369624980549e-06, "logits/chosen": -2.792584180831909, "logits/rejected": -3.0567448139190674, "logps/chosen": -56.88491439819336, "logps/rejected": -282.7833251953125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.1037418842315674, "rewards/margins": 6.363672256469727, "rewards/rejected": -9.467414855957031, "step": 9000 }, { "epoch": 1.4, "learning_rate": 7.545636184449401e-06, "logits/chosen": -2.6703155040740967, "logits/rejected": -2.1972360610961914, "logps/chosen": -307.8116455078125, "logps/rejected": -351.2683410644531, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -2.905559539794922, "rewards/margins": 3.308077335357666, "rewards/rejected": -6.213636875152588, "step": 9001 }, { "epoch": 1.4, "learning_rate": 7.544902743918253e-06, "logits/chosen": -2.8414268493652344, "logits/rejected": -2.2784647941589355, "logps/chosen": -123.45475769042969, "logps/rejected": -323.9623107910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7430568337440491, "rewards/margins": 11.568925857543945, "rewards/rejected": -12.311983108520508, "step": 9002 }, { "epoch": 1.4, "learning_rate": 7.544169303387106e-06, "logits/chosen": -0.9506307244300842, "logits/rejected": -1.36268150806427, "logps/chosen": -164.5657958984375, "logps/rejected": -411.8603210449219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6132373809814453, "rewards/margins": 9.81991195678711, "rewards/rejected": -13.433149337768555, "step": 9003 }, { "epoch": 1.4, "learning_rate": 7.543435862855958e-06, "logits/chosen": -2.2683420181274414, "logits/rejected": -3.1460981369018555, "logps/chosen": -150.51507568359375, "logps/rejected": -396.38873291015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.0790313482284546, "rewards/margins": 8.688570022583008, "rewards/rejected": -9.767601013183594, "step": 9004 }, { "epoch": 1.4, "learning_rate": 7.5427024223248095e-06, "logits/chosen": -2.6099369525909424, "logits/rejected": -1.8268245458602905, "logps/chosen": -205.24676513671875, "logps/rejected": -219.76820373535156, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -3.251584053039551, "rewards/margins": 5.62691593170166, "rewards/rejected": -8.878499984741211, "step": 9005 }, { "epoch": 1.4, "learning_rate": 7.541968981793661e-06, "logits/chosen": -2.4957921504974365, "logits/rejected": -2.899831533432007, "logps/chosen": -547.412841796875, "logps/rejected": -541.540283203125, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -5.57504940032959, "rewards/margins": 3.3977913856506348, "rewards/rejected": -8.972841262817383, "step": 9006 }, { "epoch": 1.4, "learning_rate": 7.541235541262513e-06, "logits/chosen": -2.3930141925811768, "logits/rejected": -3.0049259662628174, "logps/chosen": -165.541015625, "logps/rejected": -427.1280212402344, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -1.355140209197998, "rewards/margins": 7.810268402099609, "rewards/rejected": -9.165409088134766, "step": 9007 }, { "epoch": 1.4, "learning_rate": 7.540502100731365e-06, "logits/chosen": -1.6891155242919922, "logits/rejected": -2.9214980602264404, "logps/chosen": -80.21640014648438, "logps/rejected": -366.6387939453125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -1.5077978372573853, "rewards/margins": 7.671083450317383, "rewards/rejected": -9.17888069152832, "step": 9008 }, { "epoch": 1.4, "learning_rate": 7.539768660200219e-06, "logits/chosen": -1.449836015701294, "logits/rejected": -2.741328001022339, "logps/chosen": -300.09686279296875, "logps/rejected": -661.896728515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.707139730453491, "rewards/margins": 8.845823287963867, "rewards/rejected": -12.552963256835938, "step": 9009 }, { "epoch": 1.4, "learning_rate": 7.539035219669071e-06, "logits/chosen": -2.2408053874969482, "logits/rejected": -2.7310400009155273, "logps/chosen": -396.1434631347656, "logps/rejected": -380.92010498046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.131085157394409, "rewards/margins": 7.513803482055664, "rewards/rejected": -9.644887924194336, "step": 9010 }, { "epoch": 1.4, "learning_rate": 7.5383017791379225e-06, "logits/chosen": -2.395932912826538, "logits/rejected": -2.969434976577759, "logps/chosen": -100.1798324584961, "logps/rejected": -355.4846496582031, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.654224395751953, "rewards/margins": 6.640735626220703, "rewards/rejected": -9.294960021972656, "step": 9011 }, { "epoch": 1.4, "learning_rate": 7.537568338606774e-06, "logits/chosen": -3.2618179321289062, "logits/rejected": -2.872537136077881, "logps/chosen": -483.9263610839844, "logps/rejected": -427.0025634765625, "loss": 1.3521, "rewards/accuracies": 0.5, "rewards/chosen": -5.3976149559021, "rewards/margins": 2.6149673461914062, "rewards/rejected": -8.012582778930664, "step": 9012 }, { "epoch": 1.4, "learning_rate": 7.536834898075626e-06, "logits/chosen": -2.4136154651641846, "logits/rejected": -2.7060153484344482, "logps/chosen": -601.755859375, "logps/rejected": -600.6090698242188, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -3.127742052078247, "rewards/margins": 4.716180801391602, "rewards/rejected": -7.8439226150512695, "step": 9013 }, { "epoch": 1.4, "learning_rate": 7.536101457544478e-06, "logits/chosen": -2.502925157546997, "logits/rejected": -1.4786220788955688, "logps/chosen": -585.5608520507812, "logps/rejected": -427.35675048828125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -4.070409774780273, "rewards/margins": 6.916713714599609, "rewards/rejected": -10.987123489379883, "step": 9014 }, { "epoch": 1.4, "learning_rate": 7.53536801701333e-06, "logits/chosen": -1.9465357065200806, "logits/rejected": -2.980879306793213, "logps/chosen": -187.9584503173828, "logps/rejected": -361.72711181640625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -5.711977958679199, "rewards/margins": 6.078303337097168, "rewards/rejected": -11.790281295776367, "step": 9015 }, { "epoch": 1.4, "learning_rate": 7.534634576482182e-06, "logits/chosen": -2.5703928470611572, "logits/rejected": -3.0090627670288086, "logps/chosen": -85.99412536621094, "logps/rejected": -332.30267333984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.649040937423706, "rewards/margins": 10.59693717956543, "rewards/rejected": -12.245979309082031, "step": 9016 }, { "epoch": 1.4, "learning_rate": 7.533901135951034e-06, "logits/chosen": -2.8825392723083496, "logits/rejected": -2.5355520248413086, "logps/chosen": -398.29656982421875, "logps/rejected": -260.8416442871094, "loss": 0.2528, "rewards/accuracies": 1.0, "rewards/chosen": -3.5326309204101562, "rewards/margins": 3.600882053375244, "rewards/rejected": -7.1335129737854, "step": 9017 }, { "epoch": 1.4, "learning_rate": 7.533167695419887e-06, "logits/chosen": -2.6326663494110107, "logits/rejected": -2.1859188079833984, "logps/chosen": -257.96502685546875, "logps/rejected": -379.6711730957031, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -4.773496627807617, "rewards/margins": 7.501963138580322, "rewards/rejected": -12.275459289550781, "step": 9018 }, { "epoch": 1.4, "learning_rate": 7.532434254888739e-06, "logits/chosen": -2.769993305206299, "logits/rejected": -2.384592056274414, "logps/chosen": -197.88525390625, "logps/rejected": -225.78810119628906, "loss": 0.804, "rewards/accuracies": 0.5, "rewards/chosen": -3.8313112258911133, "rewards/margins": 2.8138625621795654, "rewards/rejected": -6.645174026489258, "step": 9019 }, { "epoch": 1.4, "learning_rate": 7.531700814357592e-06, "logits/chosen": -1.794782042503357, "logits/rejected": -2.749762535095215, "logps/chosen": -193.4778289794922, "logps/rejected": -525.9470825195312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5096733570098877, "rewards/margins": 13.22711181640625, "rewards/rejected": -16.736785888671875, "step": 9020 }, { "epoch": 1.4, "learning_rate": 7.530967373826444e-06, "logits/chosen": -3.0083227157592773, "logits/rejected": -2.8131227493286133, "logps/chosen": -186.92376708984375, "logps/rejected": -159.6288299560547, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -3.4427123069763184, "rewards/margins": 3.7258341312408447, "rewards/rejected": -7.168546676635742, "step": 9021 }, { "epoch": 1.4, "learning_rate": 7.530233933295296e-06, "logits/chosen": -2.5416226387023926, "logits/rejected": -2.204200267791748, "logps/chosen": -120.329833984375, "logps/rejected": -113.00013732910156, "loss": 2.0877, "rewards/accuracies": 0.5, "rewards/chosen": -5.249619960784912, "rewards/margins": 0.6770572662353516, "rewards/rejected": -5.926677227020264, "step": 9022 }, { "epoch": 1.4, "learning_rate": 7.5295004927641476e-06, "logits/chosen": -1.4582059383392334, "logits/rejected": -2.5092670917510986, "logps/chosen": -169.41786193847656, "logps/rejected": -609.8802490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.579861640930176, "rewards/margins": 11.608563423156738, "rewards/rejected": -14.188425064086914, "step": 9023 }, { "epoch": 1.4, "learning_rate": 7.5287670522329994e-06, "logits/chosen": -2.2792422771453857, "logits/rejected": -2.695094585418701, "logps/chosen": -104.79705810546875, "logps/rejected": -242.91842651367188, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -1.0838865041732788, "rewards/margins": 7.39108419418335, "rewards/rejected": -8.474970817565918, "step": 9024 }, { "epoch": 1.4, "learning_rate": 7.528033611701851e-06, "logits/chosen": -2.684706211090088, "logits/rejected": -2.787655830383301, "logps/chosen": -347.77008056640625, "logps/rejected": -519.2874755859375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.502814292907715, "rewards/margins": 6.914515495300293, "rewards/rejected": -9.417329788208008, "step": 9025 }, { "epoch": 1.4, "learning_rate": 7.527300171170703e-06, "logits/chosen": -1.592853307723999, "logits/rejected": -2.595106840133667, "logps/chosen": -167.30703735351562, "logps/rejected": -369.8882141113281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5046466588974, "rewards/margins": 9.913739204406738, "rewards/rejected": -11.41838550567627, "step": 9026 }, { "epoch": 1.4, "learning_rate": 7.526566730639557e-06, "logits/chosen": -2.1467251777648926, "logits/rejected": -2.8525023460388184, "logps/chosen": -168.08367919921875, "logps/rejected": -325.4753723144531, "loss": 0.8057, "rewards/accuracies": 0.5, "rewards/chosen": -3.2366881370544434, "rewards/margins": 4.704340934753418, "rewards/rejected": -7.9410295486450195, "step": 9027 }, { "epoch": 1.4, "learning_rate": 7.525833290108409e-06, "logits/chosen": -2.66774582862854, "logits/rejected": -2.955810785293579, "logps/chosen": -108.12665557861328, "logps/rejected": -142.97084045410156, "loss": 2.5781, "rewards/accuracies": 0.5, "rewards/chosen": -6.170077323913574, "rewards/margins": 1.642843246459961, "rewards/rejected": -7.812920570373535, "step": 9028 }, { "epoch": 1.4, "learning_rate": 7.5250998495772605e-06, "logits/chosen": -2.5333359241485596, "logits/rejected": -2.725989580154419, "logps/chosen": -528.8705444335938, "logps/rejected": -523.5958251953125, "loss": 0.1327, "rewards/accuracies": 1.0, "rewards/chosen": -3.0323410034179688, "rewards/margins": 5.835212707519531, "rewards/rejected": -8.8675537109375, "step": 9029 }, { "epoch": 1.4, "learning_rate": 7.524366409046112e-06, "logits/chosen": -2.6790413856506348, "logits/rejected": -2.8882906436920166, "logps/chosen": -77.6250991821289, "logps/rejected": -169.40948486328125, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -1.9936996698379517, "rewards/margins": 5.8857645988464355, "rewards/rejected": -7.879464149475098, "step": 9030 }, { "epoch": 1.4, "learning_rate": 7.523632968514964e-06, "logits/chosen": -1.0807920694351196, "logits/rejected": -2.16841197013855, "logps/chosen": -108.29991912841797, "logps/rejected": -307.46966552734375, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": -2.8495030403137207, "rewards/margins": 5.11763858795166, "rewards/rejected": -7.967141151428223, "step": 9031 }, { "epoch": 1.4, "learning_rate": 7.522899527983816e-06, "logits/chosen": -2.9902966022491455, "logits/rejected": -2.6875197887420654, "logps/chosen": -597.4865112304688, "logps/rejected": -434.11920166015625, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -5.772076606750488, "rewards/margins": 5.475137233734131, "rewards/rejected": -11.247213363647461, "step": 9032 }, { "epoch": 1.4, "learning_rate": 7.522166087452668e-06, "logits/chosen": -1.0739130973815918, "logits/rejected": -2.5834362506866455, "logps/chosen": -72.36888122558594, "logps/rejected": -287.3343505859375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.8831472396850586, "rewards/margins": 8.495548248291016, "rewards/rejected": -10.378694534301758, "step": 9033 }, { "epoch": 1.4, "learning_rate": 7.52143264692152e-06, "logits/chosen": -2.7112057209014893, "logits/rejected": -2.8242850303649902, "logps/chosen": -245.7532958984375, "logps/rejected": -270.1241760253906, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.9644241333007812, "rewards/margins": 6.288728713989258, "rewards/rejected": -10.253152847290039, "step": 9034 }, { "epoch": 1.41, "learning_rate": 7.520699206390373e-06, "logits/chosen": -2.042875289916992, "logits/rejected": -2.5576446056365967, "logps/chosen": -159.41990661621094, "logps/rejected": -307.90679931640625, "loss": 1.1169, "rewards/accuracies": 0.5, "rewards/chosen": -4.2416815757751465, "rewards/margins": 3.7687931060791016, "rewards/rejected": -8.010475158691406, "step": 9035 }, { "epoch": 1.41, "learning_rate": 7.519965765859225e-06, "logits/chosen": -2.1420340538024902, "logits/rejected": -3.1485228538513184, "logps/chosen": -136.15151977539062, "logps/rejected": -275.12890625, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": -4.718692779541016, "rewards/margins": 3.6913390159606934, "rewards/rejected": -8.410032272338867, "step": 9036 }, { "epoch": 1.41, "learning_rate": 7.519232325328078e-06, "logits/chosen": -1.426544427871704, "logits/rejected": -2.835296392440796, "logps/chosen": -145.11279296875, "logps/rejected": -433.1289367675781, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.1912550926208496, "rewards/margins": 6.401745796203613, "rewards/rejected": -8.593000411987305, "step": 9037 }, { "epoch": 1.41, "learning_rate": 7.51849888479693e-06, "logits/chosen": -3.0870914459228516, "logits/rejected": -2.9690446853637695, "logps/chosen": -503.8553771972656, "logps/rejected": -277.72991943359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.6303436756134033, "rewards/margins": 8.246004104614258, "rewards/rejected": -10.876347541809082, "step": 9038 }, { "epoch": 1.41, "learning_rate": 7.517765444265782e-06, "logits/chosen": -2.6051485538482666, "logits/rejected": -0.7033117413520813, "logps/chosen": -334.9289855957031, "logps/rejected": -202.52476501464844, "loss": 1.2055, "rewards/accuracies": 0.5, "rewards/chosen": -3.5058281421661377, "rewards/margins": 2.4690284729003906, "rewards/rejected": -5.974856853485107, "step": 9039 }, { "epoch": 1.41, "learning_rate": 7.517032003734634e-06, "logits/chosen": -2.0536348819732666, "logits/rejected": -2.545015335083008, "logps/chosen": -154.60975646972656, "logps/rejected": -335.1446228027344, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -2.7917163372039795, "rewards/margins": 6.970495223999023, "rewards/rejected": -9.762211799621582, "step": 9040 }, { "epoch": 1.41, "learning_rate": 7.516298563203486e-06, "logits/chosen": -3.0774166584014893, "logits/rejected": -2.391191005706787, "logps/chosen": -205.79327392578125, "logps/rejected": -113.72056579589844, "loss": 0.6004, "rewards/accuracies": 0.5, "rewards/chosen": -3.6793015003204346, "rewards/margins": 2.9965224266052246, "rewards/rejected": -6.675824165344238, "step": 9041 }, { "epoch": 1.41, "learning_rate": 7.5155651226723375e-06, "logits/chosen": -2.4215242862701416, "logits/rejected": -2.6812868118286133, "logps/chosen": -289.6378173828125, "logps/rejected": -411.3330383300781, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -1.6612651348114014, "rewards/margins": 9.91656494140625, "rewards/rejected": -11.57783031463623, "step": 9042 }, { "epoch": 1.41, "learning_rate": 7.514831682141189e-06, "logits/chosen": -2.4246487617492676, "logits/rejected": -3.084782361984253, "logps/chosen": -128.38929748535156, "logps/rejected": -301.22930908203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1831932067871094, "rewards/margins": 9.521347999572754, "rewards/rejected": -10.704541206359863, "step": 9043 }, { "epoch": 1.41, "learning_rate": 7.514098241610043e-06, "logits/chosen": -2.9841787815093994, "logits/rejected": -2.666670083999634, "logps/chosen": -279.5177001953125, "logps/rejected": -270.7064514160156, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.3887481689453125, "rewards/margins": 6.636970520019531, "rewards/rejected": -9.025718688964844, "step": 9044 }, { "epoch": 1.41, "learning_rate": 7.513364801078895e-06, "logits/chosen": -2.0063531398773193, "logits/rejected": -2.905548334121704, "logps/chosen": -325.60101318359375, "logps/rejected": -686.923095703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.218325138092041, "rewards/margins": 10.21280288696289, "rewards/rejected": -13.431127548217773, "step": 9045 }, { "epoch": 1.41, "learning_rate": 7.512631360547747e-06, "logits/chosen": -1.54013991355896, "logits/rejected": -2.423389196395874, "logps/chosen": -74.54010772705078, "logps/rejected": -319.5277099609375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.839716911315918, "rewards/margins": 7.896650314331055, "rewards/rejected": -10.736367225646973, "step": 9046 }, { "epoch": 1.41, "learning_rate": 7.5118979200165986e-06, "logits/chosen": -1.402357816696167, "logits/rejected": -2.8370914459228516, "logps/chosen": -184.6729736328125, "logps/rejected": -389.8555908203125, "loss": 2.2899, "rewards/accuracies": 0.5, "rewards/chosen": -5.014282703399658, "rewards/margins": 1.1144740581512451, "rewards/rejected": -6.128756523132324, "step": 9047 }, { "epoch": 1.41, "learning_rate": 7.5111644794854504e-06, "logits/chosen": -2.8696680068969727, "logits/rejected": -2.9998276233673096, "logps/chosen": -103.38277435302734, "logps/rejected": -226.98037719726562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8534208536148071, "rewards/margins": 8.756698608398438, "rewards/rejected": -9.610118865966797, "step": 9048 }, { "epoch": 1.41, "learning_rate": 7.510431038954302e-06, "logits/chosen": -1.969132900238037, "logits/rejected": -2.612107038497925, "logps/chosen": -165.615966796875, "logps/rejected": -223.19964599609375, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -2.701815605163574, "rewards/margins": 3.8959875106811523, "rewards/rejected": -6.597803115844727, "step": 9049 }, { "epoch": 1.41, "learning_rate": 7.509697598423154e-06, "logits/chosen": -1.3987500667572021, "logits/rejected": -1.8512895107269287, "logps/chosen": -260.9530944824219, "logps/rejected": -364.7098083496094, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -3.0775539875030518, "rewards/margins": 5.422128677368164, "rewards/rejected": -8.499683380126953, "step": 9050 }, { "epoch": 1.41, "learning_rate": 7.508964157892006e-06, "logits/chosen": -2.990548610687256, "logits/rejected": -2.2568938732147217, "logps/chosen": -230.44515991210938, "logps/rejected": -95.73648834228516, "loss": 0.8179, "rewards/accuracies": 0.5, "rewards/chosen": -4.532720565795898, "rewards/margins": -0.21193921566009521, "rewards/rejected": -4.320781230926514, "step": 9051 }, { "epoch": 1.41, "learning_rate": 7.508230717360859e-06, "logits/chosen": -3.216660737991333, "logits/rejected": -3.3092610836029053, "logps/chosen": -68.91499328613281, "logps/rejected": -218.85488891601562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.173980712890625, "rewards/margins": 8.70772647857666, "rewards/rejected": -9.881707191467285, "step": 9052 }, { "epoch": 1.41, "learning_rate": 7.5074972768297115e-06, "logits/chosen": -2.21612548828125, "logits/rejected": -2.898012638092041, "logps/chosen": -115.43739318847656, "logps/rejected": -243.0624237060547, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.4170432090759277, "rewards/margins": 6.171346664428711, "rewards/rejected": -8.588390350341797, "step": 9053 }, { "epoch": 1.41, "learning_rate": 7.506763836298564e-06, "logits/chosen": -1.3780640363693237, "logits/rejected": -2.7793686389923096, "logps/chosen": -94.05424499511719, "logps/rejected": -293.9282531738281, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.1192634105682373, "rewards/margins": 6.91071891784668, "rewards/rejected": -10.029982566833496, "step": 9054 }, { "epoch": 1.41, "learning_rate": 7.506030395767416e-06, "logits/chosen": -1.1325747966766357, "logits/rejected": -2.616459608078003, "logps/chosen": -61.39751434326172, "logps/rejected": -386.9521789550781, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.9818177223205566, "rewards/margins": 8.244985580444336, "rewards/rejected": -10.22680377960205, "step": 9055 }, { "epoch": 1.41, "learning_rate": 7.505296955236268e-06, "logits/chosen": -2.9376375675201416, "logits/rejected": -2.937326192855835, "logps/chosen": -303.64312744140625, "logps/rejected": -306.5473327636719, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -4.01864767074585, "rewards/margins": 6.376568794250488, "rewards/rejected": -10.39521598815918, "step": 9056 }, { "epoch": 1.41, "learning_rate": 7.50456351470512e-06, "logits/chosen": -2.627748489379883, "logits/rejected": -2.9151394367218018, "logps/chosen": -82.54328155517578, "logps/rejected": -207.6095428466797, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.2646262645721436, "rewards/margins": 6.5059356689453125, "rewards/rejected": -7.770562171936035, "step": 9057 }, { "epoch": 1.41, "learning_rate": 7.503830074173972e-06, "logits/chosen": -1.0539207458496094, "logits/rejected": -2.772615432739258, "logps/chosen": -107.08609008789062, "logps/rejected": -405.9665832519531, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -3.0993094444274902, "rewards/margins": 5.49398136138916, "rewards/rejected": -8.593290328979492, "step": 9058 }, { "epoch": 1.41, "learning_rate": 7.503096633642824e-06, "logits/chosen": -1.8425511121749878, "logits/rejected": -1.0236979722976685, "logps/chosen": -1006.6740112304688, "logps/rejected": -431.3855895996094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 0.8329932689666748, "rewards/margins": 12.495763778686523, "rewards/rejected": -11.66277027130127, "step": 9059 }, { "epoch": 1.41, "learning_rate": 7.5023631931116755e-06, "logits/chosen": -2.0354998111724854, "logits/rejected": -2.7480838298797607, "logps/chosen": -229.1644287109375, "logps/rejected": -250.28350830078125, "loss": 2.8628, "rewards/accuracies": 0.5, "rewards/chosen": -5.786520957946777, "rewards/margins": 1.9057323932647705, "rewards/rejected": -7.692253112792969, "step": 9060 }, { "epoch": 1.41, "learning_rate": 7.501629752580527e-06, "logits/chosen": -2.8372116088867188, "logits/rejected": -2.2289295196533203, "logps/chosen": -301.2651672363281, "logps/rejected": -242.69827270507812, "loss": 1.0247, "rewards/accuracies": 0.5, "rewards/chosen": -1.8101272583007812, "rewards/margins": 4.102574348449707, "rewards/rejected": -5.912701606750488, "step": 9061 }, { "epoch": 1.41, "learning_rate": 7.500896312049381e-06, "logits/chosen": -2.826246738433838, "logits/rejected": -3.0348446369171143, "logps/chosen": -67.78019714355469, "logps/rejected": -178.33456420898438, "loss": 0.7421, "rewards/accuracies": 0.5, "rewards/chosen": -5.2137556076049805, "rewards/margins": 2.644672155380249, "rewards/rejected": -7.858428001403809, "step": 9062 }, { "epoch": 1.41, "learning_rate": 7.500162871518233e-06, "logits/chosen": -2.8345108032226562, "logits/rejected": -2.968657970428467, "logps/chosen": -168.86131286621094, "logps/rejected": -148.49847412109375, "loss": 1.4961, "rewards/accuracies": 0.5, "rewards/chosen": -5.514503479003906, "rewards/margins": 2.4929158687591553, "rewards/rejected": -8.00741958618164, "step": 9063 }, { "epoch": 1.41, "learning_rate": 7.499429430987085e-06, "logits/chosen": -2.9567959308624268, "logits/rejected": -3.2480287551879883, "logps/chosen": -216.01568603515625, "logps/rejected": -360.85260009765625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.023481369018555, "rewards/margins": 6.55814266204834, "rewards/rejected": -10.581624031066895, "step": 9064 }, { "epoch": 1.41, "learning_rate": 7.498695990455937e-06, "logits/chosen": -2.4830753803253174, "logits/rejected": -2.92848539352417, "logps/chosen": -479.26385498046875, "logps/rejected": -535.8005981445312, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.3806705474853516, "rewards/margins": 5.850032806396484, "rewards/rejected": -9.230703353881836, "step": 9065 }, { "epoch": 1.41, "learning_rate": 7.4979625499247885e-06, "logits/chosen": -2.3068089485168457, "logits/rejected": -3.0239264965057373, "logps/chosen": -35.51806640625, "logps/rejected": -308.37200927734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6174347400665283, "rewards/margins": 10.215755462646484, "rewards/rejected": -10.833189964294434, "step": 9066 }, { "epoch": 1.41, "learning_rate": 7.49722910939364e-06, "logits/chosen": -1.6313376426696777, "logits/rejected": -2.84087872505188, "logps/chosen": -230.35231018066406, "logps/rejected": -507.193603515625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.7597824335098267, "rewards/margins": 6.798748970031738, "rewards/rejected": -7.558531761169434, "step": 9067 }, { "epoch": 1.41, "learning_rate": 7.496495668862492e-06, "logits/chosen": -2.421598196029663, "logits/rejected": -2.914759635925293, "logps/chosen": -236.9001922607422, "logps/rejected": -319.8392028808594, "loss": 0.0908, "rewards/accuracies": 1.0, "rewards/chosen": -2.2591466903686523, "rewards/margins": 3.8411450386047363, "rewards/rejected": -6.100291728973389, "step": 9068 }, { "epoch": 1.41, "learning_rate": 7.495762228331345e-06, "logits/chosen": -2.7992470264434814, "logits/rejected": -2.019700050354004, "logps/chosen": -245.8380126953125, "logps/rejected": -468.5045166015625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.193506956100464, "rewards/margins": 8.494998931884766, "rewards/rejected": -10.688505172729492, "step": 9069 }, { "epoch": 1.41, "learning_rate": 7.495028787800197e-06, "logits/chosen": -0.6535860896110535, "logits/rejected": -1.5559866428375244, "logps/chosen": -195.11920166015625, "logps/rejected": -502.1641845703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.9855453968048096, "rewards/margins": 10.561874389648438, "rewards/rejected": -13.547420501708984, "step": 9070 }, { "epoch": 1.41, "learning_rate": 7.49429534726905e-06, "logits/chosen": -2.0172393321990967, "logits/rejected": -2.5127997398376465, "logps/chosen": -374.18463134765625, "logps/rejected": -482.8282165527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3333334922790527, "rewards/margins": 12.496461868286133, "rewards/rejected": -14.829795837402344, "step": 9071 }, { "epoch": 1.41, "learning_rate": 7.493561906737902e-06, "logits/chosen": -2.4146571159362793, "logits/rejected": -2.8426473140716553, "logps/chosen": -69.50836944580078, "logps/rejected": -288.5018310546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.3198537826538086, "rewards/margins": 8.726821899414062, "rewards/rejected": -10.046676635742188, "step": 9072 }, { "epoch": 1.41, "learning_rate": 7.492828466206754e-06, "logits/chosen": -2.620859146118164, "logits/rejected": -2.941420316696167, "logps/chosen": -267.710693359375, "logps/rejected": -400.05316162109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.2610504627227783, "rewards/margins": 8.357370376586914, "rewards/rejected": -10.618420600891113, "step": 9073 }, { "epoch": 1.41, "learning_rate": 7.492095025675606e-06, "logits/chosen": -2.9164657592773438, "logits/rejected": -2.812246799468994, "logps/chosen": -428.85662841796875, "logps/rejected": -298.2843017578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.857635974884033, "rewards/margins": 7.635671615600586, "rewards/rejected": -11.493307113647461, "step": 9074 }, { "epoch": 1.41, "learning_rate": 7.491361585144458e-06, "logits/chosen": -2.586212158203125, "logits/rejected": -1.6562312841415405, "logps/chosen": -830.357421875, "logps/rejected": -390.7446594238281, "loss": 0.0887, "rewards/accuracies": 1.0, "rewards/chosen": -4.355696678161621, "rewards/margins": 2.6971662044525146, "rewards/rejected": -7.052863121032715, "step": 9075 }, { "epoch": 1.41, "learning_rate": 7.49062814461331e-06, "logits/chosen": -2.396193027496338, "logits/rejected": -2.3685309886932373, "logps/chosen": -158.98388671875, "logps/rejected": -206.63204956054688, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -4.134721279144287, "rewards/margins": 6.78867769241333, "rewards/rejected": -10.923398971557617, "step": 9076 }, { "epoch": 1.41, "learning_rate": 7.489894704082162e-06, "logits/chosen": -2.7046897411346436, "logits/rejected": -2.7442305088043213, "logps/chosen": -253.16810607910156, "logps/rejected": -325.6846923828125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.464402914047241, "rewards/margins": 6.03954553604126, "rewards/rejected": -8.503948211669922, "step": 9077 }, { "epoch": 1.41, "learning_rate": 7.4891612635510136e-06, "logits/chosen": -2.6807007789611816, "logits/rejected": -3.0046942234039307, "logps/chosen": -144.72250366210938, "logps/rejected": -407.5849914550781, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.7528533935546875, "rewards/margins": 6.467874526977539, "rewards/rejected": -9.220727920532227, "step": 9078 }, { "epoch": 1.41, "learning_rate": 7.4884278230198654e-06, "logits/chosen": -2.3789007663726807, "logits/rejected": -2.803378105163574, "logps/chosen": -132.8807830810547, "logps/rejected": -372.13037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3005433082580566, "rewards/margins": 9.411662101745605, "rewards/rejected": -11.712204933166504, "step": 9079 }, { "epoch": 1.41, "learning_rate": 7.487694382488719e-06, "logits/chosen": -2.959182024002075, "logits/rejected": -2.85009503364563, "logps/chosen": -143.0315704345703, "logps/rejected": -195.17977905273438, "loss": 0.0268, "rewards/accuracies": 1.0, "rewards/chosen": -2.6917195320129395, "rewards/margins": 4.415614604949951, "rewards/rejected": -7.107334136962891, "step": 9080 }, { "epoch": 1.41, "learning_rate": 7.486960941957571e-06, "logits/chosen": -2.6031060218811035, "logits/rejected": -2.7581186294555664, "logps/chosen": -439.0429382324219, "logps/rejected": -402.9300231933594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.013572692871094, "rewards/margins": 9.88427734375, "rewards/rejected": -13.897850036621094, "step": 9081 }, { "epoch": 1.41, "learning_rate": 7.486227501426423e-06, "logits/chosen": -2.165602207183838, "logits/rejected": -2.6741323471069336, "logps/chosen": -331.49786376953125, "logps/rejected": -349.39569091796875, "loss": 1.0184, "rewards/accuracies": 0.5, "rewards/chosen": -4.14455509185791, "rewards/margins": 3.6149847507476807, "rewards/rejected": -7.759539604187012, "step": 9082 }, { "epoch": 1.41, "learning_rate": 7.485494060895275e-06, "logits/chosen": -1.878891110420227, "logits/rejected": -2.785537004470825, "logps/chosen": -72.06244659423828, "logps/rejected": -178.12612915039062, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.615631580352783, "rewards/margins": 5.1522626876831055, "rewards/rejected": -8.76789379119873, "step": 9083 }, { "epoch": 1.41, "learning_rate": 7.4847606203641265e-06, "logits/chosen": -2.7781596183776855, "logits/rejected": -1.185847520828247, "logps/chosen": -264.53607177734375, "logps/rejected": -125.41033935546875, "loss": 1.0452, "rewards/accuracies": 0.5, "rewards/chosen": -4.518712043762207, "rewards/margins": 0.13261497020721436, "rewards/rejected": -4.651327133178711, "step": 9084 }, { "epoch": 1.41, "learning_rate": 7.484027179832978e-06, "logits/chosen": -2.9581191539764404, "logits/rejected": -2.0950586795806885, "logps/chosen": -665.860595703125, "logps/rejected": -455.95843505859375, "loss": 0.8602, "rewards/accuracies": 0.5, "rewards/chosen": -4.082623481750488, "rewards/margins": 3.0431764125823975, "rewards/rejected": -7.125800132751465, "step": 9085 }, { "epoch": 1.41, "learning_rate": 7.483293739301831e-06, "logits/chosen": -2.0073306560516357, "logits/rejected": -2.9343068599700928, "logps/chosen": -381.24951171875, "logps/rejected": -532.9808349609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.346432209014893, "rewards/margins": 8.704536437988281, "rewards/rejected": -13.050969123840332, "step": 9086 }, { "epoch": 1.41, "learning_rate": 7.482560298770683e-06, "logits/chosen": -2.083199977874756, "logits/rejected": -2.8939342498779297, "logps/chosen": -118.82405090332031, "logps/rejected": -344.276611328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5054974555969238, "rewards/margins": 8.416792869567871, "rewards/rejected": -9.922289848327637, "step": 9087 }, { "epoch": 1.41, "learning_rate": 7.481826858239535e-06, "logits/chosen": -2.1870954036712646, "logits/rejected": -2.206690788269043, "logps/chosen": -338.6165466308594, "logps/rejected": -334.6765441894531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.147109031677246, "rewards/margins": 8.638252258300781, "rewards/rejected": -11.785362243652344, "step": 9088 }, { "epoch": 1.41, "learning_rate": 7.4810934177083885e-06, "logits/chosen": -2.4002816677093506, "logits/rejected": -2.737067699432373, "logps/chosen": -71.75078582763672, "logps/rejected": -423.9359130859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.7878869771957397, "rewards/margins": 9.98941421508789, "rewards/rejected": -11.777300834655762, "step": 9089 }, { "epoch": 1.41, "learning_rate": 7.48035997717724e-06, "logits/chosen": -1.2088713645935059, "logits/rejected": -2.392106771469116, "logps/chosen": -122.7315673828125, "logps/rejected": -336.00146484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5095268487930298, "rewards/margins": 11.705801963806152, "rewards/rejected": -13.21532917022705, "step": 9090 }, { "epoch": 1.41, "learning_rate": 7.479626536646092e-06, "logits/chosen": -2.2075212001800537, "logits/rejected": -2.9956305027008057, "logps/chosen": -69.27168273925781, "logps/rejected": -313.6545104980469, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.810693085193634, "rewards/margins": 8.405021667480469, "rewards/rejected": -9.215714454650879, "step": 9091 }, { "epoch": 1.41, "learning_rate": 7.478893096114944e-06, "logits/chosen": -2.2914187908172607, "logits/rejected": -2.757571220397949, "logps/chosen": -313.2206726074219, "logps/rejected": -483.031982421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.52787446975708, "rewards/margins": 9.526199340820312, "rewards/rejected": -12.05407428741455, "step": 9092 }, { "epoch": 1.41, "learning_rate": 7.478159655583796e-06, "logits/chosen": -2.6277527809143066, "logits/rejected": -2.569758892059326, "logps/chosen": -228.2335968017578, "logps/rejected": -253.83792114257812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5236256122589111, "rewards/margins": 7.876324653625488, "rewards/rejected": -8.39995002746582, "step": 9093 }, { "epoch": 1.41, "learning_rate": 7.477426215052648e-06, "logits/chosen": -1.723584532737732, "logits/rejected": -2.078023910522461, "logps/chosen": -258.55157470703125, "logps/rejected": -253.85577392578125, "loss": 0.5543, "rewards/accuracies": 0.5, "rewards/chosen": -4.264142036437988, "rewards/margins": 3.3009591102600098, "rewards/rejected": -7.565101146697998, "step": 9094 }, { "epoch": 1.41, "learning_rate": 7.4766927745215e-06, "logits/chosen": -1.997393250465393, "logits/rejected": -2.9056499004364014, "logps/chosen": -108.61955261230469, "logps/rejected": -404.910888671875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.4259166717529297, "rewards/margins": 7.437764644622803, "rewards/rejected": -9.86368179321289, "step": 9095 }, { "epoch": 1.41, "learning_rate": 7.475959333990352e-06, "logits/chosen": -2.2448294162750244, "logits/rejected": -3.0345723628997803, "logps/chosen": -101.65969848632812, "logps/rejected": -272.1976318359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.2247915267944336, "rewards/margins": 8.588481903076172, "rewards/rejected": -10.813272476196289, "step": 9096 }, { "epoch": 1.41, "learning_rate": 7.4752258934592035e-06, "logits/chosen": -2.5741279125213623, "logits/rejected": -3.0476436614990234, "logps/chosen": -92.09282684326172, "logps/rejected": -269.51031494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3878785371780396, "rewards/margins": 10.663997650146484, "rewards/rejected": -12.051876068115234, "step": 9097 }, { "epoch": 1.41, "learning_rate": 7.474492452928057e-06, "logits/chosen": -2.4779152870178223, "logits/rejected": -2.8935773372650146, "logps/chosen": -45.830535888671875, "logps/rejected": -174.0482635498047, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.150418758392334, "rewards/margins": 7.8158721923828125, "rewards/rejected": -8.966290473937988, "step": 9098 }, { "epoch": 1.42, "learning_rate": 7.473759012396909e-06, "logits/chosen": -2.779163122177124, "logits/rejected": -1.69259774684906, "logps/chosen": -302.5235595703125, "logps/rejected": -312.3482666015625, "loss": 0.8151, "rewards/accuracies": 0.5, "rewards/chosen": -4.409811973571777, "rewards/margins": 2.9109959602355957, "rewards/rejected": -7.320808410644531, "step": 9099 }, { "epoch": 1.42, "learning_rate": 7.473025571865761e-06, "logits/chosen": -2.1209006309509277, "logits/rejected": -2.8846559524536133, "logps/chosen": -151.79087829589844, "logps/rejected": -290.1719970703125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.0953212976455688, "rewards/margins": 7.514403343200684, "rewards/rejected": -8.609724044799805, "step": 9100 }, { "epoch": 1.42, "learning_rate": 7.472292131334613e-06, "logits/chosen": -1.7028999328613281, "logits/rejected": -2.6095118522644043, "logps/chosen": -184.28817749023438, "logps/rejected": -376.459716796875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.787116050720215, "rewards/margins": 7.352089881896973, "rewards/rejected": -10.139205932617188, "step": 9101 }, { "epoch": 1.42, "learning_rate": 7.4715586908034646e-06, "logits/chosen": -3.0015225410461426, "logits/rejected": -3.0765793323516846, "logps/chosen": -102.55428314208984, "logps/rejected": -325.7671203613281, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.444216251373291, "rewards/margins": 7.444345474243164, "rewards/rejected": -9.888561248779297, "step": 9102 }, { "epoch": 1.42, "learning_rate": 7.470825250272317e-06, "logits/chosen": -2.4301393032073975, "logits/rejected": -3.102675437927246, "logps/chosen": -93.97633361816406, "logps/rejected": -397.65936279296875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -2.7062597274780273, "rewards/margins": 7.586038112640381, "rewards/rejected": -10.292298316955566, "step": 9103 }, { "epoch": 1.42, "learning_rate": 7.470091809741169e-06, "logits/chosen": -2.5476841926574707, "logits/rejected": -2.7452008724212646, "logps/chosen": -139.87718200683594, "logps/rejected": -329.71209716796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1654531955718994, "rewards/margins": 9.550395965576172, "rewards/rejected": -11.715849876403809, "step": 9104 }, { "epoch": 1.42, "learning_rate": 7.469358369210021e-06, "logits/chosen": -2.92612624168396, "logits/rejected": -1.9714630842208862, "logps/chosen": -183.83712768554688, "logps/rejected": -189.07337951660156, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.9370434284210205, "rewards/margins": 4.568940162658691, "rewards/rejected": -6.505983829498291, "step": 9105 }, { "epoch": 1.42, "learning_rate": 7.468624928678873e-06, "logits/chosen": -2.7563412189483643, "logits/rejected": -2.2494773864746094, "logps/chosen": -397.9853210449219, "logps/rejected": -457.02960205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2705698013305664, "rewards/margins": 10.852584838867188, "rewards/rejected": -13.12315559387207, "step": 9106 }, { "epoch": 1.42, "learning_rate": 7.4678914881477265e-06, "logits/chosen": -2.766390800476074, "logits/rejected": -2.0304806232452393, "logps/chosen": -251.6436309814453, "logps/rejected": -214.19004821777344, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.7706215381622314, "rewards/margins": 7.295657157897949, "rewards/rejected": -8.066278457641602, "step": 9107 }, { "epoch": 1.42, "learning_rate": 7.467158047616578e-06, "logits/chosen": -2.120450258255005, "logits/rejected": -2.6154210567474365, "logps/chosen": -237.22341918945312, "logps/rejected": -448.4213562011719, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.048497200012207, "rewards/margins": 6.226531982421875, "rewards/rejected": -9.275029182434082, "step": 9108 }, { "epoch": 1.42, "learning_rate": 7.46642460708543e-06, "logits/chosen": -2.6142220497131348, "logits/rejected": -2.8298590183258057, "logps/chosen": -283.72369384765625, "logps/rejected": -252.72142028808594, "loss": 1.7061, "rewards/accuracies": 0.5, "rewards/chosen": -2.7945618629455566, "rewards/margins": 1.7617223262786865, "rewards/rejected": -4.556284427642822, "step": 9109 }, { "epoch": 1.42, "learning_rate": 7.465691166554282e-06, "logits/chosen": -2.6093499660491943, "logits/rejected": -3.0559237003326416, "logps/chosen": -127.69798278808594, "logps/rejected": -240.58889770507812, "loss": 0.0256, "rewards/accuracies": 1.0, "rewards/chosen": -2.013831377029419, "rewards/margins": 4.384568214416504, "rewards/rejected": -6.398399353027344, "step": 9110 }, { "epoch": 1.42, "learning_rate": 7.464957726023134e-06, "logits/chosen": -1.1121248006820679, "logits/rejected": -2.8754210472106934, "logps/chosen": -156.94061279296875, "logps/rejected": -723.4141845703125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -4.503607749938965, "rewards/margins": 8.937397003173828, "rewards/rejected": -13.441003799438477, "step": 9111 }, { "epoch": 1.42, "learning_rate": 7.464224285491986e-06, "logits/chosen": -1.8385857343673706, "logits/rejected": -2.383852243423462, "logps/chosen": -72.35157775878906, "logps/rejected": -236.80654907226562, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.3225336074829102, "rewards/margins": 5.7956743240356445, "rewards/rejected": -7.118207931518555, "step": 9112 }, { "epoch": 1.42, "learning_rate": 7.463490844960838e-06, "logits/chosen": -2.7524402141571045, "logits/rejected": -2.1154427528381348, "logps/chosen": -712.3626098632812, "logps/rejected": -498.7376708984375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.146800994873047, "rewards/margins": 8.009774208068848, "rewards/rejected": -10.156575202941895, "step": 9113 }, { "epoch": 1.42, "learning_rate": 7.46275740442969e-06, "logits/chosen": -1.8591042757034302, "logits/rejected": -3.0166738033294678, "logps/chosen": -116.08120727539062, "logps/rejected": -361.3134765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.412466526031494, "rewards/margins": 8.23253345489502, "rewards/rejected": -11.645000457763672, "step": 9114 }, { "epoch": 1.42, "learning_rate": 7.4620239638985415e-06, "logits/chosen": -2.2028648853302, "logits/rejected": -3.0511717796325684, "logps/chosen": -113.31918334960938, "logps/rejected": -591.6624145507812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.11528816819190979, "rewards/margins": 12.449136734008789, "rewards/rejected": -12.33384895324707, "step": 9115 }, { "epoch": 1.42, "learning_rate": 7.461290523367395e-06, "logits/chosen": -1.1169207096099854, "logits/rejected": -2.4390413761138916, "logps/chosen": -63.92776870727539, "logps/rejected": -367.60009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5185072422027588, "rewards/margins": 12.160621643066406, "rewards/rejected": -13.679128646850586, "step": 9116 }, { "epoch": 1.42, "learning_rate": 7.460557082836247e-06, "logits/chosen": -2.772289752960205, "logits/rejected": -2.6706721782684326, "logps/chosen": -111.74947357177734, "logps/rejected": -115.21810150146484, "loss": 0.1603, "rewards/accuracies": 1.0, "rewards/chosen": -1.4820082187652588, "rewards/margins": 2.8666186332702637, "rewards/rejected": -4.348627090454102, "step": 9117 }, { "epoch": 1.42, "learning_rate": 7.459823642305099e-06, "logits/chosen": -3.3700220584869385, "logits/rejected": -2.6352174282073975, "logps/chosen": -160.99728393554688, "logps/rejected": -79.40152740478516, "loss": 1.3357, "rewards/accuracies": 0.5, "rewards/chosen": -6.330597400665283, "rewards/margins": -0.41376793384552, "rewards/rejected": -5.9168291091918945, "step": 9118 }, { "epoch": 1.42, "learning_rate": 7.459090201773951e-06, "logits/chosen": -2.370499610900879, "logits/rejected": -2.891265869140625, "logps/chosen": -147.76158142089844, "logps/rejected": -258.9569091796875, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -3.0342369079589844, "rewards/margins": 4.2561259269714355, "rewards/rejected": -7.290362358093262, "step": 9119 }, { "epoch": 1.42, "learning_rate": 7.4583567612428035e-06, "logits/chosen": -2.0552945137023926, "logits/rejected": -2.7262563705444336, "logps/chosen": -162.670654296875, "logps/rejected": -307.29278564453125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.238344669342041, "rewards/margins": 8.933297157287598, "rewards/rejected": -11.171642303466797, "step": 9120 }, { "epoch": 1.42, "learning_rate": 7.457623320711655e-06, "logits/chosen": -1.8420016765594482, "logits/rejected": -2.885183334350586, "logps/chosen": -149.53961181640625, "logps/rejected": -321.4228515625, "loss": 1.1134, "rewards/accuracies": 0.5, "rewards/chosen": -4.499600410461426, "rewards/margins": 2.314749240875244, "rewards/rejected": -6.81434965133667, "step": 9121 }, { "epoch": 1.42, "learning_rate": 7.456889880180507e-06, "logits/chosen": -2.583012819290161, "logits/rejected": -2.872556447982788, "logps/chosen": -106.7586441040039, "logps/rejected": -164.04087829589844, "loss": 1.8876, "rewards/accuracies": 0.5, "rewards/chosen": -3.1986398696899414, "rewards/margins": 1.9189797639846802, "rewards/rejected": -5.117619514465332, "step": 9122 }, { "epoch": 1.42, "learning_rate": 7.456156439649359e-06, "logits/chosen": -2.1693084239959717, "logits/rejected": -2.9306914806365967, "logps/chosen": -162.99728393554688, "logps/rejected": -357.526611328125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2774932384490967, "rewards/margins": 7.1081037521362305, "rewards/rejected": -8.385597229003906, "step": 9123 }, { "epoch": 1.42, "learning_rate": 7.455422999118211e-06, "logits/chosen": -2.749941110610962, "logits/rejected": -2.633754014968872, "logps/chosen": -83.91992950439453, "logps/rejected": -156.71875, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -4.158270835876465, "rewards/margins": 4.803926467895508, "rewards/rejected": -8.962197303771973, "step": 9124 }, { "epoch": 1.42, "learning_rate": 7.4546895585870645e-06, "logits/chosen": -2.2481606006622314, "logits/rejected": -1.5378239154815674, "logps/chosen": -440.8438415527344, "logps/rejected": -213.00010681152344, "loss": 1.3926, "rewards/accuracies": 0.5, "rewards/chosen": -3.8626534938812256, "rewards/margins": 3.12205171585083, "rewards/rejected": -6.984704971313477, "step": 9125 }, { "epoch": 1.42, "learning_rate": 7.4539561180559164e-06, "logits/chosen": -2.9318461418151855, "logits/rejected": -2.4209184646606445, "logps/chosen": -230.80946350097656, "logps/rejected": -173.3660888671875, "loss": 0.332, "rewards/accuracies": 1.0, "rewards/chosen": -5.25884485244751, "rewards/margins": 4.013363838195801, "rewards/rejected": -9.272209167480469, "step": 9126 }, { "epoch": 1.42, "learning_rate": 7.453222677524768e-06, "logits/chosen": -2.0522634983062744, "logits/rejected": -1.9419883489608765, "logps/chosen": -477.62774658203125, "logps/rejected": -456.8569030761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7501349449157715, "rewards/margins": 11.001187324523926, "rewards/rejected": -13.751322746276855, "step": 9127 }, { "epoch": 1.42, "learning_rate": 7.45248923699362e-06, "logits/chosen": -2.5776634216308594, "logits/rejected": -2.679924726486206, "logps/chosen": -85.50498962402344, "logps/rejected": -182.61666870117188, "loss": 0.0802, "rewards/accuracies": 1.0, "rewards/chosen": -2.3004050254821777, "rewards/margins": 4.408114433288574, "rewards/rejected": -6.708519458770752, "step": 9128 }, { "epoch": 1.42, "learning_rate": 7.451755796462472e-06, "logits/chosen": -1.5068312883377075, "logits/rejected": -2.563263416290283, "logps/chosen": -140.02163696289062, "logps/rejected": -559.85009765625, "loss": 0.0466, "rewards/accuracies": 1.0, "rewards/chosen": -2.2469356060028076, "rewards/margins": 8.687763214111328, "rewards/rejected": -10.934698104858398, "step": 9129 }, { "epoch": 1.42, "learning_rate": 7.451022355931324e-06, "logits/chosen": -3.074798822402954, "logits/rejected": -1.279187560081482, "logps/chosen": -638.5352783203125, "logps/rejected": -302.13446044921875, "loss": 1.7941, "rewards/accuracies": 0.5, "rewards/chosen": -4.169659614562988, "rewards/margins": 0.03229665756225586, "rewards/rejected": -4.201956272125244, "step": 9130 }, { "epoch": 1.42, "learning_rate": 7.450288915400176e-06, "logits/chosen": -2.7308454513549805, "logits/rejected": -1.1977812051773071, "logps/chosen": -178.799560546875, "logps/rejected": -132.59579467773438, "loss": 0.9307, "rewards/accuracies": 0.5, "rewards/chosen": -3.1406970024108887, "rewards/margins": 2.902219772338867, "rewards/rejected": -6.042916774749756, "step": 9131 }, { "epoch": 1.42, "learning_rate": 7.449555474869028e-06, "logits/chosen": -2.9366962909698486, "logits/rejected": -2.1735494136810303, "logps/chosen": -404.2070007324219, "logps/rejected": -339.1989440917969, "loss": 1.7322, "rewards/accuracies": 0.5, "rewards/chosen": -3.891439914703369, "rewards/margins": 2.3850603103637695, "rewards/rejected": -6.2764997482299805, "step": 9132 }, { "epoch": 1.42, "learning_rate": 7.4488220343378796e-06, "logits/chosen": -2.234311819076538, "logits/rejected": -2.639930486679077, "logps/chosen": -634.8650512695312, "logps/rejected": -495.4476013183594, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.556255340576172, "rewards/margins": 7.768521308898926, "rewards/rejected": -10.324776649475098, "step": 9133 }, { "epoch": 1.42, "learning_rate": 7.448088593806733e-06, "logits/chosen": -2.8908207416534424, "logits/rejected": -2.690122604370117, "logps/chosen": -262.7508544921875, "logps/rejected": -320.35101318359375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.203761339187622, "rewards/margins": 6.292544841766357, "rewards/rejected": -7.4963059425354, "step": 9134 }, { "epoch": 1.42, "learning_rate": 7.447355153275585e-06, "logits/chosen": -1.9594484567642212, "logits/rejected": -2.833099126815796, "logps/chosen": -178.2047576904297, "logps/rejected": -367.64794921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.9699519872665405, "rewards/margins": 6.664794445037842, "rewards/rejected": -8.634746551513672, "step": 9135 }, { "epoch": 1.42, "learning_rate": 7.446621712744437e-06, "logits/chosen": -2.0581765174865723, "logits/rejected": -2.6890013217926025, "logps/chosen": -157.95079040527344, "logps/rejected": -239.328369140625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.7266204357147217, "rewards/margins": 6.462276935577393, "rewards/rejected": -9.188898086547852, "step": 9136 }, { "epoch": 1.42, "learning_rate": 7.44588827221329e-06, "logits/chosen": -2.7183713912963867, "logits/rejected": -2.6081926822662354, "logps/chosen": -144.68710327148438, "logps/rejected": -408.68719482421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.9479255676269531, "rewards/margins": 9.660102844238281, "rewards/rejected": -11.608028411865234, "step": 9137 }, { "epoch": 1.42, "learning_rate": 7.4451548316821415e-06, "logits/chosen": -2.8101251125335693, "logits/rejected": -3.1092989444732666, "logps/chosen": -137.31466674804688, "logps/rejected": -376.54669189453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.3860645294189453, "rewards/margins": 8.653901100158691, "rewards/rejected": -11.039965629577637, "step": 9138 }, { "epoch": 1.42, "learning_rate": 7.444421391150993e-06, "logits/chosen": -2.172987222671509, "logits/rejected": -3.0589728355407715, "logps/chosen": -180.46173095703125, "logps/rejected": -383.38916015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.2634308338165283, "rewards/margins": 7.360656261444092, "rewards/rejected": -9.6240873336792, "step": 9139 }, { "epoch": 1.42, "learning_rate": 7.443687950619845e-06, "logits/chosen": -1.4794162511825562, "logits/rejected": -3.0822253227233887, "logps/chosen": -93.49932098388672, "logps/rejected": -326.814453125, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -2.8167014122009277, "rewards/margins": 5.647681713104248, "rewards/rejected": -8.464383125305176, "step": 9140 }, { "epoch": 1.42, "learning_rate": 7.442954510088697e-06, "logits/chosen": -2.2580363750457764, "logits/rejected": -2.638158082962036, "logps/chosen": -233.16513061523438, "logps/rejected": -297.6346435546875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.433345079421997, "rewards/margins": 8.480636596679688, "rewards/rejected": -10.913982391357422, "step": 9141 }, { "epoch": 1.42, "learning_rate": 7.442221069557551e-06, "logits/chosen": -2.5266034603118896, "logits/rejected": -2.6305530071258545, "logps/chosen": -196.927734375, "logps/rejected": -313.57000732421875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -3.411456823348999, "rewards/margins": 5.3710832595825195, "rewards/rejected": -8.782540321350098, "step": 9142 }, { "epoch": 1.42, "learning_rate": 7.441487629026403e-06, "logits/chosen": -2.722039222717285, "logits/rejected": -1.2310900688171387, "logps/chosen": -642.6812133789062, "logps/rejected": -429.37310791015625, "loss": 0.679, "rewards/accuracies": 0.5, "rewards/chosen": -5.639653205871582, "rewards/margins": 5.192273139953613, "rewards/rejected": -10.831926345825195, "step": 9143 }, { "epoch": 1.42, "learning_rate": 7.4407541884952545e-06, "logits/chosen": -2.9420435428619385, "logits/rejected": -2.527738571166992, "logps/chosen": -231.00064086914062, "logps/rejected": -206.64109802246094, "loss": 0.0906, "rewards/accuracies": 1.0, "rewards/chosen": -1.7328553199768066, "rewards/margins": 5.061773300170898, "rewards/rejected": -6.794629096984863, "step": 9144 }, { "epoch": 1.42, "learning_rate": 7.440020747964106e-06, "logits/chosen": -2.417304039001465, "logits/rejected": -3.1538422107696533, "logps/chosen": -261.3864440917969, "logps/rejected": -409.1854553222656, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.7594738006591797, "rewards/margins": 6.563895225524902, "rewards/rejected": -8.323369026184082, "step": 9145 }, { "epoch": 1.42, "learning_rate": 7.439287307432958e-06, "logits/chosen": -2.637453317642212, "logits/rejected": -2.803020715713501, "logps/chosen": -117.64909362792969, "logps/rejected": -227.67385864257812, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -2.3857147693634033, "rewards/margins": 4.096240043640137, "rewards/rejected": -6.481955051422119, "step": 9146 }, { "epoch": 1.42, "learning_rate": 7.43855386690181e-06, "logits/chosen": -2.2328810691833496, "logits/rejected": -2.8670225143432617, "logps/chosen": -67.94270324707031, "logps/rejected": -256.71502685546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.285706639289856, "rewards/margins": 9.80698299407959, "rewards/rejected": -11.092689514160156, "step": 9147 }, { "epoch": 1.42, "learning_rate": 7.437820426370662e-06, "logits/chosen": -2.660663604736328, "logits/rejected": -2.3295817375183105, "logps/chosen": -795.4126586914062, "logps/rejected": -584.0045776367188, "loss": 0.1298, "rewards/accuracies": 1.0, "rewards/chosen": -2.929858684539795, "rewards/margins": 6.087513446807861, "rewards/rejected": -9.017372131347656, "step": 9148 }, { "epoch": 1.42, "learning_rate": 7.437086985839514e-06, "logits/chosen": -2.8803558349609375, "logits/rejected": -2.888596296310425, "logps/chosen": -620.4599609375, "logps/rejected": -430.1019592285156, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.6918351650238037, "rewards/margins": 5.137001991271973, "rewards/rejected": -7.8288373947143555, "step": 9149 }, { "epoch": 1.42, "learning_rate": 7.436353545308366e-06, "logits/chosen": -2.5099077224731445, "logits/rejected": -2.6589033603668213, "logps/chosen": -312.0154724121094, "logps/rejected": -341.1318359375, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -3.6002755165100098, "rewards/margins": 5.146356582641602, "rewards/rejected": -8.746631622314453, "step": 9150 }, { "epoch": 1.42, "learning_rate": 7.435620104777219e-06, "logits/chosen": -3.117016315460205, "logits/rejected": -2.844877243041992, "logps/chosen": -615.8565673828125, "logps/rejected": -745.8475341796875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.328784227371216, "rewards/margins": 8.242939949035645, "rewards/rejected": -11.571723937988281, "step": 9151 }, { "epoch": 1.42, "learning_rate": 7.434886664246071e-06, "logits/chosen": -2.5666799545288086, "logits/rejected": -3.173389196395874, "logps/chosen": -51.38713073730469, "logps/rejected": -328.73095703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.7234970331192017, "rewards/margins": 7.2056884765625, "rewards/rejected": -7.92918586730957, "step": 9152 }, { "epoch": 1.42, "learning_rate": 7.434153223714923e-06, "logits/chosen": -2.625094175338745, "logits/rejected": -3.231847047805786, "logps/chosen": -25.18218421936035, "logps/rejected": -157.59628295898438, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -0.9044398665428162, "rewards/margins": 6.647153854370117, "rewards/rejected": -7.551593780517578, "step": 9153 }, { "epoch": 1.42, "learning_rate": 7.433419783183776e-06, "logits/chosen": -1.7920492887496948, "logits/rejected": -2.6863956451416016, "logps/chosen": -115.90406799316406, "logps/rejected": -208.38404846191406, "loss": 0.1291, "rewards/accuracies": 1.0, "rewards/chosen": -3.8327229022979736, "rewards/margins": 2.0656723976135254, "rewards/rejected": -5.898395538330078, "step": 9154 }, { "epoch": 1.42, "learning_rate": 7.432686342652628e-06, "logits/chosen": -2.995651960372925, "logits/rejected": -2.538853645324707, "logps/chosen": -377.97906494140625, "logps/rejected": -401.3927001953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.8293571472167969, "rewards/margins": 9.723998069763184, "rewards/rejected": -10.55335521697998, "step": 9155 }, { "epoch": 1.42, "learning_rate": 7.4319529021214796e-06, "logits/chosen": -1.7318350076675415, "logits/rejected": -2.80997633934021, "logps/chosen": -28.41500473022461, "logps/rejected": -352.9859924316406, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.3039301633834839, "rewards/margins": 7.645723819732666, "rewards/rejected": -8.949653625488281, "step": 9156 }, { "epoch": 1.42, "learning_rate": 7.4312194615903314e-06, "logits/chosen": -2.5842363834381104, "logits/rejected": -2.8770954608917236, "logps/chosen": -217.04177856445312, "logps/rejected": -328.974365234375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -2.3368239402770996, "rewards/margins": 6.48571252822876, "rewards/rejected": -8.82253646850586, "step": 9157 }, { "epoch": 1.42, "learning_rate": 7.430486021059183e-06, "logits/chosen": -2.9096083641052246, "logits/rejected": -2.5959742069244385, "logps/chosen": -198.3984375, "logps/rejected": -253.21556091308594, "loss": 1.6502, "rewards/accuracies": 0.5, "rewards/chosen": -3.8382315635681152, "rewards/margins": 1.7965025901794434, "rewards/rejected": -5.634734153747559, "step": 9158 }, { "epoch": 1.42, "learning_rate": 7.429752580528035e-06, "logits/chosen": -2.6681692600250244, "logits/rejected": -2.6189255714416504, "logps/chosen": -195.01852416992188, "logps/rejected": -255.51043701171875, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -2.97090220451355, "rewards/margins": 5.527881145477295, "rewards/rejected": -8.498783111572266, "step": 9159 }, { "epoch": 1.42, "learning_rate": 7.429019139996889e-06, "logits/chosen": -2.7384324073791504, "logits/rejected": -2.680166244506836, "logps/chosen": -492.03094482421875, "logps/rejected": -698.2969970703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5205986499786377, "rewards/margins": 10.265511512756348, "rewards/rejected": -11.786109924316406, "step": 9160 }, { "epoch": 1.42, "learning_rate": 7.428285699465741e-06, "logits/chosen": -2.1304664611816406, "logits/rejected": -3.1099984645843506, "logps/chosen": -49.683937072753906, "logps/rejected": -354.5861511230469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9756149053573608, "rewards/margins": 8.726943016052246, "rewards/rejected": -9.702557563781738, "step": 9161 }, { "epoch": 1.42, "learning_rate": 7.4275522589345925e-06, "logits/chosen": -1.9840080738067627, "logits/rejected": -2.8723485469818115, "logps/chosen": -302.16558837890625, "logps/rejected": -389.6779479980469, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.4034652709960938, "rewards/margins": 9.157146453857422, "rewards/rejected": -11.560611724853516, "step": 9162 }, { "epoch": 1.43, "learning_rate": 7.426818818403444e-06, "logits/chosen": -1.3826018571853638, "logits/rejected": -2.59080171585083, "logps/chosen": -242.00262451171875, "logps/rejected": -559.3026123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8214573860168457, "rewards/margins": 11.259742736816406, "rewards/rejected": -14.081199645996094, "step": 9163 }, { "epoch": 1.43, "learning_rate": 7.426085377872296e-06, "logits/chosen": -2.8539435863494873, "logits/rejected": -1.7583012580871582, "logps/chosen": -311.84539794921875, "logps/rejected": -170.3140869140625, "loss": 0.2148, "rewards/accuracies": 1.0, "rewards/chosen": -2.1144638061523438, "rewards/margins": 3.939521312713623, "rewards/rejected": -6.053985118865967, "step": 9164 }, { "epoch": 1.43, "learning_rate": 7.425351937341148e-06, "logits/chosen": -2.5294137001037598, "logits/rejected": -2.758925437927246, "logps/chosen": -215.24588012695312, "logps/rejected": -277.32318115234375, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -3.5028939247131348, "rewards/margins": 4.88710880279541, "rewards/rejected": -8.390002250671387, "step": 9165 }, { "epoch": 1.43, "learning_rate": 7.42461849681e-06, "logits/chosen": -2.076841115951538, "logits/rejected": -2.6462488174438477, "logps/chosen": -63.850704193115234, "logps/rejected": -292.3620300292969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5338096618652344, "rewards/margins": 9.715580940246582, "rewards/rejected": -11.249390602111816, "step": 9166 }, { "epoch": 1.43, "learning_rate": 7.423885056278852e-06, "logits/chosen": -2.7363088130950928, "logits/rejected": -3.0576670169830322, "logps/chosen": -159.6602325439453, "logps/rejected": -155.3235321044922, "loss": 1.3933, "rewards/accuracies": 0.5, "rewards/chosen": -4.420094013214111, "rewards/margins": 2.0686593055725098, "rewards/rejected": -6.488753318786621, "step": 9167 }, { "epoch": 1.43, "learning_rate": 7.423151615747704e-06, "logits/chosen": -2.6847472190856934, "logits/rejected": -1.9876545667648315, "logps/chosen": -116.23965454101562, "logps/rejected": -305.4788818359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.947403907775879, "rewards/margins": 11.515754699707031, "rewards/rejected": -13.463159561157227, "step": 9168 }, { "epoch": 1.43, "learning_rate": 7.422418175216557e-06, "logits/chosen": -2.5904171466827393, "logits/rejected": -2.112621545791626, "logps/chosen": -156.67291259765625, "logps/rejected": -281.63299560546875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.7590007781982422, "rewards/margins": 6.758542537689209, "rewards/rejected": -8.51754379272461, "step": 9169 }, { "epoch": 1.43, "learning_rate": 7.421684734685409e-06, "logits/chosen": -2.8297133445739746, "logits/rejected": -2.5816078186035156, "logps/chosen": -112.6003189086914, "logps/rejected": -330.21661376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5605126619338989, "rewards/margins": 11.90658187866211, "rewards/rejected": -12.467094421386719, "step": 9170 }, { "epoch": 1.43, "learning_rate": 7.420951294154262e-06, "logits/chosen": -2.418818712234497, "logits/rejected": -2.7384772300720215, "logps/chosen": -115.42695617675781, "logps/rejected": -316.7061767578125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.1250224113464355, "rewards/margins": 6.0467987060546875, "rewards/rejected": -9.171821594238281, "step": 9171 }, { "epoch": 1.43, "learning_rate": 7.420217853623114e-06, "logits/chosen": -2.7912161350250244, "logits/rejected": -2.7335708141326904, "logps/chosen": -334.27532958984375, "logps/rejected": -352.35260009765625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.107215166091919, "rewards/margins": 8.98814868927002, "rewards/rejected": -10.09536361694336, "step": 9172 }, { "epoch": 1.43, "learning_rate": 7.419484413091966e-06, "logits/chosen": -1.8430347442626953, "logits/rejected": -2.8082783222198486, "logps/chosen": -39.96155548095703, "logps/rejected": -206.36898803710938, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -1.8845765590667725, "rewards/margins": 4.044775009155273, "rewards/rejected": -5.929351806640625, "step": 9173 }, { "epoch": 1.43, "learning_rate": 7.418750972560818e-06, "logits/chosen": -1.5012178421020508, "logits/rejected": -2.0628509521484375, "logps/chosen": -309.67205810546875, "logps/rejected": -304.80767822265625, "loss": 2.2805, "rewards/accuracies": 0.5, "rewards/chosen": -5.6031036376953125, "rewards/margins": 1.7001445293426514, "rewards/rejected": -7.303248405456543, "step": 9174 }, { "epoch": 1.43, "learning_rate": 7.4180175320296695e-06, "logits/chosen": -2.8266608715057373, "logits/rejected": -2.922459602355957, "logps/chosen": -356.80230712890625, "logps/rejected": -422.1832275390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.3842215538024902, "rewards/margins": 8.619331359863281, "rewards/rejected": -12.00355339050293, "step": 9175 }, { "epoch": 1.43, "learning_rate": 7.417284091498521e-06, "logits/chosen": -2.7516629695892334, "logits/rejected": -2.9010989665985107, "logps/chosen": -73.20890808105469, "logps/rejected": -285.7293701171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.829240322113037, "rewards/margins": 8.40697193145752, "rewards/rejected": -10.236211776733398, "step": 9176 }, { "epoch": 1.43, "learning_rate": 7.416550650967373e-06, "logits/chosen": -2.894045829772949, "logits/rejected": -2.6227543354034424, "logps/chosen": -133.91253662109375, "logps/rejected": -225.4251251220703, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.894247055053711, "rewards/margins": 6.406522274017334, "rewards/rejected": -8.300769805908203, "step": 9177 }, { "epoch": 1.43, "learning_rate": 7.415817210436227e-06, "logits/chosen": -1.9287272691726685, "logits/rejected": -2.6419758796691895, "logps/chosen": -133.9559783935547, "logps/rejected": -278.4856262207031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.587033987045288, "rewards/margins": 8.153097152709961, "rewards/rejected": -9.740131378173828, "step": 9178 }, { "epoch": 1.43, "learning_rate": 7.415083769905079e-06, "logits/chosen": -1.6692787408828735, "logits/rejected": -2.7941033840179443, "logps/chosen": -59.212257385253906, "logps/rejected": -272.65191650390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.951270580291748, "rewards/margins": 9.24858283996582, "rewards/rejected": -10.199853897094727, "step": 9179 }, { "epoch": 1.43, "learning_rate": 7.4143503293739306e-06, "logits/chosen": -1.578879475593567, "logits/rejected": -2.7812516689300537, "logps/chosen": -252.13380432128906, "logps/rejected": -461.017333984375, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -2.1150269508361816, "rewards/margins": 6.970305442810059, "rewards/rejected": -9.085331916809082, "step": 9180 }, { "epoch": 1.43, "learning_rate": 7.4136168888427824e-06, "logits/chosen": -2.877878189086914, "logits/rejected": -3.1033072471618652, "logps/chosen": -193.266845703125, "logps/rejected": -315.4596862792969, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.56093168258667, "rewards/margins": 5.646625518798828, "rewards/rejected": -8.20755672454834, "step": 9181 }, { "epoch": 1.43, "learning_rate": 7.412883448311634e-06, "logits/chosen": -2.2670013904571533, "logits/rejected": -2.8399224281311035, "logps/chosen": -185.93934631347656, "logps/rejected": -388.57086181640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.2134246826171875, "rewards/margins": 8.02031421661377, "rewards/rejected": -11.233738899230957, "step": 9182 }, { "epoch": 1.43, "learning_rate": 7.412150007780486e-06, "logits/chosen": -1.883286952972412, "logits/rejected": -2.873434543609619, "logps/chosen": -137.78005981445312, "logps/rejected": -415.54241943359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.63342022895813, "rewards/margins": 10.056385040283203, "rewards/rejected": -12.689804077148438, "step": 9183 }, { "epoch": 1.43, "learning_rate": 7.411416567249338e-06, "logits/chosen": -2.962656021118164, "logits/rejected": -3.142880916595459, "logps/chosen": -39.22918701171875, "logps/rejected": -290.9556884765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.7017319202423096, "rewards/margins": 9.624320983886719, "rewards/rejected": -10.32605266571045, "step": 9184 }, { "epoch": 1.43, "learning_rate": 7.41068312671819e-06, "logits/chosen": -3.037879705429077, "logits/rejected": -3.1509294509887695, "logps/chosen": -429.33514404296875, "logps/rejected": -444.64569091796875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.0471696853637695, "rewards/margins": 9.060859680175781, "rewards/rejected": -11.108030319213867, "step": 9185 }, { "epoch": 1.43, "learning_rate": 7.409949686187043e-06, "logits/chosen": -2.782623052597046, "logits/rejected": -1.7702585458755493, "logps/chosen": -192.81063842773438, "logps/rejected": -195.95034790039062, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -1.5779138803482056, "rewards/margins": 6.797226428985596, "rewards/rejected": -8.375140190124512, "step": 9186 }, { "epoch": 1.43, "learning_rate": 7.409216245655895e-06, "logits/chosen": -2.2880449295043945, "logits/rejected": -2.3710575103759766, "logps/chosen": -119.04902648925781, "logps/rejected": -185.17578125, "loss": 0.8074, "rewards/accuracies": 0.5, "rewards/chosen": -3.5881567001342773, "rewards/margins": 4.8501691818237305, "rewards/rejected": -8.438325881958008, "step": 9187 }, { "epoch": 1.43, "learning_rate": 7.408482805124748e-06, "logits/chosen": -2.8030154705047607, "logits/rejected": -2.5282232761383057, "logps/chosen": -644.8157958984375, "logps/rejected": -408.85858154296875, "loss": 0.1161, "rewards/accuracies": 1.0, "rewards/chosen": -3.946347236633301, "rewards/margins": 4.441381454467773, "rewards/rejected": -8.387728691101074, "step": 9188 }, { "epoch": 1.43, "learning_rate": 7.4077493645936e-06, "logits/chosen": -2.8721258640289307, "logits/rejected": -2.765594720840454, "logps/chosen": -122.51815032958984, "logps/rejected": -74.550537109375, "loss": 1.225, "rewards/accuracies": 0.5, "rewards/chosen": -4.241678237915039, "rewards/margins": -0.737869143486023, "rewards/rejected": -3.5038089752197266, "step": 9189 }, { "epoch": 1.43, "learning_rate": 7.407015924062452e-06, "logits/chosen": -2.805588960647583, "logits/rejected": -2.335033416748047, "logps/chosen": -513.583740234375, "logps/rejected": -397.2793273925781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.9010682106018066, "rewards/margins": 8.763391494750977, "rewards/rejected": -12.664459228515625, "step": 9190 }, { "epoch": 1.43, "learning_rate": 7.406282483531304e-06, "logits/chosen": -1.6916438341140747, "logits/rejected": -3.024559497833252, "logps/chosen": -134.47390747070312, "logps/rejected": -323.9928894042969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8326454162597656, "rewards/margins": 8.435349464416504, "rewards/rejected": -9.26799488067627, "step": 9191 }, { "epoch": 1.43, "learning_rate": 7.405549043000156e-06, "logits/chosen": -2.426424026489258, "logits/rejected": -1.4566463232040405, "logps/chosen": -236.2483367919922, "logps/rejected": -90.92636108398438, "loss": 3.8224, "rewards/accuracies": 0.0, "rewards/chosen": -5.927990913391113, "rewards/margins": -3.7809877395629883, "rewards/rejected": -2.147002696990967, "step": 9192 }, { "epoch": 1.43, "learning_rate": 7.4048156024690075e-06, "logits/chosen": -2.371777296066284, "logits/rejected": -2.964362382888794, "logps/chosen": -42.64440155029297, "logps/rejected": -263.443359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.5085834264755249, "rewards/margins": 7.921682357788086, "rewards/rejected": -8.430265426635742, "step": 9193 }, { "epoch": 1.43, "learning_rate": 7.404082161937859e-06, "logits/chosen": -2.3287506103515625, "logits/rejected": -3.026357412338257, "logps/chosen": -69.38763427734375, "logps/rejected": -242.00814819335938, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.558672308921814, "rewards/margins": 8.24555492401123, "rewards/rejected": -9.804227828979492, "step": 9194 }, { "epoch": 1.43, "learning_rate": 7.403348721406711e-06, "logits/chosen": -2.561831474304199, "logits/rejected": -2.926008939743042, "logps/chosen": -554.751953125, "logps/rejected": -450.02593994140625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.585034370422363, "rewards/margins": 6.067766189575195, "rewards/rejected": -10.652800559997559, "step": 9195 }, { "epoch": 1.43, "learning_rate": 7.402615280875565e-06, "logits/chosen": -2.4950690269470215, "logits/rejected": -2.8673064708709717, "logps/chosen": -33.54667663574219, "logps/rejected": -210.529052734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.6751368045806885, "rewards/margins": 8.321134567260742, "rewards/rejected": -8.996271133422852, "step": 9196 }, { "epoch": 1.43, "learning_rate": 7.401881840344417e-06, "logits/chosen": -2.8333444595336914, "logits/rejected": -1.3798906803131104, "logps/chosen": -334.9472961425781, "logps/rejected": -234.8443145751953, "loss": 2.4612, "rewards/accuracies": 0.0, "rewards/chosen": -7.636307716369629, "rewards/margins": -2.1489884853363037, "rewards/rejected": -5.487318992614746, "step": 9197 }, { "epoch": 1.43, "learning_rate": 7.401148399813269e-06, "logits/chosen": -2.2775676250457764, "logits/rejected": -2.799718141555786, "logps/chosen": -32.3021240234375, "logps/rejected": -272.25152587890625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.0467880964279175, "rewards/margins": 7.681744575500488, "rewards/rejected": -8.728532791137695, "step": 9198 }, { "epoch": 1.43, "learning_rate": 7.4004149592821205e-06, "logits/chosen": -2.6326680183410645, "logits/rejected": -2.9376003742218018, "logps/chosen": -259.32867431640625, "logps/rejected": -353.7257385253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0845947265625, "rewards/margins": 9.930946350097656, "rewards/rejected": -11.015541076660156, "step": 9199 }, { "epoch": 1.43, "learning_rate": 7.399681518750972e-06, "logits/chosen": -2.1376404762268066, "logits/rejected": -2.675393581390381, "logps/chosen": -111.37124633789062, "logps/rejected": -294.5689697265625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -1.9988620281219482, "rewards/margins": 6.558180332183838, "rewards/rejected": -8.557042121887207, "step": 9200 }, { "epoch": 1.43, "learning_rate": 7.398948078219824e-06, "logits/chosen": -3.120328187942505, "logits/rejected": -2.6461427211761475, "logps/chosen": -211.76268005371094, "logps/rejected": -281.85211181640625, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": -2.4694912433624268, "rewards/margins": 5.9462103843688965, "rewards/rejected": -8.415700912475586, "step": 9201 }, { "epoch": 1.43, "learning_rate": 7.398214637688676e-06, "logits/chosen": -2.5671496391296387, "logits/rejected": -3.2137625217437744, "logps/chosen": -88.75676727294922, "logps/rejected": -299.943359375, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -2.826591730117798, "rewards/margins": 4.867060661315918, "rewards/rejected": -7.693652153015137, "step": 9202 }, { "epoch": 1.43, "learning_rate": 7.397481197157529e-06, "logits/chosen": -2.896731376647949, "logits/rejected": -2.6038739681243896, "logps/chosen": -261.9904479980469, "logps/rejected": -213.34555053710938, "loss": 0.2778, "rewards/accuracies": 1.0, "rewards/chosen": -4.05043888092041, "rewards/margins": 1.2376118898391724, "rewards/rejected": -5.288050651550293, "step": 9203 }, { "epoch": 1.43, "learning_rate": 7.396747756626381e-06, "logits/chosen": -2.7035787105560303, "logits/rejected": -3.099174976348877, "logps/chosen": -401.3485107421875, "logps/rejected": -423.4662170410156, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5557117462158203, "rewards/margins": 7.334710597991943, "rewards/rejected": -8.890422821044922, "step": 9204 }, { "epoch": 1.43, "learning_rate": 7.396014316095234e-06, "logits/chosen": -2.9430527687072754, "logits/rejected": -2.9995646476745605, "logps/chosen": -69.11608123779297, "logps/rejected": -161.63018798828125, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -1.4344725608825684, "rewards/margins": 5.592510223388672, "rewards/rejected": -7.026982307434082, "step": 9205 }, { "epoch": 1.43, "learning_rate": 7.395280875564086e-06, "logits/chosen": -1.6284971237182617, "logits/rejected": -2.351426362991333, "logps/chosen": -543.1370849609375, "logps/rejected": -551.9471435546875, "loss": 0.0788, "rewards/accuracies": 1.0, "rewards/chosen": -5.236783027648926, "rewards/margins": 5.475221157073975, "rewards/rejected": -10.712003707885742, "step": 9206 }, { "epoch": 1.43, "learning_rate": 7.394547435032938e-06, "logits/chosen": -2.8041775226593018, "logits/rejected": -2.816556930541992, "logps/chosen": -189.98324584960938, "logps/rejected": -261.7505798339844, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.4235353469848633, "rewards/margins": 5.636273384094238, "rewards/rejected": -8.059808731079102, "step": 9207 }, { "epoch": 1.43, "learning_rate": 7.39381399450179e-06, "logits/chosen": -2.900108575820923, "logits/rejected": -2.819019079208374, "logps/chosen": -679.6178588867188, "logps/rejected": -647.1114501953125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -4.900434970855713, "rewards/margins": 4.892759799957275, "rewards/rejected": -9.793194770812988, "step": 9208 }, { "epoch": 1.43, "learning_rate": 7.393080553970642e-06, "logits/chosen": -2.663034677505493, "logits/rejected": -2.828066110610962, "logps/chosen": -693.817626953125, "logps/rejected": -604.6135864257812, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.3459488153457642, "rewards/margins": 8.283462524414062, "rewards/rejected": -9.629411697387695, "step": 9209 }, { "epoch": 1.43, "learning_rate": 7.392347113439494e-06, "logits/chosen": -1.1288325786590576, "logits/rejected": -2.4210143089294434, "logps/chosen": -164.50375366210938, "logps/rejected": -509.1827392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.515010356903076, "rewards/margins": 10.899486541748047, "rewards/rejected": -14.414497375488281, "step": 9210 }, { "epoch": 1.43, "learning_rate": 7.3916136729083456e-06, "logits/chosen": -2.7832283973693848, "logits/rejected": -2.4072046279907227, "logps/chosen": -517.909912109375, "logps/rejected": -520.92919921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.3015918731689453, "rewards/margins": 9.00546646118164, "rewards/rejected": -11.307058334350586, "step": 9211 }, { "epoch": 1.43, "learning_rate": 7.3908802323771974e-06, "logits/chosen": -2.4385952949523926, "logits/rejected": -1.6814898252487183, "logps/chosen": -182.76351928710938, "logps/rejected": -219.8096923828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.789366722106934, "rewards/margins": 7.1920928955078125, "rewards/rejected": -11.981459617614746, "step": 9212 }, { "epoch": 1.43, "learning_rate": 7.390146791846049e-06, "logits/chosen": -2.4549989700317383, "logits/rejected": -1.972182035446167, "logps/chosen": -465.2303466796875, "logps/rejected": -543.8221435546875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.2779178619384766, "rewards/margins": 8.236659049987793, "rewards/rejected": -11.51457691192627, "step": 9213 }, { "epoch": 1.43, "learning_rate": 7.389413351314903e-06, "logits/chosen": -2.9485716819763184, "logits/rejected": -1.4405895471572876, "logps/chosen": -394.970947265625, "logps/rejected": -231.41839599609375, "loss": 2.4784, "rewards/accuracies": 0.5, "rewards/chosen": -6.108107089996338, "rewards/margins": -1.0163546800613403, "rewards/rejected": -5.091752052307129, "step": 9214 }, { "epoch": 1.43, "learning_rate": 7.388679910783755e-06, "logits/chosen": -2.7683968544006348, "logits/rejected": -2.7481391429901123, "logps/chosen": -229.27899169921875, "logps/rejected": -240.62850952148438, "loss": 0.1209, "rewards/accuracies": 1.0, "rewards/chosen": -2.8704514503479004, "rewards/margins": 4.989569664001465, "rewards/rejected": -7.860021114349365, "step": 9215 }, { "epoch": 1.43, "learning_rate": 7.387946470252607e-06, "logits/chosen": -2.839089870452881, "logits/rejected": -2.542074680328369, "logps/chosen": -376.72576904296875, "logps/rejected": -571.769287109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.572813034057617, "rewards/margins": 8.48158073425293, "rewards/rejected": -11.054393768310547, "step": 9216 }, { "epoch": 1.43, "learning_rate": 7.3872130297214585e-06, "logits/chosen": -2.1257598400115967, "logits/rejected": -2.6952545642852783, "logps/chosen": -181.88705444335938, "logps/rejected": -396.490478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4351097345352173, "rewards/margins": 11.44578742980957, "rewards/rejected": -12.880897521972656, "step": 9217 }, { "epoch": 1.43, "learning_rate": 7.38647958919031e-06, "logits/chosen": -1.9703351259231567, "logits/rejected": -3.082919120788574, "logps/chosen": -135.85504150390625, "logps/rejected": -503.46746826171875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.84967041015625, "rewards/margins": 6.856549263000488, "rewards/rejected": -9.706219673156738, "step": 9218 }, { "epoch": 1.43, "learning_rate": 7.385746148659162e-06, "logits/chosen": -2.721768856048584, "logits/rejected": -2.9474430084228516, "logps/chosen": -133.0486297607422, "logps/rejected": -230.2060546875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.2999004125595093, "rewards/margins": 5.389588356018066, "rewards/rejected": -5.689488410949707, "step": 9219 }, { "epoch": 1.43, "learning_rate": 7.385012708128015e-06, "logits/chosen": -1.8427090644836426, "logits/rejected": -3.053475856781006, "logps/chosen": -43.50974655151367, "logps/rejected": -249.5643310546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.9571166038513184, "rewards/margins": 6.553879737854004, "rewards/rejected": -7.5109968185424805, "step": 9220 }, { "epoch": 1.43, "learning_rate": 7.384279267596867e-06, "logits/chosen": -2.6126978397369385, "logits/rejected": -2.9017083644866943, "logps/chosen": -333.99456787109375, "logps/rejected": -278.952392578125, "loss": 1.2527, "rewards/accuracies": 0.5, "rewards/chosen": -4.4718732833862305, "rewards/margins": 3.2314505577087402, "rewards/rejected": -7.703324317932129, "step": 9221 }, { "epoch": 1.43, "learning_rate": 7.383545827065719e-06, "logits/chosen": -2.1766529083251953, "logits/rejected": -2.749019145965576, "logps/chosen": -260.2360534667969, "logps/rejected": -514.8319091796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.022517681121826, "rewards/margins": 8.882255554199219, "rewards/rejected": -12.904772758483887, "step": 9222 }, { "epoch": 1.43, "learning_rate": 7.382812386534572e-06, "logits/chosen": -2.951953887939453, "logits/rejected": -2.673001289367676, "logps/chosen": -636.6221313476562, "logps/rejected": -562.5675659179688, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.433997392654419, "rewards/margins": 6.9174299240112305, "rewards/rejected": -10.35142707824707, "step": 9223 }, { "epoch": 1.43, "learning_rate": 7.382078946003424e-06, "logits/chosen": -2.5843706130981445, "logits/rejected": -2.939650774002075, "logps/chosen": -115.32307434082031, "logps/rejected": -281.20947265625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.2056093215942383, "rewards/margins": 6.515501022338867, "rewards/rejected": -8.721110343933105, "step": 9224 }, { "epoch": 1.43, "learning_rate": 7.381345505472276e-06, "logits/chosen": -2.856706380844116, "logits/rejected": -3.03397798538208, "logps/chosen": -603.5428466796875, "logps/rejected": -498.406982421875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -3.240410566329956, "rewards/margins": 5.5008463859558105, "rewards/rejected": -8.741256713867188, "step": 9225 }, { "epoch": 1.43, "learning_rate": 7.380612064941128e-06, "logits/chosen": -2.551194667816162, "logits/rejected": -2.6220290660858154, "logps/chosen": -325.3346862792969, "logps/rejected": -300.25830078125, "loss": 0.4823, "rewards/accuracies": 0.5, "rewards/chosen": -5.073404312133789, "rewards/margins": 1.8406596183776855, "rewards/rejected": -6.914063453674316, "step": 9226 }, { "epoch": 1.43, "learning_rate": 7.37987862440998e-06, "logits/chosen": -2.903909683227539, "logits/rejected": -2.940959930419922, "logps/chosen": -176.03622436523438, "logps/rejected": -296.0090026855469, "loss": 0.9343, "rewards/accuracies": 0.5, "rewards/chosen": -5.038506507873535, "rewards/margins": 4.371534824371338, "rewards/rejected": -9.410041809082031, "step": 9227 }, { "epoch": 1.44, "learning_rate": 7.379145183878832e-06, "logits/chosen": -1.2921524047851562, "logits/rejected": -2.8609752655029297, "logps/chosen": -129.4488525390625, "logps/rejected": -516.2501220703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.9798920154571533, "rewards/margins": 7.721927642822266, "rewards/rejected": -9.70181941986084, "step": 9228 }, { "epoch": 1.44, "learning_rate": 7.378411743347684e-06, "logits/chosen": -2.8522450923919678, "logits/rejected": -1.695229411125183, "logps/chosen": -221.88015747070312, "logps/rejected": -127.79035186767578, "loss": 0.8399, "rewards/accuracies": 0.5, "rewards/chosen": -3.222522258758545, "rewards/margins": 3.2921721935272217, "rewards/rejected": -6.514694690704346, "step": 9229 }, { "epoch": 1.44, "learning_rate": 7.3776783028165355e-06, "logits/chosen": -3.0782084465026855, "logits/rejected": -2.6509273052215576, "logps/chosen": -152.99169921875, "logps/rejected": -171.00784301757812, "loss": 4.1894, "rewards/accuracies": 0.5, "rewards/chosen": -4.900291919708252, "rewards/margins": -0.6131420135498047, "rewards/rejected": -4.287149906158447, "step": 9230 }, { "epoch": 1.44, "learning_rate": 7.376944862285389e-06, "logits/chosen": -2.6195743083953857, "logits/rejected": -2.9477744102478027, "logps/chosen": -54.55870819091797, "logps/rejected": -209.43614196777344, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -3.2158920764923096, "rewards/margins": 7.003968238830566, "rewards/rejected": -10.219860076904297, "step": 9231 }, { "epoch": 1.44, "learning_rate": 7.376211421754241e-06, "logits/chosen": -3.0814967155456543, "logits/rejected": -2.007861852645874, "logps/chosen": -221.62242126464844, "logps/rejected": -352.08642578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.2914271354675293, "rewards/margins": 8.895317077636719, "rewards/rejected": -9.18674373626709, "step": 9232 }, { "epoch": 1.44, "learning_rate": 7.375477981223093e-06, "logits/chosen": -1.5384023189544678, "logits/rejected": -2.762651205062866, "logps/chosen": -89.79032897949219, "logps/rejected": -290.82647705078125, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -3.524311065673828, "rewards/margins": 4.51808500289917, "rewards/rejected": -8.04239559173584, "step": 9233 }, { "epoch": 1.44, "learning_rate": 7.374744540691945e-06, "logits/chosen": -2.4744436740875244, "logits/rejected": -2.5888404846191406, "logps/chosen": -164.35043334960938, "logps/rejected": -150.60671997070312, "loss": 1.0761, "rewards/accuracies": 0.5, "rewards/chosen": -2.4040260314941406, "rewards/margins": 2.3470511436462402, "rewards/rejected": -4.751077175140381, "step": 9234 }, { "epoch": 1.44, "learning_rate": 7.3740111001607966e-06, "logits/chosen": -2.1410529613494873, "logits/rejected": -2.5327181816101074, "logps/chosen": -101.23274993896484, "logps/rejected": -322.853515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.8818752765655518, "rewards/margins": 8.005687713623047, "rewards/rejected": -8.887563705444336, "step": 9235 }, { "epoch": 1.44, "learning_rate": 7.3732776596296485e-06, "logits/chosen": -2.719949245452881, "logits/rejected": -2.64643931388855, "logps/chosen": -149.5972900390625, "logps/rejected": -240.77305603027344, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -2.550718307495117, "rewards/margins": 6.214484214782715, "rewards/rejected": -8.765203475952148, "step": 9236 }, { "epoch": 1.44, "learning_rate": 7.372544219098501e-06, "logits/chosen": -2.0245370864868164, "logits/rejected": -2.359966278076172, "logps/chosen": -394.7117004394531, "logps/rejected": -435.784423828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5740699768066406, "rewards/margins": 11.143448829650879, "rewards/rejected": -12.71751880645752, "step": 9237 }, { "epoch": 1.44, "learning_rate": 7.371810778567353e-06, "logits/chosen": -2.489513397216797, "logits/rejected": -2.8024771213531494, "logps/chosen": -101.41088104248047, "logps/rejected": -289.5718078613281, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.6265455484390259, "rewards/margins": 7.017058849334717, "rewards/rejected": -8.643604278564453, "step": 9238 }, { "epoch": 1.44, "learning_rate": 7.371077338036205e-06, "logits/chosen": -2.706777334213257, "logits/rejected": -1.6766623258590698, "logps/chosen": -465.49835205078125, "logps/rejected": -385.6959228515625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -2.2209420204162598, "rewards/margins": 7.778537750244141, "rewards/rejected": -9.999479293823242, "step": 9239 }, { "epoch": 1.44, "learning_rate": 7.3703438975050585e-06, "logits/chosen": -2.7668204307556152, "logits/rejected": -2.6815693378448486, "logps/chosen": -168.53652954101562, "logps/rejected": -346.3239440917969, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -1.7992713451385498, "rewards/margins": 6.897931098937988, "rewards/rejected": -8.697202682495117, "step": 9240 }, { "epoch": 1.44, "learning_rate": 7.36961045697391e-06, "logits/chosen": -2.7786478996276855, "logits/rejected": -2.6368510723114014, "logps/chosen": -110.31245422363281, "logps/rejected": -182.06842041015625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -0.9777923822402954, "rewards/margins": 7.033348560333252, "rewards/rejected": -8.011140823364258, "step": 9241 }, { "epoch": 1.44, "learning_rate": 7.368877016442762e-06, "logits/chosen": -3.0226008892059326, "logits/rejected": -2.5565474033355713, "logps/chosen": -406.64874267578125, "logps/rejected": -288.2590026855469, "loss": 1.8662, "rewards/accuracies": 0.5, "rewards/chosen": -2.9035279750823975, "rewards/margins": 5.280723571777344, "rewards/rejected": -8.18425178527832, "step": 9242 }, { "epoch": 1.44, "learning_rate": 7.368143575911614e-06, "logits/chosen": -2.6424827575683594, "logits/rejected": -2.8838577270507812, "logps/chosen": -83.96760559082031, "logps/rejected": -177.91543579101562, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -1.1431901454925537, "rewards/margins": 5.43971061706543, "rewards/rejected": -6.5829010009765625, "step": 9243 }, { "epoch": 1.44, "learning_rate": 7.367410135380466e-06, "logits/chosen": -2.657663583755493, "logits/rejected": -2.9120993614196777, "logps/chosen": -85.33929443359375, "logps/rejected": -256.0522155761719, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.3151121139526367, "rewards/margins": 8.496977806091309, "rewards/rejected": -11.812089920043945, "step": 9244 }, { "epoch": 1.44, "learning_rate": 7.366676694849318e-06, "logits/chosen": -2.836142063140869, "logits/rejected": -2.473311185836792, "logps/chosen": -187.79864501953125, "logps/rejected": -296.5382385253906, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": -3.2340846061706543, "rewards/margins": 4.59312105178833, "rewards/rejected": -7.827205657958984, "step": 9245 }, { "epoch": 1.44, "learning_rate": 7.36594325431817e-06, "logits/chosen": -1.7904614210128784, "logits/rejected": -2.2406115531921387, "logps/chosen": -387.9359130859375, "logps/rejected": -389.43115234375, "loss": 0.0628, "rewards/accuracies": 1.0, "rewards/chosen": -4.284698009490967, "rewards/margins": 6.52724552154541, "rewards/rejected": -10.811943054199219, "step": 9246 }, { "epoch": 1.44, "learning_rate": 7.365209813787022e-06, "logits/chosen": -1.755322813987732, "logits/rejected": -2.45517897605896, "logps/chosen": -339.4959716796875, "logps/rejected": -489.57232666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5329596996307373, "rewards/margins": 10.770374298095703, "rewards/rejected": -14.30333423614502, "step": 9247 }, { "epoch": 1.44, "learning_rate": 7.3644763732558735e-06, "logits/chosen": -2.6550183296203613, "logits/rejected": -2.777899742126465, "logps/chosen": -71.71305847167969, "logps/rejected": -258.0919189453125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.8459675312042236, "rewards/margins": 7.616368293762207, "rewards/rejected": -9.462335586547852, "step": 9248 }, { "epoch": 1.44, "learning_rate": 7.363742932724727e-06, "logits/chosen": -0.7951825261116028, "logits/rejected": -1.3996976613998413, "logps/chosen": -134.19052124023438, "logps/rejected": -251.8866424560547, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.3975286483764648, "rewards/margins": 7.167268753051758, "rewards/rejected": -8.564797401428223, "step": 9249 }, { "epoch": 1.44, "learning_rate": 7.363009492193579e-06, "logits/chosen": -2.2271552085876465, "logits/rejected": -2.7882473468780518, "logps/chosen": -537.6934814453125, "logps/rejected": -576.0235595703125, "loss": 0.0216, "rewards/accuracies": 1.0, "rewards/chosen": -2.1759591102600098, "rewards/margins": 4.174718379974365, "rewards/rejected": -6.350677490234375, "step": 9250 }, { "epoch": 1.44, "learning_rate": 7.362276051662431e-06, "logits/chosen": -1.5938138961791992, "logits/rejected": -3.040686845779419, "logps/chosen": -50.9259033203125, "logps/rejected": -390.5794677734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.4292032718658447, "rewards/margins": 7.364019393920898, "rewards/rejected": -8.793222427368164, "step": 9251 }, { "epoch": 1.44, "learning_rate": 7.361542611131283e-06, "logits/chosen": -2.2856101989746094, "logits/rejected": -2.9720234870910645, "logps/chosen": -96.55628204345703, "logps/rejected": -390.76568603515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.256218671798706, "rewards/margins": 8.117964744567871, "rewards/rejected": -9.374183654785156, "step": 9252 }, { "epoch": 1.44, "learning_rate": 7.360809170600135e-06, "logits/chosen": -2.38674259185791, "logits/rejected": -3.0616815090179443, "logps/chosen": -192.143798828125, "logps/rejected": -520.6619873046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.6632471084594727, "rewards/margins": 7.133453369140625, "rewards/rejected": -10.796699523925781, "step": 9253 }, { "epoch": 1.44, "learning_rate": 7.360075730068987e-06, "logits/chosen": -3.290776491165161, "logits/rejected": -3.2164146900177, "logps/chosen": -151.64845275878906, "logps/rejected": -182.98855590820312, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.8053150177001953, "rewards/margins": 5.853184700012207, "rewards/rejected": -8.658499717712402, "step": 9254 }, { "epoch": 1.44, "learning_rate": 7.359342289537839e-06, "logits/chosen": -2.5203869342803955, "logits/rejected": -3.3665528297424316, "logps/chosen": -124.1654052734375, "logps/rejected": -316.52490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7087749242782593, "rewards/margins": 10.706863403320312, "rewards/rejected": -11.41563892364502, "step": 9255 }, { "epoch": 1.44, "learning_rate": 7.358608849006691e-06, "logits/chosen": -3.048211097717285, "logits/rejected": -3.0142533779144287, "logps/chosen": -541.1812744140625, "logps/rejected": -503.4803161621094, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -1.2786712646484375, "rewards/margins": 7.2165961265563965, "rewards/rejected": -8.495267868041992, "step": 9256 }, { "epoch": 1.44, "learning_rate": 7.357875408475543e-06, "logits/chosen": -2.744466781616211, "logits/rejected": -2.9814631938934326, "logps/chosen": -546.2330322265625, "logps/rejected": -505.17327880859375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.6858139038085938, "rewards/margins": 6.124695301055908, "rewards/rejected": -9.810508728027344, "step": 9257 }, { "epoch": 1.44, "learning_rate": 7.3571419679443965e-06, "logits/chosen": -2.4751975536346436, "logits/rejected": -2.681650161743164, "logps/chosen": -76.23095703125, "logps/rejected": -306.9268798828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.5215752124786377, "rewards/margins": 7.75261116027832, "rewards/rejected": -10.274186134338379, "step": 9258 }, { "epoch": 1.44, "learning_rate": 7.3564085274132484e-06, "logits/chosen": -2.9353528022766113, "logits/rejected": -2.793515920639038, "logps/chosen": -579.72021484375, "logps/rejected": -558.8983764648438, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -4.56477165222168, "rewards/margins": 5.462035179138184, "rewards/rejected": -10.026806831359863, "step": 9259 }, { "epoch": 1.44, "learning_rate": 7.3556750868821e-06, "logits/chosen": -2.904712200164795, "logits/rejected": -3.0403902530670166, "logps/chosen": -167.0692138671875, "logps/rejected": -304.33349609375, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -3.535052537918091, "rewards/margins": 4.857137680053711, "rewards/rejected": -8.392189979553223, "step": 9260 }, { "epoch": 1.44, "learning_rate": 7.354941646350952e-06, "logits/chosen": -2.9649364948272705, "logits/rejected": -2.2819316387176514, "logps/chosen": -346.2849426269531, "logps/rejected": -344.8363037109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.638047695159912, "rewards/margins": 9.399800300598145, "rewards/rejected": -12.037847518920898, "step": 9261 }, { "epoch": 1.44, "learning_rate": 7.354208205819804e-06, "logits/chosen": -2.71079683303833, "logits/rejected": -2.5767149925231934, "logps/chosen": -285.2435607910156, "logps/rejected": -339.057373046875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -3.2435531616210938, "rewards/margins": 6.094748497009277, "rewards/rejected": -9.338301658630371, "step": 9262 }, { "epoch": 1.44, "learning_rate": 7.353474765288656e-06, "logits/chosen": -2.3755428791046143, "logits/rejected": -2.9496333599090576, "logps/chosen": -87.19801330566406, "logps/rejected": -325.971923828125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.327548027038574, "rewards/margins": 7.50992488861084, "rewards/rejected": -9.837472915649414, "step": 9263 }, { "epoch": 1.44, "learning_rate": 7.352741324757508e-06, "logits/chosen": -2.1567230224609375, "logits/rejected": -2.949695110321045, "logps/chosen": -175.929931640625, "logps/rejected": -402.9373779296875, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/chosen": -2.1551148891448975, "rewards/margins": 6.8159661293029785, "rewards/rejected": -8.971080780029297, "step": 9264 }, { "epoch": 1.44, "learning_rate": 7.35200788422636e-06, "logits/chosen": -2.5908584594726562, "logits/rejected": -1.5787606239318848, "logps/chosen": -109.55952453613281, "logps/rejected": -127.27776336669922, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -0.7294275760650635, "rewards/margins": 6.713907241821289, "rewards/rejected": -7.443334579467773, "step": 9265 }, { "epoch": 1.44, "learning_rate": 7.3512744436952116e-06, "logits/chosen": -2.6405930519104004, "logits/rejected": -2.872685194015503, "logps/chosen": -699.7235107421875, "logps/rejected": -687.6640014648438, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.923419237136841, "rewards/margins": 7.071290493011475, "rewards/rejected": -9.994709968566895, "step": 9266 }, { "epoch": 1.44, "learning_rate": 7.350541003164065e-06, "logits/chosen": -2.9250521659851074, "logits/rejected": -3.350227117538452, "logps/chosen": -26.334758758544922, "logps/rejected": -176.8768310546875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.2368744611740112, "rewards/margins": 7.405904769897461, "rewards/rejected": -8.642779350280762, "step": 9267 }, { "epoch": 1.44, "learning_rate": 7.349807562632917e-06, "logits/chosen": -2.127851724624634, "logits/rejected": -3.1759655475616455, "logps/chosen": -282.33819580078125, "logps/rejected": -567.97412109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.7620391845703125, "rewards/margins": 9.004680633544922, "rewards/rejected": -12.766719818115234, "step": 9268 }, { "epoch": 1.44, "learning_rate": 7.349074122101769e-06, "logits/chosen": -2.835843086242676, "logits/rejected": -2.830664873123169, "logps/chosen": -111.29988098144531, "logps/rejected": -299.9325866699219, "loss": 1.4516, "rewards/accuracies": 0.5, "rewards/chosen": -3.3381731510162354, "rewards/margins": 3.0833539962768555, "rewards/rejected": -6.42152738571167, "step": 9269 }, { "epoch": 1.44, "learning_rate": 7.348340681570621e-06, "logits/chosen": -2.820478916168213, "logits/rejected": -2.6183836460113525, "logps/chosen": -370.8741455078125, "logps/rejected": -391.9351806640625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.119215965270996, "rewards/margins": 8.453178405761719, "rewards/rejected": -9.572393417358398, "step": 9270 }, { "epoch": 1.44, "learning_rate": 7.3476072410394735e-06, "logits/chosen": -2.676175832748413, "logits/rejected": -2.5666403770446777, "logps/chosen": -429.2369079589844, "logps/rejected": -402.690673828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.242470741271973, "rewards/margins": 8.172523498535156, "rewards/rejected": -12.414995193481445, "step": 9271 }, { "epoch": 1.44, "learning_rate": 7.346873800508325e-06, "logits/chosen": -2.7850277423858643, "logits/rejected": -2.952670097351074, "logps/chosen": -707.2401123046875, "logps/rejected": -670.6333618164062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.580065965652466, "rewards/margins": 9.716293334960938, "rewards/rejected": -12.29636001586914, "step": 9272 }, { "epoch": 1.44, "learning_rate": 7.346140359977177e-06, "logits/chosen": -2.5038559436798096, "logits/rejected": -2.9599506855010986, "logps/chosen": -105.02103424072266, "logps/rejected": -319.103515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.866987705230713, "rewards/margins": 9.182050704956055, "rewards/rejected": -11.04903793334961, "step": 9273 }, { "epoch": 1.44, "learning_rate": 7.345406919446029e-06, "logits/chosen": -2.967453956604004, "logits/rejected": -2.677901029586792, "logps/chosen": -287.8582763671875, "logps/rejected": -292.80499267578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.3208603858947754, "rewards/margins": 6.747461318969727, "rewards/rejected": -10.068321228027344, "step": 9274 }, { "epoch": 1.44, "learning_rate": 7.344673478914881e-06, "logits/chosen": -2.368345022201538, "logits/rejected": -3.144526243209839, "logps/chosen": -232.1652374267578, "logps/rejected": -542.1497192382812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.781229496002197, "rewards/margins": 9.543365478515625, "rewards/rejected": -14.324594497680664, "step": 9275 }, { "epoch": 1.44, "learning_rate": 7.343940038383735e-06, "logits/chosen": -1.893290638923645, "logits/rejected": -2.202354669570923, "logps/chosen": -340.5388488769531, "logps/rejected": -322.0011291503906, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.4013229310512543, "rewards/margins": 7.403855323791504, "rewards/rejected": -7.805178165435791, "step": 9276 }, { "epoch": 1.44, "learning_rate": 7.3432065978525865e-06, "logits/chosen": -2.8732612133026123, "logits/rejected": -2.0391290187835693, "logps/chosen": -305.19842529296875, "logps/rejected": -262.2288513183594, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -1.8786125183105469, "rewards/margins": 7.0801849365234375, "rewards/rejected": -8.958797454833984, "step": 9277 }, { "epoch": 1.44, "learning_rate": 7.342473157321438e-06, "logits/chosen": -2.7433550357818604, "logits/rejected": -3.271350383758545, "logps/chosen": -36.07427978515625, "logps/rejected": -250.45278930664062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2603600025177, "rewards/margins": 10.651265144348145, "rewards/rejected": -12.911624908447266, "step": 9278 }, { "epoch": 1.44, "learning_rate": 7.34173971679029e-06, "logits/chosen": -2.683560848236084, "logits/rejected": -3.107313394546509, "logps/chosen": -185.8489227294922, "logps/rejected": -423.5982971191406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2371628284454346, "rewards/margins": 10.795063018798828, "rewards/rejected": -13.032225608825684, "step": 9279 }, { "epoch": 1.44, "learning_rate": 7.341006276259142e-06, "logits/chosen": -2.936659574508667, "logits/rejected": -2.877406120300293, "logps/chosen": -279.20721435546875, "logps/rejected": -493.03802490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.057455539703369, "rewards/margins": 9.874554634094238, "rewards/rejected": -11.932010650634766, "step": 9280 }, { "epoch": 1.44, "learning_rate": 7.340272835727994e-06, "logits/chosen": -2.779100179672241, "logits/rejected": -1.6950771808624268, "logps/chosen": -262.489501953125, "logps/rejected": -108.12544250488281, "loss": 1.2877, "rewards/accuracies": 0.5, "rewards/chosen": -3.231754779815674, "rewards/margins": 0.7613673210144043, "rewards/rejected": -3.993122100830078, "step": 9281 }, { "epoch": 1.44, "learning_rate": 7.339539395196846e-06, "logits/chosen": -2.787454843521118, "logits/rejected": -2.004805326461792, "logps/chosen": -753.8280639648438, "logps/rejected": -451.00872802734375, "loss": 0.7451, "rewards/accuracies": 0.5, "rewards/chosen": -4.7576141357421875, "rewards/margins": 3.901923894882202, "rewards/rejected": -8.659538269042969, "step": 9282 }, { "epoch": 1.44, "learning_rate": 7.338805954665698e-06, "logits/chosen": -2.0555672645568848, "logits/rejected": -3.027550458908081, "logps/chosen": -74.70940399169922, "logps/rejected": -271.6053466796875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.819776177406311, "rewards/margins": 6.7140960693359375, "rewards/rejected": -7.533872604370117, "step": 9283 }, { "epoch": 1.44, "learning_rate": 7.33807251413455e-06, "logits/chosen": -2.3988914489746094, "logits/rejected": -3.035433053970337, "logps/chosen": -237.673095703125, "logps/rejected": -467.7907409667969, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -3.840559959411621, "rewards/margins": 4.373605728149414, "rewards/rejected": -8.214165687561035, "step": 9284 }, { "epoch": 1.44, "learning_rate": 7.337339073603403e-06, "logits/chosen": -2.2711846828460693, "logits/rejected": -2.638368606567383, "logps/chosen": -115.13501739501953, "logps/rejected": -360.7996826171875, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -2.6467065811157227, "rewards/margins": 4.213932037353516, "rewards/rejected": -6.860638618469238, "step": 9285 }, { "epoch": 1.44, "learning_rate": 7.336605633072255e-06, "logits/chosen": -1.327028512954712, "logits/rejected": -2.882002592086792, "logps/chosen": -148.526611328125, "logps/rejected": -483.529296875, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": -2.4137775897979736, "rewards/margins": 2.8700008392333984, "rewards/rejected": -5.283778190612793, "step": 9286 }, { "epoch": 1.44, "learning_rate": 7.335872192541107e-06, "logits/chosen": -2.492037773132324, "logits/rejected": -2.7887117862701416, "logps/chosen": -65.21363830566406, "logps/rejected": -291.0360412597656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7847301959991455, "rewards/margins": 9.24288558959961, "rewards/rejected": -11.027615547180176, "step": 9287 }, { "epoch": 1.44, "learning_rate": 7.335138752009959e-06, "logits/chosen": -2.506981611251831, "logits/rejected": -2.919262647628784, "logps/chosen": -588.978515625, "logps/rejected": -575.1907958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5766968727111816, "rewards/margins": 12.700796127319336, "rewards/rejected": -14.277493476867676, "step": 9288 }, { "epoch": 1.44, "learning_rate": 7.3344053114788116e-06, "logits/chosen": -2.811950445175171, "logits/rejected": -3.0522303581237793, "logps/chosen": -74.04740142822266, "logps/rejected": -193.51742553710938, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.6556800603866577, "rewards/margins": 6.539346694946289, "rewards/rejected": -8.195026397705078, "step": 9289 }, { "epoch": 1.44, "learning_rate": 7.3336718709476634e-06, "logits/chosen": -2.6400306224823, "logits/rejected": -3.2266292572021484, "logps/chosen": -64.19599151611328, "logps/rejected": -436.699951171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.3076956272125244, "rewards/margins": 8.754615783691406, "rewards/rejected": -12.062311172485352, "step": 9290 }, { "epoch": 1.44, "learning_rate": 7.332938430416515e-06, "logits/chosen": -2.3468189239501953, "logits/rejected": -3.110599994659424, "logps/chosen": -155.00357055664062, "logps/rejected": -341.1803283691406, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.044346332550049, "rewards/margins": 7.632635116577148, "rewards/rejected": -9.676980972290039, "step": 9291 }, { "epoch": 1.45, "learning_rate": 7.332204989885367e-06, "logits/chosen": -2.341273784637451, "logits/rejected": -2.699838161468506, "logps/chosen": -338.59783935546875, "logps/rejected": -299.7369079589844, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -3.0972061157226562, "rewards/margins": 4.160859107971191, "rewards/rejected": -7.258065223693848, "step": 9292 }, { "epoch": 1.45, "learning_rate": 7.331471549354219e-06, "logits/chosen": -2.0787346363067627, "logits/rejected": -2.5856518745422363, "logps/chosen": -395.57049560546875, "logps/rejected": -608.479736328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.41602325439453125, "rewards/margins": 11.230287551879883, "rewards/rejected": -11.646310806274414, "step": 9293 }, { "epoch": 1.45, "learning_rate": 7.330738108823073e-06, "logits/chosen": -2.716909646987915, "logits/rejected": -2.775923490524292, "logps/chosen": -84.23590087890625, "logps/rejected": -138.27947998046875, "loss": 0.1163, "rewards/accuracies": 1.0, "rewards/chosen": -4.057100296020508, "rewards/margins": 2.424577236175537, "rewards/rejected": -6.481677055358887, "step": 9294 }, { "epoch": 1.45, "learning_rate": 7.3300046682919245e-06, "logits/chosen": -1.5066907405853271, "logits/rejected": -2.5454914569854736, "logps/chosen": -172.1885223388672, "logps/rejected": -480.1864013671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3173271417617798, "rewards/margins": 10.472748756408691, "rewards/rejected": -11.790075302124023, "step": 9295 }, { "epoch": 1.45, "learning_rate": 7.329271227760776e-06, "logits/chosen": -2.344064950942993, "logits/rejected": -2.8048646450042725, "logps/chosen": -46.82733917236328, "logps/rejected": -134.11851501464844, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/chosen": -2.895359754562378, "rewards/margins": 3.0732500553131104, "rewards/rejected": -5.968609809875488, "step": 9296 }, { "epoch": 1.45, "learning_rate": 7.328537787229628e-06, "logits/chosen": -2.813110589981079, "logits/rejected": -3.202010154724121, "logps/chosen": -287.7005615234375, "logps/rejected": -300.8192138671875, "loss": 0.4975, "rewards/accuracies": 0.5, "rewards/chosen": -4.0141448974609375, "rewards/margins": 2.850836992263794, "rewards/rejected": -6.864981651306152, "step": 9297 }, { "epoch": 1.45, "learning_rate": 7.32780434669848e-06, "logits/chosen": -1.9260224103927612, "logits/rejected": -2.7560620307922363, "logps/chosen": -172.102294921875, "logps/rejected": -353.27508544921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.6081131100654602, "rewards/margins": 8.721773147583008, "rewards/rejected": -9.329885482788086, "step": 9298 }, { "epoch": 1.45, "learning_rate": 7.327070906167332e-06, "logits/chosen": -1.6527318954467773, "logits/rejected": -2.7093358039855957, "logps/chosen": -40.900325775146484, "logps/rejected": -405.5335693359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.8556629419326782, "rewards/margins": 10.873522758483887, "rewards/rejected": -11.729185104370117, "step": 9299 }, { "epoch": 1.45, "learning_rate": 7.326337465636184e-06, "logits/chosen": -2.6147267818450928, "logits/rejected": -2.7386367321014404, "logps/chosen": -167.42529296875, "logps/rejected": -308.84033203125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.8822929859161377, "rewards/margins": 6.411310195922852, "rewards/rejected": -7.293603420257568, "step": 9300 }, { "epoch": 1.45, "learning_rate": 7.325604025105036e-06, "logits/chosen": -2.0293455123901367, "logits/rejected": -2.5090715885162354, "logps/chosen": -437.7235107421875, "logps/rejected": -432.5017395019531, "loss": 0.0808, "rewards/accuracies": 1.0, "rewards/chosen": -3.621157169342041, "rewards/margins": 4.887029647827148, "rewards/rejected": -8.508187294006348, "step": 9301 }, { "epoch": 1.45, "learning_rate": 7.324870584573888e-06, "logits/chosen": -2.6878814697265625, "logits/rejected": -2.949680805206299, "logps/chosen": -113.92403411865234, "logps/rejected": -276.47705078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5176721811294556, "rewards/margins": 9.466963768005371, "rewards/rejected": -10.984636306762695, "step": 9302 }, { "epoch": 1.45, "learning_rate": 7.324137144042741e-06, "logits/chosen": -2.1300950050354004, "logits/rejected": -2.827484130859375, "logps/chosen": -138.87734985351562, "logps/rejected": -292.8271484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.747405767440796, "rewards/margins": 8.082019805908203, "rewards/rejected": -10.829425811767578, "step": 9303 }, { "epoch": 1.45, "learning_rate": 7.323403703511593e-06, "logits/chosen": -2.608313798904419, "logits/rejected": -2.662764549255371, "logps/chosen": -78.73599243164062, "logps/rejected": -135.24119567871094, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": -2.896023750305176, "rewards/margins": 5.498297691345215, "rewards/rejected": -8.39432144165039, "step": 9304 }, { "epoch": 1.45, "learning_rate": 7.322670262980445e-06, "logits/chosen": -2.798795700073242, "logits/rejected": -2.401135206222534, "logps/chosen": -210.04861450195312, "logps/rejected": -264.0714111328125, "loss": 1.613, "rewards/accuracies": 0.5, "rewards/chosen": -2.9178154468536377, "rewards/margins": 1.9696578979492188, "rewards/rejected": -4.8874735832214355, "step": 9305 }, { "epoch": 1.45, "learning_rate": 7.321936822449298e-06, "logits/chosen": -1.0826514959335327, "logits/rejected": -2.629908800125122, "logps/chosen": -120.20172882080078, "logps/rejected": -270.0662841796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.2827179431915283, "rewards/margins": 6.622312068939209, "rewards/rejected": -8.905030250549316, "step": 9306 }, { "epoch": 1.45, "learning_rate": 7.32120338191815e-06, "logits/chosen": -2.0418074131011963, "logits/rejected": -2.6335160732269287, "logps/chosen": -145.80296325683594, "logps/rejected": -387.2388916015625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -2.1097731590270996, "rewards/margins": 7.482388496398926, "rewards/rejected": -9.592161178588867, "step": 9307 }, { "epoch": 1.45, "learning_rate": 7.3204699413870015e-06, "logits/chosen": -1.7436898946762085, "logits/rejected": -3.0867748260498047, "logps/chosen": -459.27423095703125, "logps/rejected": -874.295166015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.5470643043518066, "rewards/margins": 8.875468254089355, "rewards/rejected": -11.42253303527832, "step": 9308 }, { "epoch": 1.45, "learning_rate": 7.319736500855853e-06, "logits/chosen": -3.261120557785034, "logits/rejected": -2.8363959789276123, "logps/chosen": -646.93603515625, "logps/rejected": -566.00927734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.908175468444824, "rewards/margins": 7.963159561157227, "rewards/rejected": -12.87133502960205, "step": 9309 }, { "epoch": 1.45, "learning_rate": 7.319003060324705e-06, "logits/chosen": -2.5204594135284424, "logits/rejected": -3.1712498664855957, "logps/chosen": -92.85505676269531, "logps/rejected": -260.0855712890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.4557647705078125, "rewards/margins": 7.801840305328369, "rewards/rejected": -10.257604598999023, "step": 9310 }, { "epoch": 1.45, "learning_rate": 7.318269619793557e-06, "logits/chosen": -2.962233543395996, "logits/rejected": -2.8793773651123047, "logps/chosen": -273.0046691894531, "logps/rejected": -382.5450134277344, "loss": 1.3155, "rewards/accuracies": 0.5, "rewards/chosen": -3.10331654548645, "rewards/margins": 3.299952507019043, "rewards/rejected": -6.403269290924072, "step": 9311 }, { "epoch": 1.45, "learning_rate": 7.317536179262411e-06, "logits/chosen": -2.783238410949707, "logits/rejected": -3.128509998321533, "logps/chosen": -125.2747802734375, "logps/rejected": -221.91876220703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.5566205978393555, "rewards/margins": 7.146688938140869, "rewards/rejected": -9.703310012817383, "step": 9312 }, { "epoch": 1.45, "learning_rate": 7.3168027387312626e-06, "logits/chosen": -2.576892852783203, "logits/rejected": -3.0833709239959717, "logps/chosen": -116.2899169921875, "logps/rejected": -285.9759216308594, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.719696521759033, "rewards/margins": 6.490100860595703, "rewards/rejected": -10.209797859191895, "step": 9313 }, { "epoch": 1.45, "learning_rate": 7.3160692982001144e-06, "logits/chosen": -2.7544004917144775, "logits/rejected": -3.002439498901367, "logps/chosen": -349.2802429199219, "logps/rejected": -452.0589904785156, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9039244651794434, "rewards/margins": 8.231182098388672, "rewards/rejected": -11.135106086730957, "step": 9314 }, { "epoch": 1.45, "learning_rate": 7.315335857668966e-06, "logits/chosen": -2.7186942100524902, "logits/rejected": -2.15191388130188, "logps/chosen": -283.5868835449219, "logps/rejected": -186.11178588867188, "loss": 0.9691, "rewards/accuracies": 0.5, "rewards/chosen": -5.061025619506836, "rewards/margins": -0.009109139442443848, "rewards/rejected": -5.051916599273682, "step": 9315 }, { "epoch": 1.45, "learning_rate": 7.314602417137818e-06, "logits/chosen": -2.8399453163146973, "logits/rejected": -2.636817216873169, "logps/chosen": -250.37814331054688, "logps/rejected": -406.22320556640625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.192873477935791, "rewards/margins": 8.149833679199219, "rewards/rejected": -10.342706680297852, "step": 9316 }, { "epoch": 1.45, "learning_rate": 7.31386897660667e-06, "logits/chosen": -2.299239158630371, "logits/rejected": -2.7258827686309814, "logps/chosen": -48.87762451171875, "logps/rejected": -336.43511962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.2681770324707031, "rewards/margins": 9.0298490524292, "rewards/rejected": -9.298026084899902, "step": 9317 }, { "epoch": 1.45, "learning_rate": 7.313135536075522e-06, "logits/chosen": -2.4000935554504395, "logits/rejected": -2.930100440979004, "logps/chosen": -262.4554748535156, "logps/rejected": -376.2108154296875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.7123026847839355, "rewards/margins": 10.52841567993164, "rewards/rejected": -12.240717887878418, "step": 9318 }, { "epoch": 1.45, "learning_rate": 7.312402095544374e-06, "logits/chosen": -2.4275851249694824, "logits/rejected": -2.857046604156494, "logps/chosen": -715.3297119140625, "logps/rejected": -633.3208618164062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.493783473968506, "rewards/margins": 8.12516975402832, "rewards/rejected": -13.618953704833984, "step": 9319 }, { "epoch": 1.45, "learning_rate": 7.311668655013226e-06, "logits/chosen": -2.674286127090454, "logits/rejected": -2.7337915897369385, "logps/chosen": -407.6386413574219, "logps/rejected": -455.8004150390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.5721442699432373, "rewards/margins": 8.200177192687988, "rewards/rejected": -9.772321701049805, "step": 9320 }, { "epoch": 1.45, "learning_rate": 7.310935214482079e-06, "logits/chosen": -2.851588726043701, "logits/rejected": -2.4434001445770264, "logps/chosen": -169.8878173828125, "logps/rejected": -136.15182495117188, "loss": 3.6809, "rewards/accuracies": 0.5, "rewards/chosen": -5.199347019195557, "rewards/margins": -1.006589651107788, "rewards/rejected": -4.192757606506348, "step": 9321 }, { "epoch": 1.45, "learning_rate": 7.310201773950931e-06, "logits/chosen": -2.5967371463775635, "logits/rejected": -2.1953303813934326, "logps/chosen": -200.87628173828125, "logps/rejected": -233.7498779296875, "loss": 2.2502, "rewards/accuracies": 0.5, "rewards/chosen": -5.900111675262451, "rewards/margins": 1.3232648372650146, "rewards/rejected": -7.223376274108887, "step": 9322 }, { "epoch": 1.45, "learning_rate": 7.309468333419784e-06, "logits/chosen": -2.7318601608276367, "logits/rejected": -2.8518645763397217, "logps/chosen": -99.56568908691406, "logps/rejected": -259.989501953125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.0319104194641113, "rewards/margins": 7.590910911560059, "rewards/rejected": -9.622820854187012, "step": 9323 }, { "epoch": 1.45, "learning_rate": 7.308734892888636e-06, "logits/chosen": -1.6611418724060059, "logits/rejected": -3.031282901763916, "logps/chosen": -133.92340087890625, "logps/rejected": -421.18341064453125, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -3.6986520290374756, "rewards/margins": 5.525448799133301, "rewards/rejected": -9.224101066589355, "step": 9324 }, { "epoch": 1.45, "learning_rate": 7.308001452357488e-06, "logits/chosen": -2.0696651935577393, "logits/rejected": -3.0516152381896973, "logps/chosen": -139.3955841064453, "logps/rejected": -494.47076416015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.9579944610595703, "rewards/margins": 10.183883666992188, "rewards/rejected": -12.141878128051758, "step": 9325 }, { "epoch": 1.45, "learning_rate": 7.3072680118263395e-06, "logits/chosen": -2.3723044395446777, "logits/rejected": -3.2980165481567383, "logps/chosen": -102.68577575683594, "logps/rejected": -424.4381103515625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.0173470973968506, "rewards/margins": 6.954545021057129, "rewards/rejected": -9.971891403198242, "step": 9326 }, { "epoch": 1.45, "learning_rate": 7.306534571295191e-06, "logits/chosen": -2.962451219558716, "logits/rejected": -2.430581569671631, "logps/chosen": -287.3388977050781, "logps/rejected": -126.50458526611328, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -1.067205786705017, "rewards/margins": 4.644847869873047, "rewards/rejected": -5.712054252624512, "step": 9327 }, { "epoch": 1.45, "learning_rate": 7.305801130764043e-06, "logits/chosen": -2.878326416015625, "logits/rejected": -2.9232707023620605, "logps/chosen": -216.94668579101562, "logps/rejected": -308.43536376953125, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": -1.7341713905334473, "rewards/margins": 4.87510871887207, "rewards/rejected": -6.609280586242676, "step": 9328 }, { "epoch": 1.45, "learning_rate": 7.305067690232897e-06, "logits/chosen": -1.935621738433838, "logits/rejected": -2.7877495288848877, "logps/chosen": -313.4154052734375, "logps/rejected": -500.89892578125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -5.303696155548096, "rewards/margins": 4.9710540771484375, "rewards/rejected": -10.274749755859375, "step": 9329 }, { "epoch": 1.45, "learning_rate": 7.304334249701749e-06, "logits/chosen": -1.6821210384368896, "logits/rejected": -2.3609745502471924, "logps/chosen": -190.23696899414062, "logps/rejected": -385.6524658203125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -3.1453986167907715, "rewards/margins": 7.034435749053955, "rewards/rejected": -10.179834365844727, "step": 9330 }, { "epoch": 1.45, "learning_rate": 7.303600809170601e-06, "logits/chosen": -0.4093370735645294, "logits/rejected": -1.1379481554031372, "logps/chosen": -111.70775604248047, "logps/rejected": -446.1300048828125, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": -1.1816192865371704, "rewards/margins": 23.44712257385254, "rewards/rejected": -24.628742218017578, "step": 9331 }, { "epoch": 1.45, "learning_rate": 7.3028673686394525e-06, "logits/chosen": -1.2697975635528564, "logits/rejected": -2.887530565261841, "logps/chosen": -90.31991577148438, "logps/rejected": -389.943115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.668403685092926, "rewards/margins": 9.812749862670898, "rewards/rejected": -10.48115348815918, "step": 9332 }, { "epoch": 1.45, "learning_rate": 7.302133928108304e-06, "logits/chosen": -2.9932894706726074, "logits/rejected": -3.1572253704071045, "logps/chosen": -768.5006103515625, "logps/rejected": -582.9635009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.641290307044983, "rewards/margins": 10.341329574584961, "rewards/rejected": -11.982620239257812, "step": 9333 }, { "epoch": 1.45, "learning_rate": 7.301400487577156e-06, "logits/chosen": -2.739661931991577, "logits/rejected": -2.7784194946289062, "logps/chosen": -51.3340950012207, "logps/rejected": -132.10018920898438, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -1.7077213525772095, "rewards/margins": 5.675619125366211, "rewards/rejected": -7.383339881896973, "step": 9334 }, { "epoch": 1.45, "learning_rate": 7.300667047046008e-06, "logits/chosen": -1.969876766204834, "logits/rejected": -2.83217716217041, "logps/chosen": -118.8435287475586, "logps/rejected": -297.47674560546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -0.8786221146583557, "rewards/margins": 6.645456314086914, "rewards/rejected": -7.524078369140625, "step": 9335 }, { "epoch": 1.45, "learning_rate": 7.29993360651486e-06, "logits/chosen": -2.7143099308013916, "logits/rejected": -2.897326707839966, "logps/chosen": -252.01805114746094, "logps/rejected": -368.3709411621094, "loss": 0.132, "rewards/accuracies": 1.0, "rewards/chosen": -3.196084499359131, "rewards/margins": 3.1005733013153076, "rewards/rejected": -6.296657562255859, "step": 9336 }, { "epoch": 1.45, "learning_rate": 7.299200165983712e-06, "logits/chosen": -2.0317270755767822, "logits/rejected": -2.696445941925049, "logps/chosen": -158.0255126953125, "logps/rejected": -260.8612365722656, "loss": 0.1346, "rewards/accuracies": 1.0, "rewards/chosen": -3.616407871246338, "rewards/margins": 4.0720930099487305, "rewards/rejected": -7.688500881195068, "step": 9337 }, { "epoch": 1.45, "learning_rate": 7.2984667254525654e-06, "logits/chosen": -2.7332763671875, "logits/rejected": -1.5427314043045044, "logps/chosen": -413.71539306640625, "logps/rejected": -284.82061767578125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.7393600940704346, "rewards/margins": 8.840585708618164, "rewards/rejected": -10.57994556427002, "step": 9338 }, { "epoch": 1.45, "learning_rate": 7.297733284921417e-06, "logits/chosen": -2.873114824295044, "logits/rejected": -2.608421802520752, "logps/chosen": -367.7386779785156, "logps/rejected": -366.3921813964844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.1584607362747192, "rewards/margins": 6.23105525970459, "rewards/rejected": -7.3895158767700195, "step": 9339 }, { "epoch": 1.45, "learning_rate": 7.29699984439027e-06, "logits/chosen": -2.2114062309265137, "logits/rejected": -2.8171417713165283, "logps/chosen": -74.26644897460938, "logps/rejected": -316.05548095703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.5765466690063477, "rewards/margins": 7.0208892822265625, "rewards/rejected": -9.597434997558594, "step": 9340 }, { "epoch": 1.45, "learning_rate": 7.296266403859122e-06, "logits/chosen": -1.9049416780471802, "logits/rejected": -2.6953351497650146, "logps/chosen": -87.14939880371094, "logps/rejected": -296.855712890625, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -3.6535940170288086, "rewards/margins": 5.386561393737793, "rewards/rejected": -9.040155410766602, "step": 9341 }, { "epoch": 1.45, "learning_rate": 7.295532963327974e-06, "logits/chosen": -2.8792917728424072, "logits/rejected": -2.920137882232666, "logps/chosen": -690.447265625, "logps/rejected": -580.5767822265625, "loss": 0.0469, "rewards/accuracies": 1.0, "rewards/chosen": -4.79315185546875, "rewards/margins": 5.162930488586426, "rewards/rejected": -9.956082344055176, "step": 9342 }, { "epoch": 1.45, "learning_rate": 7.294799522796826e-06, "logits/chosen": -2.243739604949951, "logits/rejected": -3.033177614212036, "logps/chosen": -32.82500457763672, "logps/rejected": -319.7695617675781, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -0.6553274393081665, "rewards/margins": 7.706188678741455, "rewards/rejected": -8.361515998840332, "step": 9343 }, { "epoch": 1.45, "learning_rate": 7.2940660822656776e-06, "logits/chosen": -2.690434455871582, "logits/rejected": -3.0220983028411865, "logps/chosen": -140.38885498046875, "logps/rejected": -252.67919921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.7189116477966309, "rewards/margins": 7.1154632568359375, "rewards/rejected": -8.834375381469727, "step": 9344 }, { "epoch": 1.45, "learning_rate": 7.2933326417345294e-06, "logits/chosen": -2.849865674972534, "logits/rejected": -2.2469234466552734, "logps/chosen": -362.0401306152344, "logps/rejected": -294.7467956542969, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.1844868659973145, "rewards/margins": 7.907238483428955, "rewards/rejected": -10.09172534942627, "step": 9345 }, { "epoch": 1.45, "learning_rate": 7.292599201203381e-06, "logits/chosen": -2.6302993297576904, "logits/rejected": -2.747868061065674, "logps/chosen": -217.82815551757812, "logps/rejected": -260.851318359375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -3.3403477668762207, "rewards/margins": 5.56088924407959, "rewards/rejected": -8.901237487792969, "step": 9346 }, { "epoch": 1.45, "learning_rate": 7.291865760672235e-06, "logits/chosen": -2.8819875717163086, "logits/rejected": -3.094054937362671, "logps/chosen": -277.4540710449219, "logps/rejected": -430.43890380859375, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": -1.2173928022384644, "rewards/margins": 4.114348411560059, "rewards/rejected": -5.3317413330078125, "step": 9347 }, { "epoch": 1.45, "learning_rate": 7.291132320141087e-06, "logits/chosen": -2.512078046798706, "logits/rejected": -2.7021679878234863, "logps/chosen": -231.62574768066406, "logps/rejected": -358.7225341796875, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -1.7890121936798096, "rewards/margins": 4.6160993576049805, "rewards/rejected": -6.405111789703369, "step": 9348 }, { "epoch": 1.45, "learning_rate": 7.290398879609939e-06, "logits/chosen": -2.422065258026123, "logits/rejected": -2.7835097312927246, "logps/chosen": -82.50691223144531, "logps/rejected": -225.30734252929688, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -2.4441580772399902, "rewards/margins": 5.020709991455078, "rewards/rejected": -7.464868545532227, "step": 9349 }, { "epoch": 1.45, "learning_rate": 7.2896654390787905e-06, "logits/chosen": -2.952375650405884, "logits/rejected": -2.7007267475128174, "logps/chosen": -634.0676879882812, "logps/rejected": -481.8616943359375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.7480247020721436, "rewards/margins": 7.908829689025879, "rewards/rejected": -9.656854629516602, "step": 9350 }, { "epoch": 1.45, "learning_rate": 7.288931998547642e-06, "logits/chosen": -2.0812108516693115, "logits/rejected": -2.3743226528167725, "logps/chosen": -285.3822021484375, "logps/rejected": -519.290771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7176376581192017, "rewards/margins": 12.89358139038086, "rewards/rejected": -13.611217498779297, "step": 9351 }, { "epoch": 1.45, "learning_rate": 7.288198558016494e-06, "logits/chosen": -2.5234835147857666, "logits/rejected": -2.9117331504821777, "logps/chosen": -48.83433532714844, "logps/rejected": -299.1224670410156, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.4182567596435547, "rewards/margins": 7.116904258728027, "rewards/rejected": -8.535161018371582, "step": 9352 }, { "epoch": 1.45, "learning_rate": 7.287465117485346e-06, "logits/chosen": -2.950605869293213, "logits/rejected": -2.1988096237182617, "logps/chosen": -378.0469665527344, "logps/rejected": -292.4855041503906, "loss": 2.713, "rewards/accuracies": 0.5, "rewards/chosen": -6.7201995849609375, "rewards/margins": 1.4223368167877197, "rewards/rejected": -8.142536163330078, "step": 9353 }, { "epoch": 1.45, "learning_rate": 7.286731676954198e-06, "logits/chosen": -1.23910391330719, "logits/rejected": -2.6183300018310547, "logps/chosen": -62.85680389404297, "logps/rejected": -335.84716796875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.276607632637024, "rewards/margins": 8.334163665771484, "rewards/rejected": -9.610770225524902, "step": 9354 }, { "epoch": 1.45, "learning_rate": 7.285998236423051e-06, "logits/chosen": -2.8865556716918945, "logits/rejected": -2.968921661376953, "logps/chosen": -290.79534912109375, "logps/rejected": -336.8314208984375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -2.7913098335266113, "rewards/margins": 6.004726409912109, "rewards/rejected": -8.796035766601562, "step": 9355 }, { "epoch": 1.46, "learning_rate": 7.2852647958919035e-06, "logits/chosen": -2.949692964553833, "logits/rejected": -2.860901355743408, "logps/chosen": -300.84796142578125, "logps/rejected": -420.9539794921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.036770820617676, "rewards/margins": 9.317377090454102, "rewards/rejected": -11.354147911071777, "step": 9356 }, { "epoch": 1.46, "learning_rate": 7.284531355360756e-06, "logits/chosen": -2.6208322048187256, "logits/rejected": -2.1875412464141846, "logps/chosen": -557.4537353515625, "logps/rejected": -574.3903198242188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.8469626903533936, "rewards/margins": 8.978191375732422, "rewards/rejected": -11.825154304504395, "step": 9357 }, { "epoch": 1.46, "learning_rate": 7.283797914829608e-06, "logits/chosen": -2.355624198913574, "logits/rejected": -2.7823805809020996, "logps/chosen": -162.17330932617188, "logps/rejected": -437.802734375, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -4.046093940734863, "rewards/margins": 7.314971923828125, "rewards/rejected": -11.361065864562988, "step": 9358 }, { "epoch": 1.46, "learning_rate": 7.28306447429846e-06, "logits/chosen": -2.360102415084839, "logits/rejected": -2.965117931365967, "logps/chosen": -236.2698974609375, "logps/rejected": -476.14349365234375, "loss": 0.0835, "rewards/accuracies": 1.0, "rewards/chosen": -2.849031448364258, "rewards/margins": 5.929545879364014, "rewards/rejected": -8.778576850891113, "step": 9359 }, { "epoch": 1.46, "learning_rate": 7.282331033767312e-06, "logits/chosen": -1.8076088428497314, "logits/rejected": -2.075782299041748, "logps/chosen": -148.4608154296875, "logps/rejected": -177.91070556640625, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -1.136110782623291, "rewards/margins": 5.497220039367676, "rewards/rejected": -6.633330821990967, "step": 9360 }, { "epoch": 1.46, "learning_rate": 7.281597593236164e-06, "logits/chosen": -2.7380869388580322, "logits/rejected": -2.9603633880615234, "logps/chosen": -581.4992065429688, "logps/rejected": -599.110595703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4121201038360596, "rewards/margins": 10.591917037963867, "rewards/rejected": -14.004037857055664, "step": 9361 }, { "epoch": 1.46, "learning_rate": 7.280864152705016e-06, "logits/chosen": -2.5496020317077637, "logits/rejected": -3.081061363220215, "logps/chosen": -157.14547729492188, "logps/rejected": -197.681884765625, "loss": 2.2483, "rewards/accuracies": 0.5, "rewards/chosen": -4.389925003051758, "rewards/margins": 1.999227523803711, "rewards/rejected": -6.389152526855469, "step": 9362 }, { "epoch": 1.46, "learning_rate": 7.2801307121738675e-06, "logits/chosen": -1.6897772550582886, "logits/rejected": -2.6646199226379395, "logps/chosen": -80.6973876953125, "logps/rejected": -422.42718505859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.336333274841309, "rewards/margins": 7.581523895263672, "rewards/rejected": -11.917858123779297, "step": 9363 }, { "epoch": 1.46, "learning_rate": 7.279397271642719e-06, "logits/chosen": -2.3025295734405518, "logits/rejected": -2.850449323654175, "logps/chosen": -204.224609375, "logps/rejected": -442.79962158203125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.9404137134552, "rewards/margins": 7.560973167419434, "rewards/rejected": -11.501386642456055, "step": 9364 }, { "epoch": 1.46, "learning_rate": 7.278663831111573e-06, "logits/chosen": -2.8850016593933105, "logits/rejected": -2.8533987998962402, "logps/chosen": -77.4666748046875, "logps/rejected": -197.3616180419922, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.0381510257720947, "rewards/margins": 6.249433994293213, "rewards/rejected": -7.287585258483887, "step": 9365 }, { "epoch": 1.46, "learning_rate": 7.277930390580425e-06, "logits/chosen": -2.8391599655151367, "logits/rejected": -1.8253504037857056, "logps/chosen": -233.12945556640625, "logps/rejected": -55.88054656982422, "loss": 3.9077, "rewards/accuracies": 0.0, "rewards/chosen": -6.061745643615723, "rewards/margins": -3.8861091136932373, "rewards/rejected": -2.1756365299224854, "step": 9366 }, { "epoch": 1.46, "learning_rate": 7.277196950049277e-06, "logits/chosen": -2.6016643047332764, "logits/rejected": -2.8185689449310303, "logps/chosen": -95.20255279541016, "logps/rejected": -291.1097412109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6605517864227295, "rewards/margins": 7.891948223114014, "rewards/rejected": -9.552499771118164, "step": 9367 }, { "epoch": 1.46, "learning_rate": 7.2764635095181286e-06, "logits/chosen": -2.2276411056518555, "logits/rejected": -2.815288782119751, "logps/chosen": -177.228271484375, "logps/rejected": -484.6845397949219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.987607002258301, "rewards/margins": 9.128633499145508, "rewards/rejected": -12.116239547729492, "step": 9368 }, { "epoch": 1.46, "learning_rate": 7.2757300689869804e-06, "logits/chosen": -2.85316801071167, "logits/rejected": -2.911797285079956, "logps/chosen": -254.99615478515625, "logps/rejected": -327.53826904296875, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -3.8049495220184326, "rewards/margins": 5.10122013092041, "rewards/rejected": -8.906169891357422, "step": 9369 }, { "epoch": 1.46, "learning_rate": 7.274996628455832e-06, "logits/chosen": -1.9887547492980957, "logits/rejected": -2.904038190841675, "logps/chosen": -129.75875854492188, "logps/rejected": -213.6718292236328, "loss": 0.1728, "rewards/accuracies": 1.0, "rewards/chosen": -3.364032745361328, "rewards/margins": 3.909924268722534, "rewards/rejected": -7.273957252502441, "step": 9370 }, { "epoch": 1.46, "learning_rate": 7.274263187924684e-06, "logits/chosen": -1.8054076433181763, "logits/rejected": -1.610987663269043, "logps/chosen": -804.7088623046875, "logps/rejected": -482.92822265625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.7572948932647705, "rewards/margins": 6.001192569732666, "rewards/rejected": -8.758487701416016, "step": 9371 }, { "epoch": 1.46, "learning_rate": 7.273529747393537e-06, "logits/chosen": -2.2184042930603027, "logits/rejected": -2.867290735244751, "logps/chosen": -313.1147766113281, "logps/rejected": -449.0646057128906, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -3.761277914047241, "rewards/margins": 5.330224990844727, "rewards/rejected": -9.091503143310547, "step": 9372 }, { "epoch": 1.46, "learning_rate": 7.272796306862389e-06, "logits/chosen": -2.727822780609131, "logits/rejected": -3.2521309852600098, "logps/chosen": -475.328125, "logps/rejected": -539.33154296875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.588286280632019, "rewards/margins": 9.161436080932617, "rewards/rejected": -10.749722480773926, "step": 9373 }, { "epoch": 1.46, "learning_rate": 7.272062866331242e-06, "logits/chosen": -2.3147101402282715, "logits/rejected": -2.7959706783294678, "logps/chosen": -113.94830322265625, "logps/rejected": -287.275634765625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.0847235918045044, "rewards/margins": 8.034443855285645, "rewards/rejected": -9.11916732788086, "step": 9374 }, { "epoch": 1.46, "learning_rate": 7.271329425800094e-06, "logits/chosen": -2.442718744277954, "logits/rejected": -2.6145551204681396, "logps/chosen": -343.6864013671875, "logps/rejected": -426.3004455566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.568657875061035, "rewards/margins": 14.269918441772461, "rewards/rejected": -17.838577270507812, "step": 9375 }, { "epoch": 1.46, "learning_rate": 7.270595985268946e-06, "logits/chosen": -2.5352697372436523, "logits/rejected": -2.394721031188965, "logps/chosen": -410.7530517578125, "logps/rejected": -482.3314208984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8983049392700195, "rewards/margins": 9.3819580078125, "rewards/rejected": -13.28026294708252, "step": 9376 }, { "epoch": 1.46, "learning_rate": 7.269862544737798e-06, "logits/chosen": -2.601372718811035, "logits/rejected": -2.4398584365844727, "logps/chosen": -219.12753295898438, "logps/rejected": -309.7151184082031, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.2436728477478027, "rewards/margins": 8.338052749633789, "rewards/rejected": -11.581725120544434, "step": 9377 }, { "epoch": 1.46, "learning_rate": 7.26912910420665e-06, "logits/chosen": -1.2551213502883911, "logits/rejected": -2.9094998836517334, "logps/chosen": -100.07463073730469, "logps/rejected": -315.1307067871094, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.7090795040130615, "rewards/margins": 7.488101005554199, "rewards/rejected": -10.19718074798584, "step": 9378 }, { "epoch": 1.46, "learning_rate": 7.268395663675502e-06, "logits/chosen": -2.944345235824585, "logits/rejected": -2.825151205062866, "logps/chosen": -194.35128784179688, "logps/rejected": -171.40689086914062, "loss": 1.2384, "rewards/accuracies": 0.5, "rewards/chosen": -2.9658753871917725, "rewards/margins": 2.3305788040161133, "rewards/rejected": -5.296454429626465, "step": 9379 }, { "epoch": 1.46, "learning_rate": 7.267662223144354e-06, "logits/chosen": -2.974092483520508, "logits/rejected": -3.043808937072754, "logps/chosen": -286.5622863769531, "logps/rejected": -313.98565673828125, "loss": 3.1947, "rewards/accuracies": 0.5, "rewards/chosen": -4.650525093078613, "rewards/margins": 0.07581639289855957, "rewards/rejected": -4.726341247558594, "step": 9380 }, { "epoch": 1.46, "learning_rate": 7.2669287826132055e-06, "logits/chosen": -3.0381879806518555, "logits/rejected": -3.018641710281372, "logps/chosen": -477.442626953125, "logps/rejected": -332.5086975097656, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.547499179840088, "rewards/margins": 6.827970027923584, "rewards/rejected": -10.375469207763672, "step": 9381 }, { "epoch": 1.46, "learning_rate": 7.266195342082057e-06, "logits/chosen": -2.6881563663482666, "logits/rejected": -2.031715154647827, "logps/chosen": -254.30023193359375, "logps/rejected": -270.080322265625, "loss": 2.9255, "rewards/accuracies": 0.5, "rewards/chosen": -8.27168083190918, "rewards/margins": -2.3021364212036133, "rewards/rejected": -5.969543933868408, "step": 9382 }, { "epoch": 1.46, "learning_rate": 7.265461901550911e-06, "logits/chosen": -3.0045409202575684, "logits/rejected": -2.2889442443847656, "logps/chosen": -511.4240417480469, "logps/rejected": -414.88739013671875, "loss": 0.1176, "rewards/accuracies": 1.0, "rewards/chosen": -4.099418640136719, "rewards/margins": 2.180345058441162, "rewards/rejected": -6.279764175415039, "step": 9383 }, { "epoch": 1.46, "learning_rate": 7.264728461019763e-06, "logits/chosen": -1.6013115644454956, "logits/rejected": -2.733476400375366, "logps/chosen": -182.84634399414062, "logps/rejected": -297.55902099609375, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -2.858558177947998, "rewards/margins": 5.545670986175537, "rewards/rejected": -8.404229164123535, "step": 9384 }, { "epoch": 1.46, "learning_rate": 7.263995020488615e-06, "logits/chosen": -1.0892512798309326, "logits/rejected": -2.0633294582366943, "logps/chosen": -169.7197723388672, "logps/rejected": -447.82275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.286207914352417, "rewards/margins": 10.327812194824219, "rewards/rejected": -11.614019393920898, "step": 9385 }, { "epoch": 1.46, "learning_rate": 7.263261579957467e-06, "logits/chosen": -1.267553448677063, "logits/rejected": -1.425615668296814, "logps/chosen": -360.9637756347656, "logps/rejected": -286.1036682128906, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -1.102294921875, "rewards/margins": 6.354719638824463, "rewards/rejected": -7.457015037536621, "step": 9386 }, { "epoch": 1.46, "learning_rate": 7.2625281394263185e-06, "logits/chosen": -1.9405642747879028, "logits/rejected": -2.7219960689544678, "logps/chosen": -88.55181884765625, "logps/rejected": -257.4084167480469, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -0.8048616647720337, "rewards/margins": 7.035567760467529, "rewards/rejected": -7.840429306030273, "step": 9387 }, { "epoch": 1.46, "learning_rate": 7.26179469889517e-06, "logits/chosen": -1.8975539207458496, "logits/rejected": -2.6951918601989746, "logps/chosen": -354.4245910644531, "logps/rejected": -545.9058837890625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.6010661125183105, "rewards/margins": 8.475191116333008, "rewards/rejected": -13.076257705688477, "step": 9388 }, { "epoch": 1.46, "learning_rate": 7.261061258364023e-06, "logits/chosen": -2.5157575607299805, "logits/rejected": -2.6575255393981934, "logps/chosen": -582.97314453125, "logps/rejected": -437.3810119628906, "loss": 1.911, "rewards/accuracies": 0.5, "rewards/chosen": -6.498952865600586, "rewards/margins": 2.6887145042419434, "rewards/rejected": -9.187666893005371, "step": 9389 }, { "epoch": 1.46, "learning_rate": 7.260327817832875e-06, "logits/chosen": -1.4845280647277832, "logits/rejected": -2.690472364425659, "logps/chosen": -221.2646484375, "logps/rejected": -458.3197937011719, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -3.0639710426330566, "rewards/margins": 6.550288200378418, "rewards/rejected": -9.614258766174316, "step": 9390 }, { "epoch": 1.46, "learning_rate": 7.259594377301727e-06, "logits/chosen": -1.7532882690429688, "logits/rejected": -2.7727224826812744, "logps/chosen": -51.080753326416016, "logps/rejected": -339.4837646484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.5766525268554688, "rewards/margins": 8.251859664916992, "rewards/rejected": -10.828512191772461, "step": 9391 }, { "epoch": 1.46, "learning_rate": 7.2588609367705804e-06, "logits/chosen": -2.546262502670288, "logits/rejected": -2.7983107566833496, "logps/chosen": -135.29373168945312, "logps/rejected": -358.56036376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.371678590774536, "rewards/margins": 9.296098709106445, "rewards/rejected": -11.667776107788086, "step": 9392 }, { "epoch": 1.46, "learning_rate": 7.258127496239432e-06, "logits/chosen": -2.4601216316223145, "logits/rejected": -2.871513843536377, "logps/chosen": -280.9899597167969, "logps/rejected": -335.900390625, "loss": 0.5703, "rewards/accuracies": 0.5, "rewards/chosen": -2.519239902496338, "rewards/margins": 5.637202262878418, "rewards/rejected": -8.156442642211914, "step": 9393 }, { "epoch": 1.46, "learning_rate": 7.257394055708284e-06, "logits/chosen": -2.886323928833008, "logits/rejected": -2.281799793243408, "logps/chosen": -243.68582153320312, "logps/rejected": -311.9316101074219, "loss": 0.5283, "rewards/accuracies": 0.5, "rewards/chosen": -5.170917510986328, "rewards/margins": 6.033207893371582, "rewards/rejected": -11.204126358032227, "step": 9394 }, { "epoch": 1.46, "learning_rate": 7.256660615177136e-06, "logits/chosen": -1.420479416847229, "logits/rejected": -2.7014272212982178, "logps/chosen": -286.9767150878906, "logps/rejected": -652.1343994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7334747314453125, "rewards/margins": 14.641343116760254, "rewards/rejected": -15.374818801879883, "step": 9395 }, { "epoch": 1.46, "learning_rate": 7.255927174645988e-06, "logits/chosen": -2.6944003105163574, "logits/rejected": -3.0616636276245117, "logps/chosen": -146.62911987304688, "logps/rejected": -179.43931579589844, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.6745381355285645, "rewards/margins": 5.061572551727295, "rewards/rejected": -6.736110687255859, "step": 9396 }, { "epoch": 1.46, "learning_rate": 7.25519373411484e-06, "logits/chosen": -2.738410234451294, "logits/rejected": -1.7975342273712158, "logps/chosen": -274.7095947265625, "logps/rejected": -292.7823791503906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9590072631835938, "rewards/margins": 8.959081649780273, "rewards/rejected": -11.918088912963867, "step": 9397 }, { "epoch": 1.46, "learning_rate": 7.254460293583692e-06, "logits/chosen": -2.881831407546997, "logits/rejected": -2.383693218231201, "logps/chosen": -569.5089111328125, "logps/rejected": -446.0218200683594, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -4.674988746643066, "rewards/margins": 7.363080978393555, "rewards/rejected": -12.038068771362305, "step": 9398 }, { "epoch": 1.46, "learning_rate": 7.2537268530525436e-06, "logits/chosen": -3.06270694732666, "logits/rejected": -2.633798360824585, "logps/chosen": -244.5448455810547, "logps/rejected": -181.57968139648438, "loss": 0.3563, "rewards/accuracies": 0.5, "rewards/chosen": -2.3273117542266846, "rewards/margins": 2.0639877319335938, "rewards/rejected": -4.391299247741699, "step": 9399 }, { "epoch": 1.46, "learning_rate": 7.2529934125213955e-06, "logits/chosen": -2.610926628112793, "logits/rejected": -2.793095588684082, "logps/chosen": -650.4290771484375, "logps/rejected": -532.5401611328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.465864658355713, "rewards/margins": 8.780982971191406, "rewards/rejected": -14.246847152709961, "step": 9400 }, { "epoch": 1.46, "learning_rate": 7.252259971990249e-06, "logits/chosen": -2.5241124629974365, "logits/rejected": -2.897392511367798, "logps/chosen": -333.2869567871094, "logps/rejected": -484.03564453125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 2.321511745452881, "rewards/margins": 10.815811157226562, "rewards/rejected": -8.49429988861084, "step": 9401 }, { "epoch": 1.46, "learning_rate": 7.251526531459101e-06, "logits/chosen": -2.5724947452545166, "logits/rejected": -2.9746811389923096, "logps/chosen": -89.31686401367188, "logps/rejected": -245.02178955078125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.093838691711426, "rewards/margins": 7.293574810028076, "rewards/rejected": -9.387413024902344, "step": 9402 }, { "epoch": 1.46, "learning_rate": 7.250793090927953e-06, "logits/chosen": -2.9379947185516357, "logits/rejected": -2.516693115234375, "logps/chosen": -542.5499877929688, "logps/rejected": -455.64007568359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2218475341796875, "rewards/margins": 9.00600814819336, "rewards/rejected": -11.227855682373047, "step": 9403 }, { "epoch": 1.46, "learning_rate": 7.250059650396805e-06, "logits/chosen": -2.949141025543213, "logits/rejected": -2.560239315032959, "logps/chosen": -275.84344482421875, "logps/rejected": -150.59698486328125, "loss": 1.7122, "rewards/accuracies": 0.0, "rewards/chosen": -5.737122535705566, "rewards/margins": -1.5109772682189941, "rewards/rejected": -4.226145267486572, "step": 9404 }, { "epoch": 1.46, "learning_rate": 7.2493262098656565e-06, "logits/chosen": -3.1175529956817627, "logits/rejected": -3.0046558380126953, "logps/chosen": -116.12769317626953, "logps/rejected": -225.44589233398438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2962818145751953, "rewards/margins": 9.388382911682129, "rewards/rejected": -9.092101097106934, "step": 9405 }, { "epoch": 1.46, "learning_rate": 7.248592769334509e-06, "logits/chosen": -2.096278190612793, "logits/rejected": -2.8701744079589844, "logps/chosen": -79.02454376220703, "logps/rejected": -218.96156311035156, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.9122052192687988, "rewards/margins": 6.52134895324707, "rewards/rejected": -8.433553695678711, "step": 9406 }, { "epoch": 1.46, "learning_rate": 7.247859328803361e-06, "logits/chosen": -2.952937364578247, "logits/rejected": -2.49161434173584, "logps/chosen": -198.72079467773438, "logps/rejected": -316.5753173828125, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": -3.4111742973327637, "rewards/margins": 3.0689024925231934, "rewards/rejected": -6.480076789855957, "step": 9407 }, { "epoch": 1.46, "learning_rate": 7.247125888272213e-06, "logits/chosen": -2.6191608905792236, "logits/rejected": -1.4958330392837524, "logps/chosen": -113.59346771240234, "logps/rejected": -109.37712860107422, "loss": 0.3402, "rewards/accuracies": 1.0, "rewards/chosen": -3.5496742725372314, "rewards/margins": 2.4432291984558105, "rewards/rejected": -5.992903709411621, "step": 9408 }, { "epoch": 1.46, "learning_rate": 7.246392447741065e-06, "logits/chosen": -2.7380154132843018, "logits/rejected": -1.776143193244934, "logps/chosen": -374.4649658203125, "logps/rejected": -317.64501953125, "loss": 3.228, "rewards/accuracies": 0.0, "rewards/chosen": -5.821334362030029, "rewards/margins": -3.1590116024017334, "rewards/rejected": -2.662322759628296, "step": 9409 }, { "epoch": 1.46, "learning_rate": 7.2456590072099185e-06, "logits/chosen": -2.833615303039551, "logits/rejected": -2.7068774700164795, "logps/chosen": -205.92721557617188, "logps/rejected": -227.4534454345703, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": 0.08050307631492615, "rewards/margins": 8.66033935546875, "rewards/rejected": -8.579835891723633, "step": 9410 }, { "epoch": 1.46, "learning_rate": 7.24492556667877e-06, "logits/chosen": -3.045938491821289, "logits/rejected": -2.947256088256836, "logps/chosen": -293.6458740234375, "logps/rejected": -341.0487060546875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -2.4517104625701904, "rewards/margins": 6.03517484664917, "rewards/rejected": -8.486885070800781, "step": 9411 }, { "epoch": 1.46, "learning_rate": 7.244192126147622e-06, "logits/chosen": -1.602264404296875, "logits/rejected": -2.668301582336426, "logps/chosen": -348.8337707519531, "logps/rejected": -382.2971496582031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 0.2809875011444092, "rewards/margins": 9.08939266204834, "rewards/rejected": -8.808404922485352, "step": 9412 }, { "epoch": 1.46, "learning_rate": 7.243458685616474e-06, "logits/chosen": -2.196713447570801, "logits/rejected": -2.0185821056365967, "logps/chosen": -207.05291748046875, "logps/rejected": -282.7211608886719, "loss": 0.4533, "rewards/accuracies": 0.5, "rewards/chosen": -3.1213974952697754, "rewards/margins": 4.188492774963379, "rewards/rejected": -7.309890270233154, "step": 9413 }, { "epoch": 1.46, "learning_rate": 7.242725245085326e-06, "logits/chosen": -1.2694429159164429, "logits/rejected": -2.817640781402588, "logps/chosen": -151.25933837890625, "logps/rejected": -415.9085388183594, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": -2.3084397315979004, "rewards/margins": 4.957311153411865, "rewards/rejected": -7.265750885009766, "step": 9414 }, { "epoch": 1.46, "learning_rate": 7.241991804554178e-06, "logits/chosen": -1.8430063724517822, "logits/rejected": -2.7449259757995605, "logps/chosen": -106.9016342163086, "logps/rejected": -398.32598876953125, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -2.8857192993164062, "rewards/margins": 6.290962219238281, "rewards/rejected": -9.176681518554688, "step": 9415 }, { "epoch": 1.46, "learning_rate": 7.24125836402303e-06, "logits/chosen": -2.757747173309326, "logits/rejected": -3.076716184616089, "logps/chosen": -251.6653594970703, "logps/rejected": -347.1387634277344, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -3.6804230213165283, "rewards/margins": 5.164353370666504, "rewards/rejected": -8.844776153564453, "step": 9416 }, { "epoch": 1.46, "learning_rate": 7.240524923491882e-06, "logits/chosen": -2.689730167388916, "logits/rejected": -1.2422244548797607, "logps/chosen": -314.9642639160156, "logps/rejected": -320.330810546875, "loss": 0.047, "rewards/accuracies": 1.0, "rewards/chosen": -3.746225357055664, "rewards/margins": 3.4513416290283203, "rewards/rejected": -7.197566986083984, "step": 9417 }, { "epoch": 1.46, "learning_rate": 7.239791482960735e-06, "logits/chosen": -2.635603666305542, "logits/rejected": -2.9535529613494873, "logps/chosen": -164.68553161621094, "logps/rejected": -322.7891845703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.0922670364379883, "rewards/margins": 8.587837219238281, "rewards/rejected": -10.68010425567627, "step": 9418 }, { "epoch": 1.46, "learning_rate": 7.239058042429587e-06, "logits/chosen": -0.9685574173927307, "logits/rejected": -2.4276692867279053, "logps/chosen": -153.46194458007812, "logps/rejected": -508.6548156738281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.8908510208129883, "rewards/margins": 8.902688980102539, "rewards/rejected": -12.793540000915527, "step": 9419 }, { "epoch": 1.47, "learning_rate": 7.238324601898439e-06, "logits/chosen": -2.668001890182495, "logits/rejected": -1.480757713317871, "logps/chosen": -537.1494750976562, "logps/rejected": -319.16015625, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -3.791935920715332, "rewards/margins": 3.737720489501953, "rewards/rejected": -7.529656410217285, "step": 9420 }, { "epoch": 1.47, "learning_rate": 7.237591161367291e-06, "logits/chosen": -2.053823709487915, "logits/rejected": -3.010493278503418, "logps/chosen": -70.7468490600586, "logps/rejected": -383.13372802734375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -1.362278938293457, "rewards/margins": 6.965169429779053, "rewards/rejected": -8.327448844909668, "step": 9421 }, { "epoch": 1.47, "learning_rate": 7.236857720836143e-06, "logits/chosen": -2.3419101238250732, "logits/rejected": -2.7048611640930176, "logps/chosen": -196.72731018066406, "logps/rejected": -337.9574279785156, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.3962299823760986, "rewards/margins": 6.742581844329834, "rewards/rejected": -8.138811111450195, "step": 9422 }, { "epoch": 1.47, "learning_rate": 7.2361242803049954e-06, "logits/chosen": -2.17560076713562, "logits/rejected": -2.8478784561157227, "logps/chosen": -80.48039245605469, "logps/rejected": -288.5061340332031, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.574228286743164, "rewards/margins": 6.354089260101318, "rewards/rejected": -8.92831802368164, "step": 9423 }, { "epoch": 1.47, "learning_rate": 7.235390839773847e-06, "logits/chosen": -1.9736565351486206, "logits/rejected": -2.5108482837677, "logps/chosen": -50.504539489746094, "logps/rejected": -310.19805908203125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -1.9613068103790283, "rewards/margins": 4.7623162269592285, "rewards/rejected": -6.723623275756836, "step": 9424 }, { "epoch": 1.47, "learning_rate": 7.234657399242699e-06, "logits/chosen": -1.858526349067688, "logits/rejected": -2.822099208831787, "logps/chosen": -72.52484893798828, "logps/rejected": -218.68966674804688, "loss": 0.0715, "rewards/accuracies": 1.0, "rewards/chosen": -3.6990933418273926, "rewards/margins": 2.629769802093506, "rewards/rejected": -6.328863143920898, "step": 9425 }, { "epoch": 1.47, "learning_rate": 7.233923958711551e-06, "logits/chosen": -2.8483171463012695, "logits/rejected": -2.8615145683288574, "logps/chosen": -497.97406005859375, "logps/rejected": -526.9573974609375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.44976544380188, "rewards/margins": 7.301049709320068, "rewards/rejected": -9.750815391540527, "step": 9426 }, { "epoch": 1.47, "learning_rate": 7.233190518180405e-06, "logits/chosen": -2.7645246982574463, "logits/rejected": -2.878685474395752, "logps/chosen": -92.82435607910156, "logps/rejected": -226.314453125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -2.4099111557006836, "rewards/margins": 4.58785343170166, "rewards/rejected": -6.997764587402344, "step": 9427 }, { "epoch": 1.47, "learning_rate": 7.2324570776492565e-06, "logits/chosen": -2.0087571144104004, "logits/rejected": -2.8725204467773438, "logps/chosen": -454.62481689453125, "logps/rejected": -488.50146484375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -5.289187431335449, "rewards/margins": 5.761784553527832, "rewards/rejected": -11.050971984863281, "step": 9428 }, { "epoch": 1.47, "learning_rate": 7.231723637118108e-06, "logits/chosen": -2.6476831436157227, "logits/rejected": -2.1675353050231934, "logps/chosen": -130.74826049804688, "logps/rejected": -158.75, "loss": 0.6228, "rewards/accuracies": 0.5, "rewards/chosen": -4.830353736877441, "rewards/margins": 0.7178514003753662, "rewards/rejected": -5.5482048988342285, "step": 9429 }, { "epoch": 1.47, "learning_rate": 7.23099019658696e-06, "logits/chosen": -0.9147146940231323, "logits/rejected": -2.7797958850860596, "logps/chosen": -94.31732940673828, "logps/rejected": -603.8433837890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.076061248779297, "rewards/margins": 12.014314651489258, "rewards/rejected": -14.090375900268555, "step": 9430 }, { "epoch": 1.47, "learning_rate": 7.230256756055812e-06, "logits/chosen": -2.8915367126464844, "logits/rejected": -2.952897787094116, "logps/chosen": -333.47100830078125, "logps/rejected": -372.6395568847656, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.5510685443878174, "rewards/margins": 6.923848628997803, "rewards/rejected": -9.4749174118042, "step": 9431 }, { "epoch": 1.47, "learning_rate": 7.229523315524664e-06, "logits/chosen": -2.4134182929992676, "logits/rejected": -2.5525712966918945, "logps/chosen": -220.19921875, "logps/rejected": -237.89437866210938, "loss": 0.3989, "rewards/accuracies": 0.5, "rewards/chosen": -1.8919044733047485, "rewards/margins": 3.503481864929199, "rewards/rejected": -5.395386695861816, "step": 9432 }, { "epoch": 1.47, "learning_rate": 7.228789874993516e-06, "logits/chosen": -2.9719178676605225, "logits/rejected": -2.6852612495422363, "logps/chosen": -101.5315170288086, "logps/rejected": -133.40359497070312, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.8469587564468384, "rewards/margins": 6.185977935791016, "rewards/rejected": -7.032937049865723, "step": 9433 }, { "epoch": 1.47, "learning_rate": 7.228056434462368e-06, "logits/chosen": -1.8622592687606812, "logits/rejected": -2.50526762008667, "logps/chosen": -103.83235931396484, "logps/rejected": -291.5589904785156, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.9663302898406982, "rewards/margins": 8.523430824279785, "rewards/rejected": -10.489761352539062, "step": 9434 }, { "epoch": 1.47, "learning_rate": 7.22732299393122e-06, "logits/chosen": -2.342426300048828, "logits/rejected": -3.1079962253570557, "logps/chosen": -183.8209228515625, "logps/rejected": -322.1701965332031, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.0147926807403564, "rewards/margins": 7.637749195098877, "rewards/rejected": -8.652542114257812, "step": 9435 }, { "epoch": 1.47, "learning_rate": 7.226589553400073e-06, "logits/chosen": -2.9679782390594482, "logits/rejected": -2.1859335899353027, "logps/chosen": -1103.78662109375, "logps/rejected": -579.1913452148438, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -6.318316459655762, "rewards/margins": 4.932965278625488, "rewards/rejected": -11.25128173828125, "step": 9436 }, { "epoch": 1.47, "learning_rate": 7.225856112868925e-06, "logits/chosen": -2.748499631881714, "logits/rejected": -2.8751018047332764, "logps/chosen": -86.33747100830078, "logps/rejected": -192.00526428222656, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -3.9762983322143555, "rewards/margins": 5.891840934753418, "rewards/rejected": -9.868139266967773, "step": 9437 }, { "epoch": 1.47, "learning_rate": 7.225122672337777e-06, "logits/chosen": -2.773942470550537, "logits/rejected": -2.705594062805176, "logps/chosen": -200.93377685546875, "logps/rejected": -238.8448944091797, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -0.938524603843689, "rewards/margins": 5.7286376953125, "rewards/rejected": -6.6671624183654785, "step": 9438 }, { "epoch": 1.47, "learning_rate": 7.224389231806629e-06, "logits/chosen": -1.2299236059188843, "logits/rejected": -1.9603413343429565, "logps/chosen": -291.3283996582031, "logps/rejected": -541.2410888671875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.8916397094726562, "rewards/margins": 8.44268798828125, "rewards/rejected": -11.334327697753906, "step": 9439 }, { "epoch": 1.47, "learning_rate": 7.223655791275482e-06, "logits/chosen": -2.764188289642334, "logits/rejected": -2.027372121810913, "logps/chosen": -167.377197265625, "logps/rejected": -167.59405517578125, "loss": 0.1913, "rewards/accuracies": 1.0, "rewards/chosen": -1.3056480884552002, "rewards/margins": 3.817645311355591, "rewards/rejected": -5.123293399810791, "step": 9440 }, { "epoch": 1.47, "learning_rate": 7.2229223507443335e-06, "logits/chosen": -2.5890555381774902, "logits/rejected": -2.7800073623657227, "logps/chosen": -184.2890167236328, "logps/rejected": -372.66717529296875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.661635160446167, "rewards/margins": 7.703326225280762, "rewards/rejected": -10.364961624145508, "step": 9441 }, { "epoch": 1.47, "learning_rate": 7.222188910213185e-06, "logits/chosen": -2.7490391731262207, "logits/rejected": -2.741628646850586, "logps/chosen": -194.8590087890625, "logps/rejected": -237.48806762695312, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.4556937217712402, "rewards/margins": 6.469114303588867, "rewards/rejected": -8.924808502197266, "step": 9442 }, { "epoch": 1.47, "learning_rate": 7.221455469682037e-06, "logits/chosen": -2.145021915435791, "logits/rejected": -2.7455482482910156, "logps/chosen": -156.10475158691406, "logps/rejected": -281.80670166015625, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -1.8089150190353394, "rewards/margins": 5.968440532684326, "rewards/rejected": -7.777355194091797, "step": 9443 }, { "epoch": 1.47, "learning_rate": 7.220722029150889e-06, "logits/chosen": -1.9284032583236694, "logits/rejected": -2.8205783367156982, "logps/chosen": -226.43948364257812, "logps/rejected": -438.4062805175781, "loss": 0.7619, "rewards/accuracies": 0.5, "rewards/chosen": -5.3975348472595215, "rewards/margins": 5.228518009185791, "rewards/rejected": -10.626052856445312, "step": 9444 }, { "epoch": 1.47, "learning_rate": 7.219988588619743e-06, "logits/chosen": -2.0210459232330322, "logits/rejected": -2.832859516143799, "logps/chosen": -268.4103698730469, "logps/rejected": -506.804443359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.204427242279053, "rewards/margins": 8.324201583862305, "rewards/rejected": -12.528629302978516, "step": 9445 }, { "epoch": 1.47, "learning_rate": 7.2192551480885946e-06, "logits/chosen": -2.0145978927612305, "logits/rejected": -2.7806508541107178, "logps/chosen": -120.23297119140625, "logps/rejected": -281.9754638671875, "loss": 0.1456, "rewards/accuracies": 1.0, "rewards/chosen": -3.3378231525421143, "rewards/margins": 4.918126106262207, "rewards/rejected": -8.255949020385742, "step": 9446 }, { "epoch": 1.47, "learning_rate": 7.2185217075574464e-06, "logits/chosen": -1.7257790565490723, "logits/rejected": -2.451298475265503, "logps/chosen": -234.81082153320312, "logps/rejected": -266.8606262207031, "loss": 1.2375, "rewards/accuracies": 0.5, "rewards/chosen": -6.112005233764648, "rewards/margins": 3.3972253799438477, "rewards/rejected": -9.509230613708496, "step": 9447 }, { "epoch": 1.47, "learning_rate": 7.217788267026298e-06, "logits/chosen": -1.5001170635223389, "logits/rejected": -2.6481363773345947, "logps/chosen": -73.71328735351562, "logps/rejected": -200.4367218017578, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.9362587928771973, "rewards/margins": 6.532711029052734, "rewards/rejected": -9.468969345092773, "step": 9448 }, { "epoch": 1.47, "learning_rate": 7.21705482649515e-06, "logits/chosen": -2.701347827911377, "logits/rejected": -1.2420583963394165, "logps/chosen": -214.54087829589844, "logps/rejected": -143.2021484375, "loss": 0.1217, "rewards/accuracies": 1.0, "rewards/chosen": -2.6138923168182373, "rewards/margins": 2.967167377471924, "rewards/rejected": -5.581059455871582, "step": 9449 }, { "epoch": 1.47, "learning_rate": 7.216321385964002e-06, "logits/chosen": -1.9534854888916016, "logits/rejected": -2.462679147720337, "logps/chosen": -185.35040283203125, "logps/rejected": -396.5942687988281, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.8737807273864746, "rewards/margins": 8.864716529846191, "rewards/rejected": -11.738496780395508, "step": 9450 }, { "epoch": 1.47, "learning_rate": 7.215587945432854e-06, "logits/chosen": -2.0426976680755615, "logits/rejected": -2.7371954917907715, "logps/chosen": -188.02415466308594, "logps/rejected": -369.4048156738281, "loss": 0.2227, "rewards/accuracies": 1.0, "rewards/chosen": -3.359973430633545, "rewards/margins": 4.813016414642334, "rewards/rejected": -8.172989845275879, "step": 9451 }, { "epoch": 1.47, "learning_rate": 7.214854504901706e-06, "logits/chosen": -2.668429136276245, "logits/rejected": -1.9089064598083496, "logps/chosen": -148.3768310546875, "logps/rejected": -213.9531707763672, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": -2.477309465408325, "rewards/margins": 5.176061153411865, "rewards/rejected": -7.653370380401611, "step": 9452 }, { "epoch": 1.47, "learning_rate": 7.214121064370558e-06, "logits/chosen": -2.576718807220459, "logits/rejected": -2.895742416381836, "logps/chosen": -205.70582580566406, "logps/rejected": -388.39971923828125, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -4.531816005706787, "rewards/margins": 5.468642234802246, "rewards/rejected": -10.000458717346191, "step": 9453 }, { "epoch": 1.47, "learning_rate": 7.213387623839411e-06, "logits/chosen": -3.122150182723999, "logits/rejected": -2.932343006134033, "logps/chosen": -853.6381225585938, "logps/rejected": -833.239013671875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -5.959836006164551, "rewards/margins": 6.787694931030273, "rewards/rejected": -12.74753189086914, "step": 9454 }, { "epoch": 1.47, "learning_rate": 7.212654183308263e-06, "logits/chosen": -0.9567790031433105, "logits/rejected": -2.1329734325408936, "logps/chosen": -143.57778930664062, "logps/rejected": -439.5518798828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4188930988311768, "rewards/margins": 10.231461524963379, "rewards/rejected": -11.650354385375977, "step": 9455 }, { "epoch": 1.47, "learning_rate": 7.211920742777115e-06, "logits/chosen": -2.173945188522339, "logits/rejected": -2.7938032150268555, "logps/chosen": -129.16004943847656, "logps/rejected": -365.01031494140625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.9878391027450562, "rewards/margins": 7.508617401123047, "rewards/rejected": -9.49645709991455, "step": 9456 }, { "epoch": 1.47, "learning_rate": 7.211187302245968e-06, "logits/chosen": -1.813853144645691, "logits/rejected": -2.5003952980041504, "logps/chosen": -141.8352508544922, "logps/rejected": -267.6081848144531, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.285083770751953, "rewards/margins": 8.186700820922852, "rewards/rejected": -10.471784591674805, "step": 9457 }, { "epoch": 1.47, "learning_rate": 7.21045386171482e-06, "logits/chosen": -2.6335296630859375, "logits/rejected": -1.5834802389144897, "logps/chosen": -377.0629577636719, "logps/rejected": -311.01690673828125, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": -1.6301765441894531, "rewards/margins": 3.400724411010742, "rewards/rejected": -5.030900955200195, "step": 9458 }, { "epoch": 1.47, "learning_rate": 7.2097204211836715e-06, "logits/chosen": -2.8547186851501465, "logits/rejected": -2.4726808071136475, "logps/chosen": -243.44505310058594, "logps/rejected": -122.02816009521484, "loss": 0.7308, "rewards/accuracies": 0.5, "rewards/chosen": -3.597285509109497, "rewards/margins": 1.2900913953781128, "rewards/rejected": -4.88737678527832, "step": 9459 }, { "epoch": 1.47, "learning_rate": 7.208986980652523e-06, "logits/chosen": -1.5948643684387207, "logits/rejected": -2.20721173286438, "logps/chosen": -114.12776947021484, "logps/rejected": -393.18743896484375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.4502196311950684, "rewards/margins": 6.608652114868164, "rewards/rejected": -9.05887222290039, "step": 9460 }, { "epoch": 1.47, "learning_rate": 7.208253540121375e-06, "logits/chosen": -2.1968445777893066, "logits/rejected": -2.5978996753692627, "logps/chosen": -285.86688232421875, "logps/rejected": -403.290771484375, "loss": 0.1446, "rewards/accuracies": 1.0, "rewards/chosen": -2.6436002254486084, "rewards/margins": 3.934018135070801, "rewards/rejected": -6.577618598937988, "step": 9461 }, { "epoch": 1.47, "learning_rate": 7.207520099590227e-06, "logits/chosen": -2.2313308715820312, "logits/rejected": -2.8108863830566406, "logps/chosen": -108.63890075683594, "logps/rejected": -271.8892517089844, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -3.7152457237243652, "rewards/margins": 4.843441963195801, "rewards/rejected": -8.558687210083008, "step": 9462 }, { "epoch": 1.47, "learning_rate": 7.206786659059081e-06, "logits/chosen": -2.6202175617218018, "logits/rejected": -2.5540103912353516, "logps/chosen": -697.3280029296875, "logps/rejected": -520.8919677734375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -4.257465362548828, "rewards/margins": 8.689704895019531, "rewards/rejected": -12.94717025756836, "step": 9463 }, { "epoch": 1.47, "learning_rate": 7.206053218527933e-06, "logits/chosen": -1.667942762374878, "logits/rejected": -3.090250015258789, "logps/chosen": -180.7051544189453, "logps/rejected": -693.1572875976562, "loss": 0.1408, "rewards/accuracies": 1.0, "rewards/chosen": -4.89005708694458, "rewards/margins": 3.845115900039673, "rewards/rejected": -8.735173225402832, "step": 9464 }, { "epoch": 1.47, "learning_rate": 7.2053197779967845e-06, "logits/chosen": -3.2103676795959473, "logits/rejected": -3.3434927463531494, "logps/chosen": -101.86753845214844, "logps/rejected": -224.81419372558594, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.389869451522827, "rewards/margins": 7.980759620666504, "rewards/rejected": -10.37062931060791, "step": 9465 }, { "epoch": 1.47, "learning_rate": 7.204586337465636e-06, "logits/chosen": -2.883100748062134, "logits/rejected": -2.640648365020752, "logps/chosen": -112.20468139648438, "logps/rejected": -101.0863037109375, "loss": 2.3687, "rewards/accuracies": 0.5, "rewards/chosen": -5.472886562347412, "rewards/margins": 0.2463235855102539, "rewards/rejected": -5.719210147857666, "step": 9466 }, { "epoch": 1.47, "learning_rate": 7.203852896934488e-06, "logits/chosen": -2.6211395263671875, "logits/rejected": -2.6833536624908447, "logps/chosen": -273.3333740234375, "logps/rejected": -384.10528564453125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.325645446777344, "rewards/margins": 8.680699348449707, "rewards/rejected": -13.00634479522705, "step": 9467 }, { "epoch": 1.47, "learning_rate": 7.20311945640334e-06, "logits/chosen": -2.778292655944824, "logits/rejected": -3.050008773803711, "logps/chosen": -783.202880859375, "logps/rejected": -678.7281494140625, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -5.3984527587890625, "rewards/margins": 5.549692153930664, "rewards/rejected": -10.948144912719727, "step": 9468 }, { "epoch": 1.47, "learning_rate": 7.202386015872192e-06, "logits/chosen": -2.7718396186828613, "logits/rejected": -2.8835597038269043, "logps/chosen": -66.69214630126953, "logps/rejected": -278.90399169921875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.5931382179260254, "rewards/margins": 8.461381912231445, "rewards/rejected": -11.054520606994629, "step": 9469 }, { "epoch": 1.47, "learning_rate": 7.201652575341044e-06, "logits/chosen": -1.7197327613830566, "logits/rejected": -2.818639039993286, "logps/chosen": -65.81513977050781, "logps/rejected": -281.95849609375, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -2.290900468826294, "rewards/margins": 4.771039009094238, "rewards/rejected": -7.061939239501953, "step": 9470 }, { "epoch": 1.47, "learning_rate": 7.200919134809896e-06, "logits/chosen": -1.4430146217346191, "logits/rejected": -2.0685088634490967, "logps/chosen": -140.176513671875, "logps/rejected": -312.7044372558594, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": -3.1388463973999023, "rewards/margins": 7.430159568786621, "rewards/rejected": -10.569005966186523, "step": 9471 }, { "epoch": 1.47, "learning_rate": 7.200185694278749e-06, "logits/chosen": -2.8369245529174805, "logits/rejected": -3.0811972618103027, "logps/chosen": -282.5389709472656, "logps/rejected": -349.55035400390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4728851318359375, "rewards/margins": 7.6709699630737305, "rewards/rejected": -9.143856048583984, "step": 9472 }, { "epoch": 1.47, "learning_rate": 7.199452253747601e-06, "logits/chosen": -2.2939202785491943, "logits/rejected": -3.134139060974121, "logps/chosen": -146.5890655517578, "logps/rejected": -422.7568664550781, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.81414532661438, "rewards/margins": 8.342341423034668, "rewards/rejected": -11.156486511230469, "step": 9473 }, { "epoch": 1.47, "learning_rate": 7.198718813216454e-06, "logits/chosen": -2.4504730701446533, "logits/rejected": -2.983837366104126, "logps/chosen": -191.9825439453125, "logps/rejected": -376.45098876953125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.9668350219726562, "rewards/margins": 8.11470890045166, "rewards/rejected": -10.081544876098633, "step": 9474 }, { "epoch": 1.47, "learning_rate": 7.197985372685306e-06, "logits/chosen": -2.1176888942718506, "logits/rejected": -2.857609510421753, "logps/chosen": -123.50572967529297, "logps/rejected": -434.1076354980469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.7214202880859375, "rewards/margins": 9.703739166259766, "rewards/rejected": -12.425159454345703, "step": 9475 }, { "epoch": 1.47, "learning_rate": 7.197251932154158e-06, "logits/chosen": -2.257159948348999, "logits/rejected": -2.7456302642822266, "logps/chosen": -63.81079864501953, "logps/rejected": -232.22767639160156, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": -2.4990673065185547, "rewards/margins": 6.650217056274414, "rewards/rejected": -9.149284362792969, "step": 9476 }, { "epoch": 1.47, "learning_rate": 7.1965184916230096e-06, "logits/chosen": -1.9235100746154785, "logits/rejected": -2.9841833114624023, "logps/chosen": -93.2757339477539, "logps/rejected": -420.57733154296875, "loss": 1.5347, "rewards/accuracies": 0.5, "rewards/chosen": -5.615711212158203, "rewards/margins": 1.138689637184143, "rewards/rejected": -6.754400730133057, "step": 9477 }, { "epoch": 1.47, "learning_rate": 7.1957850510918614e-06, "logits/chosen": -2.957223892211914, "logits/rejected": -2.9923255443573, "logps/chosen": -170.36288452148438, "logps/rejected": -148.0667266845703, "loss": 0.6547, "rewards/accuracies": 0.5, "rewards/chosen": -2.9402785301208496, "rewards/margins": 3.9553089141845703, "rewards/rejected": -6.89558744430542, "step": 9478 }, { "epoch": 1.47, "learning_rate": 7.195051610560713e-06, "logits/chosen": -2.6826937198638916, "logits/rejected": -2.066545248031616, "logps/chosen": -503.5054931640625, "logps/rejected": -472.90155029296875, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.2352535724639893, "rewards/margins": 6.1574177742004395, "rewards/rejected": -8.392671585083008, "step": 9479 }, { "epoch": 1.47, "learning_rate": 7.194318170029565e-06, "logits/chosen": -3.0823233127593994, "logits/rejected": -2.4943718910217285, "logps/chosen": -213.36038208007812, "logps/rejected": -163.73101806640625, "loss": 0.0798, "rewards/accuracies": 1.0, "rewards/chosen": 0.9901562929153442, "rewards/margins": 7.699231147766113, "rewards/rejected": -6.709074974060059, "step": 9480 }, { "epoch": 1.47, "learning_rate": 7.193584729498419e-06, "logits/chosen": -2.612837553024292, "logits/rejected": -2.8418729305267334, "logps/chosen": -294.6624755859375, "logps/rejected": -523.2861938476562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.963717460632324, "rewards/margins": 8.510416030883789, "rewards/rejected": -11.474133491516113, "step": 9481 }, { "epoch": 1.47, "learning_rate": 7.192851288967271e-06, "logits/chosen": -2.2471346855163574, "logits/rejected": -2.769052028656006, "logps/chosen": -138.69427490234375, "logps/rejected": -367.46392822265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.6255416870117188, "rewards/margins": 8.459821701049805, "rewards/rejected": -11.085363388061523, "step": 9482 }, { "epoch": 1.47, "learning_rate": 7.1921178484361225e-06, "logits/chosen": -2.9459311962127686, "logits/rejected": -1.504059910774231, "logps/chosen": -430.8421325683594, "logps/rejected": -187.74685668945312, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.8488006591796875, "rewards/margins": 8.769184112548828, "rewards/rejected": -11.617984771728516, "step": 9483 }, { "epoch": 1.47, "learning_rate": 7.191384407904974e-06, "logits/chosen": -2.051431894302368, "logits/rejected": -2.7221035957336426, "logps/chosen": -350.35882568359375, "logps/rejected": -564.89306640625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -4.37101936340332, "rewards/margins": 10.625463485717773, "rewards/rejected": -14.996482849121094, "step": 9484 }, { "epoch": 1.48, "learning_rate": 7.190650967373826e-06, "logits/chosen": -2.0266385078430176, "logits/rejected": -2.8530023097991943, "logps/chosen": -53.27635955810547, "logps/rejected": -215.92103576660156, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -1.8820645809173584, "rewards/margins": 8.865320205688477, "rewards/rejected": -10.747385025024414, "step": 9485 }, { "epoch": 1.48, "learning_rate": 7.189917526842678e-06, "logits/chosen": -2.406796455383301, "logits/rejected": -2.958549976348877, "logps/chosen": -160.604248046875, "logps/rejected": -500.990478515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.552690505981445, "rewards/margins": 8.987943649291992, "rewards/rejected": -13.540634155273438, "step": 9486 }, { "epoch": 1.48, "learning_rate": 7.18918408631153e-06, "logits/chosen": -1.963806390762329, "logits/rejected": -2.864698648452759, "logps/chosen": -124.46641540527344, "logps/rejected": -264.9306640625, "loss": 0.1105, "rewards/accuracies": 1.0, "rewards/chosen": -6.658253192901611, "rewards/margins": 2.4706506729125977, "rewards/rejected": -9.128904342651367, "step": 9487 }, { "epoch": 1.48, "learning_rate": 7.188450645780382e-06, "logits/chosen": -1.9124244451522827, "logits/rejected": -2.4513633251190186, "logps/chosen": -237.30303955078125, "logps/rejected": -336.7066650390625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.9081254005432129, "rewards/margins": 9.958637237548828, "rewards/rejected": -10.8667631149292, "step": 9488 }, { "epoch": 1.48, "learning_rate": 7.187717205249235e-06, "logits/chosen": -1.7216064929962158, "logits/rejected": -2.7624974250793457, "logps/chosen": -363.9599304199219, "logps/rejected": -623.7326049804688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.004890441894531, "rewards/margins": 10.30184268951416, "rewards/rejected": -15.306734085083008, "step": 9489 }, { "epoch": 1.48, "learning_rate": 7.186983764718087e-06, "logits/chosen": -2.6022820472717285, "logits/rejected": -2.9612059593200684, "logps/chosen": -706.6287231445312, "logps/rejected": -640.9546508789062, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.1433229446411133, "rewards/margins": 7.345431804656982, "rewards/rejected": -10.488754272460938, "step": 9490 }, { "epoch": 1.48, "learning_rate": 7.18625032418694e-06, "logits/chosen": -2.5265378952026367, "logits/rejected": -2.781613826751709, "logps/chosen": -299.49713134765625, "logps/rejected": -404.95855712890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.268641710281372, "rewards/margins": 8.533401489257812, "rewards/rejected": -11.802042961120605, "step": 9491 }, { "epoch": 1.48, "learning_rate": 7.185516883655792e-06, "logits/chosen": -2.4592976570129395, "logits/rejected": -2.199842929840088, "logps/chosen": -584.2060546875, "logps/rejected": -657.3846435546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.186108589172363, "rewards/margins": 8.72035026550293, "rewards/rejected": -12.90645980834961, "step": 9492 }, { "epoch": 1.48, "learning_rate": 7.184783443124644e-06, "logits/chosen": -2.762380599975586, "logits/rejected": -2.6400630474090576, "logps/chosen": -166.45498657226562, "logps/rejected": -164.660888671875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.916417360305786, "rewards/margins": 5.976779937744141, "rewards/rejected": -8.893198013305664, "step": 9493 }, { "epoch": 1.48, "learning_rate": 7.184050002593496e-06, "logits/chosen": -2.7232611179351807, "logits/rejected": -3.0390725135803223, "logps/chosen": -260.5584716796875, "logps/rejected": -481.7959899902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7201015949249268, "rewards/margins": 11.674667358398438, "rewards/rejected": -13.394768714904785, "step": 9494 }, { "epoch": 1.48, "learning_rate": 7.183316562062348e-06, "logits/chosen": -3.145819664001465, "logits/rejected": -2.805305004119873, "logps/chosen": -607.7696533203125, "logps/rejected": -534.9866943359375, "loss": 0.0761, "rewards/accuracies": 1.0, "rewards/chosen": -6.075112819671631, "rewards/margins": 4.838830947875977, "rewards/rejected": -10.91394329071045, "step": 9495 }, { "epoch": 1.48, "learning_rate": 7.1825831215311995e-06, "logits/chosen": -2.962712526321411, "logits/rejected": -2.1921563148498535, "logps/chosen": -333.17913818359375, "logps/rejected": -241.66563415527344, "loss": 1.797, "rewards/accuracies": 0.5, "rewards/chosen": -5.812464714050293, "rewards/margins": 1.8526886701583862, "rewards/rejected": -7.665153503417969, "step": 9496 }, { "epoch": 1.48, "learning_rate": 7.181849681000051e-06, "logits/chosen": -2.7499523162841797, "logits/rejected": -2.103651762008667, "logps/chosen": -359.22894287109375, "logps/rejected": -268.79412841796875, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": -2.9788429737091064, "rewards/margins": 3.0961694717407227, "rewards/rejected": -6.07501220703125, "step": 9497 }, { "epoch": 1.48, "learning_rate": 7.181116240468903e-06, "logits/chosen": -1.5551074743270874, "logits/rejected": -3.0158562660217285, "logps/chosen": -82.43074035644531, "logps/rejected": -402.8162841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9232497215270996, "rewards/margins": 10.443333625793457, "rewards/rejected": -13.366582870483398, "step": 9498 }, { "epoch": 1.48, "learning_rate": 7.180382799937757e-06, "logits/chosen": -2.263073444366455, "logits/rejected": -2.6855921745300293, "logps/chosen": -156.62794494628906, "logps/rejected": -169.76327514648438, "loss": 2.0601, "rewards/accuracies": 0.5, "rewards/chosen": -5.708860397338867, "rewards/margins": 2.1595675945281982, "rewards/rejected": -7.8684282302856445, "step": 9499 }, { "epoch": 1.48, "learning_rate": 7.179649359406609e-06, "logits/chosen": -1.2106177806854248, "logits/rejected": -2.9409022331237793, "logps/chosen": -203.44830322265625, "logps/rejected": -853.0064086914062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3282647132873535, "rewards/margins": 14.342794418334961, "rewards/rejected": -16.671058654785156, "step": 9500 }, { "epoch": 1.48, "learning_rate": 7.1789159188754606e-06, "logits/chosen": -1.6846709251403809, "logits/rejected": -2.7172586917877197, "logps/chosen": -190.82017517089844, "logps/rejected": -285.4436340332031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.8594131469726562, "rewards/margins": 8.416934967041016, "rewards/rejected": -10.276348114013672, "step": 9501 }, { "epoch": 1.48, "learning_rate": 7.1781824783443124e-06, "logits/chosen": -2.7837681770324707, "logits/rejected": -2.5037899017333984, "logps/chosen": -264.85113525390625, "logps/rejected": -372.7335205078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.3064918518066406, "rewards/margins": 7.859714508056641, "rewards/rejected": -11.166206359863281, "step": 9502 }, { "epoch": 1.48, "learning_rate": 7.177449037813164e-06, "logits/chosen": -2.7754065990448, "logits/rejected": -2.4110593795776367, "logps/chosen": -424.0545959472656, "logps/rejected": -301.34063720703125, "loss": 0.1169, "rewards/accuracies": 1.0, "rewards/chosen": -4.687333583831787, "rewards/margins": 4.068548679351807, "rewards/rejected": -8.755882263183594, "step": 9503 }, { "epoch": 1.48, "learning_rate": 7.176715597282016e-06, "logits/chosen": -2.6226236820220947, "logits/rejected": -3.1580309867858887, "logps/chosen": -253.18081665039062, "logps/rejected": -403.91131591796875, "loss": 0.2716, "rewards/accuracies": 1.0, "rewards/chosen": -4.589413642883301, "rewards/margins": 5.5530805587768555, "rewards/rejected": -10.142494201660156, "step": 9504 }, { "epoch": 1.48, "learning_rate": 7.175982156750868e-06, "logits/chosen": -3.081923007965088, "logits/rejected": -2.491879940032959, "logps/chosen": -227.6416778564453, "logps/rejected": -138.89303588867188, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -2.0285685062408447, "rewards/margins": 4.917233467102051, "rewards/rejected": -6.945801734924316, "step": 9505 }, { "epoch": 1.48, "learning_rate": 7.175248716219721e-06, "logits/chosen": -2.902194023132324, "logits/rejected": -2.485907554626465, "logps/chosen": -328.0314636230469, "logps/rejected": -299.2645263671875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.453578472137451, "rewards/margins": 7.882601737976074, "rewards/rejected": -12.336179733276367, "step": 9506 }, { "epoch": 1.48, "learning_rate": 7.174515275688573e-06, "logits/chosen": -2.747669219970703, "logits/rejected": -2.2161483764648438, "logps/chosen": -254.86094665527344, "logps/rejected": -221.17494201660156, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -3.6484267711639404, "rewards/margins": 4.845451354980469, "rewards/rejected": -8.493878364562988, "step": 9507 }, { "epoch": 1.48, "learning_rate": 7.173781835157426e-06, "logits/chosen": -2.7465426921844482, "logits/rejected": -2.8091578483581543, "logps/chosen": -241.49688720703125, "logps/rejected": -456.6767578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4774186611175537, "rewards/margins": 10.869024276733398, "rewards/rejected": -14.346442222595215, "step": 9508 }, { "epoch": 1.48, "learning_rate": 7.173048394626278e-06, "logits/chosen": -3.0219810009002686, "logits/rejected": -3.146773338317871, "logps/chosen": -68.51394653320312, "logps/rejected": -169.13568115234375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -2.527998208999634, "rewards/margins": 5.447338581085205, "rewards/rejected": -7.975337028503418, "step": 9509 }, { "epoch": 1.48, "learning_rate": 7.17231495409513e-06, "logits/chosen": -2.8054518699645996, "logits/rejected": -2.551842212677002, "logps/chosen": -487.69171142578125, "logps/rejected": -642.1813354492188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6475892066955566, "rewards/margins": 12.171680450439453, "rewards/rejected": -15.819269180297852, "step": 9510 }, { "epoch": 1.48, "learning_rate": 7.171581513563982e-06, "logits/chosen": -1.2259478569030762, "logits/rejected": -2.255244255065918, "logps/chosen": -379.2601013183594, "logps/rejected": -419.0254821777344, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -4.024825096130371, "rewards/margins": 6.2083635330200195, "rewards/rejected": -10.23318862915039, "step": 9511 }, { "epoch": 1.48, "learning_rate": 7.170848073032834e-06, "logits/chosen": -1.6772948503494263, "logits/rejected": -3.0348589420318604, "logps/chosen": -67.34656524658203, "logps/rejected": -358.81060791015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.6506195068359375, "rewards/margins": 7.9651055335998535, "rewards/rejected": -10.61572551727295, "step": 9512 }, { "epoch": 1.48, "learning_rate": 7.170114632501686e-06, "logits/chosen": -0.8777065873146057, "logits/rejected": -2.7815051078796387, "logps/chosen": -128.59173583984375, "logps/rejected": -549.7809448242188, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -4.145568370819092, "rewards/margins": 6.99050235748291, "rewards/rejected": -11.136070251464844, "step": 9513 }, { "epoch": 1.48, "learning_rate": 7.1693811919705375e-06, "logits/chosen": -1.912358045578003, "logits/rejected": -3.1329731941223145, "logps/chosen": -158.28688049316406, "logps/rejected": -373.23590087890625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.2571232318878174, "rewards/margins": 6.405013561248779, "rewards/rejected": -9.662137031555176, "step": 9514 }, { "epoch": 1.48, "learning_rate": 7.168647751439389e-06, "logits/chosen": -2.856576919555664, "logits/rejected": -1.7734858989715576, "logps/chosen": -341.4375, "logps/rejected": -258.2060546875, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -1.630929708480835, "rewards/margins": 4.392172813415527, "rewards/rejected": -6.023102760314941, "step": 9515 }, { "epoch": 1.48, "learning_rate": 7.167914310908243e-06, "logits/chosen": -2.9897327423095703, "logits/rejected": -2.474379539489746, "logps/chosen": -306.7090148925781, "logps/rejected": -328.26910400390625, "loss": 0.8543, "rewards/accuracies": 0.5, "rewards/chosen": -4.904367923736572, "rewards/margins": 2.2896645069122314, "rewards/rejected": -7.194032669067383, "step": 9516 }, { "epoch": 1.48, "learning_rate": 7.167180870377095e-06, "logits/chosen": -2.8094468116760254, "logits/rejected": -2.665311098098755, "logps/chosen": -467.9635314941406, "logps/rejected": -609.9285278320312, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -4.276498794555664, "rewards/margins": 5.896980285644531, "rewards/rejected": -10.173479080200195, "step": 9517 }, { "epoch": 1.48, "learning_rate": 7.166447429845947e-06, "logits/chosen": -2.8047165870666504, "logits/rejected": -2.0384562015533447, "logps/chosen": -411.6917724609375, "logps/rejected": -430.3578796386719, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -1.7263092994689941, "rewards/margins": 10.175332069396973, "rewards/rejected": -11.901641845703125, "step": 9518 }, { "epoch": 1.48, "learning_rate": 7.165713989314799e-06, "logits/chosen": -0.9636125564575195, "logits/rejected": -1.743959665298462, "logps/chosen": -42.71836471557617, "logps/rejected": -329.508544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2949258089065552, "rewards/margins": 10.360240936279297, "rewards/rejected": -11.655167579650879, "step": 9519 }, { "epoch": 1.48, "learning_rate": 7.1649805487836505e-06, "logits/chosen": -2.9318525791168213, "logits/rejected": -3.0168540477752686, "logps/chosen": -97.49232482910156, "logps/rejected": -204.19607543945312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.215898513793945, "rewards/margins": 6.595149517059326, "rewards/rejected": -10.81104850769043, "step": 9520 }, { "epoch": 1.48, "learning_rate": 7.164247108252502e-06, "logits/chosen": -2.780376434326172, "logits/rejected": -2.3624789714813232, "logps/chosen": -161.5391387939453, "logps/rejected": -208.78512573242188, "loss": 0.6495, "rewards/accuracies": 0.5, "rewards/chosen": -4.495488166809082, "rewards/margins": 3.756817102432251, "rewards/rejected": -8.252305030822754, "step": 9521 }, { "epoch": 1.48, "learning_rate": 7.163513667721354e-06, "logits/chosen": -2.192781925201416, "logits/rejected": -2.4823522567749023, "logps/chosen": -199.90286254882812, "logps/rejected": -371.1739196777344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.5673110485076904, "rewards/margins": 9.50459098815918, "rewards/rejected": -11.071903228759766, "step": 9522 }, { "epoch": 1.48, "learning_rate": 7.162780227190207e-06, "logits/chosen": -2.7043919563293457, "logits/rejected": -2.280531406402588, "logps/chosen": -761.5445556640625, "logps/rejected": -693.33349609375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.160728454589844, "rewards/margins": 8.001363754272461, "rewards/rejected": -12.162093162536621, "step": 9523 }, { "epoch": 1.48, "learning_rate": 7.162046786659059e-06, "logits/chosen": -2.9138736724853516, "logits/rejected": -2.7476556301116943, "logps/chosen": -152.8594970703125, "logps/rejected": -208.6876220703125, "loss": 2.5893, "rewards/accuracies": 0.5, "rewards/chosen": -4.959620475769043, "rewards/margins": 1.965022325515747, "rewards/rejected": -6.924643039703369, "step": 9524 }, { "epoch": 1.48, "learning_rate": 7.1613133461279124e-06, "logits/chosen": -2.515360116958618, "logits/rejected": -2.9088141918182373, "logps/chosen": -154.58053588867188, "logps/rejected": -292.72308349609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1794861555099487, "rewards/margins": 10.203357696533203, "rewards/rejected": -11.382843971252441, "step": 9525 }, { "epoch": 1.48, "learning_rate": 7.160579905596764e-06, "logits/chosen": -2.5805728435516357, "logits/rejected": -2.8272147178649902, "logps/chosen": -725.3814086914062, "logps/rejected": -669.405029296875, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -3.3546996116638184, "rewards/margins": 6.894318580627441, "rewards/rejected": -10.249018669128418, "step": 9526 }, { "epoch": 1.48, "learning_rate": 7.159846465065616e-06, "logits/chosen": -2.6274495124816895, "logits/rejected": -1.1021969318389893, "logps/chosen": -421.3635559082031, "logps/rejected": -289.417724609375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.3406829833984375, "rewards/margins": 7.044783115386963, "rewards/rejected": -10.385465621948242, "step": 9527 }, { "epoch": 1.48, "learning_rate": 7.159113024534468e-06, "logits/chosen": -2.569812774658203, "logits/rejected": -2.6602401733398438, "logps/chosen": -411.83392333984375, "logps/rejected": -540.3868408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8450608253479004, "rewards/margins": 10.241165161132812, "rewards/rejected": -13.086225509643555, "step": 9528 }, { "epoch": 1.48, "learning_rate": 7.15837958400332e-06, "logits/chosen": -2.17146897315979, "logits/rejected": -3.1419596672058105, "logps/chosen": -94.30815124511719, "logps/rejected": -275.453369140625, "loss": 1.1493, "rewards/accuracies": 0.5, "rewards/chosen": -4.580836296081543, "rewards/margins": 4.852328300476074, "rewards/rejected": -9.433164596557617, "step": 9529 }, { "epoch": 1.48, "learning_rate": 7.157646143472172e-06, "logits/chosen": -1.6699182987213135, "logits/rejected": -2.65798282623291, "logps/chosen": -162.06820678710938, "logps/rejected": -618.6357421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.0547561645507812, "rewards/margins": 8.99096965789795, "rewards/rejected": -12.045726776123047, "step": 9530 }, { "epoch": 1.48, "learning_rate": 7.156912702941024e-06, "logits/chosen": -2.0284597873687744, "logits/rejected": -2.795461893081665, "logps/chosen": -111.76092529296875, "logps/rejected": -285.15264892578125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -3.964818000793457, "rewards/margins": 6.212082862854004, "rewards/rejected": -10.176900863647461, "step": 9531 }, { "epoch": 1.48, "learning_rate": 7.1561792624098756e-06, "logits/chosen": -2.597151517868042, "logits/rejected": -3.003034830093384, "logps/chosen": -270.5221862792969, "logps/rejected": -340.6690979003906, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.2762343883514404, "rewards/margins": 7.755682945251465, "rewards/rejected": -10.031917572021484, "step": 9532 }, { "epoch": 1.48, "learning_rate": 7.1554458218787275e-06, "logits/chosen": -1.1193573474884033, "logits/rejected": -2.6330549716949463, "logps/chosen": -82.21158599853516, "logps/rejected": -204.25015258789062, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -2.3984971046447754, "rewards/margins": 3.5421714782714844, "rewards/rejected": -5.94066858291626, "step": 9533 }, { "epoch": 1.48, "learning_rate": 7.154712381347581e-06, "logits/chosen": -2.9577536582946777, "logits/rejected": -2.8704140186309814, "logps/chosen": -184.02517700195312, "logps/rejected": -293.43280029296875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.815500259399414, "rewards/margins": 6.295560836791992, "rewards/rejected": -10.111061096191406, "step": 9534 }, { "epoch": 1.48, "learning_rate": 7.153978940816433e-06, "logits/chosen": -2.806488037109375, "logits/rejected": -2.6902530193328857, "logps/chosen": -292.4647216796875, "logps/rejected": -331.6448974609375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.143206834793091, "rewards/margins": 7.977514266967773, "rewards/rejected": -11.120721817016602, "step": 9535 }, { "epoch": 1.48, "learning_rate": 7.153245500285285e-06, "logits/chosen": -1.7223997116088867, "logits/rejected": -2.866335868835449, "logps/chosen": -166.38446044921875, "logps/rejected": -468.8131103515625, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -2.7266464233398438, "rewards/margins": 4.988656520843506, "rewards/rejected": -7.71530294418335, "step": 9536 }, { "epoch": 1.48, "learning_rate": 7.152512059754137e-06, "logits/chosen": -2.2817482948303223, "logits/rejected": -3.018007278442383, "logps/chosen": -240.29800415039062, "logps/rejected": -489.65264892578125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -5.482220649719238, "rewards/margins": 6.006135940551758, "rewards/rejected": -11.488356590270996, "step": 9537 }, { "epoch": 1.48, "learning_rate": 7.1517786192229885e-06, "logits/chosen": -2.602271556854248, "logits/rejected": -2.843970537185669, "logps/chosen": -53.53533935546875, "logps/rejected": -236.31080627441406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5244107246398926, "rewards/margins": 9.017751693725586, "rewards/rejected": -10.542162895202637, "step": 9538 }, { "epoch": 1.48, "learning_rate": 7.15104517869184e-06, "logits/chosen": -2.7195911407470703, "logits/rejected": -2.951589584350586, "logps/chosen": -82.4061279296875, "logps/rejected": -289.3577575683594, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.7770233154296875, "rewards/margins": 7.65855598449707, "rewards/rejected": -11.435579299926758, "step": 9539 }, { "epoch": 1.48, "learning_rate": 7.150311738160693e-06, "logits/chosen": -2.080148696899414, "logits/rejected": -2.7835586071014404, "logps/chosen": -212.20260620117188, "logps/rejected": -426.5914306640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2034127712249756, "rewards/margins": 9.335119247436523, "rewards/rejected": -11.538532257080078, "step": 9540 }, { "epoch": 1.48, "learning_rate": 7.149578297629545e-06, "logits/chosen": -2.755260944366455, "logits/rejected": -2.9681155681610107, "logps/chosen": -102.21431732177734, "logps/rejected": -341.5876770019531, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.89255952835083, "rewards/margins": 8.639411926269531, "rewards/rejected": -11.53197193145752, "step": 9541 }, { "epoch": 1.48, "learning_rate": 7.148844857098397e-06, "logits/chosen": -2.9457039833068848, "logits/rejected": -2.98913836479187, "logps/chosen": -150.0369873046875, "logps/rejected": -167.86383056640625, "loss": 2.4367, "rewards/accuracies": 0.5, "rewards/chosen": -4.203963279724121, "rewards/margins": 0.6807091236114502, "rewards/rejected": -4.88467264175415, "step": 9542 }, { "epoch": 1.48, "learning_rate": 7.1481114165672505e-06, "logits/chosen": -2.748011589050293, "logits/rejected": -2.703131914138794, "logps/chosen": -153.73451232910156, "logps/rejected": -327.326416015625, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/chosen": -4.430654525756836, "rewards/margins": 4.088438987731934, "rewards/rejected": -8.51909351348877, "step": 9543 }, { "epoch": 1.48, "learning_rate": 7.147377976036102e-06, "logits/chosen": -1.3311693668365479, "logits/rejected": -2.7511777877807617, "logps/chosen": -31.539175033569336, "logps/rejected": -286.723876953125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -1.817521095275879, "rewards/margins": 6.937407970428467, "rewards/rejected": -8.754928588867188, "step": 9544 }, { "epoch": 1.48, "learning_rate": 7.146644535504954e-06, "logits/chosen": -0.9424399733543396, "logits/rejected": -2.7504076957702637, "logps/chosen": -109.38211059570312, "logps/rejected": -394.3949890136719, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/chosen": -3.8789870738983154, "rewards/margins": 5.848568916320801, "rewards/rejected": -9.727556228637695, "step": 9545 }, { "epoch": 1.48, "learning_rate": 7.145911094973806e-06, "logits/chosen": -0.9090428948402405, "logits/rejected": -1.1587198972702026, "logps/chosen": -120.61711120605469, "logps/rejected": -236.80531311035156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.861739158630371, "rewards/margins": 7.680001258850098, "rewards/rejected": -9.541740417480469, "step": 9546 }, { "epoch": 1.48, "learning_rate": 7.145177654442658e-06, "logits/chosen": -1.599995493888855, "logits/rejected": -2.8894176483154297, "logps/chosen": -125.39051818847656, "logps/rejected": -444.1285095214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6731233596801758, "rewards/margins": 11.865300178527832, "rewards/rejected": -13.538423538208008, "step": 9547 }, { "epoch": 1.48, "learning_rate": 7.14444421391151e-06, "logits/chosen": -2.661447286605835, "logits/rejected": -2.1354095935821533, "logps/chosen": -326.118896484375, "logps/rejected": -244.54718017578125, "loss": 1.3177, "rewards/accuracies": 0.5, "rewards/chosen": -5.943048477172852, "rewards/margins": 5.900163650512695, "rewards/rejected": -11.843212127685547, "step": 9548 }, { "epoch": 1.49, "learning_rate": 7.143710773380362e-06, "logits/chosen": -2.315918207168579, "logits/rejected": -2.5134501457214355, "logps/chosen": -240.399658203125, "logps/rejected": -455.05743408203125, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -3.997811794281006, "rewards/margins": 7.297969818115234, "rewards/rejected": -11.295781135559082, "step": 9549 }, { "epoch": 1.49, "learning_rate": 7.142977332849214e-06, "logits/chosen": -1.0407789945602417, "logits/rejected": -2.5489284992218018, "logps/chosen": -146.94776916503906, "logps/rejected": -486.3738098144531, "loss": 0.3711, "rewards/accuracies": 0.5, "rewards/chosen": -5.72940731048584, "rewards/margins": 5.311703205108643, "rewards/rejected": -11.04111099243164, "step": 9550 }, { "epoch": 1.49, "learning_rate": 7.1422438923180655e-06, "logits/chosen": -2.819552421569824, "logits/rejected": -1.7247203588485718, "logps/chosen": -335.2032775878906, "logps/rejected": -240.19500732421875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -4.962772369384766, "rewards/margins": 6.405285835266113, "rewards/rejected": -11.368059158325195, "step": 9551 }, { "epoch": 1.49, "learning_rate": 7.141510451786919e-06, "logits/chosen": -2.3691534996032715, "logits/rejected": -3.1074752807617188, "logps/chosen": -87.32644653320312, "logps/rejected": -457.7174987792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9614508152008057, "rewards/margins": 12.84550952911377, "rewards/rejected": -14.806960105895996, "step": 9552 }, { "epoch": 1.49, "learning_rate": 7.140777011255771e-06, "logits/chosen": -2.7977147102355957, "logits/rejected": -3.007658004760742, "logps/chosen": -172.4673309326172, "logps/rejected": -259.9305419921875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -4.8870930671691895, "rewards/margins": 5.653843879699707, "rewards/rejected": -10.540937423706055, "step": 9553 }, { "epoch": 1.49, "learning_rate": 7.140043570724623e-06, "logits/chosen": -2.536799669265747, "logits/rejected": -2.8028604984283447, "logps/chosen": -386.16461181640625, "logps/rejected": -390.417724609375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.909115791320801, "rewards/margins": 7.774580955505371, "rewards/rejected": -12.683696746826172, "step": 9554 }, { "epoch": 1.49, "learning_rate": 7.139310130193475e-06, "logits/chosen": -3.2824244499206543, "logits/rejected": -2.8783035278320312, "logps/chosen": -275.9234924316406, "logps/rejected": -172.08987426757812, "loss": 1.9476, "rewards/accuracies": 0.5, "rewards/chosen": -3.530461311340332, "rewards/margins": 4.274618148803711, "rewards/rejected": -7.805078983306885, "step": 9555 }, { "epoch": 1.49, "learning_rate": 7.138576689662327e-06, "logits/chosen": -3.169300079345703, "logits/rejected": -3.202969551086426, "logps/chosen": -90.67322540283203, "logps/rejected": -197.78614807128906, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.019545555114746, "rewards/margins": 6.051173210144043, "rewards/rejected": -8.070718765258789, "step": 9556 }, { "epoch": 1.49, "learning_rate": 7.137843249131179e-06, "logits/chosen": -2.635936737060547, "logits/rejected": -1.831933856010437, "logps/chosen": -201.95376586914062, "logps/rejected": -115.45288848876953, "loss": 2.5385, "rewards/accuracies": 0.0, "rewards/chosen": -5.584146976470947, "rewards/margins": -2.4441614151000977, "rewards/rejected": -3.1399855613708496, "step": 9557 }, { "epoch": 1.49, "learning_rate": 7.137109808600031e-06, "logits/chosen": -1.0670950412750244, "logits/rejected": -2.5245659351348877, "logps/chosen": -121.08857727050781, "logps/rejected": -332.7149658203125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -4.545199394226074, "rewards/margins": 6.474730968475342, "rewards/rejected": -11.019929885864258, "step": 9558 }, { "epoch": 1.49, "learning_rate": 7.136376368068883e-06, "logits/chosen": -3.1136343479156494, "logits/rejected": -2.836571216583252, "logps/chosen": -408.2084655761719, "logps/rejected": -298.11956787109375, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -1.2625900506973267, "rewards/margins": 6.384988784790039, "rewards/rejected": -7.647578716278076, "step": 9559 }, { "epoch": 1.49, "learning_rate": 7.135642927537735e-06, "logits/chosen": -2.8435275554656982, "logits/rejected": -3.1420986652374268, "logps/chosen": -197.39389038085938, "logps/rejected": -242.65719604492188, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -1.8782310485839844, "rewards/margins": 4.483554840087891, "rewards/rejected": -6.361785888671875, "step": 9560 }, { "epoch": 1.49, "learning_rate": 7.1349094870065885e-06, "logits/chosen": -1.9893226623535156, "logits/rejected": -2.869093179702759, "logps/chosen": -72.86421203613281, "logps/rejected": -440.49761962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0600337982177734, "rewards/margins": 11.654012680053711, "rewards/rejected": -13.714046478271484, "step": 9561 }, { "epoch": 1.49, "learning_rate": 7.13417604647544e-06, "logits/chosen": -2.7506628036499023, "logits/rejected": -2.2591044902801514, "logps/chosen": -363.5234375, "logps/rejected": -287.41119384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.813147068023682, "rewards/margins": 10.574068069458008, "rewards/rejected": -15.387215614318848, "step": 9562 }, { "epoch": 1.49, "learning_rate": 7.133442605944292e-06, "logits/chosen": -1.3395839929580688, "logits/rejected": -2.6897058486938477, "logps/chosen": -126.60281372070312, "logps/rejected": -543.4989624023438, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -3.0382041931152344, "rewards/margins": 8.636113166809082, "rewards/rejected": -11.674317359924316, "step": 9563 }, { "epoch": 1.49, "learning_rate": 7.132709165413144e-06, "logits/chosen": -1.5873830318450928, "logits/rejected": -2.6374850273132324, "logps/chosen": -198.19387817382812, "logps/rejected": -359.32415771484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.026639461517334, "rewards/margins": 8.617029190063477, "rewards/rejected": -11.643669128417969, "step": 9564 }, { "epoch": 1.49, "learning_rate": 7.131975724881996e-06, "logits/chosen": -1.8420463800430298, "logits/rejected": -2.8321309089660645, "logps/chosen": -113.20687866210938, "logps/rejected": -365.31817626953125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.9816999435424805, "rewards/margins": 9.045724868774414, "rewards/rejected": -12.027424812316895, "step": 9565 }, { "epoch": 1.49, "learning_rate": 7.131242284350848e-06, "logits/chosen": -2.6230547428131104, "logits/rejected": -3.149902820587158, "logps/chosen": -189.2261505126953, "logps/rejected": -437.14996337890625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -1.4712013006210327, "rewards/margins": 5.933919906616211, "rewards/rejected": -7.405120849609375, "step": 9566 }, { "epoch": 1.49, "learning_rate": 7.1305088438197e-06, "logits/chosen": -2.733361005783081, "logits/rejected": -2.577893018722534, "logps/chosen": -175.49105834960938, "logps/rejected": -184.76541137695312, "loss": 0.2147, "rewards/accuracies": 1.0, "rewards/chosen": -6.031380653381348, "rewards/margins": 2.9845070838928223, "rewards/rejected": -9.015887260437012, "step": 9567 }, { "epoch": 1.49, "learning_rate": 7.129775403288552e-06, "logits/chosen": -2.7807650566101074, "logits/rejected": -2.4369192123413086, "logps/chosen": -239.90103149414062, "logps/rejected": -206.44314575195312, "loss": 0.4899, "rewards/accuracies": 0.5, "rewards/chosen": -3.9547042846679688, "rewards/margins": 4.300573348999023, "rewards/rejected": -8.255277633666992, "step": 9568 }, { "epoch": 1.49, "learning_rate": 7.1290419627574035e-06, "logits/chosen": -1.6914020776748657, "logits/rejected": -2.9320790767669678, "logps/chosen": -116.94476318359375, "logps/rejected": -471.23602294921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.015460252761841, "rewards/margins": 10.300439834594727, "rewards/rejected": -13.315899848937988, "step": 9569 }, { "epoch": 1.49, "learning_rate": 7.128308522226257e-06, "logits/chosen": -2.9468915462493896, "logits/rejected": -2.556274652481079, "logps/chosen": -273.8790283203125, "logps/rejected": -178.7195281982422, "loss": 0.7291, "rewards/accuracies": 0.5, "rewards/chosen": -5.411828994750977, "rewards/margins": 2.011162042617798, "rewards/rejected": -7.422990798950195, "step": 9570 }, { "epoch": 1.49, "learning_rate": 7.127575081695109e-06, "logits/chosen": -2.8602452278137207, "logits/rejected": -1.632893681526184, "logps/chosen": -260.42755126953125, "logps/rejected": -302.3279113769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6289169192314148, "rewards/margins": 12.504003524780273, "rewards/rejected": -13.13292121887207, "step": 9571 }, { "epoch": 1.49, "learning_rate": 7.126841641163961e-06, "logits/chosen": -2.9575717449188232, "logits/rejected": -2.0508487224578857, "logps/chosen": -586.8250732421875, "logps/rejected": -361.2188720703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.9799420833587646, "rewards/margins": 8.32904052734375, "rewards/rejected": -11.308982849121094, "step": 9572 }, { "epoch": 1.49, "learning_rate": 7.126108200632813e-06, "logits/chosen": -1.8502432107925415, "logits/rejected": -1.3336012363433838, "logps/chosen": -487.04547119140625, "logps/rejected": -485.35089111328125, "loss": 0.7602, "rewards/accuracies": 0.5, "rewards/chosen": -4.380558967590332, "rewards/margins": 3.4814133644104004, "rewards/rejected": -7.861971855163574, "step": 9573 }, { "epoch": 1.49, "learning_rate": 7.1253747601016655e-06, "logits/chosen": -1.4180594682693481, "logits/rejected": -2.5693204402923584, "logps/chosen": -177.44479370117188, "logps/rejected": -305.6436462402344, "loss": 0.1257, "rewards/accuracies": 1.0, "rewards/chosen": -3.9838874340057373, "rewards/margins": 6.968850135803223, "rewards/rejected": -10.952737808227539, "step": 9574 }, { "epoch": 1.49, "learning_rate": 7.124641319570517e-06, "logits/chosen": -2.8682565689086914, "logits/rejected": -2.083672046661377, "logps/chosen": -487.04840087890625, "logps/rejected": -410.75640869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3792436122894287, "rewards/margins": 10.849204063415527, "rewards/rejected": -13.228447914123535, "step": 9575 }, { "epoch": 1.49, "learning_rate": 7.123907879039369e-06, "logits/chosen": -2.604627847671509, "logits/rejected": -2.699563980102539, "logps/chosen": -172.0180206298828, "logps/rejected": -114.08535766601562, "loss": 1.3517, "rewards/accuracies": 0.0, "rewards/chosen": -6.612698554992676, "rewards/margins": -1.0446977615356445, "rewards/rejected": -5.568000793457031, "step": 9576 }, { "epoch": 1.49, "learning_rate": 7.123174438508221e-06, "logits/chosen": -2.5279009342193604, "logits/rejected": -1.8868085145950317, "logps/chosen": -192.6800537109375, "logps/rejected": -165.2392578125, "loss": 0.7047, "rewards/accuracies": 0.5, "rewards/chosen": -3.5136852264404297, "rewards/margins": 2.5080533027648926, "rewards/rejected": -6.021738529205322, "step": 9577 }, { "epoch": 1.49, "learning_rate": 7.122440997977073e-06, "logits/chosen": -2.634532928466797, "logits/rejected": -3.2941837310791016, "logps/chosen": -448.5589904785156, "logps/rejected": -530.7744140625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.435089111328125, "rewards/margins": 7.447405815124512, "rewards/rejected": -10.882495880126953, "step": 9578 }, { "epoch": 1.49, "learning_rate": 7.1217075574459266e-06, "logits/chosen": -2.7540283203125, "logits/rejected": -1.4355446100234985, "logps/chosen": -864.946533203125, "logps/rejected": -351.39617919921875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -3.7323617935180664, "rewards/margins": 6.814501762390137, "rewards/rejected": -10.546863555908203, "step": 9579 }, { "epoch": 1.49, "learning_rate": 7.1209741169147784e-06, "logits/chosen": -2.02797532081604, "logits/rejected": -3.0210418701171875, "logps/chosen": -291.118896484375, "logps/rejected": -474.635986328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.8696272373199463, "rewards/margins": 8.0846586227417, "rewards/rejected": -11.954285621643066, "step": 9580 }, { "epoch": 1.49, "learning_rate": 7.12024067638363e-06, "logits/chosen": -2.453517198562622, "logits/rejected": -2.7072372436523438, "logps/chosen": -669.025146484375, "logps/rejected": -621.64306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6985414028167725, "rewards/margins": 14.073383331298828, "rewards/rejected": -16.771923065185547, "step": 9581 }, { "epoch": 1.49, "learning_rate": 7.119507235852482e-06, "logits/chosen": -2.6051478385925293, "logits/rejected": -3.0492804050445557, "logps/chosen": -80.38033294677734, "logps/rejected": -310.64459228515625, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -3.5385665893554688, "rewards/margins": 6.714859485626221, "rewards/rejected": -10.253426551818848, "step": 9582 }, { "epoch": 1.49, "learning_rate": 7.118773795321334e-06, "logits/chosen": -1.9368784427642822, "logits/rejected": -2.9919257164001465, "logps/chosen": -281.0922546386719, "logps/rejected": -433.8757019042969, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -1.932621717453003, "rewards/margins": 5.537829399108887, "rewards/rejected": -7.470451354980469, "step": 9583 }, { "epoch": 1.49, "learning_rate": 7.118040354790186e-06, "logits/chosen": -2.993942975997925, "logits/rejected": -2.237119197845459, "logps/chosen": -277.7287292480469, "logps/rejected": -249.0114288330078, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.5825408697128296, "rewards/margins": 7.073631286621094, "rewards/rejected": -7.656172275543213, "step": 9584 }, { "epoch": 1.49, "learning_rate": 7.117306914259038e-06, "logits/chosen": -2.9814045429229736, "logits/rejected": -2.4294934272766113, "logps/chosen": -199.13177490234375, "logps/rejected": -221.52862548828125, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -3.175252914428711, "rewards/margins": 5.730050086975098, "rewards/rejected": -8.905303001403809, "step": 9585 }, { "epoch": 1.49, "learning_rate": 7.11657347372789e-06, "logits/chosen": -2.069950819015503, "logits/rejected": -3.054046154022217, "logps/chosen": -279.9350280761719, "logps/rejected": -339.91424560546875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -3.104311466217041, "rewards/margins": 5.03199577331543, "rewards/rejected": -8.136306762695312, "step": 9586 }, { "epoch": 1.49, "learning_rate": 7.115840033196742e-06, "logits/chosen": -2.7564783096313477, "logits/rejected": -3.04191517829895, "logps/chosen": -105.52815246582031, "logps/rejected": -372.58026123046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.5626213550567627, "rewards/margins": 8.86053466796875, "rewards/rejected": -11.42315673828125, "step": 9587 }, { "epoch": 1.49, "learning_rate": 7.115106592665595e-06, "logits/chosen": -2.181570529937744, "logits/rejected": -3.0162899494171143, "logps/chosen": -131.02670288085938, "logps/rejected": -412.8187255859375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.426759719848633, "rewards/margins": 8.8594388961792, "rewards/rejected": -11.286198616027832, "step": 9588 }, { "epoch": 1.49, "learning_rate": 7.114373152134447e-06, "logits/chosen": -2.8769686222076416, "logits/rejected": -2.999833822250366, "logps/chosen": -225.69802856445312, "logps/rejected": -498.28338623046875, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -1.4325613975524902, "rewards/margins": 7.131760597229004, "rewards/rejected": -8.564321517944336, "step": 9589 }, { "epoch": 1.49, "learning_rate": 7.113639711603299e-06, "logits/chosen": -2.846022367477417, "logits/rejected": -2.429318904876709, "logps/chosen": -404.8394470214844, "logps/rejected": -312.673095703125, "loss": 0.1468, "rewards/accuracies": 1.0, "rewards/chosen": -2.5782158374786377, "rewards/margins": 3.9440159797668457, "rewards/rejected": -6.5222320556640625, "step": 9590 }, { "epoch": 1.49, "learning_rate": 7.112906271072152e-06, "logits/chosen": -2.680140495300293, "logits/rejected": -2.361659049987793, "logps/chosen": -216.8035125732422, "logps/rejected": -281.61370849609375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.805100917816162, "rewards/margins": 6.494339942932129, "rewards/rejected": -11.299440383911133, "step": 9591 }, { "epoch": 1.49, "learning_rate": 7.1121728305410035e-06, "logits/chosen": -3.0447678565979004, "logits/rejected": -3.0308918952941895, "logps/chosen": -375.1134948730469, "logps/rejected": -318.6015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.876556873321533, "rewards/margins": 6.793137073516846, "rewards/rejected": -9.669693946838379, "step": 9592 }, { "epoch": 1.49, "learning_rate": 7.111439390009855e-06, "logits/chosen": -3.1236608028411865, "logits/rejected": -2.81425404548645, "logps/chosen": -497.3033447265625, "logps/rejected": -305.1407775878906, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.4400429725646973, "rewards/margins": 8.582015991210938, "rewards/rejected": -12.022058486938477, "step": 9593 }, { "epoch": 1.49, "learning_rate": 7.110705949478707e-06, "logits/chosen": -2.67376708984375, "logits/rejected": -2.19050669670105, "logps/chosen": -310.34417724609375, "logps/rejected": -358.2041931152344, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -2.7812130451202393, "rewards/margins": 9.08148193359375, "rewards/rejected": -11.862695693969727, "step": 9594 }, { "epoch": 1.49, "learning_rate": 7.109972508947559e-06, "logits/chosen": -2.4331421852111816, "logits/rejected": -1.3934823274612427, "logps/chosen": -458.6577453613281, "logps/rejected": -369.10052490234375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.975480556488037, "rewards/margins": 7.601050853729248, "rewards/rejected": -9.576531410217285, "step": 9595 }, { "epoch": 1.49, "learning_rate": 7.109239068416411e-06, "logits/chosen": -2.5438151359558105, "logits/rejected": -2.749122142791748, "logps/chosen": -143.0910186767578, "logps/rejected": -289.68646240234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.2078592777252197, "rewards/margins": 8.003401756286621, "rewards/rejected": -10.211260795593262, "step": 9596 }, { "epoch": 1.49, "learning_rate": 7.108505627885265e-06, "logits/chosen": -2.4041659832000732, "logits/rejected": -2.80763578414917, "logps/chosen": -301.7386474609375, "logps/rejected": -276.8904113769531, "loss": 1.385, "rewards/accuracies": 0.5, "rewards/chosen": -5.01101016998291, "rewards/margins": 0.7632442712783813, "rewards/rejected": -5.774254322052002, "step": 9597 }, { "epoch": 1.49, "learning_rate": 7.1077721873541165e-06, "logits/chosen": -2.6960582733154297, "logits/rejected": -2.0485172271728516, "logps/chosen": -295.7542419433594, "logps/rejected": -320.5457763671875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.904910087585449, "rewards/margins": 10.379378318786621, "rewards/rejected": -15.28428840637207, "step": 9598 }, { "epoch": 1.49, "learning_rate": 7.107038746822968e-06, "logits/chosen": -1.0701793432235718, "logits/rejected": -2.902010917663574, "logps/chosen": -89.54891967773438, "logps/rejected": -538.4384155273438, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.0054264068603516, "rewards/margins": 11.003616333007812, "rewards/rejected": -13.009042739868164, "step": 9599 }, { "epoch": 1.49, "learning_rate": 7.10630530629182e-06, "logits/chosen": -1.347851037979126, "logits/rejected": -2.7136406898498535, "logps/chosen": -122.3101806640625, "logps/rejected": -418.8396911621094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9143524169921875, "rewards/margins": 9.267179489135742, "rewards/rejected": -12.18153190612793, "step": 9600 }, { "epoch": 1.49, "learning_rate": 7.105571865760672e-06, "logits/chosen": -1.6069674491882324, "logits/rejected": -2.1118993759155273, "logps/chosen": -42.78327178955078, "logps/rejected": -313.2535705566406, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -1.7724260091781616, "rewards/margins": 7.353097915649414, "rewards/rejected": -9.125523567199707, "step": 9601 }, { "epoch": 1.49, "learning_rate": 7.104838425229524e-06, "logits/chosen": -2.8294966220855713, "logits/rejected": -3.3062469959259033, "logps/chosen": -40.06538772583008, "logps/rejected": -221.4684295654297, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.5308282375335693, "rewards/margins": 7.301859378814697, "rewards/rejected": -9.832687377929688, "step": 9602 }, { "epoch": 1.49, "learning_rate": 7.104104984698376e-06, "logits/chosen": -0.9806801676750183, "logits/rejected": -2.13590931892395, "logps/chosen": -199.62466430664062, "logps/rejected": -481.80877685546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4449892044067383, "rewards/margins": 10.254646301269531, "rewards/rejected": -13.69963550567627, "step": 9603 }, { "epoch": 1.49, "learning_rate": 7.103371544167228e-06, "logits/chosen": -3.07908034324646, "logits/rejected": -2.0924558639526367, "logps/chosen": -421.02716064453125, "logps/rejected": -314.08392333984375, "loss": 2.8227, "rewards/accuracies": 0.5, "rewards/chosen": -5.456615447998047, "rewards/margins": 2.4032998085021973, "rewards/rejected": -7.859915256500244, "step": 9604 }, { "epoch": 1.49, "learning_rate": 7.102638103636081e-06, "logits/chosen": -2.569969654083252, "logits/rejected": -2.8546509742736816, "logps/chosen": -347.188232421875, "logps/rejected": -523.2921752929688, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -4.624730110168457, "rewards/margins": 5.0651750564575195, "rewards/rejected": -9.689905166625977, "step": 9605 }, { "epoch": 1.49, "learning_rate": 7.101904663104933e-06, "logits/chosen": -2.6750717163085938, "logits/rejected": -2.599010944366455, "logps/chosen": -752.8449096679688, "logps/rejected": -756.387451171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.707536220550537, "rewards/margins": 8.763908386230469, "rewards/rejected": -12.471445083618164, "step": 9606 }, { "epoch": 1.49, "learning_rate": 7.101171222573785e-06, "logits/chosen": -2.7485764026641846, "logits/rejected": -1.3337504863739014, "logps/chosen": -808.8873901367188, "logps/rejected": -680.1932373046875, "loss": 1.0525, "rewards/accuracies": 0.5, "rewards/chosen": -8.606403350830078, "rewards/margins": 0.6950538158416748, "rewards/rejected": -9.301456451416016, "step": 9607 }, { "epoch": 1.49, "learning_rate": 7.100437782042638e-06, "logits/chosen": -2.126997232437134, "logits/rejected": -2.8124992847442627, "logps/chosen": -90.53279113769531, "logps/rejected": -351.89569091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6065411567687988, "rewards/margins": 10.395766258239746, "rewards/rejected": -12.002307891845703, "step": 9608 }, { "epoch": 1.49, "learning_rate": 7.09970434151149e-06, "logits/chosen": -3.076340675354004, "logits/rejected": -2.8576548099517822, "logps/chosen": -716.0636596679688, "logps/rejected": -399.62652587890625, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -4.419371128082275, "rewards/margins": 3.9950692653656006, "rewards/rejected": -8.414440155029297, "step": 9609 }, { "epoch": 1.49, "learning_rate": 7.0989709009803416e-06, "logits/chosen": -2.5588266849517822, "logits/rejected": -2.8262693881988525, "logps/chosen": -122.44578552246094, "logps/rejected": -159.7199249267578, "loss": 0.9952, "rewards/accuracies": 0.5, "rewards/chosen": -6.350242614746094, "rewards/margins": 4.087450981140137, "rewards/rejected": -10.43769359588623, "step": 9610 }, { "epoch": 1.49, "learning_rate": 7.0982374604491934e-06, "logits/chosen": -2.5977320671081543, "logits/rejected": -2.726490020751953, "logps/chosen": -216.7548828125, "logps/rejected": -216.38568115234375, "loss": 2.0851, "rewards/accuracies": 0.5, "rewards/chosen": -4.76503849029541, "rewards/margins": 0.857050895690918, "rewards/rejected": -5.622089385986328, "step": 9611 }, { "epoch": 1.49, "learning_rate": 7.097504019918045e-06, "logits/chosen": -2.52123761177063, "logits/rejected": -3.0456628799438477, "logps/chosen": -105.36190795898438, "logps/rejected": -274.1009826660156, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.612060070037842, "rewards/margins": 7.758427143096924, "rewards/rejected": -12.370487213134766, "step": 9612 }, { "epoch": 1.5, "learning_rate": 7.096770579386897e-06, "logits/chosen": -0.9954944252967834, "logits/rejected": -2.8524436950683594, "logps/chosen": -85.04243469238281, "logps/rejected": -454.47613525390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.5825634002685547, "rewards/margins": 9.822524070739746, "rewards/rejected": -11.4050874710083, "step": 9613 }, { "epoch": 1.5, "learning_rate": 7.096037138855751e-06, "logits/chosen": -1.891750454902649, "logits/rejected": -3.254053831100464, "logps/chosen": -110.50950622558594, "logps/rejected": -359.92633056640625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.515897750854492, "rewards/margins": 6.478672027587891, "rewards/rejected": -9.994569778442383, "step": 9614 }, { "epoch": 1.5, "learning_rate": 7.095303698324603e-06, "logits/chosen": -2.3947343826293945, "logits/rejected": -2.834193468093872, "logps/chosen": -54.311744689941406, "logps/rejected": -163.1706085205078, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -2.614522695541382, "rewards/margins": 4.136532306671143, "rewards/rejected": -6.751054763793945, "step": 9615 }, { "epoch": 1.5, "learning_rate": 7.0945702577934545e-06, "logits/chosen": -2.6820995807647705, "logits/rejected": -3.2224013805389404, "logps/chosen": -725.7206420898438, "logps/rejected": -918.3375854492188, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -4.354719161987305, "rewards/margins": 7.020195484161377, "rewards/rejected": -11.374914169311523, "step": 9616 }, { "epoch": 1.5, "learning_rate": 7.093836817262306e-06, "logits/chosen": -2.639427900314331, "logits/rejected": -2.497298240661621, "logps/chosen": -202.16217041015625, "logps/rejected": -359.7020263671875, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/chosen": -1.6236250400543213, "rewards/margins": 6.22587776184082, "rewards/rejected": -7.8495025634765625, "step": 9617 }, { "epoch": 1.5, "learning_rate": 7.093103376731158e-06, "logits/chosen": -0.9209548830986023, "logits/rejected": -2.3514223098754883, "logps/chosen": -137.86569213867188, "logps/rejected": -384.8153076171875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -4.652946949005127, "rewards/margins": 7.714441776275635, "rewards/rejected": -12.367388725280762, "step": 9618 }, { "epoch": 1.5, "learning_rate": 7.09236993620001e-06, "logits/chosen": -2.8437976837158203, "logits/rejected": -2.656877279281616, "logps/chosen": -186.4123992919922, "logps/rejected": -288.0806884765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.866599440574646, "rewards/margins": 8.76753044128418, "rewards/rejected": -10.634129524230957, "step": 9619 }, { "epoch": 1.5, "learning_rate": 7.091636495668862e-06, "logits/chosen": -2.8999195098876953, "logits/rejected": -1.673244595527649, "logps/chosen": -337.5900573730469, "logps/rejected": -152.72787475585938, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": 0.33246302604675293, "rewards/margins": 7.962342739105225, "rewards/rejected": -7.629879474639893, "step": 9620 }, { "epoch": 1.5, "learning_rate": 7.090903055137714e-06, "logits/chosen": -1.8899600505828857, "logits/rejected": -2.567129373550415, "logps/chosen": -298.86981201171875, "logps/rejected": -495.987548828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.545772075653076, "rewards/margins": 9.609962463378906, "rewards/rejected": -13.15573501586914, "step": 9621 }, { "epoch": 1.5, "learning_rate": 7.090169614606566e-06, "logits/chosen": -2.897250175476074, "logits/rejected": -3.0193467140197754, "logps/chosen": -187.6917724609375, "logps/rejected": -263.49835205078125, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -3.646374225616455, "rewards/margins": 4.27694845199585, "rewards/rejected": -7.923322677612305, "step": 9622 }, { "epoch": 1.5, "learning_rate": 7.089436174075419e-06, "logits/chosen": -2.687425136566162, "logits/rejected": -3.0330519676208496, "logps/chosen": -423.1436767578125, "logps/rejected": -496.83355712890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.5478541851043701, "rewards/margins": 6.720794200897217, "rewards/rejected": -8.268648147583008, "step": 9623 }, { "epoch": 1.5, "learning_rate": 7.088702733544271e-06, "logits/chosen": -2.0373783111572266, "logits/rejected": -2.7651100158691406, "logps/chosen": -211.0550994873047, "logps/rejected": -325.173095703125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.6010453701019287, "rewards/margins": 7.6809468269348145, "rewards/rejected": -10.281991958618164, "step": 9624 }, { "epoch": 1.5, "learning_rate": 7.087969293013124e-06, "logits/chosen": -0.8876209259033203, "logits/rejected": -2.731858253479004, "logps/chosen": -69.5903549194336, "logps/rejected": -363.25128173828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.547004222869873, "rewards/margins": 8.205367088317871, "rewards/rejected": -10.752370834350586, "step": 9625 }, { "epoch": 1.5, "learning_rate": 7.087235852481976e-06, "logits/chosen": -2.275576591491699, "logits/rejected": -2.880582809448242, "logps/chosen": -101.24873352050781, "logps/rejected": -312.86419677734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.590655326843262, "rewards/margins": 8.855274200439453, "rewards/rejected": -13.445928573608398, "step": 9626 }, { "epoch": 1.5, "learning_rate": 7.086502411950828e-06, "logits/chosen": -1.572148323059082, "logits/rejected": -2.5067505836486816, "logps/chosen": -97.42667388916016, "logps/rejected": -300.93133544921875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -3.9410810470581055, "rewards/margins": 6.698019027709961, "rewards/rejected": -10.639100074768066, "step": 9627 }, { "epoch": 1.5, "learning_rate": 7.08576897141968e-06, "logits/chosen": -1.362797498703003, "logits/rejected": -1.6303837299346924, "logps/chosen": -302.10009765625, "logps/rejected": -202.11500549316406, "loss": 0.2697, "rewards/accuracies": 1.0, "rewards/chosen": -3.1375792026519775, "rewards/margins": 4.189610958099365, "rewards/rejected": -7.327190399169922, "step": 9628 }, { "epoch": 1.5, "learning_rate": 7.0850355308885315e-06, "logits/chosen": -1.9670625925064087, "logits/rejected": -2.9503939151763916, "logps/chosen": -220.85867309570312, "logps/rejected": -416.4840087890625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.8279426097869873, "rewards/margins": 5.358191013336182, "rewards/rejected": -7.18613338470459, "step": 9629 }, { "epoch": 1.5, "learning_rate": 7.084302090357383e-06, "logits/chosen": -2.6261115074157715, "logits/rejected": -2.9757988452911377, "logps/chosen": -66.54931640625, "logps/rejected": -194.21177673339844, "loss": 0.0875, "rewards/accuracies": 1.0, "rewards/chosen": -4.420294284820557, "rewards/margins": 4.683071136474609, "rewards/rejected": -9.103364944458008, "step": 9630 }, { "epoch": 1.5, "learning_rate": 7.083568649826235e-06, "logits/chosen": -2.4180123805999756, "logits/rejected": -2.868993043899536, "logps/chosen": -253.88812255859375, "logps/rejected": -460.41668701171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.523013114929199, "rewards/margins": 10.703374862670898, "rewards/rejected": -13.226387023925781, "step": 9631 }, { "epoch": 1.5, "learning_rate": 7.082835209295089e-06, "logits/chosen": -1.5371068716049194, "logits/rejected": -2.6078274250030518, "logps/chosen": -346.41400146484375, "logps/rejected": -409.6768798828125, "loss": 1.5438, "rewards/accuracies": 0.5, "rewards/chosen": -6.9265851974487305, "rewards/margins": 1.92238450050354, "rewards/rejected": -8.848970413208008, "step": 9632 }, { "epoch": 1.5, "learning_rate": 7.082101768763941e-06, "logits/chosen": -2.4017772674560547, "logits/rejected": -3.206800699234009, "logps/chosen": -48.865577697753906, "logps/rejected": -304.9176940917969, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.3696720600128174, "rewards/margins": 7.804459571838379, "rewards/rejected": -11.174131393432617, "step": 9633 }, { "epoch": 1.5, "learning_rate": 7.0813683282327926e-06, "logits/chosen": -2.9888551235198975, "logits/rejected": -3.134101152420044, "logps/chosen": -655.1153564453125, "logps/rejected": -713.3992919921875, "loss": 0.4424, "rewards/accuracies": 1.0, "rewards/chosen": -7.235846042633057, "rewards/margins": 0.6133055686950684, "rewards/rejected": -7.849151611328125, "step": 9634 }, { "epoch": 1.5, "learning_rate": 7.0806348877016444e-06, "logits/chosen": -1.7794818878173828, "logits/rejected": -2.870321750640869, "logps/chosen": -171.9684600830078, "logps/rejected": -391.12091064453125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.185693740844727, "rewards/margins": 6.063597202301025, "rewards/rejected": -10.249290466308594, "step": 9635 }, { "epoch": 1.5, "learning_rate": 7.079901447170496e-06, "logits/chosen": -2.2192747592926025, "logits/rejected": -2.719346046447754, "logps/chosen": -494.61773681640625, "logps/rejected": -479.08343505859375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.8131024837493896, "rewards/margins": 6.578211784362793, "rewards/rejected": -9.391313552856445, "step": 9636 }, { "epoch": 1.5, "learning_rate": 7.079168006639348e-06, "logits/chosen": -1.4925709962844849, "logits/rejected": -2.8557138442993164, "logps/chosen": -237.26263427734375, "logps/rejected": -449.48382568359375, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -2.7115230560302734, "rewards/margins": 6.141901016235352, "rewards/rejected": -8.853424072265625, "step": 9637 }, { "epoch": 1.5, "learning_rate": 7.0784345661082e-06, "logits/chosen": -1.9750109910964966, "logits/rejected": -2.861621141433716, "logps/chosen": -273.82421875, "logps/rejected": -579.9310302734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.5884971618652344, "rewards/margins": 8.964664459228516, "rewards/rejected": -12.55316162109375, "step": 9638 }, { "epoch": 1.5, "learning_rate": 7.077701125577052e-06, "logits/chosen": -2.6974895000457764, "logits/rejected": -2.996042490005493, "logps/chosen": -112.15141296386719, "logps/rejected": -254.81031799316406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.572762966156006, "rewards/margins": 8.306729316711426, "rewards/rejected": -10.879491806030273, "step": 9639 }, { "epoch": 1.5, "learning_rate": 7.076967685045904e-06, "logits/chosen": -2.825258731842041, "logits/rejected": -2.0293092727661133, "logps/chosen": -205.4962158203125, "logps/rejected": -276.57049560546875, "loss": 0.133, "rewards/accuracies": 1.0, "rewards/chosen": -2.585219144821167, "rewards/margins": 6.536266803741455, "rewards/rejected": -9.121485710144043, "step": 9640 }, { "epoch": 1.5, "learning_rate": 7.076234244514757e-06, "logits/chosen": -2.7984821796417236, "logits/rejected": -2.582240343093872, "logps/chosen": -127.01140594482422, "logps/rejected": -197.08355712890625, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -0.8846549987792969, "rewards/margins": 3.290886640548706, "rewards/rejected": -4.175541877746582, "step": 9641 }, { "epoch": 1.5, "learning_rate": 7.07550080398361e-06, "logits/chosen": -1.4922605752944946, "logits/rejected": -2.692060947418213, "logps/chosen": -102.40711212158203, "logps/rejected": -237.19497680664062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.7869811058044434, "rewards/margins": 7.983604431152344, "rewards/rejected": -10.770586013793945, "step": 9642 }, { "epoch": 1.5, "learning_rate": 7.074767363452462e-06, "logits/chosen": -2.5862410068511963, "logits/rejected": -3.2520034313201904, "logps/chosen": -785.4578857421875, "logps/rejected": -749.8588256835938, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.95330810546875, "rewards/margins": 7.311338424682617, "rewards/rejected": -12.264646530151367, "step": 9643 }, { "epoch": 1.5, "learning_rate": 7.074033922921314e-06, "logits/chosen": -3.0746524333953857, "logits/rejected": -2.953556537628174, "logps/chosen": -127.19349670410156, "logps/rejected": -178.53482055664062, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -5.318779468536377, "rewards/margins": 4.721537113189697, "rewards/rejected": -10.040316581726074, "step": 9644 }, { "epoch": 1.5, "learning_rate": 7.073300482390166e-06, "logits/chosen": -1.6641429662704468, "logits/rejected": -2.588751792907715, "logps/chosen": -299.44036865234375, "logps/rejected": -385.6741943359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.864973545074463, "rewards/margins": 9.651986122131348, "rewards/rejected": -12.516960144042969, "step": 9645 }, { "epoch": 1.5, "learning_rate": 7.072567041859018e-06, "logits/chosen": -2.3723249435424805, "logits/rejected": -2.381789207458496, "logps/chosen": -454.27020263671875, "logps/rejected": -384.41888427734375, "loss": 0.7602, "rewards/accuracies": 0.5, "rewards/chosen": -4.514422416687012, "rewards/margins": 5.178171634674072, "rewards/rejected": -9.692594528198242, "step": 9646 }, { "epoch": 1.5, "learning_rate": 7.0718336013278695e-06, "logits/chosen": -1.85958993434906, "logits/rejected": -2.588876485824585, "logps/chosen": -436.0294494628906, "logps/rejected": -324.2884216308594, "loss": 1.0551, "rewards/accuracies": 0.5, "rewards/chosen": -5.387875556945801, "rewards/margins": 2.464146614074707, "rewards/rejected": -7.852022171020508, "step": 9647 }, { "epoch": 1.5, "learning_rate": 7.071100160796721e-06, "logits/chosen": -2.7799713611602783, "logits/rejected": -2.704660415649414, "logps/chosen": -472.97076416015625, "logps/rejected": -476.1896057128906, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.7835686206817627, "rewards/margins": 8.478256225585938, "rewards/rejected": -11.261825561523438, "step": 9648 }, { "epoch": 1.5, "learning_rate": 7.070366720265573e-06, "logits/chosen": -2.7369794845581055, "logits/rejected": -1.7876836061477661, "logps/chosen": -644.4212646484375, "logps/rejected": -489.1654968261719, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -3.426938056945801, "rewards/margins": 7.323147296905518, "rewards/rejected": -10.750085830688477, "step": 9649 }, { "epoch": 1.5, "learning_rate": 7.069633279734426e-06, "logits/chosen": -2.7144577503204346, "logits/rejected": -2.981494665145874, "logps/chosen": -350.93316650390625, "logps/rejected": -457.8875732421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.305580139160156, "rewards/margins": 9.17515754699707, "rewards/rejected": -13.480737686157227, "step": 9650 }, { "epoch": 1.5, "learning_rate": 7.068899839203278e-06, "logits/chosen": -2.5390851497650146, "logits/rejected": -2.9661331176757812, "logps/chosen": -43.147159576416016, "logps/rejected": -330.2366638183594, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -2.6999754905700684, "rewards/margins": 5.898167610168457, "rewards/rejected": -8.598143577575684, "step": 9651 }, { "epoch": 1.5, "learning_rate": 7.068166398672131e-06, "logits/chosen": -2.9884796142578125, "logits/rejected": -3.108255386352539, "logps/chosen": -316.34228515625, "logps/rejected": -414.8060302734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.21634984016418457, "rewards/margins": 7.494836330413818, "rewards/rejected": -7.711186408996582, "step": 9652 }, { "epoch": 1.5, "learning_rate": 7.0674329581409825e-06, "logits/chosen": -2.897451162338257, "logits/rejected": -2.071377754211426, "logps/chosen": -480.30987548828125, "logps/rejected": -427.9688720703125, "loss": 0.0539, "rewards/accuracies": 1.0, "rewards/chosen": -2.6045150756835938, "rewards/margins": 4.4606828689575195, "rewards/rejected": -7.065197944641113, "step": 9653 }, { "epoch": 1.5, "learning_rate": 7.066699517609834e-06, "logits/chosen": -2.494757652282715, "logits/rejected": -2.7633938789367676, "logps/chosen": -137.56707763671875, "logps/rejected": -343.2786865234375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.238495826721191, "rewards/margins": 6.972265243530273, "rewards/rejected": -11.210760116577148, "step": 9654 }, { "epoch": 1.5, "learning_rate": 7.065966077078686e-06, "logits/chosen": -2.6553430557250977, "logits/rejected": -2.748758554458618, "logps/chosen": -133.37664794921875, "logps/rejected": -180.74867248535156, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -3.417459011077881, "rewards/margins": 5.306272983551025, "rewards/rejected": -8.723731994628906, "step": 9655 }, { "epoch": 1.5, "learning_rate": 7.065232636547538e-06, "logits/chosen": -2.8533432483673096, "logits/rejected": -2.9428627490997314, "logps/chosen": -244.59828186035156, "logps/rejected": -275.42401123046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.8139519691467285, "rewards/margins": 7.6680498123168945, "rewards/rejected": -10.482002258300781, "step": 9656 }, { "epoch": 1.5, "learning_rate": 7.064499196016391e-06, "logits/chosen": -2.821211576461792, "logits/rejected": -1.8861452341079712, "logps/chosen": -168.73561096191406, "logps/rejected": -203.45443725585938, "loss": 0.8633, "rewards/accuracies": 0.5, "rewards/chosen": -3.979600667953491, "rewards/margins": 3.7724101543426514, "rewards/rejected": -7.752010345458984, "step": 9657 }, { "epoch": 1.5, "learning_rate": 7.063765755485243e-06, "logits/chosen": -1.6776548624038696, "logits/rejected": -2.331406354904175, "logps/chosen": -172.50473022460938, "logps/rejected": -387.0369567871094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.274888515472412, "rewards/margins": 9.200275421142578, "rewards/rejected": -13.475164413452148, "step": 9658 }, { "epoch": 1.5, "learning_rate": 7.0630323149540955e-06, "logits/chosen": -2.7137815952301025, "logits/rejected": -2.212676525115967, "logps/chosen": -270.11859130859375, "logps/rejected": -320.2415466308594, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.197168827056885, "rewards/margins": 8.579885482788086, "rewards/rejected": -12.777053833007812, "step": 9659 }, { "epoch": 1.5, "learning_rate": 7.062298874422947e-06, "logits/chosen": -2.069823980331421, "logits/rejected": -2.622905969619751, "logps/chosen": -152.688720703125, "logps/rejected": -376.95892333984375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.097247123718262, "rewards/margins": 7.381627082824707, "rewards/rejected": -11.478874206542969, "step": 9660 }, { "epoch": 1.5, "learning_rate": 7.0615654338918e-06, "logits/chosen": -2.1156468391418457, "logits/rejected": -1.942965030670166, "logps/chosen": -271.0046691894531, "logps/rejected": -377.22247314453125, "loss": 0.0417, "rewards/accuracies": 1.0, "rewards/chosen": -5.052587509155273, "rewards/margins": 6.370410442352295, "rewards/rejected": -11.42299747467041, "step": 9661 }, { "epoch": 1.5, "learning_rate": 7.060831993360652e-06, "logits/chosen": -2.092301368713379, "logits/rejected": -2.590959310531616, "logps/chosen": -118.49140930175781, "logps/rejected": -240.07789611816406, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -4.72478723526001, "rewards/margins": 4.606758117675781, "rewards/rejected": -9.331544876098633, "step": 9662 }, { "epoch": 1.5, "learning_rate": 7.060098552829504e-06, "logits/chosen": -2.8703653812408447, "logits/rejected": -2.5282270908355713, "logps/chosen": -374.50592041015625, "logps/rejected": -352.7506103515625, "loss": 0.2638, "rewards/accuracies": 1.0, "rewards/chosen": -5.464247226715088, "rewards/margins": 1.997239589691162, "rewards/rejected": -7.46148681640625, "step": 9663 }, { "epoch": 1.5, "learning_rate": 7.059365112298356e-06, "logits/chosen": -2.5803029537200928, "logits/rejected": -2.788792610168457, "logps/chosen": -178.63784790039062, "logps/rejected": -340.416259765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3994522094726562, "rewards/margins": 10.107719421386719, "rewards/rejected": -11.507171630859375, "step": 9664 }, { "epoch": 1.5, "learning_rate": 7.0586316717672076e-06, "logits/chosen": -1.2183849811553955, "logits/rejected": -2.7264244556427, "logps/chosen": -244.7957763671875, "logps/rejected": -533.3239135742188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.020346164703369, "rewards/margins": 9.854166984558105, "rewards/rejected": -14.874513626098633, "step": 9665 }, { "epoch": 1.5, "learning_rate": 7.05789823123606e-06, "logits/chosen": -2.2649502754211426, "logits/rejected": -2.9603028297424316, "logps/chosen": -358.56427001953125, "logps/rejected": -514.18603515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.218942165374756, "rewards/margins": 8.763077735900879, "rewards/rejected": -11.982020378112793, "step": 9666 }, { "epoch": 1.5, "learning_rate": 7.057164790704912e-06, "logits/chosen": -2.4383625984191895, "logits/rejected": -2.9003429412841797, "logps/chosen": -515.5338745117188, "logps/rejected": -600.0631103515625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.9105224609375, "rewards/margins": 6.71820068359375, "rewards/rejected": -9.62872314453125, "step": 9667 }, { "epoch": 1.5, "learning_rate": 7.056431350173764e-06, "logits/chosen": -2.766874313354492, "logits/rejected": -3.352564573287964, "logps/chosen": -80.98338317871094, "logps/rejected": -339.01934814453125, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -3.4039340019226074, "rewards/margins": 5.056132793426514, "rewards/rejected": -8.460066795349121, "step": 9668 }, { "epoch": 1.5, "learning_rate": 7.055697909642616e-06, "logits/chosen": -2.6192972660064697, "logits/rejected": -0.9408676624298096, "logps/chosen": -264.1185607910156, "logps/rejected": -109.54116821289062, "loss": 0.0754, "rewards/accuracies": 1.0, "rewards/chosen": -2.7762603759765625, "rewards/margins": 2.551596164703369, "rewards/rejected": -5.327856540679932, "step": 9669 }, { "epoch": 1.5, "learning_rate": 7.054964469111469e-06, "logits/chosen": -1.512382984161377, "logits/rejected": -2.856903553009033, "logps/chosen": -140.92759704589844, "logps/rejected": -315.6789245605469, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -3.5352706909179688, "rewards/margins": 6.027325630187988, "rewards/rejected": -9.562596321105957, "step": 9670 }, { "epoch": 1.5, "learning_rate": 7.0542310285803205e-06, "logits/chosen": -2.3844032287597656, "logits/rejected": -3.1652636528015137, "logps/chosen": -68.45684051513672, "logps/rejected": -476.4787902832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.494462013244629, "rewards/margins": 10.863943099975586, "rewards/rejected": -13.358404159545898, "step": 9671 }, { "epoch": 1.5, "learning_rate": 7.053497588049172e-06, "logits/chosen": -2.1776556968688965, "logits/rejected": -2.671985626220703, "logps/chosen": -127.42430114746094, "logps/rejected": -202.30345153808594, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": -4.101156711578369, "rewards/margins": 4.348211288452148, "rewards/rejected": -8.44936752319336, "step": 9672 }, { "epoch": 1.5, "learning_rate": 7.052764147518024e-06, "logits/chosen": -1.2761281728744507, "logits/rejected": -2.6473662853240967, "logps/chosen": -66.07331848144531, "logps/rejected": -253.71575927734375, "loss": 0.0919, "rewards/accuracies": 1.0, "rewards/chosen": -4.200279712677002, "rewards/margins": 6.502588272094727, "rewards/rejected": -10.70286750793457, "step": 9673 }, { "epoch": 1.5, "learning_rate": 7.052030706986877e-06, "logits/chosen": -2.8055241107940674, "logits/rejected": -3.0127720832824707, "logps/chosen": -226.36090087890625, "logps/rejected": -445.6531066894531, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.146124839782715, "rewards/margins": 7.417325019836426, "rewards/rejected": -13.56344985961914, "step": 9674 }, { "epoch": 1.5, "learning_rate": 7.051297266455729e-06, "logits/chosen": -2.926090955734253, "logits/rejected": -3.2525529861450195, "logps/chosen": -140.79116821289062, "logps/rejected": -258.081298828125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -5.200460433959961, "rewards/margins": 6.342403411865234, "rewards/rejected": -11.542863845825195, "step": 9675 }, { "epoch": 1.5, "learning_rate": 7.050563825924582e-06, "logits/chosen": -1.6297484636306763, "logits/rejected": -2.6982595920562744, "logps/chosen": -147.339111328125, "logps/rejected": -307.9254150390625, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.9470508098602295, "rewards/margins": 7.488144874572754, "rewards/rejected": -9.435195922851562, "step": 9676 }, { "epoch": 1.5, "learning_rate": 7.0498303853934335e-06, "logits/chosen": -2.216928720474243, "logits/rejected": -2.9721689224243164, "logps/chosen": -109.09246826171875, "logps/rejected": -426.91064453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.422542095184326, "rewards/margins": 11.225764274597168, "rewards/rejected": -15.648305892944336, "step": 9677 }, { "epoch": 1.51, "learning_rate": 7.049096944862285e-06, "logits/chosen": -1.3093881607055664, "logits/rejected": -2.270026683807373, "logps/chosen": -169.91473388671875, "logps/rejected": -344.24212646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3516650199890137, "rewards/margins": 9.091236114501953, "rewards/rejected": -12.442901611328125, "step": 9678 }, { "epoch": 1.51, "learning_rate": 7.048363504331138e-06, "logits/chosen": -2.407580614089966, "logits/rejected": -2.8422317504882812, "logps/chosen": -372.5406494140625, "logps/rejected": -425.7513732910156, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -3.8832993507385254, "rewards/margins": 7.644217014312744, "rewards/rejected": -11.527515411376953, "step": 9679 }, { "epoch": 1.51, "learning_rate": 7.04763006379999e-06, "logits/chosen": -1.8547672033309937, "logits/rejected": -2.8627350330352783, "logps/chosen": -197.34083557128906, "logps/rejected": -392.5590515136719, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.3421127796173096, "rewards/margins": 6.55067777633667, "rewards/rejected": -8.892789840698242, "step": 9680 }, { "epoch": 1.51, "learning_rate": 7.046896623268842e-06, "logits/chosen": -3.3911681175231934, "logits/rejected": -3.2150192260742188, "logps/chosen": -174.51065063476562, "logps/rejected": -163.30938720703125, "loss": 0.4821, "rewards/accuracies": 0.5, "rewards/chosen": -4.614121437072754, "rewards/margins": 1.9467442035675049, "rewards/rejected": -6.56086540222168, "step": 9681 }, { "epoch": 1.51, "learning_rate": 7.046163182737694e-06, "logits/chosen": -2.580031633377075, "logits/rejected": -2.8816561698913574, "logps/chosen": -185.11212158203125, "logps/rejected": -329.96624755859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.361217498779297, "rewards/margins": 6.87673282623291, "rewards/rejected": -9.237950325012207, "step": 9682 }, { "epoch": 1.51, "learning_rate": 7.0454297422065465e-06, "logits/chosen": -1.9877352714538574, "logits/rejected": -3.1076865196228027, "logps/chosen": -340.0987243652344, "logps/rejected": -547.4517822265625, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -2.3713326454162598, "rewards/margins": 6.5751471519470215, "rewards/rejected": -8.946479797363281, "step": 9683 }, { "epoch": 1.51, "learning_rate": 7.044696301675398e-06, "logits/chosen": -2.32334041595459, "logits/rejected": -2.9536616802215576, "logps/chosen": -90.76065826416016, "logps/rejected": -413.42547607421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.585452079772949, "rewards/margins": 12.031578063964844, "rewards/rejected": -14.617029190063477, "step": 9684 }, { "epoch": 1.51, "learning_rate": 7.04396286114425e-06, "logits/chosen": -2.0727357864379883, "logits/rejected": -2.6838366985321045, "logps/chosen": -68.86698913574219, "logps/rejected": -200.61488342285156, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.1261277198791504, "rewards/margins": 7.061940670013428, "rewards/rejected": -10.188068389892578, "step": 9685 }, { "epoch": 1.51, "learning_rate": 7.043229420613102e-06, "logits/chosen": -3.1209959983825684, "logits/rejected": -3.337465763092041, "logps/chosen": -273.3734130859375, "logps/rejected": -223.11782836914062, "loss": 0.0755, "rewards/accuracies": 1.0, "rewards/chosen": -3.977114200592041, "rewards/margins": 2.9500808715820312, "rewards/rejected": -6.927195072174072, "step": 9686 }, { "epoch": 1.51, "learning_rate": 7.042495980081954e-06, "logits/chosen": -2.6830990314483643, "logits/rejected": -3.08296275138855, "logps/chosen": -86.04312896728516, "logps/rejected": -249.0818634033203, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -3.659170627593994, "rewards/margins": 3.781959056854248, "rewards/rejected": -7.441129684448242, "step": 9687 }, { "epoch": 1.51, "learning_rate": 7.041762539550807e-06, "logits/chosen": -2.236903667449951, "logits/rejected": -2.79841685295105, "logps/chosen": -263.5615234375, "logps/rejected": -491.56573486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.373291492462158, "rewards/margins": 9.830917358398438, "rewards/rejected": -14.204208374023438, "step": 9688 }, { "epoch": 1.51, "learning_rate": 7.041029099019659e-06, "logits/chosen": -1.8822121620178223, "logits/rejected": -2.4309451580047607, "logps/chosen": -231.28076171875, "logps/rejected": -243.739013671875, "loss": 1.6177, "rewards/accuracies": 0.5, "rewards/chosen": -6.279892921447754, "rewards/margins": 2.501573085784912, "rewards/rejected": -8.781465530395508, "step": 9689 }, { "epoch": 1.51, "learning_rate": 7.0402956584885105e-06, "logits/chosen": -2.05759859085083, "logits/rejected": -2.92596173286438, "logps/chosen": -197.91812133789062, "logps/rejected": -372.9254150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.494952201843262, "rewards/margins": 10.780656814575195, "rewards/rejected": -15.275609970092773, "step": 9690 }, { "epoch": 1.51, "learning_rate": 7.039562217957362e-06, "logits/chosen": -2.4570720195770264, "logits/rejected": -2.999849796295166, "logps/chosen": -67.04412078857422, "logps/rejected": -310.38916015625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.591189384460449, "rewards/margins": 6.734938144683838, "rewards/rejected": -10.326128005981445, "step": 9691 }, { "epoch": 1.51, "learning_rate": 7.038828777426215e-06, "logits/chosen": -2.073168992996216, "logits/rejected": -2.855177164077759, "logps/chosen": -55.75466537475586, "logps/rejected": -209.71429443359375, "loss": 0.5322, "rewards/accuracies": 0.5, "rewards/chosen": -4.075697898864746, "rewards/margins": 3.6254148483276367, "rewards/rejected": -7.701112747192383, "step": 9692 }, { "epoch": 1.51, "learning_rate": 7.038095336895068e-06, "logits/chosen": -3.1079823970794678, "logits/rejected": -2.4207332134246826, "logps/chosen": -512.2332153320312, "logps/rejected": -451.4931335449219, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.082156181335449, "rewards/margins": 7.719598770141602, "rewards/rejected": -12.80175495147705, "step": 9693 }, { "epoch": 1.51, "learning_rate": 7.03736189636392e-06, "logits/chosen": -1.6038620471954346, "logits/rejected": -2.7943289279937744, "logps/chosen": -231.03543090820312, "logps/rejected": -334.726806640625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.4708268642425537, "rewards/margins": 7.39654541015625, "rewards/rejected": -9.867372512817383, "step": 9694 }, { "epoch": 1.51, "learning_rate": 7.0366284558327715e-06, "logits/chosen": -2.131821870803833, "logits/rejected": -3.0096263885498047, "logps/chosen": -193.8362274169922, "logps/rejected": -406.5810241699219, "loss": 0.4736, "rewards/accuracies": 0.5, "rewards/chosen": -5.566527366638184, "rewards/margins": 2.9260971546173096, "rewards/rejected": -8.492624282836914, "step": 9695 }, { "epoch": 1.51, "learning_rate": 7.0358950153016234e-06, "logits/chosen": -2.5304317474365234, "logits/rejected": -2.7629282474517822, "logps/chosen": -789.8853759765625, "logps/rejected": -1048.4124755859375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.527679443359375, "rewards/margins": 8.71385383605957, "rewards/rejected": -12.241533279418945, "step": 9696 }, { "epoch": 1.51, "learning_rate": 7.035161574770476e-06, "logits/chosen": -1.499136209487915, "logits/rejected": -2.9384212493896484, "logps/chosen": -99.63668823242188, "logps/rejected": -251.37026977539062, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -3.204960584640503, "rewards/margins": 5.278719902038574, "rewards/rejected": -8.483680725097656, "step": 9697 }, { "epoch": 1.51, "learning_rate": 7.034428134239328e-06, "logits/chosen": -2.9891676902770996, "logits/rejected": -2.8337981700897217, "logps/chosen": -350.8519592285156, "logps/rejected": -547.7846069335938, "loss": 0.1215, "rewards/accuracies": 1.0, "rewards/chosen": -3.487501859664917, "rewards/margins": 4.6232590675354, "rewards/rejected": -8.110760688781738, "step": 9698 }, { "epoch": 1.51, "learning_rate": 7.03369469370818e-06, "logits/chosen": -2.518355131149292, "logits/rejected": -2.792161226272583, "logps/chosen": -145.30953979492188, "logps/rejected": -199.80430603027344, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -3.1487183570861816, "rewards/margins": 7.108679294586182, "rewards/rejected": -10.257397651672363, "step": 9699 }, { "epoch": 1.51, "learning_rate": 7.032961253177032e-06, "logits/chosen": -2.8025615215301514, "logits/rejected": -3.089742660522461, "logps/chosen": -94.24609375, "logps/rejected": -276.2470397949219, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.467309951782227, "rewards/margins": 9.036256790161133, "rewards/rejected": -13.50356674194336, "step": 9700 }, { "epoch": 1.51, "learning_rate": 7.0322278126458845e-06, "logits/chosen": -1.5895899534225464, "logits/rejected": -2.5493319034576416, "logps/chosen": -122.23515319824219, "logps/rejected": -409.8719482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6397910714149475, "rewards/margins": 13.761672019958496, "rewards/rejected": -14.40146255493164, "step": 9701 }, { "epoch": 1.51, "learning_rate": 7.031494372114736e-06, "logits/chosen": -2.9446561336517334, "logits/rejected": -3.0409743785858154, "logps/chosen": -162.89306640625, "logps/rejected": -329.62762451171875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.598043203353882, "rewards/margins": 7.108193397521973, "rewards/rejected": -9.706236839294434, "step": 9702 }, { "epoch": 1.51, "learning_rate": 7.030760931583588e-06, "logits/chosen": -2.8705339431762695, "logits/rejected": -3.105764150619507, "logps/chosen": -86.96680450439453, "logps/rejected": -240.6834716796875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -3.7738873958587646, "rewards/margins": 5.370120525360107, "rewards/rejected": -9.144007682800293, "step": 9703 }, { "epoch": 1.51, "learning_rate": 7.03002749105244e-06, "logits/chosen": -1.4333921670913696, "logits/rejected": -2.6245932579040527, "logps/chosen": -67.62298583984375, "logps/rejected": -289.06292724609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.218494176864624, "rewards/margins": 9.025724411010742, "rewards/rejected": -10.244218826293945, "step": 9704 }, { "epoch": 1.51, "learning_rate": 7.029294050521292e-06, "logits/chosen": -2.826066493988037, "logits/rejected": -2.916557788848877, "logps/chosen": -259.3036804199219, "logps/rejected": -213.8410186767578, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -1.5372062921524048, "rewards/margins": 4.556227684020996, "rewards/rejected": -6.093433856964111, "step": 9705 }, { "epoch": 1.51, "learning_rate": 7.028560609990145e-06, "logits/chosen": -2.356675863265991, "logits/rejected": -2.5239059925079346, "logps/chosen": -105.17229461669922, "logps/rejected": -277.34912109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2236266136169434, "rewards/margins": 10.992647171020508, "rewards/rejected": -13.21627426147461, "step": 9706 }, { "epoch": 1.51, "learning_rate": 7.027827169458997e-06, "logits/chosen": -2.603013753890991, "logits/rejected": -3.106189012527466, "logps/chosen": -106.0716781616211, "logps/rejected": -313.4296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.081124305725098, "rewards/margins": 8.079488754272461, "rewards/rejected": -13.160614013671875, "step": 9707 }, { "epoch": 1.51, "learning_rate": 7.0270937289278485e-06, "logits/chosen": -2.0411763191223145, "logits/rejected": -2.740485668182373, "logps/chosen": -59.201717376708984, "logps/rejected": -185.49118041992188, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -3.414599895477295, "rewards/margins": 5.778244972229004, "rewards/rejected": -9.19284439086914, "step": 9708 }, { "epoch": 1.51, "learning_rate": 7.026360288396701e-06, "logits/chosen": -2.6284968852996826, "logits/rejected": -1.9955246448516846, "logps/chosen": -243.0087127685547, "logps/rejected": -267.5649108886719, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": -3.816795825958252, "rewards/margins": 3.4709088802337646, "rewards/rejected": -7.2877044677734375, "step": 9709 }, { "epoch": 1.51, "learning_rate": 7.025626847865554e-06, "logits/chosen": -2.946716070175171, "logits/rejected": -2.459308385848999, "logps/chosen": -878.4027099609375, "logps/rejected": -345.6466369628906, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.249798774719238, "rewards/margins": 6.863759994506836, "rewards/rejected": -13.113557815551758, "step": 9710 }, { "epoch": 1.51, "learning_rate": 7.024893407334406e-06, "logits/chosen": -2.8466997146606445, "logits/rejected": -3.312119245529175, "logps/chosen": -34.11656951904297, "logps/rejected": -193.61122131347656, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.34912109375, "rewards/margins": 6.50677490234375, "rewards/rejected": -7.85589599609375, "step": 9711 }, { "epoch": 1.51, "learning_rate": 7.024159966803258e-06, "logits/chosen": -2.4045770168304443, "logits/rejected": -2.8784339427948, "logps/chosen": -209.21315002441406, "logps/rejected": -319.579833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.5999975204467773, "rewards/margins": 14.529232025146484, "rewards/rejected": -13.92923355102539, "step": 9712 }, { "epoch": 1.51, "learning_rate": 7.02342652627211e-06, "logits/chosen": -3.047312021255493, "logits/rejected": -2.84829044342041, "logps/chosen": -702.1427001953125, "logps/rejected": -543.7442016601562, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -5.54168701171875, "rewards/margins": 4.560603618621826, "rewards/rejected": -10.102291107177734, "step": 9713 }, { "epoch": 1.51, "learning_rate": 7.0226930857409615e-06, "logits/chosen": -2.5807900428771973, "logits/rejected": -2.933546304702759, "logps/chosen": -55.326927185058594, "logps/rejected": -214.5546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1995642185211182, "rewards/margins": 9.303792953491211, "rewards/rejected": -10.503357887268066, "step": 9714 }, { "epoch": 1.51, "learning_rate": 7.021959645209814e-06, "logits/chosen": -1.6775803565979004, "logits/rejected": -3.1768531799316406, "logps/chosen": -64.91007995605469, "logps/rejected": -412.2464904785156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.677245855331421, "rewards/margins": 9.396425247192383, "rewards/rejected": -11.073671340942383, "step": 9715 }, { "epoch": 1.51, "learning_rate": 7.021226204678666e-06, "logits/chosen": -2.6928954124450684, "logits/rejected": -2.635334014892578, "logps/chosen": -792.0598754882812, "logps/rejected": -670.603759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1678578853607178, "rewards/margins": 11.133459091186523, "rewards/rejected": -13.30131721496582, "step": 9716 }, { "epoch": 1.51, "learning_rate": 7.020492764147518e-06, "logits/chosen": -2.2449090480804443, "logits/rejected": -2.9571783542633057, "logps/chosen": -242.46829223632812, "logps/rejected": -492.3878173828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.957904815673828, "rewards/margins": 12.140774726867676, "rewards/rejected": -16.09868049621582, "step": 9717 }, { "epoch": 1.51, "learning_rate": 7.01975932361637e-06, "logits/chosen": -2.6994478702545166, "logits/rejected": -1.115918517112732, "logps/chosen": -513.3917236328125, "logps/rejected": -183.6576385498047, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -3.775723934173584, "rewards/margins": 4.9687018394470215, "rewards/rejected": -8.744425773620605, "step": 9718 }, { "epoch": 1.51, "learning_rate": 7.0190258830852226e-06, "logits/chosen": -3.0567803382873535, "logits/rejected": -2.819903612136841, "logps/chosen": -129.19615173339844, "logps/rejected": -176.08712768554688, "loss": 1.1798, "rewards/accuracies": 0.5, "rewards/chosen": -3.82763409614563, "rewards/margins": 3.5537848472595215, "rewards/rejected": -7.3814191818237305, "step": 9719 }, { "epoch": 1.51, "learning_rate": 7.0182924425540744e-06, "logits/chosen": -2.3976290225982666, "logits/rejected": -2.9720795154571533, "logps/chosen": -100.49267578125, "logps/rejected": -287.03057861328125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.082498550415039, "rewards/margins": 7.561450004577637, "rewards/rejected": -10.64394760131836, "step": 9720 }, { "epoch": 1.51, "learning_rate": 7.017559002022926e-06, "logits/chosen": -2.541266679763794, "logits/rejected": -3.030153274536133, "logps/chosen": -260.9656982421875, "logps/rejected": -667.318115234375, "loss": 0.0881, "rewards/accuracies": 1.0, "rewards/chosen": -7.062624454498291, "rewards/margins": 2.726928234100342, "rewards/rejected": -9.789552688598633, "step": 9721 }, { "epoch": 1.51, "learning_rate": 7.016825561491778e-06, "logits/chosen": -1.4427874088287354, "logits/rejected": -2.6321160793304443, "logps/chosen": -252.79287719726562, "logps/rejected": -660.0364990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.777779459953308, "rewards/margins": 10.596597671508789, "rewards/rejected": -12.37437629699707, "step": 9722 }, { "epoch": 1.51, "learning_rate": 7.016092120960631e-06, "logits/chosen": -2.9286863803863525, "logits/rejected": -2.94183349609375, "logps/chosen": -298.58740234375, "logps/rejected": -398.2786560058594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.2083001136779785, "rewards/margins": 9.274131774902344, "rewards/rejected": -12.482431411743164, "step": 9723 }, { "epoch": 1.51, "learning_rate": 7.015358680429483e-06, "logits/chosen": -2.8242945671081543, "logits/rejected": -2.8910748958587646, "logps/chosen": -178.7117919921875, "logps/rejected": -175.94122314453125, "loss": 0.1693, "rewards/accuracies": 1.0, "rewards/chosen": -3.6737194061279297, "rewards/margins": 2.9692022800445557, "rewards/rejected": -6.642921447753906, "step": 9724 }, { "epoch": 1.51, "learning_rate": 7.014625239898335e-06, "logits/chosen": -1.4601527452468872, "logits/rejected": -2.8164520263671875, "logps/chosen": -135.78170776367188, "logps/rejected": -409.99224853515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.7851004600524902, "rewards/margins": 8.433826446533203, "rewards/rejected": -12.218927383422852, "step": 9725 }, { "epoch": 1.51, "learning_rate": 7.013891799367187e-06, "logits/chosen": -2.880849599838257, "logits/rejected": -2.761843681335449, "logps/chosen": -153.77178955078125, "logps/rejected": -137.02255249023438, "loss": 1.2535, "rewards/accuracies": 0.5, "rewards/chosen": -5.9004974365234375, "rewards/margins": 3.8042049407958984, "rewards/rejected": -9.704702377319336, "step": 9726 }, { "epoch": 1.51, "learning_rate": 7.013158358836039e-06, "logits/chosen": -2.421135425567627, "logits/rejected": -2.8264002799987793, "logps/chosen": -251.97052001953125, "logps/rejected": -230.29806518554688, "loss": 0.0873, "rewards/accuracies": 1.0, "rewards/chosen": -6.157689571380615, "rewards/margins": 2.557910442352295, "rewards/rejected": -8.71560001373291, "step": 9727 }, { "epoch": 1.51, "learning_rate": 7.012424918304892e-06, "logits/chosen": -2.0532400608062744, "logits/rejected": -2.816309690475464, "logps/chosen": -239.3719482421875, "logps/rejected": -557.8517456054688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.010916233062744, "rewards/margins": 10.101058959960938, "rewards/rejected": -14.11197566986084, "step": 9728 }, { "epoch": 1.51, "learning_rate": 7.011691477773744e-06, "logits/chosen": -3.0113673210144043, "logits/rejected": -1.4121618270874023, "logps/chosen": -527.0619506835938, "logps/rejected": -304.51519775390625, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -2.238656759262085, "rewards/margins": 4.815252780914307, "rewards/rejected": -7.0539093017578125, "step": 9729 }, { "epoch": 1.51, "learning_rate": 7.010958037242596e-06, "logits/chosen": -2.811174154281616, "logits/rejected": -1.7965197563171387, "logps/chosen": -227.20765686035156, "logps/rejected": -357.9894714355469, "loss": 0.5583, "rewards/accuracies": 0.5, "rewards/chosen": -4.573028087615967, "rewards/margins": 5.765169143676758, "rewards/rejected": -10.338197708129883, "step": 9730 }, { "epoch": 1.51, "learning_rate": 7.010224596711448e-06, "logits/chosen": -2.271366834640503, "logits/rejected": -2.546347141265869, "logps/chosen": -251.348388671875, "logps/rejected": -518.9964599609375, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -2.7252259254455566, "rewards/margins": 6.908871650695801, "rewards/rejected": -9.634098052978516, "step": 9731 }, { "epoch": 1.51, "learning_rate": 7.0094911561803e-06, "logits/chosen": -3.120388984680176, "logits/rejected": -2.9028637409210205, "logps/chosen": -199.33148193359375, "logps/rejected": -166.80355834960938, "loss": 1.22, "rewards/accuracies": 0.5, "rewards/chosen": -3.9114022254943848, "rewards/margins": 1.7320587635040283, "rewards/rejected": -5.643461227416992, "step": 9732 }, { "epoch": 1.51, "learning_rate": 7.008757715649152e-06, "logits/chosen": -3.187326669692993, "logits/rejected": -3.3210363388061523, "logps/chosen": -84.64225006103516, "logps/rejected": -188.48880004882812, "loss": 0.3887, "rewards/accuracies": 0.5, "rewards/chosen": -3.7217397689819336, "rewards/margins": 4.970288276672363, "rewards/rejected": -8.692028045654297, "step": 9733 }, { "epoch": 1.51, "learning_rate": 7.008024275118004e-06, "logits/chosen": -1.9373767375946045, "logits/rejected": -2.648529529571533, "logps/chosen": -367.9486999511719, "logps/rejected": -468.23236083984375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -3.1806554794311523, "rewards/margins": 7.833491325378418, "rewards/rejected": -11.01414680480957, "step": 9734 }, { "epoch": 1.51, "learning_rate": 7.007290834586856e-06, "logits/chosen": -2.4299938678741455, "logits/rejected": -2.9680163860321045, "logps/chosen": -123.26341247558594, "logps/rejected": -364.4242858886719, "loss": 1.3998, "rewards/accuracies": 0.5, "rewards/chosen": -5.063043117523193, "rewards/margins": 3.265342950820923, "rewards/rejected": -8.328386306762695, "step": 9735 }, { "epoch": 1.51, "learning_rate": 7.006557394055708e-06, "logits/chosen": -3.2212045192718506, "logits/rejected": -3.306838035583496, "logps/chosen": -41.769676208496094, "logps/rejected": -158.53067016601562, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.5856380462646484, "rewards/margins": 7.594029903411865, "rewards/rejected": -10.179668426513672, "step": 9736 }, { "epoch": 1.51, "learning_rate": 7.005823953524561e-06, "logits/chosen": -1.8935835361480713, "logits/rejected": -2.7796170711517334, "logps/chosen": -226.31442260742188, "logps/rejected": -341.48101806640625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -3.8190574645996094, "rewards/margins": 6.740436553955078, "rewards/rejected": -10.559494018554688, "step": 9737 }, { "epoch": 1.51, "learning_rate": 7.0050905129934125e-06, "logits/chosen": -2.441615581512451, "logits/rejected": -2.8351428508758545, "logps/chosen": -370.49139404296875, "logps/rejected": -757.7333984375, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -3.7742247581481934, "rewards/margins": 7.056128025054932, "rewards/rejected": -10.830352783203125, "step": 9738 }, { "epoch": 1.51, "learning_rate": 7.004357072462264e-06, "logits/chosen": -2.5854690074920654, "logits/rejected": -1.9402012825012207, "logps/chosen": -283.0333557128906, "logps/rejected": -212.43948364257812, "loss": 0.4081, "rewards/accuracies": 0.5, "rewards/chosen": -8.21467399597168, "rewards/margins": 1.8201494216918945, "rewards/rejected": -10.034822463989258, "step": 9739 }, { "epoch": 1.51, "learning_rate": 7.003623631931116e-06, "logits/chosen": -1.9843109846115112, "logits/rejected": -2.5518314838409424, "logps/chosen": -223.3403778076172, "logps/rejected": -382.57379150390625, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -5.930328369140625, "rewards/margins": 5.322300910949707, "rewards/rejected": -11.252630233764648, "step": 9740 }, { "epoch": 1.51, "learning_rate": 7.002890191399969e-06, "logits/chosen": -2.6910500526428223, "logits/rejected": -2.769183397293091, "logps/chosen": -179.15721130371094, "logps/rejected": -324.7969665527344, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -5.284481048583984, "rewards/margins": 5.551791191101074, "rewards/rejected": -10.836272239685059, "step": 9741 }, { "epoch": 1.52, "learning_rate": 7.002156750868821e-06, "logits/chosen": -2.88346004486084, "logits/rejected": -2.946415901184082, "logps/chosen": -84.86453247070312, "logps/rejected": -249.46176147460938, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.066038131713867, "rewards/margins": 6.221573829650879, "rewards/rejected": -9.287611961364746, "step": 9742 }, { "epoch": 1.52, "learning_rate": 7.0014233103376736e-06, "logits/chosen": -1.8676949739456177, "logits/rejected": -2.7709250450134277, "logps/chosen": -226.56854248046875, "logps/rejected": -549.5216674804688, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -5.462522983551025, "rewards/margins": 8.413609504699707, "rewards/rejected": -13.876132011413574, "step": 9743 }, { "epoch": 1.52, "learning_rate": 7.0006898698065254e-06, "logits/chosen": -1.1296286582946777, "logits/rejected": -2.6178138256073, "logps/chosen": -129.12747192382812, "logps/rejected": -525.6251220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.683562755584717, "rewards/margins": 11.838958740234375, "rewards/rejected": -14.52252197265625, "step": 9744 }, { "epoch": 1.52, "learning_rate": 6.999956429275377e-06, "logits/chosen": -2.960181713104248, "logits/rejected": -1.8165451288223267, "logps/chosen": -241.94619750976562, "logps/rejected": -134.2045135498047, "loss": 1.534, "rewards/accuracies": 0.5, "rewards/chosen": -5.10479211807251, "rewards/margins": 2.414039134979248, "rewards/rejected": -7.518831253051758, "step": 9745 }, { "epoch": 1.52, "learning_rate": 6.99922298874423e-06, "logits/chosen": -0.7876537442207336, "logits/rejected": -2.7589313983917236, "logps/chosen": -86.08663940429688, "logps/rejected": -383.25054931640625, "loss": 0.3771, "rewards/accuracies": 0.5, "rewards/chosen": -6.25772762298584, "rewards/margins": 2.505781888961792, "rewards/rejected": -8.763509750366211, "step": 9746 }, { "epoch": 1.52, "learning_rate": 6.998489548213082e-06, "logits/chosen": -2.742790460586548, "logits/rejected": -2.9860129356384277, "logps/chosen": -285.0328674316406, "logps/rejected": -327.30828857421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.4373092651367188, "rewards/margins": 6.700920104980469, "rewards/rejected": -9.138229370117188, "step": 9747 }, { "epoch": 1.52, "learning_rate": 6.997756107681934e-06, "logits/chosen": -2.675241231918335, "logits/rejected": -3.0371220111846924, "logps/chosen": -84.55787658691406, "logps/rejected": -243.58883666992188, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": -2.6992430686950684, "rewards/margins": 5.73351526260376, "rewards/rejected": -8.432758331298828, "step": 9748 }, { "epoch": 1.52, "learning_rate": 6.997022667150786e-06, "logits/chosen": -2.0097203254699707, "logits/rejected": -2.423316240310669, "logps/chosen": -140.14215087890625, "logps/rejected": -346.8272399902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.72326922416687, "rewards/margins": 11.559164047241211, "rewards/rejected": -15.28243350982666, "step": 9749 }, { "epoch": 1.52, "learning_rate": 6.996289226619638e-06, "logits/chosen": -2.107497453689575, "logits/rejected": -2.897477388381958, "logps/chosen": -102.2320556640625, "logps/rejected": -237.12095642089844, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -3.6473822593688965, "rewards/margins": 5.5244669914245605, "rewards/rejected": -9.171849250793457, "step": 9750 }, { "epoch": 1.52, "learning_rate": 6.99555578608849e-06, "logits/chosen": -0.9765166640281677, "logits/rejected": -1.666185975074768, "logps/chosen": -151.23211669921875, "logps/rejected": -540.0647583007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.44010591506958, "rewards/margins": 12.816534042358398, "rewards/rejected": -16.25663948059082, "step": 9751 }, { "epoch": 1.52, "learning_rate": 6.994822345557342e-06, "logits/chosen": -2.625910758972168, "logits/rejected": -3.0489702224731445, "logps/chosen": -187.58364868164062, "logps/rejected": -379.9948425292969, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.098382949829102, "rewards/margins": 10.969429016113281, "rewards/rejected": -16.067811965942383, "step": 9752 }, { "epoch": 1.52, "learning_rate": 6.994088905026194e-06, "logits/chosen": -1.97950279712677, "logits/rejected": -2.9446563720703125, "logps/chosen": -288.8774108886719, "logps/rejected": -431.8237609863281, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.152289390563965, "rewards/margins": 6.680922508239746, "rewards/rejected": -10.833211898803711, "step": 9753 }, { "epoch": 1.52, "learning_rate": 6.993355464495046e-06, "logits/chosen": -3.0260725021362305, "logits/rejected": -2.5331039428710938, "logps/chosen": -303.27532958984375, "logps/rejected": -148.70285034179688, "loss": 1.8942, "rewards/accuracies": 0.5, "rewards/chosen": -6.244903564453125, "rewards/margins": 2.148057699203491, "rewards/rejected": -8.392961502075195, "step": 9754 }, { "epoch": 1.52, "learning_rate": 6.992622023963899e-06, "logits/chosen": -2.7625675201416016, "logits/rejected": -2.045363426208496, "logps/chosen": -787.390625, "logps/rejected": -613.4915771484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.3108458518981934, "rewards/margins": 8.393986701965332, "rewards/rejected": -11.704832077026367, "step": 9755 }, { "epoch": 1.52, "learning_rate": 6.9918885834327505e-06, "logits/chosen": -3.1323297023773193, "logits/rejected": -3.371973752975464, "logps/chosen": -317.5198974609375, "logps/rejected": -367.9678039550781, "loss": 3.4096, "rewards/accuracies": 0.5, "rewards/chosen": -7.112587928771973, "rewards/margins": 1.3280625343322754, "rewards/rejected": -8.44064998626709, "step": 9756 }, { "epoch": 1.52, "learning_rate": 6.991155142901602e-06, "logits/chosen": -2.4748032093048096, "logits/rejected": -3.0761823654174805, "logps/chosen": -203.1911163330078, "logps/rejected": -309.11553955078125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -3.0792691707611084, "rewards/margins": 5.44251012802124, "rewards/rejected": -8.52177906036377, "step": 9757 }, { "epoch": 1.52, "learning_rate": 6.990421702370454e-06, "logits/chosen": -2.801224708557129, "logits/rejected": -3.174010753631592, "logps/chosen": -84.82964324951172, "logps/rejected": -338.1134948730469, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.5027825832366943, "rewards/margins": 8.951608657836914, "rewards/rejected": -12.454391479492188, "step": 9758 }, { "epoch": 1.52, "learning_rate": 6.989688261839307e-06, "logits/chosen": -2.078625440597534, "logits/rejected": -2.6539371013641357, "logps/chosen": -62.51568603515625, "logps/rejected": -303.2863464355469, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.747511863708496, "rewards/margins": 8.16263484954834, "rewards/rejected": -10.910146713256836, "step": 9759 }, { "epoch": 1.52, "learning_rate": 6.98895482130816e-06, "logits/chosen": -2.723742723464966, "logits/rejected": -3.0621755123138428, "logps/chosen": -788.2434692382812, "logps/rejected": -741.572998046875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -5.088708400726318, "rewards/margins": 8.734588623046875, "rewards/rejected": -13.823297500610352, "step": 9760 }, { "epoch": 1.52, "learning_rate": 6.988221380777012e-06, "logits/chosen": -1.8089624643325806, "logits/rejected": -2.9628264904022217, "logps/chosen": -327.24127197265625, "logps/rejected": -606.2230834960938, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -2.0711426734924316, "rewards/margins": 5.374931335449219, "rewards/rejected": -7.44607400894165, "step": 9761 }, { "epoch": 1.52, "learning_rate": 6.9874879402458635e-06, "logits/chosen": -1.4248628616333008, "logits/rejected": -2.6878066062927246, "logps/chosen": -86.89629364013672, "logps/rejected": -277.4183654785156, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.790594577789307, "rewards/margins": 7.399418830871582, "rewards/rejected": -13.190013885498047, "step": 9762 }, { "epoch": 1.52, "learning_rate": 6.986754499714715e-06, "logits/chosen": -2.657020092010498, "logits/rejected": -2.7187318801879883, "logps/chosen": -83.95622253417969, "logps/rejected": -124.32365417480469, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -3.046548366546631, "rewards/margins": 4.028223991394043, "rewards/rejected": -7.074772357940674, "step": 9763 }, { "epoch": 1.52, "learning_rate": 6.986021059183568e-06, "logits/chosen": -2.315690755844116, "logits/rejected": -2.883502721786499, "logps/chosen": -152.55615234375, "logps/rejected": -289.276123046875, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/chosen": -2.3238649368286133, "rewards/margins": 5.808243751525879, "rewards/rejected": -8.132108688354492, "step": 9764 }, { "epoch": 1.52, "learning_rate": 6.98528761865242e-06, "logits/chosen": -2.920762777328491, "logits/rejected": -2.2015717029571533, "logps/chosen": -510.409423828125, "logps/rejected": -423.0704040527344, "loss": 0.1853, "rewards/accuracies": 1.0, "rewards/chosen": -4.282803535461426, "rewards/margins": 5.0966596603393555, "rewards/rejected": -9.379463195800781, "step": 9765 }, { "epoch": 1.52, "learning_rate": 6.984554178121272e-06, "logits/chosen": -2.909219741821289, "logits/rejected": -2.9123449325561523, "logps/chosen": -137.643310546875, "logps/rejected": -268.6597900390625, "loss": 0.7133, "rewards/accuracies": 0.5, "rewards/chosen": -6.410087585449219, "rewards/margins": 2.4547696113586426, "rewards/rejected": -8.864856719970703, "step": 9766 }, { "epoch": 1.52, "learning_rate": 6.983820737590124e-06, "logits/chosen": -2.6250159740448, "logits/rejected": -2.855480432510376, "logps/chosen": -255.4908447265625, "logps/rejected": -405.72064208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9736942648887634, "rewards/margins": 11.147769927978516, "rewards/rejected": -12.121464729309082, "step": 9767 }, { "epoch": 1.52, "learning_rate": 6.9830872970589764e-06, "logits/chosen": -2.4502110481262207, "logits/rejected": -3.087256908416748, "logps/chosen": -111.25875854492188, "logps/rejected": -524.1114501953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.087338924407959, "rewards/margins": 10.107749938964844, "rewards/rejected": -14.195089340209961, "step": 9768 }, { "epoch": 1.52, "learning_rate": 6.982353856527828e-06, "logits/chosen": -2.8028125762939453, "logits/rejected": -2.2974774837493896, "logps/chosen": -136.0919647216797, "logps/rejected": -267.8810729980469, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -0.18073120713233948, "rewards/margins": 9.001255989074707, "rewards/rejected": -9.181986808776855, "step": 9769 }, { "epoch": 1.52, "learning_rate": 6.98162041599668e-06, "logits/chosen": -2.5549607276916504, "logits/rejected": -2.7598936557769775, "logps/chosen": -460.1578369140625, "logps/rejected": -424.0390930175781, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.715983867645264, "rewards/margins": 9.205181121826172, "rewards/rejected": -13.921164512634277, "step": 9770 }, { "epoch": 1.52, "learning_rate": 6.980886975465532e-06, "logits/chosen": -1.4334797859191895, "logits/rejected": -2.6110610961914062, "logps/chosen": -181.41445922851562, "logps/rejected": -387.3493347167969, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -4.074423789978027, "rewards/margins": 6.3828444480896, "rewards/rejected": -10.457267761230469, "step": 9771 }, { "epoch": 1.52, "learning_rate": 6.980153534934385e-06, "logits/chosen": -1.7889727354049683, "logits/rejected": -2.574666976928711, "logps/chosen": -77.77593994140625, "logps/rejected": -185.7753448486328, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.4686341285705566, "rewards/margins": 5.947144508361816, "rewards/rejected": -8.415779113769531, "step": 9772 }, { "epoch": 1.52, "learning_rate": 6.979420094403237e-06, "logits/chosen": -2.5214133262634277, "logits/rejected": -2.9712531566619873, "logps/chosen": -140.76617431640625, "logps/rejected": -301.33447265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.6195049285888672, "rewards/margins": 7.994037628173828, "rewards/rejected": -9.613542556762695, "step": 9773 }, { "epoch": 1.52, "learning_rate": 6.9786866538720886e-06, "logits/chosen": -1.9933042526245117, "logits/rejected": -2.8272223472595215, "logps/chosen": -130.21669006347656, "logps/rejected": -351.280029296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.6357946395874023, "rewards/margins": 9.15368366241455, "rewards/rejected": -11.789478302001953, "step": 9774 }, { "epoch": 1.52, "learning_rate": 6.9779532133409404e-06, "logits/chosen": -1.196205496788025, "logits/rejected": -2.7206339836120605, "logps/chosen": -87.7552719116211, "logps/rejected": -424.3293151855469, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.7198567390441895, "rewards/margins": 9.232917785644531, "rewards/rejected": -11.952774047851562, "step": 9775 }, { "epoch": 1.52, "learning_rate": 6.977219772809793e-06, "logits/chosen": -2.3362135887145996, "logits/rejected": -2.746244430541992, "logps/chosen": -155.63790893554688, "logps/rejected": -428.963623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.088672637939453, "rewards/margins": 12.909574508666992, "rewards/rejected": -14.998247146606445, "step": 9776 }, { "epoch": 1.52, "learning_rate": 6.976486332278646e-06, "logits/chosen": -2.157294750213623, "logits/rejected": -2.854964017868042, "logps/chosen": -52.05860137939453, "logps/rejected": -298.3658752441406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.186128854751587, "rewards/margins": 8.532808303833008, "rewards/rejected": -10.718937873840332, "step": 9777 }, { "epoch": 1.52, "learning_rate": 6.975752891747498e-06, "logits/chosen": -2.571916341781616, "logits/rejected": -1.4925175905227661, "logps/chosen": -154.21197509765625, "logps/rejected": -118.71553802490234, "loss": 0.6734, "rewards/accuracies": 0.5, "rewards/chosen": -5.722732067108154, "rewards/margins": 1.763704538345337, "rewards/rejected": -7.48643684387207, "step": 9778 }, { "epoch": 1.52, "learning_rate": 6.97501945121635e-06, "logits/chosen": -2.595750093460083, "logits/rejected": -2.166745901107788, "logps/chosen": -145.81121826171875, "logps/rejected": -280.5271301269531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.4681551456451416, "rewards/margins": 8.230016708374023, "rewards/rejected": -9.698172569274902, "step": 9779 }, { "epoch": 1.52, "learning_rate": 6.9742860106852015e-06, "logits/chosen": -2.542407751083374, "logits/rejected": -2.832442283630371, "logps/chosen": -83.66134643554688, "logps/rejected": -418.2308349609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.368064522743225, "rewards/margins": 8.977837562561035, "rewards/rejected": -10.345901489257812, "step": 9780 }, { "epoch": 1.52, "learning_rate": 6.973552570154054e-06, "logits/chosen": -2.232940912246704, "logits/rejected": -2.8463146686553955, "logps/chosen": -109.333984375, "logps/rejected": -407.67840576171875, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -2.9641876220703125, "rewards/margins": 7.523345470428467, "rewards/rejected": -10.487533569335938, "step": 9781 }, { "epoch": 1.52, "learning_rate": 6.972819129622906e-06, "logits/chosen": -2.910752534866333, "logits/rejected": -2.9319777488708496, "logps/chosen": -209.76695251464844, "logps/rejected": -321.1624755859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9996666312217712, "rewards/margins": 8.558579444885254, "rewards/rejected": -9.558246612548828, "step": 9782 }, { "epoch": 1.52, "learning_rate": 6.972085689091758e-06, "logits/chosen": -2.6846351623535156, "logits/rejected": -2.4943511486053467, "logps/chosen": -398.25762939453125, "logps/rejected": -289.1475524902344, "loss": 0.1566, "rewards/accuracies": 1.0, "rewards/chosen": -3.537583351135254, "rewards/margins": 7.190604209899902, "rewards/rejected": -10.728187561035156, "step": 9783 }, { "epoch": 1.52, "learning_rate": 6.97135224856061e-06, "logits/chosen": -2.921353340148926, "logits/rejected": -2.7475149631500244, "logps/chosen": -154.83883666992188, "logps/rejected": -218.09765625, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -2.490659713745117, "rewards/margins": 6.161153793334961, "rewards/rejected": -8.651813507080078, "step": 9784 }, { "epoch": 1.52, "learning_rate": 6.970618808029462e-06, "logits/chosen": -2.115452766418457, "logits/rejected": -2.9272854328155518, "logps/chosen": -140.031494140625, "logps/rejected": -221.48928833007812, "loss": 1.5622, "rewards/accuracies": 0.5, "rewards/chosen": -4.668461322784424, "rewards/margins": 0.6071532964706421, "rewards/rejected": -5.2756147384643555, "step": 9785 }, { "epoch": 1.52, "learning_rate": 6.9698853674983145e-06, "logits/chosen": -2.1839404106140137, "logits/rejected": -2.331528425216675, "logps/chosen": -96.14759826660156, "logps/rejected": -428.70428466796875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.631577968597412, "rewards/margins": 7.676881790161133, "rewards/rejected": -11.308460235595703, "step": 9786 }, { "epoch": 1.52, "learning_rate": 6.969151926967166e-06, "logits/chosen": -1.6859471797943115, "logits/rejected": -3.2003190517425537, "logps/chosen": -92.80937194824219, "logps/rejected": -338.1053771972656, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -4.527305603027344, "rewards/margins": 6.558157920837402, "rewards/rejected": -11.085463523864746, "step": 9787 }, { "epoch": 1.52, "learning_rate": 6.968418486436018e-06, "logits/chosen": -1.2870186567306519, "logits/rejected": -1.8647854328155518, "logps/chosen": -250.58631896972656, "logps/rejected": -394.87847900390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.7014737129211426, "rewards/margins": 9.053445816040039, "rewards/rejected": -11.754919052124023, "step": 9788 }, { "epoch": 1.52, "learning_rate": 6.96768504590487e-06, "logits/chosen": -2.9352877140045166, "logits/rejected": -0.9304344058036804, "logps/chosen": -767.1871337890625, "logps/rejected": -268.1155090332031, "loss": 3.2574, "rewards/accuracies": 0.5, "rewards/chosen": -7.828464031219482, "rewards/margins": -0.0575709342956543, "rewards/rejected": -7.770893096923828, "step": 9789 }, { "epoch": 1.52, "learning_rate": 6.966951605373723e-06, "logits/chosen": -3.0802347660064697, "logits/rejected": -2.240901470184326, "logps/chosen": -369.86785888671875, "logps/rejected": -222.17947387695312, "loss": 1.4261, "rewards/accuracies": 0.5, "rewards/chosen": -4.921339511871338, "rewards/margins": 0.32037413120269775, "rewards/rejected": -5.241713523864746, "step": 9790 }, { "epoch": 1.52, "learning_rate": 6.966218164842575e-06, "logits/chosen": -3.0524299144744873, "logits/rejected": -2.871473789215088, "logps/chosen": -1063.622314453125, "logps/rejected": -642.967041015625, "loss": 0.4909, "rewards/accuracies": 0.5, "rewards/chosen": -4.908438205718994, "rewards/margins": 2.350533962249756, "rewards/rejected": -7.25897216796875, "step": 9791 }, { "epoch": 1.52, "learning_rate": 6.965484724311427e-06, "logits/chosen": -1.9261101484298706, "logits/rejected": -2.802698850631714, "logps/chosen": -231.22837829589844, "logps/rejected": -366.43231201171875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -3.4261245727539062, "rewards/margins": 8.379415512084961, "rewards/rejected": -11.805540084838867, "step": 9792 }, { "epoch": 1.52, "learning_rate": 6.964751283780279e-06, "logits/chosen": -3.0611300468444824, "logits/rejected": -2.95507550239563, "logps/chosen": -229.9306640625, "logps/rejected": -335.4756164550781, "loss": 0.1494, "rewards/accuracies": 1.0, "rewards/chosen": -5.179605484008789, "rewards/margins": 6.5379133224487305, "rewards/rejected": -11.71751880645752, "step": 9793 }, { "epoch": 1.52, "learning_rate": 6.964017843249131e-06, "logits/chosen": -1.247269630432129, "logits/rejected": -2.926819086074829, "logps/chosen": -91.79063415527344, "logps/rejected": -525.024169921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.8392499089241028, "rewards/margins": 9.872517585754395, "rewards/rejected": -10.711767196655273, "step": 9794 }, { "epoch": 1.52, "learning_rate": 6.963284402717984e-06, "logits/chosen": -2.5536959171295166, "logits/rejected": -2.976457118988037, "logps/chosen": -375.9875183105469, "logps/rejected": -414.7003479003906, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -3.3455963134765625, "rewards/margins": 5.199586868286133, "rewards/rejected": -8.545183181762695, "step": 9795 }, { "epoch": 1.52, "learning_rate": 6.962550962186836e-06, "logits/chosen": -2.6659088134765625, "logits/rejected": -1.6095798015594482, "logps/chosen": -199.36105346679688, "logps/rejected": -343.70404052734375, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": -1.8500173091888428, "rewards/margins": 6.422186374664307, "rewards/rejected": -8.27220344543457, "step": 9796 }, { "epoch": 1.52, "learning_rate": 6.961817521655688e-06, "logits/chosen": -2.440481662750244, "logits/rejected": -2.8607678413391113, "logps/chosen": -131.55142211914062, "logps/rejected": -424.880126953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.360663414001465, "rewards/margins": 9.746538162231445, "rewards/rejected": -14.10720157623291, "step": 9797 }, { "epoch": 1.52, "learning_rate": 6.9610840811245396e-06, "logits/chosen": -2.789161205291748, "logits/rejected": -2.2830426692962646, "logps/chosen": -225.30569458007812, "logps/rejected": -339.2405090332031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.005549669265747, "rewards/margins": 10.369296073913574, "rewards/rejected": -12.374845504760742, "step": 9798 }, { "epoch": 1.52, "learning_rate": 6.960350640593392e-06, "logits/chosen": -1.0479341745376587, "logits/rejected": -2.248478412628174, "logps/chosen": -124.9978256225586, "logps/rejected": -480.0024108886719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.335543394088745, "rewards/margins": 10.250865936279297, "rewards/rejected": -12.586408615112305, "step": 9799 }, { "epoch": 1.52, "learning_rate": 6.959617200062244e-06, "logits/chosen": -2.730630874633789, "logits/rejected": -3.3646323680877686, "logps/chosen": -70.48196411132812, "logps/rejected": -264.8517761230469, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -1.6965445280075073, "rewards/margins": 5.72061824798584, "rewards/rejected": -7.417162895202637, "step": 9800 }, { "epoch": 1.52, "learning_rate": 6.958883759531096e-06, "logits/chosen": -2.5415115356445312, "logits/rejected": -2.8367502689361572, "logps/chosen": -397.88226318359375, "logps/rejected": -599.659423828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9506893157958984, "rewards/margins": 10.250713348388672, "rewards/rejected": -13.20140266418457, "step": 9801 }, { "epoch": 1.52, "learning_rate": 6.958150318999948e-06, "logits/chosen": -1.98150634765625, "logits/rejected": -2.464266300201416, "logps/chosen": -185.44393920898438, "logps/rejected": -259.77862548828125, "loss": 0.0931, "rewards/accuracies": 1.0, "rewards/chosen": -2.420891761779785, "rewards/margins": 3.674072265625, "rewards/rejected": -6.094964027404785, "step": 9802 }, { "epoch": 1.52, "learning_rate": 6.9574168784688e-06, "logits/chosen": -2.7115817070007324, "logits/rejected": -2.8495774269104004, "logps/chosen": -228.68463134765625, "logps/rejected": -197.51583862304688, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -4.4167022705078125, "rewards/margins": 5.470183372497559, "rewards/rejected": -9.886884689331055, "step": 9803 }, { "epoch": 1.52, "learning_rate": 6.9566834379376525e-06, "logits/chosen": -1.7925130128860474, "logits/rejected": -2.5686981678009033, "logps/chosen": -134.63821411132812, "logps/rejected": -239.53013610839844, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.212599277496338, "rewards/margins": 8.215677261352539, "rewards/rejected": -10.428277015686035, "step": 9804 }, { "epoch": 1.52, "learning_rate": 6.955949997406504e-06, "logits/chosen": -2.170233726501465, "logits/rejected": -3.0383358001708984, "logps/chosen": -607.6341552734375, "logps/rejected": -604.8045043945312, "loss": 0.0418, "rewards/accuracies": 1.0, "rewards/chosen": -4.26524543762207, "rewards/margins": 5.4098358154296875, "rewards/rejected": -9.675081253051758, "step": 9805 }, { "epoch": 1.53, "learning_rate": 6.955216556875356e-06, "logits/chosen": -1.6949552297592163, "logits/rejected": -2.784165143966675, "logps/chosen": -147.37417602539062, "logps/rejected": -401.02740478515625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.9800915718078613, "rewards/margins": 7.443291187286377, "rewards/rejected": -10.423382759094238, "step": 9806 }, { "epoch": 1.53, "learning_rate": 6.954483116344208e-06, "logits/chosen": -1.7142441272735596, "logits/rejected": -2.7679007053375244, "logps/chosen": -134.22344970703125, "logps/rejected": -434.5906982421875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.203769207000732, "rewards/margins": 8.193998336791992, "rewards/rejected": -12.397767066955566, "step": 9807 }, { "epoch": 1.53, "learning_rate": 6.953749675813061e-06, "logits/chosen": -2.8904504776000977, "logits/rejected": -2.7648239135742188, "logps/chosen": -299.5479431152344, "logps/rejected": -164.369873046875, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -2.9190821647644043, "rewards/margins": 5.452122688293457, "rewards/rejected": -8.371204376220703, "step": 9808 }, { "epoch": 1.53, "learning_rate": 6.953016235281913e-06, "logits/chosen": -2.7707204818725586, "logits/rejected": -2.583714723587036, "logps/chosen": -208.98570251464844, "logps/rejected": -259.5354919433594, "loss": 0.7251, "rewards/accuracies": 0.5, "rewards/chosen": -3.1189675331115723, "rewards/margins": 5.40899133682251, "rewards/rejected": -8.527958869934082, "step": 9809 }, { "epoch": 1.53, "learning_rate": 6.9522827947507655e-06, "logits/chosen": -2.106382131576538, "logits/rejected": -2.7832210063934326, "logps/chosen": -298.35906982421875, "logps/rejected": -386.1282958984375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -1.4837334156036377, "rewards/margins": 6.16536808013916, "rewards/rejected": -7.649101257324219, "step": 9810 }, { "epoch": 1.53, "learning_rate": 6.951549354219617e-06, "logits/chosen": -2.739856719970703, "logits/rejected": -1.2651005983352661, "logps/chosen": -257.810546875, "logps/rejected": -126.98816680908203, "loss": 1.8364, "rewards/accuracies": 0.5, "rewards/chosen": -5.679224491119385, "rewards/margins": 2.222487688064575, "rewards/rejected": -7.901712417602539, "step": 9811 }, { "epoch": 1.53, "learning_rate": 6.950815913688469e-06, "logits/chosen": -2.618043899536133, "logits/rejected": -2.780054807662964, "logps/chosen": -132.9925079345703, "logps/rejected": -287.6853942871094, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -1.6129783391952515, "rewards/margins": 6.546396732330322, "rewards/rejected": -8.159375190734863, "step": 9812 }, { "epoch": 1.53, "learning_rate": 6.950082473157322e-06, "logits/chosen": -2.5893025398254395, "logits/rejected": -2.9981496334075928, "logps/chosen": -358.7580261230469, "logps/rejected": -428.6371154785156, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -4.804912090301514, "rewards/margins": 4.788234233856201, "rewards/rejected": -9.593146324157715, "step": 9813 }, { "epoch": 1.53, "learning_rate": 6.949349032626174e-06, "logits/chosen": -2.707982063293457, "logits/rejected": -2.5096633434295654, "logps/chosen": -237.2070770263672, "logps/rejected": -212.99539184570312, "loss": 1.1987, "rewards/accuracies": 0.5, "rewards/chosen": -6.254151344299316, "rewards/margins": 1.0747408866882324, "rewards/rejected": -7.328891754150391, "step": 9814 }, { "epoch": 1.53, "learning_rate": 6.948615592095026e-06, "logits/chosen": -2.7052955627441406, "logits/rejected": -2.5035929679870605, "logps/chosen": -179.74703979492188, "logps/rejected": -273.89154052734375, "loss": 2.3426, "rewards/accuracies": 0.5, "rewards/chosen": -5.829289436340332, "rewards/margins": 3.245159149169922, "rewards/rejected": -9.074448585510254, "step": 9815 }, { "epoch": 1.53, "learning_rate": 6.947882151563878e-06, "logits/chosen": -1.4433475732803345, "logits/rejected": -2.796997308731079, "logps/chosen": -138.43463134765625, "logps/rejected": -661.2730712890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.502364158630371, "rewards/margins": 9.440439224243164, "rewards/rejected": -13.942804336547852, "step": 9816 }, { "epoch": 1.53, "learning_rate": 6.94714871103273e-06, "logits/chosen": -2.1430728435516357, "logits/rejected": -2.677305221557617, "logps/chosen": -228.92527770996094, "logps/rejected": -371.3340759277344, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -3.191040515899658, "rewards/margins": 4.806791305541992, "rewards/rejected": -7.99783182144165, "step": 9817 }, { "epoch": 1.53, "learning_rate": 6.946415270501582e-06, "logits/chosen": -1.2454376220703125, "logits/rejected": -3.1009905338287354, "logps/chosen": -103.885498046875, "logps/rejected": -430.8917236328125, "loss": 0.0873, "rewards/accuracies": 1.0, "rewards/chosen": -4.774177551269531, "rewards/margins": 7.588860511779785, "rewards/rejected": -12.363037109375, "step": 9818 }, { "epoch": 1.53, "learning_rate": 6.945681829970434e-06, "logits/chosen": -2.272878408432007, "logits/rejected": -3.0280375480651855, "logps/chosen": -145.25782775878906, "logps/rejected": -538.6229248046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.8279223442077637, "rewards/margins": 9.11834716796875, "rewards/rejected": -12.946269989013672, "step": 9819 }, { "epoch": 1.53, "learning_rate": 6.944948389439286e-06, "logits/chosen": -2.8263542652130127, "logits/rejected": -2.33662486076355, "logps/chosen": -294.1341857910156, "logps/rejected": -316.6482849121094, "loss": 0.646, "rewards/accuracies": 0.5, "rewards/chosen": -3.381978750228882, "rewards/margins": 3.0966358184814453, "rewards/rejected": -6.478614807128906, "step": 9820 }, { "epoch": 1.53, "learning_rate": 6.944214948908139e-06, "logits/chosen": -2.671515464782715, "logits/rejected": -3.0348501205444336, "logps/chosen": -87.047607421875, "logps/rejected": -232.36434936523438, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -2.252302646636963, "rewards/margins": 6.616265773773193, "rewards/rejected": -8.868568420410156, "step": 9821 }, { "epoch": 1.53, "learning_rate": 6.943481508376991e-06, "logits/chosen": -2.9082913398742676, "logits/rejected": -2.989055871963501, "logps/chosen": -393.6836242675781, "logps/rejected": -434.39959716796875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -4.303946495056152, "rewards/margins": 4.773038864135742, "rewards/rejected": -9.076985359191895, "step": 9822 }, { "epoch": 1.53, "learning_rate": 6.9427480678458425e-06, "logits/chosen": -2.8899614810943604, "logits/rejected": -1.994658350944519, "logps/chosen": -565.0186767578125, "logps/rejected": -427.72314453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.0804200172424316, "rewards/margins": 8.483673095703125, "rewards/rejected": -11.564092636108398, "step": 9823 }, { "epoch": 1.53, "learning_rate": 6.942014627314694e-06, "logits/chosen": -2.627636194229126, "logits/rejected": -2.7404255867004395, "logps/chosen": -315.4107666015625, "logps/rejected": -445.2065734863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6006221771240234, "rewards/margins": 10.538573265075684, "rewards/rejected": -11.139195442199707, "step": 9824 }, { "epoch": 1.53, "learning_rate": 6.941281186783546e-06, "logits/chosen": -1.5539896488189697, "logits/rejected": -2.7087948322296143, "logps/chosen": -144.09092712402344, "logps/rejected": -369.85491943359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.2069435119628906, "rewards/margins": 8.716913223266602, "rewards/rejected": -10.923856735229492, "step": 9825 }, { "epoch": 1.53, "learning_rate": 6.940547746252399e-06, "logits/chosen": -2.9254000186920166, "logits/rejected": -2.4607837200164795, "logps/chosen": -129.74990844726562, "logps/rejected": -202.40513610839844, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -1.3905853033065796, "rewards/margins": 7.822242736816406, "rewards/rejected": -9.212827682495117, "step": 9826 }, { "epoch": 1.53, "learning_rate": 6.939814305721252e-06, "logits/chosen": -2.9852771759033203, "logits/rejected": -2.7875537872314453, "logps/chosen": -643.7565307617188, "logps/rejected": -631.8841552734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.833047389984131, "rewards/margins": 8.483154296875, "rewards/rejected": -11.316202163696289, "step": 9827 }, { "epoch": 1.53, "learning_rate": 6.9390808651901035e-06, "logits/chosen": -2.166630506515503, "logits/rejected": -2.6355392932891846, "logps/chosen": -291.8595275878906, "logps/rejected": -435.83673095703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.528933525085449, "rewards/margins": 7.61001443862915, "rewards/rejected": -10.138948440551758, "step": 9828 }, { "epoch": 1.53, "learning_rate": 6.938347424658955e-06, "logits/chosen": -2.7628183364868164, "logits/rejected": -3.32255482673645, "logps/chosen": -391.3323669433594, "logps/rejected": -522.3035888671875, "loss": 0.2631, "rewards/accuracies": 1.0, "rewards/chosen": -6.0015411376953125, "rewards/margins": 4.314398288726807, "rewards/rejected": -10.315938949584961, "step": 9829 }, { "epoch": 1.53, "learning_rate": 6.937613984127808e-06, "logits/chosen": -2.5382864475250244, "logits/rejected": -2.310861110687256, "logps/chosen": -462.85540771484375, "logps/rejected": -472.6651916503906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1969590187072754, "rewards/margins": 9.709088325500488, "rewards/rejected": -12.906047821044922, "step": 9830 }, { "epoch": 1.53, "learning_rate": 6.93688054359666e-06, "logits/chosen": -3.0221104621887207, "logits/rejected": -2.9476184844970703, "logps/chosen": -177.91534423828125, "logps/rejected": -225.13427734375, "loss": 0.2455, "rewards/accuracies": 1.0, "rewards/chosen": -3.281742811203003, "rewards/margins": 1.8254472017288208, "rewards/rejected": -5.107190132141113, "step": 9831 }, { "epoch": 1.53, "learning_rate": 6.936147103065512e-06, "logits/chosen": -2.703960418701172, "logits/rejected": -2.872224807739258, "logps/chosen": -212.159912109375, "logps/rejected": -359.73492431640625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.4921524226665497, "rewards/margins": 6.98444938659668, "rewards/rejected": -7.476601600646973, "step": 9832 }, { "epoch": 1.53, "learning_rate": 6.935413662534364e-06, "logits/chosen": -2.714080810546875, "logits/rejected": -2.691892147064209, "logps/chosen": -253.1707000732422, "logps/rejected": -183.31085205078125, "loss": 0.785, "rewards/accuracies": 0.5, "rewards/chosen": -2.5881621837615967, "rewards/margins": 0.5224097967147827, "rewards/rejected": -3.110572099685669, "step": 9833 }, { "epoch": 1.53, "learning_rate": 6.934680222003216e-06, "logits/chosen": -2.5097086429595947, "logits/rejected": -3.2277019023895264, "logps/chosen": -243.29592895507812, "logps/rejected": -353.1723327636719, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -4.418577671051025, "rewards/margins": 4.207183837890625, "rewards/rejected": -8.625761985778809, "step": 9834 }, { "epoch": 1.53, "learning_rate": 6.933946781472068e-06, "logits/chosen": -2.3912417888641357, "logits/rejected": -2.9907021522521973, "logps/chosen": -102.27251434326172, "logps/rejected": -279.83880615234375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -4.085200309753418, "rewards/margins": 8.307939529418945, "rewards/rejected": -12.393139839172363, "step": 9835 }, { "epoch": 1.53, "learning_rate": 6.93321334094092e-06, "logits/chosen": -2.7967708110809326, "logits/rejected": -2.742995023727417, "logps/chosen": -436.91766357421875, "logps/rejected": -539.7200927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0738518238067627, "rewards/margins": 11.140462875366211, "rewards/rejected": -12.214314460754395, "step": 9836 }, { "epoch": 1.53, "learning_rate": 6.932479900409772e-06, "logits/chosen": -1.959784746170044, "logits/rejected": -2.7191786766052246, "logps/chosen": -205.88998413085938, "logps/rejected": -566.5438232421875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -5.532655715942383, "rewards/margins": 7.984755516052246, "rewards/rejected": -13.517410278320312, "step": 9837 }, { "epoch": 1.53, "learning_rate": 6.931746459878624e-06, "logits/chosen": -2.528028964996338, "logits/rejected": -2.948221445083618, "logps/chosen": -373.6831970214844, "logps/rejected": -395.9890441894531, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.388066291809082, "rewards/margins": 8.520195007324219, "rewards/rejected": -12.9082612991333, "step": 9838 }, { "epoch": 1.53, "learning_rate": 6.931013019347477e-06, "logits/chosen": -1.0017606019973755, "logits/rejected": -2.374417304992676, "logps/chosen": -135.4613037109375, "logps/rejected": -420.12274169921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.245957612991333, "rewards/margins": 7.279109477996826, "rewards/rejected": -9.525067329406738, "step": 9839 }, { "epoch": 1.53, "learning_rate": 6.930279578816329e-06, "logits/chosen": -2.446833848953247, "logits/rejected": -2.759938955307007, "logps/chosen": -94.18212890625, "logps/rejected": -198.05587768554688, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -3.7545523643493652, "rewards/margins": 3.9606618881225586, "rewards/rejected": -7.715214729309082, "step": 9840 }, { "epoch": 1.53, "learning_rate": 6.9295461382851805e-06, "logits/chosen": -2.7605154514312744, "logits/rejected": -3.002488613128662, "logps/chosen": -110.68968200683594, "logps/rejected": -167.0465545654297, "loss": 0.5694, "rewards/accuracies": 0.5, "rewards/chosen": -2.1873936653137207, "rewards/margins": 3.8099536895751953, "rewards/rejected": -5.997347354888916, "step": 9841 }, { "epoch": 1.53, "learning_rate": 6.928812697754032e-06, "logits/chosen": -2.5114822387695312, "logits/rejected": -1.781234860420227, "logps/chosen": -173.147705078125, "logps/rejected": -295.842041015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3931015133857727, "rewards/margins": 10.95954418182373, "rewards/rejected": -11.352645874023438, "step": 9842 }, { "epoch": 1.53, "learning_rate": 6.928079257222885e-06, "logits/chosen": -2.2173359394073486, "logits/rejected": -3.067746877670288, "logps/chosen": -195.19325256347656, "logps/rejected": -438.79669189453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.871892213821411, "rewards/margins": 9.371828079223633, "rewards/rejected": -12.243720054626465, "step": 9843 }, { "epoch": 1.53, "learning_rate": 6.927345816691738e-06, "logits/chosen": -1.1528820991516113, "logits/rejected": -2.9084088802337646, "logps/chosen": -55.072967529296875, "logps/rejected": -418.08355712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1985859870910645, "rewards/margins": 11.36164379119873, "rewards/rejected": -13.560230255126953, "step": 9844 }, { "epoch": 1.53, "learning_rate": 6.92661237616059e-06, "logits/chosen": -1.7540034055709839, "logits/rejected": -2.9308102130889893, "logps/chosen": -148.47494506835938, "logps/rejected": -456.4712829589844, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.8633575439453125, "rewards/margins": 8.264800071716309, "rewards/rejected": -12.128157615661621, "step": 9845 }, { "epoch": 1.53, "learning_rate": 6.925878935629442e-06, "logits/chosen": -3.133026361465454, "logits/rejected": -3.0373659133911133, "logps/chosen": -306.8276672363281, "logps/rejected": -230.03741455078125, "loss": 1.3772, "rewards/accuracies": 0.5, "rewards/chosen": -5.2935028076171875, "rewards/margins": 2.188530921936035, "rewards/rejected": -7.482033729553223, "step": 9846 }, { "epoch": 1.53, "learning_rate": 6.9251454950982935e-06, "logits/chosen": -3.051547050476074, "logits/rejected": -2.355774164199829, "logps/chosen": -694.81201171875, "logps/rejected": -367.87701416015625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.1845946311950684, "rewards/margins": 6.712291717529297, "rewards/rejected": -8.896885871887207, "step": 9847 }, { "epoch": 1.53, "learning_rate": 6.924412054567146e-06, "logits/chosen": -2.647634744644165, "logits/rejected": -2.567011594772339, "logps/chosen": -158.78244018554688, "logps/rejected": -224.09494018554688, "loss": 0.1656, "rewards/accuracies": 1.0, "rewards/chosen": -3.3091156482696533, "rewards/margins": 2.7074790000915527, "rewards/rejected": -6.016594886779785, "step": 9848 }, { "epoch": 1.53, "learning_rate": 6.923678614035998e-06, "logits/chosen": -2.004493236541748, "logits/rejected": -2.6592507362365723, "logps/chosen": -347.82421875, "logps/rejected": -400.6946716308594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3353219032287598, "rewards/margins": 8.179901123046875, "rewards/rejected": -9.515222549438477, "step": 9849 }, { "epoch": 1.53, "learning_rate": 6.92294517350485e-06, "logits/chosen": -2.964836835861206, "logits/rejected": -2.2688040733337402, "logps/chosen": -264.5716552734375, "logps/rejected": -252.17251586914062, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.7681456804275513, "rewards/margins": 8.775979042053223, "rewards/rejected": -10.544124603271484, "step": 9850 }, { "epoch": 1.53, "learning_rate": 6.922211732973702e-06, "logits/chosen": -1.8672775030136108, "logits/rejected": -1.4478647708892822, "logps/chosen": -235.9481201171875, "logps/rejected": -267.71795654296875, "loss": 0.9842, "rewards/accuracies": 0.5, "rewards/chosen": -5.1676859855651855, "rewards/margins": 1.2082304954528809, "rewards/rejected": -6.375916481018066, "step": 9851 }, { "epoch": 1.53, "learning_rate": 6.921478292442554e-06, "logits/chosen": -2.664919376373291, "logits/rejected": -2.37998628616333, "logps/chosen": -480.76556396484375, "logps/rejected": -460.7960205078125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.01824951171875, "rewards/margins": 7.596794605255127, "rewards/rejected": -9.615043640136719, "step": 9852 }, { "epoch": 1.53, "learning_rate": 6.9207448519114064e-06, "logits/chosen": -2.186152696609497, "logits/rejected": -2.734302520751953, "logps/chosen": -135.68606567382812, "logps/rejected": -212.84591674804688, "loss": 0.1065, "rewards/accuracies": 1.0, "rewards/chosen": -3.9263226985931396, "rewards/margins": 3.9508962631225586, "rewards/rejected": -7.877219200134277, "step": 9853 }, { "epoch": 1.53, "learning_rate": 6.920011411380258e-06, "logits/chosen": -2.603895664215088, "logits/rejected": -1.8603136539459229, "logps/chosen": -523.9620361328125, "logps/rejected": -391.9288330078125, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -5.112041473388672, "rewards/margins": 4.411665916442871, "rewards/rejected": -9.52370834350586, "step": 9854 }, { "epoch": 1.53, "learning_rate": 6.91927797084911e-06, "logits/chosen": -2.997138023376465, "logits/rejected": -2.921130657196045, "logps/chosen": -233.74241638183594, "logps/rejected": -247.50807189941406, "loss": 0.1467, "rewards/accuracies": 1.0, "rewards/chosen": -2.9652817249298096, "rewards/margins": 3.3515102863311768, "rewards/rejected": -6.316792011260986, "step": 9855 }, { "epoch": 1.53, "learning_rate": 6.918544530317962e-06, "logits/chosen": -1.9309669733047485, "logits/rejected": -3.0195603370666504, "logps/chosen": -237.1668701171875, "logps/rejected": -454.9256591796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.349575996398926, "rewards/margins": 8.783884048461914, "rewards/rejected": -11.133459091186523, "step": 9856 }, { "epoch": 1.53, "learning_rate": 6.917811089786815e-06, "logits/chosen": -1.215470314025879, "logits/rejected": -2.6323204040527344, "logps/chosen": -167.01805114746094, "logps/rejected": -409.7665100097656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.63668966293335, "rewards/margins": 9.116543769836426, "rewards/rejected": -13.753232955932617, "step": 9857 }, { "epoch": 1.53, "learning_rate": 6.917077649255667e-06, "logits/chosen": -2.8049471378326416, "logits/rejected": -3.3152554035186768, "logps/chosen": -53.43107604980469, "logps/rejected": -423.84552001953125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4200019836425781, "rewards/margins": 9.586190223693848, "rewards/rejected": -11.006192207336426, "step": 9858 }, { "epoch": 1.53, "learning_rate": 6.9163442087245185e-06, "logits/chosen": -1.6855496168136597, "logits/rejected": -2.7071094512939453, "logps/chosen": -46.14158630371094, "logps/rejected": -336.3191833496094, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.8862524032592773, "rewards/margins": 7.765034198760986, "rewards/rejected": -10.651287078857422, "step": 9859 }, { "epoch": 1.53, "learning_rate": 6.915610768193371e-06, "logits/chosen": -3.0745623111724854, "logits/rejected": -2.340794563293457, "logps/chosen": -238.5066680908203, "logps/rejected": -167.4415740966797, "loss": 1.9838, "rewards/accuracies": 0.5, "rewards/chosen": -4.583649635314941, "rewards/margins": 1.5818142890930176, "rewards/rejected": -6.165463924407959, "step": 9860 }, { "epoch": 1.53, "learning_rate": 6.914877327662224e-06, "logits/chosen": -2.7436118125915527, "logits/rejected": -1.826504111289978, "logps/chosen": -554.0025024414062, "logps/rejected": -463.92681884765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.5213489532470703, "rewards/margins": 9.948017120361328, "rewards/rejected": -12.469366073608398, "step": 9861 }, { "epoch": 1.53, "learning_rate": 6.914143887131076e-06, "logits/chosen": -2.3317062854766846, "logits/rejected": -2.771949291229248, "logps/chosen": -112.84127807617188, "logps/rejected": -246.2147979736328, "loss": 0.1958, "rewards/accuracies": 1.0, "rewards/chosen": -4.64654541015625, "rewards/margins": 1.7107770442962646, "rewards/rejected": -6.3573222160339355, "step": 9862 }, { "epoch": 1.53, "learning_rate": 6.913410446599928e-06, "logits/chosen": -2.84529185295105, "logits/rejected": -0.6442053318023682, "logps/chosen": -418.1299133300781, "logps/rejected": -202.91998291015625, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -3.0473475456237793, "rewards/margins": 6.283125877380371, "rewards/rejected": -9.330472946166992, "step": 9863 }, { "epoch": 1.53, "learning_rate": 6.91267700606878e-06, "logits/chosen": -2.3411192893981934, "logits/rejected": -2.5797007083892822, "logps/chosen": -145.0377655029297, "logps/rejected": -417.08502197265625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.3079686164855957, "rewards/margins": 9.122233390808105, "rewards/rejected": -11.430201530456543, "step": 9864 }, { "epoch": 1.53, "learning_rate": 6.9119435655376315e-06, "logits/chosen": -2.6452722549438477, "logits/rejected": -3.25506329536438, "logps/chosen": -372.54461669921875, "logps/rejected": -531.406982421875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -3.6851935386657715, "rewards/margins": 6.673386096954346, "rewards/rejected": -10.358579635620117, "step": 9865 }, { "epoch": 1.53, "learning_rate": 6.911210125006484e-06, "logits/chosen": -3.0418102741241455, "logits/rejected": -2.493581533432007, "logps/chosen": -149.68145751953125, "logps/rejected": -69.55030822753906, "loss": 3.0145, "rewards/accuracies": 0.0, "rewards/chosen": -8.142013549804688, "rewards/margins": -2.949134111404419, "rewards/rejected": -5.192879676818848, "step": 9866 }, { "epoch": 1.53, "learning_rate": 6.910476684475336e-06, "logits/chosen": -1.2478513717651367, "logits/rejected": -2.7953848838806152, "logps/chosen": -105.51060485839844, "logps/rejected": -426.14410400390625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -4.655834197998047, "rewards/margins": 7.633299827575684, "rewards/rejected": -12.289134979248047, "step": 9867 }, { "epoch": 1.53, "learning_rate": 6.909743243944188e-06, "logits/chosen": -2.597057819366455, "logits/rejected": -2.9029488563537598, "logps/chosen": -52.574684143066406, "logps/rejected": -175.2580108642578, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": -2.5788118839263916, "rewards/margins": 5.686029434204102, "rewards/rejected": -8.264841079711914, "step": 9868 }, { "epoch": 1.53, "learning_rate": 6.90900980341304e-06, "logits/chosen": -2.819371461868286, "logits/rejected": -2.6374928951263428, "logps/chosen": -161.189208984375, "logps/rejected": -189.65396118164062, "loss": 0.4165, "rewards/accuracies": 0.5, "rewards/chosen": -5.076686382293701, "rewards/margins": 1.3201185464859009, "rewards/rejected": -6.3968048095703125, "step": 9869 }, { "epoch": 1.53, "learning_rate": 6.908276362881893e-06, "logits/chosen": -1.8998932838439941, "logits/rejected": -1.9811393022537231, "logps/chosen": -224.36964416503906, "logps/rejected": -235.10540771484375, "loss": 0.0858, "rewards/accuracies": 1.0, "rewards/chosen": -3.1095635890960693, "rewards/margins": 3.6789915561676025, "rewards/rejected": -6.788555145263672, "step": 9870 }, { "epoch": 1.54, "learning_rate": 6.9075429223507445e-06, "logits/chosen": -2.0943236351013184, "logits/rejected": -2.8151369094848633, "logps/chosen": -141.98524475097656, "logps/rejected": -394.88006591796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.639863967895508, "rewards/margins": 9.340021133422852, "rewards/rejected": -13.97988510131836, "step": 9871 }, { "epoch": 1.54, "learning_rate": 6.906809481819596e-06, "logits/chosen": -2.880505084991455, "logits/rejected": -2.166461944580078, "logps/chosen": -298.9376525878906, "logps/rejected": -318.07293701171875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -5.68618106842041, "rewards/margins": 5.562142372131348, "rewards/rejected": -11.248323440551758, "step": 9872 }, { "epoch": 1.54, "learning_rate": 6.906076041288448e-06, "logits/chosen": -2.2220191955566406, "logits/rejected": -2.973344087600708, "logps/chosen": -144.01437377929688, "logps/rejected": -312.60760498046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1714813709259033, "rewards/margins": 9.197524070739746, "rewards/rejected": -11.36900520324707, "step": 9873 }, { "epoch": 1.54, "learning_rate": 6.9053426007573e-06, "logits/chosen": -2.647250175476074, "logits/rejected": -2.7490901947021484, "logps/chosen": -323.9781799316406, "logps/rejected": -441.2945556640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.6415748596191406, "rewards/margins": 8.958395957946777, "rewards/rejected": -12.599971771240234, "step": 9874 }, { "epoch": 1.54, "learning_rate": 6.904609160226153e-06, "logits/chosen": -2.41493821144104, "logits/rejected": -3.060901403427124, "logps/chosen": -223.75454711914062, "logps/rejected": -509.691162109375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -5.4879608154296875, "rewards/margins": 4.878167152404785, "rewards/rejected": -10.366128921508789, "step": 9875 }, { "epoch": 1.54, "learning_rate": 6.903875719695005e-06, "logits/chosen": -2.22489070892334, "logits/rejected": -2.5179309844970703, "logps/chosen": -295.42205810546875, "logps/rejected": -268.7828063964844, "loss": 0.4723, "rewards/accuracies": 0.5, "rewards/chosen": -4.2166643142700195, "rewards/margins": 2.2819299697875977, "rewards/rejected": -6.498594284057617, "step": 9876 }, { "epoch": 1.54, "learning_rate": 6.9031422791638574e-06, "logits/chosen": -0.8498849868774414, "logits/rejected": -2.2256388664245605, "logps/chosen": -225.4673614501953, "logps/rejected": -477.60333251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5461461544036865, "rewards/margins": 10.78873062133789, "rewards/rejected": -13.33487606048584, "step": 9877 }, { "epoch": 1.54, "learning_rate": 6.902408838632709e-06, "logits/chosen": -2.717414617538452, "logits/rejected": -1.9825011491775513, "logps/chosen": -415.62994384765625, "logps/rejected": -417.4962158203125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -2.9924278259277344, "rewards/margins": 5.476189136505127, "rewards/rejected": -8.46861743927002, "step": 9878 }, { "epoch": 1.54, "learning_rate": 6.901675398101562e-06, "logits/chosen": -2.6523263454437256, "logits/rejected": -2.4637575149536133, "logps/chosen": -562.7628784179688, "logps/rejected": -484.1499328613281, "loss": 0.2127, "rewards/accuracies": 1.0, "rewards/chosen": -4.072218418121338, "rewards/margins": 6.390227317810059, "rewards/rejected": -10.462446212768555, "step": 9879 }, { "epoch": 1.54, "learning_rate": 6.900941957570414e-06, "logits/chosen": -2.628216028213501, "logits/rejected": -1.3128752708435059, "logps/chosen": -214.01983642578125, "logps/rejected": -132.47140502929688, "loss": 0.5169, "rewards/accuracies": 0.5, "rewards/chosen": -6.925780296325684, "rewards/margins": 1.7590789794921875, "rewards/rejected": -8.684859275817871, "step": 9880 }, { "epoch": 1.54, "learning_rate": 6.900208517039266e-06, "logits/chosen": -2.9849448204040527, "logits/rejected": -3.1538987159729004, "logps/chosen": -428.6382751464844, "logps/rejected": -399.1192321777344, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -3.7493391036987305, "rewards/margins": 5.348814010620117, "rewards/rejected": -9.098153114318848, "step": 9881 }, { "epoch": 1.54, "learning_rate": 6.899475076508118e-06, "logits/chosen": -2.5546960830688477, "logits/rejected": -2.926861524581909, "logps/chosen": -180.45657348632812, "logps/rejected": -260.25335693359375, "loss": 0.2561, "rewards/accuracies": 1.0, "rewards/chosen": -4.665545463562012, "rewards/margins": 2.2590854167938232, "rewards/rejected": -6.924631118774414, "step": 9882 }, { "epoch": 1.54, "learning_rate": 6.8987416359769696e-06, "logits/chosen": -2.063523530960083, "logits/rejected": -2.8330729007720947, "logps/chosen": -305.6627197265625, "logps/rejected": -626.7425537109375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.075056552886963, "rewards/margins": 10.771453857421875, "rewards/rejected": -14.84650993347168, "step": 9883 }, { "epoch": 1.54, "learning_rate": 6.898008195445822e-06, "logits/chosen": -2.90988826751709, "logits/rejected": -2.1115994453430176, "logps/chosen": -476.9872131347656, "logps/rejected": -428.6144714355469, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.062131643295288, "rewards/margins": 8.319825172424316, "rewards/rejected": -10.381956100463867, "step": 9884 }, { "epoch": 1.54, "learning_rate": 6.897274754914674e-06, "logits/chosen": -2.814162254333496, "logits/rejected": -2.9583992958068848, "logps/chosen": -359.61083984375, "logps/rejected": -399.81732177734375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.85252046585083, "rewards/margins": 7.658174514770508, "rewards/rejected": -11.510695457458496, "step": 9885 }, { "epoch": 1.54, "learning_rate": 6.896541314383526e-06, "logits/chosen": -2.7897632122039795, "logits/rejected": -2.2243144512176514, "logps/chosen": -688.8746337890625, "logps/rejected": -549.928955078125, "loss": 0.2549, "rewards/accuracies": 1.0, "rewards/chosen": -4.115105628967285, "rewards/margins": 3.1274025440216064, "rewards/rejected": -7.2425079345703125, "step": 9886 }, { "epoch": 1.54, "learning_rate": 6.895807873852378e-06, "logits/chosen": -1.3150124549865723, "logits/rejected": -2.6413440704345703, "logps/chosen": -231.2230224609375, "logps/rejected": -421.8648376464844, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -5.12993860244751, "rewards/margins": 5.85412073135376, "rewards/rejected": -10.98405933380127, "step": 9887 }, { "epoch": 1.54, "learning_rate": 6.895074433321231e-06, "logits/chosen": -1.6472407579421997, "logits/rejected": -2.8261470794677734, "logps/chosen": -208.92367553710938, "logps/rejected": -539.3917236328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.034172058105469, "rewards/margins": 8.137939453125, "rewards/rejected": -12.172111511230469, "step": 9888 }, { "epoch": 1.54, "learning_rate": 6.8943409927900825e-06, "logits/chosen": -2.784069776535034, "logits/rejected": -3.059450149536133, "logps/chosen": -131.80645751953125, "logps/rejected": -229.77102661132812, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -3.1006317138671875, "rewards/margins": 5.756737232208252, "rewards/rejected": -8.857368469238281, "step": 9889 }, { "epoch": 1.54, "learning_rate": 6.893607552258934e-06, "logits/chosen": -2.391007661819458, "logits/rejected": -2.8033010959625244, "logps/chosen": -282.90362548828125, "logps/rejected": -366.216064453125, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -4.963665962219238, "rewards/margins": 4.706151485443115, "rewards/rejected": -9.669816970825195, "step": 9890 }, { "epoch": 1.54, "learning_rate": 6.892874111727786e-06, "logits/chosen": -2.7625057697296143, "logits/rejected": -2.1533565521240234, "logps/chosen": -250.0216064453125, "logps/rejected": -256.2203674316406, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -3.419790267944336, "rewards/margins": 7.592695236206055, "rewards/rejected": -11.01248550415039, "step": 9891 }, { "epoch": 1.54, "learning_rate": 6.892140671196638e-06, "logits/chosen": -3.223461627960205, "logits/rejected": -2.4578499794006348, "logps/chosen": -195.96253967285156, "logps/rejected": -266.83837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.6610289812088013, "rewards/margins": 11.277896881103516, "rewards/rejected": -10.616868019104004, "step": 9892 }, { "epoch": 1.54, "learning_rate": 6.891407230665491e-06, "logits/chosen": -2.878385066986084, "logits/rejected": -2.814565896987915, "logps/chosen": -210.9891357421875, "logps/rejected": -308.80072021484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.3287956714630127, "rewards/margins": 10.548564910888672, "rewards/rejected": -12.877361297607422, "step": 9893 }, { "epoch": 1.54, "learning_rate": 6.890673790134344e-06, "logits/chosen": -2.9390816688537598, "logits/rejected": -2.965094566345215, "logps/chosen": -281.18292236328125, "logps/rejected": -363.3543395996094, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -2.173832416534424, "rewards/margins": 3.872241258621216, "rewards/rejected": -6.046073913574219, "step": 9894 }, { "epoch": 1.54, "learning_rate": 6.8899403496031955e-06, "logits/chosen": -2.686941385269165, "logits/rejected": -3.176694393157959, "logps/chosen": -74.24955749511719, "logps/rejected": -215.77162170410156, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.305398941040039, "rewards/margins": 7.124692916870117, "rewards/rejected": -10.430091857910156, "step": 9895 }, { "epoch": 1.54, "learning_rate": 6.889206909072047e-06, "logits/chosen": -2.381593704223633, "logits/rejected": -2.830202579498291, "logps/chosen": -718.8176879882812, "logps/rejected": -647.3812255859375, "loss": 0.4874, "rewards/accuracies": 0.5, "rewards/chosen": -6.020743370056152, "rewards/margins": 4.139801025390625, "rewards/rejected": -10.160544395446777, "step": 9896 }, { "epoch": 1.54, "learning_rate": 6.8884734685409e-06, "logits/chosen": -2.7248992919921875, "logits/rejected": -2.969475269317627, "logps/chosen": -202.2495880126953, "logps/rejected": -158.79879760742188, "loss": 0.0836, "rewards/accuracies": 1.0, "rewards/chosen": -2.6644644737243652, "rewards/margins": 4.068511486053467, "rewards/rejected": -6.732975959777832, "step": 9897 }, { "epoch": 1.54, "learning_rate": 6.887740028009752e-06, "logits/chosen": -2.0017244815826416, "logits/rejected": -2.856145143508911, "logps/chosen": -164.72381591796875, "logps/rejected": -394.22418212890625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -4.623472213745117, "rewards/margins": 6.23478889465332, "rewards/rejected": -10.858261108398438, "step": 9898 }, { "epoch": 1.54, "learning_rate": 6.887006587478604e-06, "logits/chosen": -2.7202022075653076, "logits/rejected": -2.9679977893829346, "logps/chosen": -246.89596557617188, "logps/rejected": -430.2935791015625, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -5.1758928298950195, "rewards/margins": 7.924823760986328, "rewards/rejected": -13.100717544555664, "step": 9899 }, { "epoch": 1.54, "learning_rate": 6.886273146947456e-06, "logits/chosen": -2.7528750896453857, "logits/rejected": -2.744718551635742, "logps/chosen": -175.18032836914062, "logps/rejected": -221.30447387695312, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -1.488195776939392, "rewards/margins": 6.774668216705322, "rewards/rejected": -8.262864112854004, "step": 9900 }, { "epoch": 1.54, "learning_rate": 6.885539706416308e-06, "logits/chosen": -2.718569040298462, "logits/rejected": -2.7810072898864746, "logps/chosen": -104.83275604248047, "logps/rejected": -249.47006225585938, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.3360424041748047, "rewards/margins": 7.736883163452148, "rewards/rejected": -10.072925567626953, "step": 9901 }, { "epoch": 1.54, "learning_rate": 6.88480626588516e-06, "logits/chosen": -2.613220453262329, "logits/rejected": -2.9745841026306152, "logps/chosen": -205.21713256835938, "logps/rejected": -293.667724609375, "loss": 0.1601, "rewards/accuracies": 1.0, "rewards/chosen": -3.776012420654297, "rewards/margins": 5.119758605957031, "rewards/rejected": -8.895771026611328, "step": 9902 }, { "epoch": 1.54, "learning_rate": 6.884072825354012e-06, "logits/chosen": -2.69513201713562, "logits/rejected": -3.1025986671447754, "logps/chosen": -94.99378967285156, "logps/rejected": -254.858154296875, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -3.9592671394348145, "rewards/margins": 4.186184883117676, "rewards/rejected": -8.145451545715332, "step": 9903 }, { "epoch": 1.54, "learning_rate": 6.883339384822864e-06, "logits/chosen": -2.813952684402466, "logits/rejected": -2.348188877105713, "logps/chosen": -210.04776000976562, "logps/rejected": -284.1822509765625, "loss": 0.0486, "rewards/accuracies": 1.0, "rewards/chosen": -3.186962604522705, "rewards/margins": 5.332828521728516, "rewards/rejected": -8.519791603088379, "step": 9904 }, { "epoch": 1.54, "learning_rate": 6.882605944291716e-06, "logits/chosen": -2.62732195854187, "logits/rejected": -1.6838181018829346, "logps/chosen": -183.0670166015625, "logps/rejected": -164.96328735351562, "loss": 0.6786, "rewards/accuracies": 0.5, "rewards/chosen": -5.864926338195801, "rewards/margins": 1.050904631614685, "rewards/rejected": -6.915831089019775, "step": 9905 }, { "epoch": 1.54, "learning_rate": 6.881872503760569e-06, "logits/chosen": -0.904387354850769, "logits/rejected": -1.9517827033996582, "logps/chosen": -124.44985961914062, "logps/rejected": -522.6060791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.416219711303711, "rewards/margins": 11.33995246887207, "rewards/rejected": -13.756172180175781, "step": 9906 }, { "epoch": 1.54, "learning_rate": 6.8811390632294206e-06, "logits/chosen": -2.0989508628845215, "logits/rejected": -2.9291880130767822, "logps/chosen": -236.47708129882812, "logps/rejected": -439.5611877441406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.775496006011963, "rewards/margins": 9.699121475219727, "rewards/rejected": -13.474617004394531, "step": 9907 }, { "epoch": 1.54, "learning_rate": 6.8804056226982724e-06, "logits/chosen": -3.0934693813323975, "logits/rejected": -2.126474142074585, "logps/chosen": -453.3883972167969, "logps/rejected": -308.5900573730469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0283600091934204, "rewards/margins": 9.978355407714844, "rewards/rejected": -11.006715774536133, "step": 9908 }, { "epoch": 1.54, "learning_rate": 6.879672182167124e-06, "logits/chosen": -2.1319618225097656, "logits/rejected": -2.5278613567352295, "logps/chosen": -176.35061645507812, "logps/rejected": -185.11325073242188, "loss": 2.4251, "rewards/accuracies": 0.5, "rewards/chosen": -5.579100131988525, "rewards/margins": 1.6639487743377686, "rewards/rejected": -7.243048667907715, "step": 9909 }, { "epoch": 1.54, "learning_rate": 6.878938741635977e-06, "logits/chosen": -2.778775215148926, "logits/rejected": -2.9245550632476807, "logps/chosen": -190.54354858398438, "logps/rejected": -317.295654296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.37353515625, "rewards/margins": 8.495651245117188, "rewards/rejected": -11.869186401367188, "step": 9910 }, { "epoch": 1.54, "learning_rate": 6.87820530110483e-06, "logits/chosen": -2.676816940307617, "logits/rejected": -1.7418781518936157, "logps/chosen": -165.82073974609375, "logps/rejected": -200.4325714111328, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -5.476579666137695, "rewards/margins": 3.483877182006836, "rewards/rejected": -8.960456848144531, "step": 9911 }, { "epoch": 1.54, "learning_rate": 6.877471860573682e-06, "logits/chosen": -2.5381572246551514, "logits/rejected": -2.95226788520813, "logps/chosen": -537.9796142578125, "logps/rejected": -503.6147766113281, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.901872158050537, "rewards/margins": 8.995859146118164, "rewards/rejected": -11.89773178100586, "step": 9912 }, { "epoch": 1.54, "learning_rate": 6.8767384200425335e-06, "logits/chosen": -1.6402335166931152, "logits/rejected": -2.7612857818603516, "logps/chosen": -251.8727569580078, "logps/rejected": -312.1094665527344, "loss": 0.2084, "rewards/accuracies": 1.0, "rewards/chosen": -4.4736328125, "rewards/margins": 5.256740570068359, "rewards/rejected": -9.73037338256836, "step": 9913 }, { "epoch": 1.54, "learning_rate": 6.876004979511385e-06, "logits/chosen": -1.7826502323150635, "logits/rejected": -2.8869595527648926, "logps/chosen": -102.58349609375, "logps/rejected": -388.6734619140625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.000032901763916, "rewards/margins": 9.10235595703125, "rewards/rejected": -12.102389335632324, "step": 9914 }, { "epoch": 1.54, "learning_rate": 6.875271538980238e-06, "logits/chosen": -2.952321767807007, "logits/rejected": -2.7494747638702393, "logps/chosen": -215.401611328125, "logps/rejected": -167.3148193359375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -3.841365337371826, "rewards/margins": 5.147486209869385, "rewards/rejected": -8.988851547241211, "step": 9915 }, { "epoch": 1.54, "learning_rate": 6.87453809844909e-06, "logits/chosen": -2.3098814487457275, "logits/rejected": -2.9542434215545654, "logps/chosen": -414.08892822265625, "logps/rejected": -368.916259765625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.209096431732178, "rewards/margins": 6.385455131530762, "rewards/rejected": -10.594551086425781, "step": 9916 }, { "epoch": 1.54, "learning_rate": 6.873804657917942e-06, "logits/chosen": -2.8470993041992188, "logits/rejected": -2.000291585922241, "logps/chosen": -576.2158203125, "logps/rejected": -387.7054748535156, "loss": 0.0516, "rewards/accuracies": 1.0, "rewards/chosen": -5.325095176696777, "rewards/margins": 6.020017623901367, "rewards/rejected": -11.345112800598145, "step": 9917 }, { "epoch": 1.54, "learning_rate": 6.873071217386794e-06, "logits/chosen": -0.6870459318161011, "logits/rejected": -2.6858437061309814, "logps/chosen": -52.34726333618164, "logps/rejected": -399.4682312011719, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -3.614753246307373, "rewards/margins": 5.826580047607422, "rewards/rejected": -9.441332817077637, "step": 9918 }, { "epoch": 1.54, "learning_rate": 6.8723377768556465e-06, "logits/chosen": -3.136622905731201, "logits/rejected": -3.046008586883545, "logps/chosen": -160.62481689453125, "logps/rejected": -209.99420166015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.7700376510620117, "rewards/margins": 7.9289937019348145, "rewards/rejected": -10.699031829833984, "step": 9919 }, { "epoch": 1.54, "learning_rate": 6.871604336324498e-06, "logits/chosen": -3.17383074760437, "logits/rejected": -3.156517505645752, "logps/chosen": -169.2158660888672, "logps/rejected": -276.00872802734375, "loss": 0.492, "rewards/accuracies": 0.5, "rewards/chosen": -4.1840996742248535, "rewards/margins": 4.124922752380371, "rewards/rejected": -8.309022903442383, "step": 9920 }, { "epoch": 1.54, "learning_rate": 6.87087089579335e-06, "logits/chosen": -2.6349129676818848, "logits/rejected": -1.550628900527954, "logps/chosen": -269.532958984375, "logps/rejected": -229.31094360351562, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.468104362487793, "rewards/margins": 6.524906635284424, "rewards/rejected": -11.993011474609375, "step": 9921 }, { "epoch": 1.54, "learning_rate": 6.870137455262202e-06, "logits/chosen": -3.164264440536499, "logits/rejected": -3.2030975818634033, "logps/chosen": -325.72662353515625, "logps/rejected": -380.7916564941406, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -2.490217447280884, "rewards/margins": 6.806637763977051, "rewards/rejected": -9.296854972839355, "step": 9922 }, { "epoch": 1.54, "learning_rate": 6.869404014731054e-06, "logits/chosen": -2.924361228942871, "logits/rejected": -2.9615769386291504, "logps/chosen": -133.4744415283203, "logps/rejected": -308.3806457519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.697486162185669, "rewards/margins": 10.706656455993652, "rewards/rejected": -14.404142379760742, "step": 9923 }, { "epoch": 1.54, "learning_rate": 6.868670574199907e-06, "logits/chosen": -2.612786293029785, "logits/rejected": -2.9868481159210205, "logps/chosen": -123.52986145019531, "logps/rejected": -285.3661804199219, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -6.553863525390625, "rewards/margins": 7.350888252258301, "rewards/rejected": -13.904752731323242, "step": 9924 }, { "epoch": 1.54, "learning_rate": 6.867937133668759e-06, "logits/chosen": -2.5326662063598633, "logits/rejected": -2.7920188903808594, "logps/chosen": -554.1876220703125, "logps/rejected": -655.8101806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2442002296447754, "rewards/margins": 11.152629852294922, "rewards/rejected": -14.396829605102539, "step": 9925 }, { "epoch": 1.54, "learning_rate": 6.8672036931376105e-06, "logits/chosen": -1.964177131652832, "logits/rejected": -2.7771785259246826, "logps/chosen": -204.78662109375, "logps/rejected": -358.4866638183594, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.5911264419555664, "rewards/margins": 9.149145126342773, "rewards/rejected": -11.740270614624023, "step": 9926 }, { "epoch": 1.54, "learning_rate": 6.866470252606463e-06, "logits/chosen": -2.363574743270874, "logits/rejected": -3.0119857788085938, "logps/chosen": -823.3090209960938, "logps/rejected": -630.2230224609375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.96467924118042, "rewards/margins": 6.367835521697998, "rewards/rejected": -10.332514762878418, "step": 9927 }, { "epoch": 1.54, "learning_rate": 6.865736812075316e-06, "logits/chosen": -2.6303465366363525, "logits/rejected": -2.9543097019195557, "logps/chosen": -515.7803344726562, "logps/rejected": -562.833251953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -3.7571961879730225, "rewards/margins": 6.643872261047363, "rewards/rejected": -10.401067733764648, "step": 9928 }, { "epoch": 1.54, "learning_rate": 6.865003371544168e-06, "logits/chosen": -0.9722583889961243, "logits/rejected": -2.489414930343628, "logps/chosen": -71.90255737304688, "logps/rejected": -313.98126220703125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.22450590133667, "rewards/margins": 6.765268802642822, "rewards/rejected": -11.989774703979492, "step": 9929 }, { "epoch": 1.54, "learning_rate": 6.86426993101302e-06, "logits/chosen": -2.6806700229644775, "logits/rejected": -2.049464225769043, "logps/chosen": -204.77078247070312, "logps/rejected": -351.1035461425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.810332775115967, "rewards/margins": 10.711812973022461, "rewards/rejected": -13.52214527130127, "step": 9930 }, { "epoch": 1.54, "learning_rate": 6.8635364904818716e-06, "logits/chosen": -1.5734062194824219, "logits/rejected": -2.660249948501587, "logps/chosen": -178.40225219726562, "logps/rejected": -505.41168212890625, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/chosen": -4.128798007965088, "rewards/margins": 8.125447273254395, "rewards/rejected": -12.254244804382324, "step": 9931 }, { "epoch": 1.54, "learning_rate": 6.8628030499507235e-06, "logits/chosen": -2.6710622310638428, "logits/rejected": -2.251816511154175, "logps/chosen": -231.30564880371094, "logps/rejected": -266.696044921875, "loss": 1.0239, "rewards/accuracies": 0.5, "rewards/chosen": -7.78237247467041, "rewards/margins": 2.0830018520355225, "rewards/rejected": -9.865374565124512, "step": 9932 }, { "epoch": 1.54, "learning_rate": 6.862069609419576e-06, "logits/chosen": -2.860682249069214, "logits/rejected": -1.761678695678711, "logps/chosen": -285.8452453613281, "logps/rejected": -92.26153564453125, "loss": 0.4929, "rewards/accuracies": 0.5, "rewards/chosen": 0.020627617835998535, "rewards/margins": 5.960063457489014, "rewards/rejected": -5.939435958862305, "step": 9933 }, { "epoch": 1.54, "learning_rate": 6.861336168888428e-06, "logits/chosen": -2.6893153190612793, "logits/rejected": -3.1851956844329834, "logps/chosen": -59.24178695678711, "logps/rejected": -241.1990966796875, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -3.1432952880859375, "rewards/margins": 3.623046875, "rewards/rejected": -6.7663421630859375, "step": 9934 }, { "epoch": 1.55, "learning_rate": 6.86060272835728e-06, "logits/chosen": -1.9145697355270386, "logits/rejected": -3.171022891998291, "logps/chosen": -279.49078369140625, "logps/rejected": -512.1453857421875, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -6.080132484436035, "rewards/margins": 3.4050376415252686, "rewards/rejected": -9.485170364379883, "step": 9935 }, { "epoch": 1.55, "learning_rate": 6.859869287826132e-06, "logits/chosen": -2.503002166748047, "logits/rejected": -2.8717970848083496, "logps/chosen": -92.9627456665039, "logps/rejected": -249.12509155273438, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.1676483154296875, "rewards/margins": 8.359670639038086, "rewards/rejected": -12.527318954467773, "step": 9936 }, { "epoch": 1.55, "learning_rate": 6.8591358472949845e-06, "logits/chosen": -2.7947309017181396, "logits/rejected": -2.777177333831787, "logps/chosen": -289.653564453125, "logps/rejected": -349.2201843261719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2357451915740967, "rewards/margins": 9.471362113952637, "rewards/rejected": -11.707107543945312, "step": 9937 }, { "epoch": 1.55, "learning_rate": 6.858402406763836e-06, "logits/chosen": -2.171790599822998, "logits/rejected": -2.6928012371063232, "logps/chosen": -106.07955932617188, "logps/rejected": -265.75274658203125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.0259509086608887, "rewards/margins": 6.549393653869629, "rewards/rejected": -9.57534408569336, "step": 9938 }, { "epoch": 1.55, "learning_rate": 6.857668966232688e-06, "logits/chosen": -2.636472702026367, "logits/rejected": -3.2188034057617188, "logps/chosen": -114.22749328613281, "logps/rejected": -430.4388427734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.4405736923217773, "rewards/margins": 7.37158203125, "rewards/rejected": -9.812155723571777, "step": 9939 }, { "epoch": 1.55, "learning_rate": 6.85693552570154e-06, "logits/chosen": -2.682090997695923, "logits/rejected": -3.2264487743377686, "logps/chosen": -93.81151580810547, "logps/rejected": -380.64202880859375, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": -3.7711286544799805, "rewards/margins": 7.698395729064941, "rewards/rejected": -11.469524383544922, "step": 9940 }, { "epoch": 1.55, "learning_rate": 6.856202085170392e-06, "logits/chosen": -3.012712240219116, "logits/rejected": -3.295638084411621, "logps/chosen": -146.47802734375, "logps/rejected": -445.85687255859375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -3.113384962081909, "rewards/margins": 9.339789390563965, "rewards/rejected": -12.453174591064453, "step": 9941 }, { "epoch": 1.55, "learning_rate": 6.855468644639245e-06, "logits/chosen": -2.7980566024780273, "logits/rejected": -2.046762466430664, "logps/chosen": -216.52743530273438, "logps/rejected": -78.64166259765625, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -0.6493206024169922, "rewards/margins": 4.497785568237305, "rewards/rejected": -5.147106170654297, "step": 9942 }, { "epoch": 1.55, "learning_rate": 6.854735204108097e-06, "logits/chosen": -3.415191411972046, "logits/rejected": -3.249117612838745, "logps/chosen": -320.0428771972656, "logps/rejected": -338.27825927734375, "loss": 0.8997, "rewards/accuracies": 0.5, "rewards/chosen": -7.507901191711426, "rewards/margins": 1.324721336364746, "rewards/rejected": -8.832622528076172, "step": 9943 }, { "epoch": 1.55, "learning_rate": 6.854001763576949e-06, "logits/chosen": -2.9972221851348877, "logits/rejected": -2.269315719604492, "logps/chosen": -283.42266845703125, "logps/rejected": -376.95806884765625, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": -2.30560302734375, "rewards/margins": 5.33762264251709, "rewards/rejected": -7.643225193023682, "step": 9944 }, { "epoch": 1.55, "learning_rate": 6.853268323045801e-06, "logits/chosen": -1.784632682800293, "logits/rejected": -2.9619927406311035, "logps/chosen": -87.70343780517578, "logps/rejected": -277.29290771484375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.7273306846618652, "rewards/margins": 6.387032508850098, "rewards/rejected": -10.114362716674805, "step": 9945 }, { "epoch": 1.55, "learning_rate": 6.852534882514654e-06, "logits/chosen": -2.458944320678711, "logits/rejected": -3.022284507751465, "logps/chosen": -210.662109375, "logps/rejected": -314.973876953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.0386528968811035, "rewards/margins": 6.885947227478027, "rewards/rejected": -8.924600601196289, "step": 9946 }, { "epoch": 1.55, "learning_rate": 6.851801441983506e-06, "logits/chosen": -3.0215866565704346, "logits/rejected": -2.3453145027160645, "logps/chosen": -242.76632690429688, "logps/rejected": -178.12728881835938, "loss": 1.1498, "rewards/accuracies": 0.5, "rewards/chosen": -3.917166233062744, "rewards/margins": 2.070688247680664, "rewards/rejected": -5.987854480743408, "step": 9947 }, { "epoch": 1.55, "learning_rate": 6.851068001452358e-06, "logits/chosen": -2.6191563606262207, "logits/rejected": -3.0902585983276367, "logps/chosen": -103.35758972167969, "logps/rejected": -271.1700134277344, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -4.180629253387451, "rewards/margins": 5.608516693115234, "rewards/rejected": -9.789146423339844, "step": 9948 }, { "epoch": 1.55, "learning_rate": 6.85033456092121e-06, "logits/chosen": -2.421919107437134, "logits/rejected": -2.957698345184326, "logps/chosen": -194.34173583984375, "logps/rejected": -353.66510009765625, "loss": 0.07, "rewards/accuracies": 1.0, "rewards/chosen": -3.6889984607696533, "rewards/margins": 2.652143955230713, "rewards/rejected": -6.341142654418945, "step": 9949 }, { "epoch": 1.55, "learning_rate": 6.8496011203900615e-06, "logits/chosen": -1.5798652172088623, "logits/rejected": -3.150207281112671, "logps/chosen": -124.85469055175781, "logps/rejected": -318.7098388671875, "loss": 1.6647, "rewards/accuracies": 0.5, "rewards/chosen": -5.204463958740234, "rewards/margins": 2.2707226276397705, "rewards/rejected": -7.475186347961426, "step": 9950 }, { "epoch": 1.55, "learning_rate": 6.848867679858914e-06, "logits/chosen": -2.297607183456421, "logits/rejected": -2.971578598022461, "logps/chosen": -99.88986206054688, "logps/rejected": -412.09197998046875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.4563019275665283, "rewards/margins": 7.418245792388916, "rewards/rejected": -9.874547958374023, "step": 9951 }, { "epoch": 1.55, "learning_rate": 6.848134239327766e-06, "logits/chosen": -2.643941640853882, "logits/rejected": -2.8185479640960693, "logps/chosen": -162.99583435058594, "logps/rejected": -152.16204833984375, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -3.3975582122802734, "rewards/margins": 4.075446128845215, "rewards/rejected": -7.473004341125488, "step": 9952 }, { "epoch": 1.55, "learning_rate": 6.847400798796618e-06, "logits/chosen": -2.5244534015655518, "logits/rejected": -3.106403112411499, "logps/chosen": -185.82904052734375, "logps/rejected": -556.6895751953125, "loss": 1.1744, "rewards/accuracies": 0.5, "rewards/chosen": -5.074789524078369, "rewards/margins": 0.4973335266113281, "rewards/rejected": -5.572123050689697, "step": 9953 }, { "epoch": 1.55, "learning_rate": 6.84666735826547e-06, "logits/chosen": -2.1836745738983154, "logits/rejected": -2.57025408744812, "logps/chosen": -446.533203125, "logps/rejected": -579.96875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.259368896484375, "rewards/margins": 10.371038436889648, "rewards/rejected": -12.630406379699707, "step": 9954 }, { "epoch": 1.55, "learning_rate": 6.845933917734323e-06, "logits/chosen": -2.794776201248169, "logits/rejected": -2.806539297103882, "logps/chosen": -209.13241577148438, "logps/rejected": -280.64544677734375, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -3.444824695587158, "rewards/margins": 4.8909220695495605, "rewards/rejected": -8.335746765136719, "step": 9955 }, { "epoch": 1.55, "learning_rate": 6.8452004772031745e-06, "logits/chosen": -2.4592361450195312, "logits/rejected": -2.8658804893493652, "logps/chosen": -78.10663604736328, "logps/rejected": -368.7318115234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.8636605739593506, "rewards/margins": 10.657366752624512, "rewards/rejected": -12.521026611328125, "step": 9956 }, { "epoch": 1.55, "learning_rate": 6.844467036672026e-06, "logits/chosen": -2.791496515274048, "logits/rejected": -2.598989725112915, "logps/chosen": -321.8067626953125, "logps/rejected": -358.6206970214844, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -1.7680389881134033, "rewards/margins": 7.3248114585876465, "rewards/rejected": -9.092850685119629, "step": 9957 }, { "epoch": 1.55, "learning_rate": 6.843733596140878e-06, "logits/chosen": -2.7051656246185303, "logits/rejected": -2.885488271713257, "logps/chosen": -130.61868286132812, "logps/rejected": -313.17730712890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.729152679443359, "rewards/margins": 8.906232833862305, "rewards/rejected": -13.635385513305664, "step": 9958 }, { "epoch": 1.55, "learning_rate": 6.843000155609731e-06, "logits/chosen": -2.7951667308807373, "logits/rejected": -2.199112892150879, "logps/chosen": -365.3269348144531, "logps/rejected": -307.47320556640625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.6085586547851562, "rewards/margins": 6.091183662414551, "rewards/rejected": -8.699742317199707, "step": 9959 }, { "epoch": 1.55, "learning_rate": 6.842266715078583e-06, "logits/chosen": -2.2689883708953857, "logits/rejected": -3.1675541400909424, "logps/chosen": -241.14634704589844, "logps/rejected": -340.4699401855469, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -4.155508518218994, "rewards/margins": 5.6482744216918945, "rewards/rejected": -9.803783416748047, "step": 9960 }, { "epoch": 1.55, "learning_rate": 6.8415332745474355e-06, "logits/chosen": -1.543686866760254, "logits/rejected": -2.556166648864746, "logps/chosen": -146.8209228515625, "logps/rejected": -282.0518798828125, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -3.3945841789245605, "rewards/margins": 8.504117965698242, "rewards/rejected": -11.898702621459961, "step": 9961 }, { "epoch": 1.55, "learning_rate": 6.840799834016287e-06, "logits/chosen": -2.099431276321411, "logits/rejected": -2.668518543243408, "logps/chosen": -205.10580444335938, "logps/rejected": -221.28201293945312, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -3.723259925842285, "rewards/margins": 6.305150032043457, "rewards/rejected": -10.028409957885742, "step": 9962 }, { "epoch": 1.55, "learning_rate": 6.840066393485139e-06, "logits/chosen": -2.7954092025756836, "logits/rejected": -1.7154481410980225, "logps/chosen": -257.8885803222656, "logps/rejected": -156.3961639404297, "loss": 0.6083, "rewards/accuracies": 0.5, "rewards/chosen": -4.179192066192627, "rewards/margins": 1.5589940547943115, "rewards/rejected": -5.738186359405518, "step": 9963 }, { "epoch": 1.55, "learning_rate": 6.839332952953992e-06, "logits/chosen": -3.059354066848755, "logits/rejected": -2.5645318031311035, "logps/chosen": -244.736572265625, "logps/rejected": -199.06146240234375, "loss": 0.3391, "rewards/accuracies": 1.0, "rewards/chosen": -3.3485543727874756, "rewards/margins": 0.9077750444412231, "rewards/rejected": -4.256329536437988, "step": 9964 }, { "epoch": 1.55, "learning_rate": 6.838599512422844e-06, "logits/chosen": -2.2007391452789307, "logits/rejected": -2.9872734546661377, "logps/chosen": -453.61724853515625, "logps/rejected": -650.326171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.5549721717834473, "rewards/margins": 7.836623191833496, "rewards/rejected": -10.391595840454102, "step": 9965 }, { "epoch": 1.55, "learning_rate": 6.837866071891696e-06, "logits/chosen": -2.777888059616089, "logits/rejected": -2.679995536804199, "logps/chosen": -163.44125366210938, "logps/rejected": -394.47576904296875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -3.582833766937256, "rewards/margins": 8.534515380859375, "rewards/rejected": -12.117348670959473, "step": 9966 }, { "epoch": 1.55, "learning_rate": 6.837132631360548e-06, "logits/chosen": -2.790250301361084, "logits/rejected": -2.6287002563476562, "logps/chosen": -441.27325439453125, "logps/rejected": -474.27679443359375, "loss": 0.0929, "rewards/accuracies": 1.0, "rewards/chosen": -6.270915985107422, "rewards/margins": 3.993046998977661, "rewards/rejected": -10.263962745666504, "step": 9967 }, { "epoch": 1.55, "learning_rate": 6.8363991908294e-06, "logits/chosen": -1.7144107818603516, "logits/rejected": -2.7476329803466797, "logps/chosen": -232.80838012695312, "logps/rejected": -454.7873229980469, "loss": 0.0467, "rewards/accuracies": 1.0, "rewards/chosen": -2.956463098526001, "rewards/margins": 5.739933013916016, "rewards/rejected": -8.696395874023438, "step": 9968 }, { "epoch": 1.55, "learning_rate": 6.835665750298252e-06, "logits/chosen": -2.8010072708129883, "logits/rejected": -1.9249439239501953, "logps/chosen": -310.8426208496094, "logps/rejected": -141.79928588867188, "loss": 0.8133, "rewards/accuracies": 0.5, "rewards/chosen": -8.740754127502441, "rewards/margins": -0.13540077209472656, "rewards/rejected": -8.605353355407715, "step": 9969 }, { "epoch": 1.55, "learning_rate": 6.834932309767104e-06, "logits/chosen": -2.739229440689087, "logits/rejected": -2.9663374423980713, "logps/chosen": -91.18206024169922, "logps/rejected": -336.7930908203125, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": -2.8242475986480713, "rewards/margins": 4.063688278198242, "rewards/rejected": -6.887935638427734, "step": 9970 }, { "epoch": 1.55, "learning_rate": 6.834198869235956e-06, "logits/chosen": -2.721003532409668, "logits/rejected": -3.0745491981506348, "logps/chosen": -113.97261810302734, "logps/rejected": -362.3807373046875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.5888705253601074, "rewards/margins": 8.23267936706543, "rewards/rejected": -11.821550369262695, "step": 9971 }, { "epoch": 1.55, "learning_rate": 6.833465428704808e-06, "logits/chosen": -3.206975221633911, "logits/rejected": -2.841827630996704, "logps/chosen": -207.95556640625, "logps/rejected": -171.003173828125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.7762129306793213, "rewards/margins": 6.878175258636475, "rewards/rejected": -7.654388427734375, "step": 9972 }, { "epoch": 1.55, "learning_rate": 6.832731988173661e-06, "logits/chosen": -2.4668498039245605, "logits/rejected": -3.0646018981933594, "logps/chosen": -428.0522155761719, "logps/rejected": -541.742431640625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.4444732666015625, "rewards/margins": 6.82213830947876, "rewards/rejected": -10.266611099243164, "step": 9973 }, { "epoch": 1.55, "learning_rate": 6.8319985476425125e-06, "logits/chosen": -2.8047521114349365, "logits/rejected": -2.4766509532928467, "logps/chosen": -168.54489135742188, "logps/rejected": -213.24366760253906, "loss": 0.1405, "rewards/accuracies": 1.0, "rewards/chosen": -4.582852363586426, "rewards/margins": 2.973102569580078, "rewards/rejected": -7.555954933166504, "step": 9974 }, { "epoch": 1.55, "learning_rate": 6.831265107111364e-06, "logits/chosen": -2.312751531600952, "logits/rejected": -2.75494122505188, "logps/chosen": -125.46607971191406, "logps/rejected": -204.49830627441406, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.6558446884155273, "rewards/margins": 6.7837395668029785, "rewards/rejected": -9.439584732055664, "step": 9975 }, { "epoch": 1.55, "learning_rate": 6.830531666580216e-06, "logits/chosen": -2.848374128341675, "logits/rejected": -2.9663121700286865, "logps/chosen": -259.32183837890625, "logps/rejected": -266.97283935546875, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -3.4839258193969727, "rewards/margins": 6.039921283721924, "rewards/rejected": -9.523847579956055, "step": 9976 }, { "epoch": 1.55, "learning_rate": 6.829798226049069e-06, "logits/chosen": -2.906071901321411, "logits/rejected": -2.8427956104278564, "logps/chosen": -125.99256896972656, "logps/rejected": -127.4320068359375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9268608093261719, "rewards/margins": 5.638406276702881, "rewards/rejected": -7.565267086029053, "step": 9977 }, { "epoch": 1.55, "learning_rate": 6.829064785517922e-06, "logits/chosen": -0.91005939245224, "logits/rejected": -1.5109567642211914, "logps/chosen": -221.2738037109375, "logps/rejected": -326.4187927246094, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.158205509185791, "rewards/margins": 10.592489242553711, "rewards/rejected": -12.750694274902344, "step": 9978 }, { "epoch": 1.55, "learning_rate": 6.828331344986774e-06, "logits/chosen": -2.4354567527770996, "logits/rejected": -2.114323139190674, "logps/chosen": -225.29885864257812, "logps/rejected": -335.2875671386719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.0868401527404785, "rewards/margins": 8.544925689697266, "rewards/rejected": -11.631765365600586, "step": 9979 }, { "epoch": 1.55, "learning_rate": 6.8275979044556255e-06, "logits/chosen": -2.8410465717315674, "logits/rejected": -1.749687910079956, "logps/chosen": -372.6298828125, "logps/rejected": -248.3129425048828, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.0832009315490723, "rewards/margins": 6.770510196685791, "rewards/rejected": -9.853711128234863, "step": 9980 }, { "epoch": 1.55, "learning_rate": 6.826864463924477e-06, "logits/chosen": -2.966165781021118, "logits/rejected": -2.82669734954834, "logps/chosen": -1043.603515625, "logps/rejected": -743.3084716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3724822998046875, "rewards/margins": 9.606021881103516, "rewards/rejected": -12.978504180908203, "step": 9981 }, { "epoch": 1.55, "learning_rate": 6.82613102339333e-06, "logits/chosen": -2.702814817428589, "logits/rejected": -1.204102873802185, "logps/chosen": -307.4951171875, "logps/rejected": -179.8652801513672, "loss": 1.9293, "rewards/accuracies": 0.5, "rewards/chosen": -5.094733238220215, "rewards/margins": 0.4774906635284424, "rewards/rejected": -5.572223663330078, "step": 9982 }, { "epoch": 1.55, "learning_rate": 6.825397582862182e-06, "logits/chosen": -2.6863300800323486, "logits/rejected": -3.2572343349456787, "logps/chosen": -344.1371765136719, "logps/rejected": -476.68157958984375, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -5.453640460968018, "rewards/margins": 4.291014671325684, "rewards/rejected": -9.74465560913086, "step": 9983 }, { "epoch": 1.55, "learning_rate": 6.824664142331034e-06, "logits/chosen": -2.3364436626434326, "logits/rejected": -2.607773780822754, "logps/chosen": -196.8939208984375, "logps/rejected": -384.81646728515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.867672920227051, "rewards/margins": 7.589339256286621, "rewards/rejected": -11.457012176513672, "step": 9984 }, { "epoch": 1.55, "learning_rate": 6.823930701799886e-06, "logits/chosen": -3.1718380451202393, "logits/rejected": -2.8598263263702393, "logps/chosen": -99.00491333007812, "logps/rejected": -185.8458251953125, "loss": 0.4893, "rewards/accuracies": 0.5, "rewards/chosen": -3.0816617012023926, "rewards/margins": 5.05255651473999, "rewards/rejected": -8.134218215942383, "step": 9985 }, { "epoch": 1.55, "learning_rate": 6.8231972612687384e-06, "logits/chosen": -2.3538293838500977, "logits/rejected": -2.9214227199554443, "logps/chosen": -54.72425079345703, "logps/rejected": -260.2839660644531, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.4370203018188477, "rewards/margins": 5.870034694671631, "rewards/rejected": -9.30705451965332, "step": 9986 }, { "epoch": 1.55, "learning_rate": 6.82246382073759e-06, "logits/chosen": -2.6248786449432373, "logits/rejected": -3.00502610206604, "logps/chosen": -146.64962768554688, "logps/rejected": -391.3776550292969, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.432677745819092, "rewards/margins": 6.637036323547363, "rewards/rejected": -11.069713592529297, "step": 9987 }, { "epoch": 1.55, "learning_rate": 6.821730380206442e-06, "logits/chosen": -2.909642219543457, "logits/rejected": -2.644575357437134, "logps/chosen": -426.35955810546875, "logps/rejected": -364.0622253417969, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.2938263416290283, "rewards/margins": 8.179266929626465, "rewards/rejected": -11.47309398651123, "step": 9988 }, { "epoch": 1.55, "learning_rate": 6.820996939675294e-06, "logits/chosen": -2.990253210067749, "logits/rejected": -2.998366355895996, "logps/chosen": -176.5858154296875, "logps/rejected": -236.56732177734375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.2011945247650146, "rewards/margins": 5.819517135620117, "rewards/rejected": -8.020711898803711, "step": 9989 }, { "epoch": 1.55, "learning_rate": 6.820263499144146e-06, "logits/chosen": -2.6254544258117676, "logits/rejected": -3.261948823928833, "logps/chosen": -85.81805419921875, "logps/rejected": -322.1697692871094, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9541749358177185, "rewards/margins": 8.92392349243164, "rewards/rejected": -9.878098487854004, "step": 9990 }, { "epoch": 1.55, "learning_rate": 6.819530058612999e-06, "logits/chosen": -2.7726285457611084, "logits/rejected": -3.137620449066162, "logps/chosen": -126.42058563232422, "logps/rejected": -222.13424682617188, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -2.9905667304992676, "rewards/margins": 3.807525157928467, "rewards/rejected": -6.798091888427734, "step": 9991 }, { "epoch": 1.55, "learning_rate": 6.8187966180818505e-06, "logits/chosen": -3.2422232627868652, "logits/rejected": -3.2078893184661865, "logps/chosen": -135.47998046875, "logps/rejected": -125.77661895751953, "loss": 0.632, "rewards/accuracies": 0.5, "rewards/chosen": -3.2951669692993164, "rewards/margins": 1.948967456817627, "rewards/rejected": -5.244134426116943, "step": 9992 }, { "epoch": 1.55, "learning_rate": 6.8180631775507024e-06, "logits/chosen": -2.8866426944732666, "logits/rejected": -3.0636534690856934, "logps/chosen": -492.057373046875, "logps/rejected": -466.2882995605469, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -0.9548202753067017, "rewards/margins": 9.04688835144043, "rewards/rejected": -10.001708030700684, "step": 9993 }, { "epoch": 1.55, "learning_rate": 6.817329737019555e-06, "logits/chosen": -1.3878487348556519, "logits/rejected": -2.2810094356536865, "logps/chosen": -91.62794494628906, "logps/rejected": -276.1642761230469, "loss": 0.0608, "rewards/accuracies": 1.0, "rewards/chosen": -6.463632583618164, "rewards/margins": 6.230782508850098, "rewards/rejected": -12.694415092468262, "step": 9994 }, { "epoch": 1.55, "learning_rate": 6.816596296488408e-06, "logits/chosen": -1.536178708076477, "logits/rejected": -2.9055404663085938, "logps/chosen": -98.40657043457031, "logps/rejected": -398.5802917480469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.8976593017578125, "rewards/margins": 8.837239265441895, "rewards/rejected": -12.734898567199707, "step": 9995 }, { "epoch": 1.55, "learning_rate": 6.81586285595726e-06, "logits/chosen": -2.702618360519409, "logits/rejected": -1.1764494180679321, "logps/chosen": -239.80001831054688, "logps/rejected": -278.68731689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0538036823272705, "rewards/margins": 11.54411792755127, "rewards/rejected": -13.597921371459961, "step": 9996 }, { "epoch": 1.55, "learning_rate": 6.815129415426112e-06, "logits/chosen": -2.8926732540130615, "logits/rejected": -2.889308452606201, "logps/chosen": -80.99861145019531, "logps/rejected": -336.669189453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.330124378204346, "rewards/margins": 11.1968412399292, "rewards/rejected": -15.526966094970703, "step": 9997 }, { "epoch": 1.55, "learning_rate": 6.8143959748949635e-06, "logits/chosen": -1.8659512996673584, "logits/rejected": -2.7672524452209473, "logps/chosen": -168.29922485351562, "logps/rejected": -291.21099853515625, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": -4.75647497177124, "rewards/margins": 4.28537654876709, "rewards/rejected": -9.041851043701172, "step": 9998 }, { "epoch": 1.56, "learning_rate": 6.813662534363815e-06, "logits/chosen": -2.75514817237854, "logits/rejected": -1.80658757686615, "logps/chosen": -377.46258544921875, "logps/rejected": -331.7532653808594, "loss": 0.3215, "rewards/accuracies": 1.0, "rewards/chosen": -5.625163555145264, "rewards/margins": 4.9236249923706055, "rewards/rejected": -10.548788070678711, "step": 9999 }, { "epoch": 1.56, "learning_rate": 6.812929093832668e-06, "logits/chosen": -3.207430362701416, "logits/rejected": -3.407712459564209, "logps/chosen": -56.568397521972656, "logps/rejected": -195.93870544433594, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.046405792236328, "rewards/margins": 6.918646812438965, "rewards/rejected": -8.965052604675293, "step": 10000 }, { "epoch": 1.56, "learning_rate": 6.81219565330152e-06, "logits/chosen": -1.1611052751541138, "logits/rejected": -2.5216665267944336, "logps/chosen": -123.3271713256836, "logps/rejected": -507.4130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4791653156280518, "rewards/margins": 12.18346881866455, "rewards/rejected": -13.662633895874023, "step": 10001 }, { "epoch": 1.56, "learning_rate": 6.811462212770372e-06, "logits/chosen": -3.0678491592407227, "logits/rejected": -3.055478572845459, "logps/chosen": -265.7159118652344, "logps/rejected": -256.9974365234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.114894151687622, "rewards/margins": 6.54508638381958, "rewards/rejected": -9.659980773925781, "step": 10002 }, { "epoch": 1.56, "learning_rate": 6.810728772239224e-06, "logits/chosen": -2.4984045028686523, "logits/rejected": -2.8953685760498047, "logps/chosen": -300.7672424316406, "logps/rejected": -414.96319580078125, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -5.181894302368164, "rewards/margins": 5.719776630401611, "rewards/rejected": -10.901670455932617, "step": 10003 }, { "epoch": 1.56, "learning_rate": 6.8099953317080765e-06, "logits/chosen": -1.789459466934204, "logits/rejected": -2.9292726516723633, "logps/chosen": -146.83462524414062, "logps/rejected": -491.3532409667969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.174320697784424, "rewards/margins": 8.511397361755371, "rewards/rejected": -10.685718536376953, "step": 10004 }, { "epoch": 1.56, "learning_rate": 6.809261891176928e-06, "logits/chosen": -2.4288523197174072, "logits/rejected": -2.3436264991760254, "logps/chosen": -120.49610137939453, "logps/rejected": -280.84783935546875, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -4.095203876495361, "rewards/margins": 9.590818405151367, "rewards/rejected": -13.68602180480957, "step": 10005 }, { "epoch": 1.56, "learning_rate": 6.80852845064578e-06, "logits/chosen": -2.8364903926849365, "logits/rejected": -2.8951847553253174, "logps/chosen": -217.53761291503906, "logps/rejected": -217.5440673828125, "loss": 0.9594, "rewards/accuracies": 0.5, "rewards/chosen": -2.9757614135742188, "rewards/margins": 2.2612624168395996, "rewards/rejected": -5.237023830413818, "step": 10006 }, { "epoch": 1.56, "learning_rate": 6.807795010114632e-06, "logits/chosen": -3.002714157104492, "logits/rejected": -2.316437244415283, "logps/chosen": -132.85122680664062, "logps/rejected": -128.06170654296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.04134559631347656, "rewards/margins": 9.034135818481445, "rewards/rejected": -8.992790222167969, "step": 10007 }, { "epoch": 1.56, "learning_rate": 6.807061569583485e-06, "logits/chosen": -1.6756755113601685, "logits/rejected": -2.7952728271484375, "logps/chosen": -137.14892578125, "logps/rejected": -567.3741455078125, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -4.197614669799805, "rewards/margins": 7.035109519958496, "rewards/rejected": -11.2327241897583, "step": 10008 }, { "epoch": 1.56, "learning_rate": 6.806328129052337e-06, "logits/chosen": -2.6416733264923096, "logits/rejected": -2.345876455307007, "logps/chosen": -479.0093994140625, "logps/rejected": -486.1361083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1883701086044312, "rewards/margins": 11.800570487976074, "rewards/rejected": -12.988941192626953, "step": 10009 }, { "epoch": 1.56, "learning_rate": 6.805594688521189e-06, "logits/chosen": -2.5466701984405518, "logits/rejected": -2.3097925186157227, "logps/chosen": -160.51622009277344, "logps/rejected": -329.2442626953125, "loss": 0.4193, "rewards/accuracies": 0.5, "rewards/chosen": -4.720724582672119, "rewards/margins": 5.939983367919922, "rewards/rejected": -10.660707473754883, "step": 10010 }, { "epoch": 1.56, "learning_rate": 6.804861247990041e-06, "logits/chosen": -0.8183380365371704, "logits/rejected": -2.6265976428985596, "logps/chosen": -91.72969818115234, "logps/rejected": -316.6453857421875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -3.9673776626586914, "rewards/margins": 5.778130531311035, "rewards/rejected": -9.745508193969727, "step": 10011 }, { "epoch": 1.56, "learning_rate": 6.804127807458893e-06, "logits/chosen": -2.7551398277282715, "logits/rejected": -2.1871113777160645, "logps/chosen": -353.22637939453125, "logps/rejected": -270.75341796875, "loss": 2.3502, "rewards/accuracies": 0.5, "rewards/chosen": -4.215816974639893, "rewards/margins": 0.1554279327392578, "rewards/rejected": -4.37124490737915, "step": 10012 }, { "epoch": 1.56, "learning_rate": 6.803394366927746e-06, "logits/chosen": -2.3351263999938965, "logits/rejected": -2.7457361221313477, "logps/chosen": -238.5635986328125, "logps/rejected": -370.755615234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4405922889709473, "rewards/margins": 8.682727813720703, "rewards/rejected": -10.123319625854492, "step": 10013 }, { "epoch": 1.56, "learning_rate": 6.802660926396598e-06, "logits/chosen": -2.723904609680176, "logits/rejected": -1.708211898803711, "logps/chosen": -742.3024291992188, "logps/rejected": -491.3515930175781, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.4974212646484375, "rewards/margins": 6.395174026489258, "rewards/rejected": -9.892595291137695, "step": 10014 }, { "epoch": 1.56, "learning_rate": 6.80192748586545e-06, "logits/chosen": -1.8474806547164917, "logits/rejected": -2.840989112854004, "logps/chosen": -205.52883911132812, "logps/rejected": -345.13629150390625, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -4.119168281555176, "rewards/margins": 5.6141791343688965, "rewards/rejected": -9.733346939086914, "step": 10015 }, { "epoch": 1.56, "learning_rate": 6.8011940453343016e-06, "logits/chosen": -1.651296854019165, "logits/rejected": -3.01242995262146, "logps/chosen": -167.68238830566406, "logps/rejected": -470.1457824707031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.156940460205078, "rewards/margins": 13.416693687438965, "rewards/rejected": -15.573633193969727, "step": 10016 }, { "epoch": 1.56, "learning_rate": 6.800460604803154e-06, "logits/chosen": -2.8569390773773193, "logits/rejected": -1.3252928256988525, "logps/chosen": -223.46194458007812, "logps/rejected": -71.63722229003906, "loss": 1.5406, "rewards/accuracies": 0.5, "rewards/chosen": -6.337644577026367, "rewards/margins": -0.7392324209213257, "rewards/rejected": -5.598412036895752, "step": 10017 }, { "epoch": 1.56, "learning_rate": 6.799727164272006e-06, "logits/chosen": -1.794457197189331, "logits/rejected": -2.7494757175445557, "logps/chosen": -214.0251922607422, "logps/rejected": -359.40252685546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.015693187713623, "rewards/margins": 9.28141975402832, "rewards/rejected": -11.297113418579102, "step": 10018 }, { "epoch": 1.56, "learning_rate": 6.798993723740858e-06, "logits/chosen": -2.993610382080078, "logits/rejected": -2.8406503200531006, "logps/chosen": -273.2899475097656, "logps/rejected": -333.14117431640625, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.6578181982040405, "rewards/margins": 8.252677917480469, "rewards/rejected": -9.91049575805664, "step": 10019 }, { "epoch": 1.56, "learning_rate": 6.79826028320971e-06, "logits/chosen": -2.81396484375, "logits/rejected": -2.102163553237915, "logps/chosen": -195.50352478027344, "logps/rejected": -195.41354370117188, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -1.7886769771575928, "rewards/margins": 4.25981330871582, "rewards/rejected": -6.048490524291992, "step": 10020 }, { "epoch": 1.56, "learning_rate": 6.797526842678562e-06, "logits/chosen": -2.5071871280670166, "logits/rejected": -2.6145780086517334, "logps/chosen": -150.5380096435547, "logps/rejected": -320.494873046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.175417423248291, "rewards/margins": 8.861614227294922, "rewards/rejected": -12.037032127380371, "step": 10021 }, { "epoch": 1.56, "learning_rate": 6.7967934021474145e-06, "logits/chosen": -2.644758462905884, "logits/rejected": -2.40081524848938, "logps/chosen": -435.25299072265625, "logps/rejected": -404.9535827636719, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -3.332197666168213, "rewards/margins": 5.776394367218018, "rewards/rejected": -9.10859203338623, "step": 10022 }, { "epoch": 1.56, "learning_rate": 6.796059961616266e-06, "logits/chosen": -2.8758931159973145, "logits/rejected": -2.730133295059204, "logps/chosen": -471.8810119628906, "logps/rejected": -377.56500244140625, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -1.493935465812683, "rewards/margins": 5.003172874450684, "rewards/rejected": -6.497107982635498, "step": 10023 }, { "epoch": 1.56, "learning_rate": 6.795326521085118e-06, "logits/chosen": -1.8038206100463867, "logits/rejected": -3.0659093856811523, "logps/chosen": -103.744873046875, "logps/rejected": -556.3800659179688, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -4.525042533874512, "rewards/margins": 6.100252151489258, "rewards/rejected": -10.62529468536377, "step": 10024 }, { "epoch": 1.56, "learning_rate": 6.79459308055397e-06, "logits/chosen": -2.720871686935425, "logits/rejected": -2.9266886711120605, "logps/chosen": -216.15957641601562, "logps/rejected": -433.2152099609375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -3.551389694213867, "rewards/margins": 6.046178817749023, "rewards/rejected": -9.59756851196289, "step": 10025 }, { "epoch": 1.56, "learning_rate": 6.793859640022823e-06, "logits/chosen": -2.9375503063201904, "logits/rejected": -2.471369981765747, "logps/chosen": -352.7413330078125, "logps/rejected": -362.40301513671875, "loss": 1.1465, "rewards/accuracies": 0.5, "rewards/chosen": -7.267778396606445, "rewards/margins": -0.3722965717315674, "rewards/rejected": -6.895482063293457, "step": 10026 }, { "epoch": 1.56, "learning_rate": 6.793126199491675e-06, "logits/chosen": -2.862950086593628, "logits/rejected": -2.0703845024108887, "logps/chosen": -241.30123901367188, "logps/rejected": -267.1812744140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7825802564620972, "rewards/margins": 7.87106990814209, "rewards/rejected": -9.653650283813477, "step": 10027 }, { "epoch": 1.56, "learning_rate": 6.792392758960527e-06, "logits/chosen": -2.699258804321289, "logits/rejected": -1.9114123582839966, "logps/chosen": -507.5703430175781, "logps/rejected": -586.93798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.826517343521118, "rewards/margins": 12.099250793457031, "rewards/rejected": -14.925768852233887, "step": 10028 }, { "epoch": 1.56, "learning_rate": 6.791659318429379e-06, "logits/chosen": -2.7363758087158203, "logits/rejected": -2.6653707027435303, "logps/chosen": -141.91195678710938, "logps/rejected": -325.08709716796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.967733383178711, "rewards/margins": 8.142467498779297, "rewards/rejected": -12.110200881958008, "step": 10029 }, { "epoch": 1.56, "learning_rate": 6.790925877898231e-06, "logits/chosen": -1.7466726303100586, "logits/rejected": -2.631072521209717, "logps/chosen": -127.08223724365234, "logps/rejected": -376.680908203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8404651880264282, "rewards/margins": 8.493942260742188, "rewards/rejected": -9.334407806396484, "step": 10030 }, { "epoch": 1.56, "learning_rate": 6.790192437367084e-06, "logits/chosen": -3.230463981628418, "logits/rejected": -2.394693613052368, "logps/chosen": -689.603515625, "logps/rejected": -373.88726806640625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -5.062530994415283, "rewards/margins": 7.665107250213623, "rewards/rejected": -12.727638244628906, "step": 10031 }, { "epoch": 1.56, "learning_rate": 6.789458996835936e-06, "logits/chosen": -2.464550495147705, "logits/rejected": -3.0244977474212646, "logps/chosen": -143.81350708007812, "logps/rejected": -452.70074462890625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.6408538818359375, "rewards/margins": 7.180008411407471, "rewards/rejected": -10.82086181640625, "step": 10032 }, { "epoch": 1.56, "learning_rate": 6.788725556304788e-06, "logits/chosen": -1.3614109754562378, "logits/rejected": -2.7008514404296875, "logps/chosen": -105.42988586425781, "logps/rejected": -418.40985107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4305816888809204, "rewards/margins": 11.633025169372559, "rewards/rejected": -13.063606262207031, "step": 10033 }, { "epoch": 1.56, "learning_rate": 6.78799211577364e-06, "logits/chosen": -2.47562575340271, "logits/rejected": -3.0748450756073, "logps/chosen": -122.95230865478516, "logps/rejected": -254.43890380859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.980782985687256, "rewards/margins": 7.578325271606445, "rewards/rejected": -11.55910873413086, "step": 10034 }, { "epoch": 1.56, "learning_rate": 6.787258675242492e-06, "logits/chosen": -2.1453447341918945, "logits/rejected": -2.861130475997925, "logps/chosen": -167.5233154296875, "logps/rejected": -671.288818359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.888810634613037, "rewards/margins": 8.8524169921875, "rewards/rejected": -12.741228103637695, "step": 10035 }, { "epoch": 1.56, "learning_rate": 6.786525234711344e-06, "logits/chosen": -1.537062406539917, "logits/rejected": -2.384305953979492, "logps/chosen": -70.86482238769531, "logps/rejected": -391.8511047363281, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.264383316040039, "rewards/margins": 8.762261390686035, "rewards/rejected": -11.026644706726074, "step": 10036 }, { "epoch": 1.56, "learning_rate": 6.785791794180196e-06, "logits/chosen": -2.5359785556793213, "logits/rejected": -2.8997533321380615, "logps/chosen": -162.66513061523438, "logps/rejected": -410.50128173828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.652900218963623, "rewards/margins": 10.193286895751953, "rewards/rejected": -12.846187591552734, "step": 10037 }, { "epoch": 1.56, "learning_rate": 6.785058353649048e-06, "logits/chosen": -3.0201239585876465, "logits/rejected": -3.101365327835083, "logps/chosen": -448.6004943847656, "logps/rejected": -507.42218017578125, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -2.167544364929199, "rewards/margins": 5.961423397064209, "rewards/rejected": -8.12896728515625, "step": 10038 }, { "epoch": 1.56, "learning_rate": 6.7843249131179e-06, "logits/chosen": -2.7607052326202393, "logits/rejected": -1.6921690702438354, "logps/chosen": -428.899658203125, "logps/rejected": -1188.6170654296875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.072797775268555, "rewards/margins": 9.168350219726562, "rewards/rejected": -15.241147994995117, "step": 10039 }, { "epoch": 1.56, "learning_rate": 6.7835914725867526e-06, "logits/chosen": -1.3014485836029053, "logits/rejected": -2.9479010105133057, "logps/chosen": -131.8667755126953, "logps/rejected": -480.1630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.026949405670166, "rewards/margins": 12.129354476928711, "rewards/rejected": -15.156303405761719, "step": 10040 }, { "epoch": 1.56, "learning_rate": 6.7828580320556044e-06, "logits/chosen": -2.744257926940918, "logits/rejected": -3.183441400527954, "logps/chosen": -308.1745300292969, "logps/rejected": -504.0496826171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2059171199798584, "rewards/margins": 9.12733268737793, "rewards/rejected": -10.33324909210205, "step": 10041 }, { "epoch": 1.56, "learning_rate": 6.782124591524456e-06, "logits/chosen": -2.0097360610961914, "logits/rejected": -2.5570390224456787, "logps/chosen": -118.38438415527344, "logps/rejected": -233.31541442871094, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.452077865600586, "rewards/margins": 6.673489570617676, "rewards/rejected": -10.125567436218262, "step": 10042 }, { "epoch": 1.56, "learning_rate": 6.781391150993308e-06, "logits/chosen": -3.024183750152588, "logits/rejected": -3.259274482727051, "logps/chosen": -70.3014144897461, "logps/rejected": -104.17753601074219, "loss": 1.2069, "rewards/accuracies": 0.5, "rewards/chosen": -3.5286784172058105, "rewards/margins": 1.9066452980041504, "rewards/rejected": -5.435323715209961, "step": 10043 }, { "epoch": 1.56, "learning_rate": 6.780657710462161e-06, "logits/chosen": -3.009110927581787, "logits/rejected": -3.0010671615600586, "logps/chosen": -293.4789733886719, "logps/rejected": -368.8543701171875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -2.0347912311553955, "rewards/margins": 6.742319107055664, "rewards/rejected": -8.77711009979248, "step": 10044 }, { "epoch": 1.56, "learning_rate": 6.779924269931013e-06, "logits/chosen": -3.0886764526367188, "logits/rejected": -2.54840087890625, "logps/chosen": -295.27783203125, "logps/rejected": -279.826904296875, "loss": 0.3996, "rewards/accuracies": 0.5, "rewards/chosen": -3.832688331604004, "rewards/margins": 3.2802023887634277, "rewards/rejected": -7.112890720367432, "step": 10045 }, { "epoch": 1.56, "learning_rate": 6.7791908293998655e-06, "logits/chosen": -3.0746614933013916, "logits/rejected": -2.8062708377838135, "logps/chosen": -213.04074096679688, "logps/rejected": -126.71861267089844, "loss": 0.3402, "rewards/accuracies": 1.0, "rewards/chosen": -3.8050272464752197, "rewards/margins": 0.9735323786735535, "rewards/rejected": -4.778559684753418, "step": 10046 }, { "epoch": 1.56, "learning_rate": 6.778457388868717e-06, "logits/chosen": -2.12548565864563, "logits/rejected": -3.2318150997161865, "logps/chosen": -34.465370178222656, "logps/rejected": -355.03460693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.3335731625556946, "rewards/margins": 11.416744232177734, "rewards/rejected": -11.750317573547363, "step": 10047 }, { "epoch": 1.56, "learning_rate": 6.77772394833757e-06, "logits/chosen": -2.6101040840148926, "logits/rejected": -3.2228269577026367, "logps/chosen": -134.60267639160156, "logps/rejected": -504.45489501953125, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -6.11870002746582, "rewards/margins": 12.410666465759277, "rewards/rejected": -18.52936553955078, "step": 10048 }, { "epoch": 1.56, "learning_rate": 6.776990507806422e-06, "logits/chosen": -2.1253890991210938, "logits/rejected": -3.0483431816101074, "logps/chosen": -83.01261901855469, "logps/rejected": -355.69976806640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.3840959072113037, "rewards/margins": 12.313382148742676, "rewards/rejected": -14.697477340698242, "step": 10049 }, { "epoch": 1.56, "learning_rate": 6.776257067275274e-06, "logits/chosen": -1.7523244619369507, "logits/rejected": -3.271836519241333, "logps/chosen": -123.76669311523438, "logps/rejected": -434.97125244140625, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -2.808664321899414, "rewards/margins": 7.157297134399414, "rewards/rejected": -9.965961456298828, "step": 10050 }, { "epoch": 1.56, "learning_rate": 6.775523626744126e-06, "logits/chosen": -2.638132333755493, "logits/rejected": -3.019339084625244, "logps/chosen": -84.70722961425781, "logps/rejected": -319.25775146484375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -3.9115638732910156, "rewards/margins": 6.9914937019348145, "rewards/rejected": -10.903057098388672, "step": 10051 }, { "epoch": 1.56, "learning_rate": 6.774790186212978e-06, "logits/chosen": -1.9276463985443115, "logits/rejected": -2.7467782497406006, "logps/chosen": -117.73481750488281, "logps/rejected": -436.2795104980469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.679152727127075, "rewards/margins": 13.075669288635254, "rewards/rejected": -16.75482177734375, "step": 10052 }, { "epoch": 1.56, "learning_rate": 6.77405674568183e-06, "logits/chosen": -2.6232235431671143, "logits/rejected": -2.1792125701904297, "logps/chosen": -356.8837890625, "logps/rejected": -335.25390625, "loss": 1.1944, "rewards/accuracies": 0.5, "rewards/chosen": -6.887301445007324, "rewards/margins": 3.2403664588928223, "rewards/rejected": -10.127668380737305, "step": 10053 }, { "epoch": 1.56, "learning_rate": 6.773323305150682e-06, "logits/chosen": -1.783042073249817, "logits/rejected": -2.873224973678589, "logps/chosen": -105.97127532958984, "logps/rejected": -300.585693359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.2887649536132812, "rewards/margins": 7.140925884246826, "rewards/rejected": -10.429691314697266, "step": 10054 }, { "epoch": 1.56, "learning_rate": 6.772589864619534e-06, "logits/chosen": -1.2462925910949707, "logits/rejected": -2.610501289367676, "logps/chosen": -171.39224243164062, "logps/rejected": -462.90374755859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.4788389205932617, "rewards/margins": 9.333695411682129, "rewards/rejected": -11.81253433227539, "step": 10055 }, { "epoch": 1.56, "learning_rate": 6.771856424088386e-06, "logits/chosen": -2.923823595046997, "logits/rejected": -2.4237372875213623, "logps/chosen": -220.22232055664062, "logps/rejected": -195.91156005859375, "loss": 0.7166, "rewards/accuracies": 0.5, "rewards/chosen": -3.323744535446167, "rewards/margins": 2.134103775024414, "rewards/rejected": -5.45784854888916, "step": 10056 }, { "epoch": 1.56, "learning_rate": 6.771122983557239e-06, "logits/chosen": -2.9862523078918457, "logits/rejected": -1.824333667755127, "logps/chosen": -298.63629150390625, "logps/rejected": -102.0867691040039, "loss": 3.2329, "rewards/accuracies": 0.5, "rewards/chosen": -6.141798973083496, "rewards/margins": -2.348529100418091, "rewards/rejected": -3.7932701110839844, "step": 10057 }, { "epoch": 1.56, "learning_rate": 6.770389543026091e-06, "logits/chosen": -2.866738796234131, "logits/rejected": -2.4649181365966797, "logps/chosen": -176.05496215820312, "logps/rejected": -134.59405517578125, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": -4.489028453826904, "rewards/margins": 4.1367106437683105, "rewards/rejected": -8.625739097595215, "step": 10058 }, { "epoch": 1.56, "learning_rate": 6.7696561024949425e-06, "logits/chosen": -2.695089101791382, "logits/rejected": -2.8535828590393066, "logps/chosen": -166.6175537109375, "logps/rejected": -261.1070556640625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.8611254692077637, "rewards/margins": 6.364042282104492, "rewards/rejected": -9.225168228149414, "step": 10059 }, { "epoch": 1.56, "learning_rate": 6.768922661963794e-06, "logits/chosen": -2.5441529750823975, "logits/rejected": -2.9578685760498047, "logps/chosen": -221.56080627441406, "logps/rejected": -533.4557495117188, "loss": 0.7184, "rewards/accuracies": 0.5, "rewards/chosen": -7.432592868804932, "rewards/margins": 6.01828145980835, "rewards/rejected": -13.450874328613281, "step": 10060 }, { "epoch": 1.56, "learning_rate": 6.768189221432646e-06, "logits/chosen": -2.246781587600708, "logits/rejected": -2.8325047492980957, "logps/chosen": -280.3803405761719, "logps/rejected": -501.8471374511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.534178256988525, "rewards/margins": 10.096595764160156, "rewards/rejected": -14.630773544311523, "step": 10061 }, { "epoch": 1.56, "learning_rate": 6.767455780901499e-06, "logits/chosen": -2.7330846786499023, "logits/rejected": -2.2593071460723877, "logps/chosen": -311.31951904296875, "logps/rejected": -227.69772338867188, "loss": 1.763, "rewards/accuracies": 0.0, "rewards/chosen": -7.907617568969727, "rewards/margins": -1.5650651454925537, "rewards/rejected": -6.342552185058594, "step": 10062 }, { "epoch": 1.57, "learning_rate": 6.766722340370352e-06, "logits/chosen": -2.2879631519317627, "logits/rejected": -3.0804059505462646, "logps/chosen": -67.614501953125, "logps/rejected": -255.0386962890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.679567575454712, "rewards/margins": 9.923823356628418, "rewards/rejected": -12.603391647338867, "step": 10063 }, { "epoch": 1.57, "learning_rate": 6.7659888998392036e-06, "logits/chosen": -2.9495484828948975, "logits/rejected": -2.9883921146392822, "logps/chosen": -91.30770874023438, "logps/rejected": -139.64869689941406, "loss": 0.1716, "rewards/accuracies": 1.0, "rewards/chosen": -5.974571228027344, "rewards/margins": 3.346249580383301, "rewards/rejected": -9.320820808410645, "step": 10064 }, { "epoch": 1.57, "learning_rate": 6.7652554593080555e-06, "logits/chosen": -2.8858728408813477, "logits/rejected": -2.349029302597046, "logps/chosen": -389.37890625, "logps/rejected": -378.98760986328125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -5.894757270812988, "rewards/margins": 8.282136917114258, "rewards/rejected": -14.176895141601562, "step": 10065 }, { "epoch": 1.57, "learning_rate": 6.764522018776908e-06, "logits/chosen": -3.062814950942993, "logits/rejected": -2.811553478240967, "logps/chosen": -482.8427734375, "logps/rejected": -353.8860168457031, "loss": 0.1074, "rewards/accuracies": 1.0, "rewards/chosen": -0.1901611089706421, "rewards/margins": 4.601963996887207, "rewards/rejected": -4.7921247482299805, "step": 10066 }, { "epoch": 1.57, "learning_rate": 6.76378857824576e-06, "logits/chosen": -2.881049633026123, "logits/rejected": -2.5936734676361084, "logps/chosen": -156.83041381835938, "logps/rejected": -212.37686157226562, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.027643203735352, "rewards/margins": 7.254883766174316, "rewards/rejected": -11.282526969909668, "step": 10067 }, { "epoch": 1.57, "learning_rate": 6.763055137714612e-06, "logits/chosen": -2.1640312671661377, "logits/rejected": -2.96818470954895, "logps/chosen": -225.61907958984375, "logps/rejected": -404.471923828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": 0.19432830810546875, "rewards/margins": 11.27530288696289, "rewards/rejected": -11.080974578857422, "step": 10068 }, { "epoch": 1.57, "learning_rate": 6.762321697183464e-06, "logits/chosen": -2.6192004680633545, "logits/rejected": -2.587906837463379, "logps/chosen": -629.2257690429688, "logps/rejected": -514.028564453125, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": -3.168747901916504, "rewards/margins": 5.907556533813477, "rewards/rejected": -9.076305389404297, "step": 10069 }, { "epoch": 1.57, "learning_rate": 6.761588256652316e-06, "logits/chosen": -2.5709409713745117, "logits/rejected": -1.4340773820877075, "logps/chosen": -296.6561584472656, "logps/rejected": -255.71800231933594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.09801328182220459, "rewards/margins": 9.843863487243652, "rewards/rejected": -9.941876411437988, "step": 10070 }, { "epoch": 1.57, "learning_rate": 6.760854816121168e-06, "logits/chosen": -2.769256353378296, "logits/rejected": -2.2781014442443848, "logps/chosen": -302.228759765625, "logps/rejected": -168.72689819335938, "loss": 1.9848, "rewards/accuracies": 0.5, "rewards/chosen": -5.919539928436279, "rewards/margins": 0.8385320901870728, "rewards/rejected": -6.758072376251221, "step": 10071 }, { "epoch": 1.57, "learning_rate": 6.76012137559002e-06, "logits/chosen": -2.871375322341919, "logits/rejected": -2.6404900550842285, "logps/chosen": -441.4418640136719, "logps/rejected": -461.2393493652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6072990894317627, "rewards/margins": 11.07987117767334, "rewards/rejected": -11.687170028686523, "step": 10072 }, { "epoch": 1.57, "learning_rate": 6.759387935058872e-06, "logits/chosen": -2.8408894538879395, "logits/rejected": -3.0743141174316406, "logps/chosen": -253.8440399169922, "logps/rejected": -333.4616394042969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.908437252044678, "rewards/margins": 8.995124816894531, "rewards/rejected": -13.90356159210205, "step": 10073 }, { "epoch": 1.57, "learning_rate": 6.758654494527724e-06, "logits/chosen": -2.320674180984497, "logits/rejected": -2.2730321884155273, "logps/chosen": -941.306884765625, "logps/rejected": -662.3572387695312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.080313205718994, "rewards/margins": 9.334173202514648, "rewards/rejected": -14.4144868850708, "step": 10074 }, { "epoch": 1.57, "learning_rate": 6.757921053996577e-06, "logits/chosen": -2.6973202228546143, "logits/rejected": -2.0348987579345703, "logps/chosen": -399.7432556152344, "logps/rejected": -369.7808837890625, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -6.341414451599121, "rewards/margins": 4.666008949279785, "rewards/rejected": -11.007423400878906, "step": 10075 }, { "epoch": 1.57, "learning_rate": 6.757187613465429e-06, "logits/chosen": -2.3930060863494873, "logits/rejected": -2.8195080757141113, "logps/chosen": -176.24508666992188, "logps/rejected": -335.1513671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.6922523975372314, "rewards/margins": 9.320302963256836, "rewards/rejected": -12.012556076049805, "step": 10076 }, { "epoch": 1.57, "learning_rate": 6.7564541729342805e-06, "logits/chosen": -2.8904001712799072, "logits/rejected": -2.9109046459198, "logps/chosen": -194.6884002685547, "logps/rejected": -160.32614135742188, "loss": 0.2336, "rewards/accuracies": 1.0, "rewards/chosen": -2.5833208560943604, "rewards/margins": 2.387277841567993, "rewards/rejected": -4.9705986976623535, "step": 10077 }, { "epoch": 1.57, "learning_rate": 6.755720732403132e-06, "logits/chosen": -2.417074203491211, "logits/rejected": -2.9675047397613525, "logps/chosen": -160.13478088378906, "logps/rejected": -358.5102233886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6460554599761963, "rewards/margins": 11.207859992980957, "rewards/rejected": -11.85391616821289, "step": 10078 }, { "epoch": 1.57, "learning_rate": 6.754987291871985e-06, "logits/chosen": -2.1809840202331543, "logits/rejected": -2.1257286071777344, "logps/chosen": -109.57731628417969, "logps/rejected": -261.79498291015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.839557647705078, "rewards/margins": 7.819980144500732, "rewards/rejected": -11.659538269042969, "step": 10079 }, { "epoch": 1.57, "learning_rate": 6.754253851340838e-06, "logits/chosen": -2.1433119773864746, "logits/rejected": -1.6215450763702393, "logps/chosen": -175.34149169921875, "logps/rejected": -387.4685363769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9993042945861816, "rewards/margins": 11.913795471191406, "rewards/rejected": -14.913100242614746, "step": 10080 }, { "epoch": 1.57, "learning_rate": 6.75352041080969e-06, "logits/chosen": -2.9159481525421143, "logits/rejected": -3.3972229957580566, "logps/chosen": -153.85008239746094, "logps/rejected": -442.928955078125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.201443672180176, "rewards/margins": 6.459569454193115, "rewards/rejected": -9.661012649536133, "step": 10081 }, { "epoch": 1.57, "learning_rate": 6.752786970278542e-06, "logits/chosen": -2.6004798412323, "logits/rejected": -2.6079745292663574, "logps/chosen": -212.8616180419922, "logps/rejected": -414.79998779296875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -2.427508592605591, "rewards/margins": 9.762641906738281, "rewards/rejected": -12.190150260925293, "step": 10082 }, { "epoch": 1.57, "learning_rate": 6.7520535297473935e-06, "logits/chosen": -3.063080072402954, "logits/rejected": -3.1562538146972656, "logps/chosen": -185.10223388671875, "logps/rejected": -309.02825927734375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -3.7400054931640625, "rewards/margins": 6.428790092468262, "rewards/rejected": -10.168795585632324, "step": 10083 }, { "epoch": 1.57, "learning_rate": 6.751320089216246e-06, "logits/chosen": -0.8343552947044373, "logits/rejected": -2.8127193450927734, "logps/chosen": -190.510986328125, "logps/rejected": -589.739990234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.9045052528381348, "rewards/margins": 8.47205638885498, "rewards/rejected": -10.376561164855957, "step": 10084 }, { "epoch": 1.57, "learning_rate": 6.750586648685098e-06, "logits/chosen": -2.2243194580078125, "logits/rejected": -2.9572906494140625, "logps/chosen": -399.94903564453125, "logps/rejected": -441.99188232421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.309606552124023, "rewards/margins": 8.312651634216309, "rewards/rejected": -13.622258186340332, "step": 10085 }, { "epoch": 1.57, "learning_rate": 6.74985320815395e-06, "logits/chosen": -1.245592713356018, "logits/rejected": -3.0184566974639893, "logps/chosen": -145.47813415527344, "logps/rejected": -552.78173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3200652599334717, "rewards/margins": 10.411441802978516, "rewards/rejected": -13.73150634765625, "step": 10086 }, { "epoch": 1.57, "learning_rate": 6.749119767622802e-06, "logits/chosen": -1.609937310218811, "logits/rejected": -2.4421677589416504, "logps/chosen": -280.1844482421875, "logps/rejected": -420.4222412109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.5553786754608154, "rewards/margins": 8.730707168579102, "rewards/rejected": -12.286086082458496, "step": 10087 }, { "epoch": 1.57, "learning_rate": 6.748386327091654e-06, "logits/chosen": -3.0961735248565674, "logits/rejected": -1.9113061428070068, "logps/chosen": -308.86932373046875, "logps/rejected": -127.48124694824219, "loss": 3.8683, "rewards/accuracies": 0.5, "rewards/chosen": -6.909182548522949, "rewards/margins": -1.5809986591339111, "rewards/rejected": -5.328184127807617, "step": 10088 }, { "epoch": 1.57, "learning_rate": 6.7476528865605065e-06, "logits/chosen": -2.2640631198883057, "logits/rejected": -2.9706175327301025, "logps/chosen": -236.0902862548828, "logps/rejected": -384.53033447265625, "loss": 0.0877, "rewards/accuracies": 1.0, "rewards/chosen": -5.439253807067871, "rewards/margins": 4.573975563049316, "rewards/rejected": -10.013229370117188, "step": 10089 }, { "epoch": 1.57, "learning_rate": 6.746919446029358e-06, "logits/chosen": -2.111654281616211, "logits/rejected": -2.8735289573669434, "logps/chosen": -287.4327392578125, "logps/rejected": -444.61541748046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.817736864089966, "rewards/margins": 9.007600784301758, "rewards/rejected": -12.825337409973145, "step": 10090 }, { "epoch": 1.57, "learning_rate": 6.74618600549821e-06, "logits/chosen": -3.0306339263916016, "logits/rejected": -3.126648426055908, "logps/chosen": -178.03628540039062, "logps/rejected": -430.22088623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.982969760894775, "rewards/margins": 11.463912963867188, "rewards/rejected": -16.446882247924805, "step": 10091 }, { "epoch": 1.57, "learning_rate": 6.745452564967062e-06, "logits/chosen": -2.8091845512390137, "logits/rejected": -2.847454786300659, "logps/chosen": -187.53866577148438, "logps/rejected": -313.997802734375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.0736889839172363, "rewards/margins": 8.26505184173584, "rewards/rejected": -10.338741302490234, "step": 10092 }, { "epoch": 1.57, "learning_rate": 6.744719124435915e-06, "logits/chosen": -2.418771505355835, "logits/rejected": -2.9243319034576416, "logps/chosen": -157.33935546875, "logps/rejected": -389.1441650390625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.7505526542663574, "rewards/margins": 6.119223594665527, "rewards/rejected": -9.869775772094727, "step": 10093 }, { "epoch": 1.57, "learning_rate": 6.743985683904767e-06, "logits/chosen": -1.7954975366592407, "logits/rejected": -2.9364633560180664, "logps/chosen": -36.49346160888672, "logps/rejected": -250.50604248046875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -2.1639270782470703, "rewards/margins": 5.391305923461914, "rewards/rejected": -7.555233001708984, "step": 10094 }, { "epoch": 1.57, "learning_rate": 6.7432522433736186e-06, "logits/chosen": -3.0851552486419678, "logits/rejected": -2.7014689445495605, "logps/chosen": -224.7822723388672, "logps/rejected": -190.4579315185547, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -4.457675933837891, "rewards/margins": 4.754241943359375, "rewards/rejected": -9.211917877197266, "step": 10095 }, { "epoch": 1.57, "learning_rate": 6.742518802842471e-06, "logits/chosen": -1.5951963663101196, "logits/rejected": -2.894540786743164, "logps/chosen": -139.16680908203125, "logps/rejected": -169.455322265625, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -3.1955339908599854, "rewards/margins": 4.5169997215271, "rewards/rejected": -7.712533950805664, "step": 10096 }, { "epoch": 1.57, "learning_rate": 6.741785362311324e-06, "logits/chosen": -2.896052122116089, "logits/rejected": -1.337052822113037, "logps/chosen": -276.6792297363281, "logps/rejected": -126.97029113769531, "loss": 0.5966, "rewards/accuracies": 0.5, "rewards/chosen": -3.497920036315918, "rewards/margins": 2.2908825874328613, "rewards/rejected": -5.788802623748779, "step": 10097 }, { "epoch": 1.57, "learning_rate": 6.741051921780176e-06, "logits/chosen": -2.8669304847717285, "logits/rejected": -2.5599398612976074, "logps/chosen": -225.01705932617188, "logps/rejected": -330.43768310546875, "loss": 0.6341, "rewards/accuracies": 0.5, "rewards/chosen": -5.986097812652588, "rewards/margins": 4.842316627502441, "rewards/rejected": -10.828414916992188, "step": 10098 }, { "epoch": 1.57, "learning_rate": 6.740318481249028e-06, "logits/chosen": -2.688326835632324, "logits/rejected": -2.695033550262451, "logps/chosen": -276.09588623046875, "logps/rejected": -187.0292205810547, "loss": 3.0506, "rewards/accuracies": 0.5, "rewards/chosen": -5.658968448638916, "rewards/margins": -0.7291905879974365, "rewards/rejected": -4.9297776222229, "step": 10099 }, { "epoch": 1.57, "learning_rate": 6.73958504071788e-06, "logits/chosen": -2.663970708847046, "logits/rejected": -3.141402244567871, "logps/chosen": -76.54481506347656, "logps/rejected": -276.871826171875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.916325569152832, "rewards/margins": 7.132502555847168, "rewards/rejected": -9.048828125, "step": 10100 }, { "epoch": 1.57, "learning_rate": 6.7388516001867315e-06, "logits/chosen": -1.2380417585372925, "logits/rejected": -2.6064062118530273, "logps/chosen": -58.116432189941406, "logps/rejected": -254.89862060546875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.4957282543182373, "rewards/margins": 6.235598564147949, "rewards/rejected": -9.731327056884766, "step": 10101 }, { "epoch": 1.57, "learning_rate": 6.738118159655584e-06, "logits/chosen": -2.8025903701782227, "logits/rejected": -2.86503267288208, "logps/chosen": -460.07745361328125, "logps/rejected": -416.227783203125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.904069900512695, "rewards/margins": 7.220555305480957, "rewards/rejected": -12.124624252319336, "step": 10102 }, { "epoch": 1.57, "learning_rate": 6.737384719124436e-06, "logits/chosen": -2.5956153869628906, "logits/rejected": -3.247262477874756, "logps/chosen": -94.86841583251953, "logps/rejected": -373.6108703613281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1109304428100586, "rewards/margins": 10.297544479370117, "rewards/rejected": -13.408474922180176, "step": 10103 }, { "epoch": 1.57, "learning_rate": 6.736651278593288e-06, "logits/chosen": -2.2741317749023438, "logits/rejected": -2.8084354400634766, "logps/chosen": -262.48565673828125, "logps/rejected": -275.73260498046875, "loss": 0.0458, "rewards/accuracies": 1.0, "rewards/chosen": -4.175559043884277, "rewards/margins": 4.666584014892578, "rewards/rejected": -8.842143058776855, "step": 10104 }, { "epoch": 1.57, "learning_rate": 6.73591783806214e-06, "logits/chosen": -3.080690622329712, "logits/rejected": -2.864109754562378, "logps/chosen": -155.0675048828125, "logps/rejected": -151.22152709960938, "loss": 0.28, "rewards/accuracies": 1.0, "rewards/chosen": -4.401966094970703, "rewards/margins": 2.8504374027252197, "rewards/rejected": -7.252403259277344, "step": 10105 }, { "epoch": 1.57, "learning_rate": 6.735184397530993e-06, "logits/chosen": -2.61948561668396, "logits/rejected": -1.772700309753418, "logps/chosen": -234.14886474609375, "logps/rejected": -256.4044189453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.560089111328125, "rewards/margins": 9.363812446594238, "rewards/rejected": -12.923901557922363, "step": 10106 }, { "epoch": 1.57, "learning_rate": 6.7344509569998445e-06, "logits/chosen": -1.707337498664856, "logits/rejected": -2.656663656234741, "logps/chosen": -303.97576904296875, "logps/rejected": -398.61102294921875, "loss": 1.3296, "rewards/accuracies": 0.5, "rewards/chosen": -4.765939712524414, "rewards/margins": 2.408329486846924, "rewards/rejected": -7.174269199371338, "step": 10107 }, { "epoch": 1.57, "learning_rate": 6.733717516468696e-06, "logits/chosen": -2.607983112335205, "logits/rejected": -3.0480148792266846, "logps/chosen": -51.85420227050781, "logps/rejected": -330.3404541015625, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.5254344940185547, "rewards/margins": 5.577023029327393, "rewards/rejected": -8.102457046508789, "step": 10108 }, { "epoch": 1.57, "learning_rate": 6.732984075937548e-06, "logits/chosen": -2.64371395111084, "logits/rejected": -2.609762191772461, "logps/chosen": -410.02752685546875, "logps/rejected": -505.0755920410156, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.4955506324768066, "rewards/margins": 7.138574600219727, "rewards/rejected": -9.634124755859375, "step": 10109 }, { "epoch": 1.57, "learning_rate": 6.7322506354064e-06, "logits/chosen": -2.4391047954559326, "logits/rejected": -2.997124433517456, "logps/chosen": -586.4344482421875, "logps/rejected": -502.21527099609375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.9705629348754883, "rewards/margins": 9.869455337524414, "rewards/rejected": -13.840018272399902, "step": 10110 }, { "epoch": 1.57, "learning_rate": 6.731517194875253e-06, "logits/chosen": -2.2888846397399902, "logits/rejected": -2.804283618927002, "logps/chosen": -324.5108337402344, "logps/rejected": -500.1678466796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.3089823722839355, "rewards/margins": 6.402912139892578, "rewards/rejected": -9.711894989013672, "step": 10111 }, { "epoch": 1.57, "learning_rate": 6.730783754344105e-06, "logits/chosen": -2.1390974521636963, "logits/rejected": -2.7313356399536133, "logps/chosen": -660.6422729492188, "logps/rejected": -749.985107421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.6845207214355469, "rewards/margins": 8.735227584838867, "rewards/rejected": -10.419748306274414, "step": 10112 }, { "epoch": 1.57, "learning_rate": 6.7300503138129575e-06, "logits/chosen": -3.0515973567962646, "logits/rejected": -3.120025396347046, "logps/chosen": -302.85382080078125, "logps/rejected": -375.8736877441406, "loss": 0.9432, "rewards/accuracies": 0.5, "rewards/chosen": -4.563920497894287, "rewards/margins": 3.652038812637329, "rewards/rejected": -8.215959548950195, "step": 10113 }, { "epoch": 1.57, "learning_rate": 6.729316873281809e-06, "logits/chosen": -2.045097827911377, "logits/rejected": -2.781425952911377, "logps/chosen": -259.92633056640625, "logps/rejected": -337.708740234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.518051862716675, "rewards/margins": 7.989108562469482, "rewards/rejected": -10.507160186767578, "step": 10114 }, { "epoch": 1.57, "learning_rate": 6.728583432750662e-06, "logits/chosen": -1.7029809951782227, "logits/rejected": -2.600128650665283, "logps/chosen": -145.2084197998047, "logps/rejected": -374.4736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.876910448074341, "rewards/margins": 10.18274211883545, "rewards/rejected": -13.059652328491211, "step": 10115 }, { "epoch": 1.57, "learning_rate": 6.727849992219514e-06, "logits/chosen": -3.128730058670044, "logits/rejected": -3.1047000885009766, "logps/chosen": -175.12229919433594, "logps/rejected": -213.5989227294922, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -5.725546360015869, "rewards/margins": 5.72784423828125, "rewards/rejected": -11.453390121459961, "step": 10116 }, { "epoch": 1.57, "learning_rate": 6.727116551688366e-06, "logits/chosen": -1.885454773902893, "logits/rejected": -2.8543055057525635, "logps/chosen": -281.14666748046875, "logps/rejected": -508.0269775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4469573497772217, "rewards/margins": 10.343708992004395, "rewards/rejected": -12.790666580200195, "step": 10117 }, { "epoch": 1.57, "learning_rate": 6.726383111157218e-06, "logits/chosen": -2.331657648086548, "logits/rejected": -2.883122205734253, "logps/chosen": -221.55490112304688, "logps/rejected": -457.0468444824219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.4263542890548706, "rewards/margins": 9.313405990600586, "rewards/rejected": -10.73975944519043, "step": 10118 }, { "epoch": 1.57, "learning_rate": 6.72564967062607e-06, "logits/chosen": -2.5975725650787354, "logits/rejected": -2.873305320739746, "logps/chosen": -281.5340576171875, "logps/rejected": -374.7445068359375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.952730655670166, "rewards/margins": 7.353211879730225, "rewards/rejected": -10.30594253540039, "step": 10119 }, { "epoch": 1.57, "learning_rate": 6.724916230094922e-06, "logits/chosen": -2.8899056911468506, "logits/rejected": -3.1368229389190674, "logps/chosen": -129.15426635742188, "logps/rejected": -287.16473388671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.4891063868999481, "rewards/margins": 9.101088523864746, "rewards/rejected": -9.590194702148438, "step": 10120 }, { "epoch": 1.57, "learning_rate": 6.724182789563774e-06, "logits/chosen": -3.0397789478302, "logits/rejected": -3.0317447185516357, "logps/chosen": -73.87493133544922, "logps/rejected": -179.56356811523438, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0051313638687134, "rewards/margins": 8.459234237670898, "rewards/rejected": -9.46436595916748, "step": 10121 }, { "epoch": 1.57, "learning_rate": 6.723449349032626e-06, "logits/chosen": -2.8057942390441895, "logits/rejected": -3.0792651176452637, "logps/chosen": -165.3600311279297, "logps/rejected": -278.2544250488281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1002284288406372, "rewards/margins": 9.092144012451172, "rewards/rejected": -10.192373275756836, "step": 10122 }, { "epoch": 1.57, "learning_rate": 6.722715908501478e-06, "logits/chosen": -2.6959664821624756, "logits/rejected": -2.805400848388672, "logps/chosen": -537.394775390625, "logps/rejected": -471.1512756347656, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -4.334944725036621, "rewards/margins": 3.918297052383423, "rewards/rejected": -8.253242492675781, "step": 10123 }, { "epoch": 1.57, "learning_rate": 6.721982467970331e-06, "logits/chosen": -2.305926561355591, "logits/rejected": -2.917116403579712, "logps/chosen": -325.14532470703125, "logps/rejected": -627.916259765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.93557071685791, "rewards/margins": 6.762551307678223, "rewards/rejected": -10.698122024536133, "step": 10124 }, { "epoch": 1.57, "learning_rate": 6.7212490274391825e-06, "logits/chosen": -2.364741563796997, "logits/rejected": -3.1977474689483643, "logps/chosen": -465.6289367675781, "logps/rejected": -602.8017578125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.591371536254883, "rewards/margins": 6.0752482414245605, "rewards/rejected": -10.666620254516602, "step": 10125 }, { "epoch": 1.57, "learning_rate": 6.7205155869080344e-06, "logits/chosen": -2.5580077171325684, "logits/rejected": -2.904547929763794, "logps/chosen": -191.21832275390625, "logps/rejected": -313.1142883300781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0625412464141846, "rewards/margins": 8.940942764282227, "rewards/rejected": -10.003484725952148, "step": 10126 }, { "epoch": 1.57, "learning_rate": 6.719782146376886e-06, "logits/chosen": -2.25042462348938, "logits/rejected": -2.6629340648651123, "logps/chosen": -158.55169677734375, "logps/rejected": -358.7275390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.9201977252960205, "rewards/margins": 8.376581192016602, "rewards/rejected": -11.296778678894043, "step": 10127 }, { "epoch": 1.58, "learning_rate": 6.719048705845738e-06, "logits/chosen": -2.559467315673828, "logits/rejected": -2.8551251888275146, "logps/chosen": -201.7867431640625, "logps/rejected": -373.4505615234375, "loss": 0.0915, "rewards/accuracies": 1.0, "rewards/chosen": -3.8136415481567383, "rewards/margins": 5.848893165588379, "rewards/rejected": -9.662534713745117, "step": 10128 }, { "epoch": 1.58, "learning_rate": 6.718315265314591e-06, "logits/chosen": -2.306305170059204, "logits/rejected": -3.215324878692627, "logps/chosen": -138.05169677734375, "logps/rejected": -247.87619018554688, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -3.274625778198242, "rewards/margins": 5.345758438110352, "rewards/rejected": -8.620384216308594, "step": 10129 }, { "epoch": 1.58, "learning_rate": 6.717581824783444e-06, "logits/chosen": -3.0505828857421875, "logits/rejected": -2.9284422397613525, "logps/chosen": -852.0695190429688, "logps/rejected": -519.296875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.4866943359375, "rewards/margins": 8.059072494506836, "rewards/rejected": -13.545766830444336, "step": 10130 }, { "epoch": 1.58, "learning_rate": 6.7168483842522955e-06, "logits/chosen": -3.1267316341400146, "logits/rejected": -3.1804559230804443, "logps/chosen": -236.85647583007812, "logps/rejected": -219.1439971923828, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.921567440032959, "rewards/margins": 7.840453624725342, "rewards/rejected": -9.7620210647583, "step": 10131 }, { "epoch": 1.58, "learning_rate": 6.716114943721147e-06, "logits/chosen": -2.2967517375946045, "logits/rejected": -2.8991692066192627, "logps/chosen": -965.3134155273438, "logps/rejected": -920.6190185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.654754638671875, "rewards/margins": 11.076764106750488, "rewards/rejected": -13.731518745422363, "step": 10132 }, { "epoch": 1.58, "learning_rate": 6.71538150319e-06, "logits/chosen": -2.641326427459717, "logits/rejected": -2.9100818634033203, "logps/chosen": -743.6988525390625, "logps/rejected": -677.41552734375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.5286288261413574, "rewards/margins": 6.588774681091309, "rewards/rejected": -10.117403030395508, "step": 10133 }, { "epoch": 1.58, "learning_rate": 6.714648062658852e-06, "logits/chosen": -2.853133201599121, "logits/rejected": -2.8897056579589844, "logps/chosen": -620.206298828125, "logps/rejected": -660.2697143554688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.9687896966934204, "rewards/margins": 8.560258865356445, "rewards/rejected": -10.529048919677734, "step": 10134 }, { "epoch": 1.58, "learning_rate": 6.713914622127704e-06, "logits/chosen": -1.3741120100021362, "logits/rejected": -2.9443676471710205, "logps/chosen": -37.10809326171875, "logps/rejected": -491.0126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.068480968475342, "rewards/margins": 13.88421630859375, "rewards/rejected": -15.95269775390625, "step": 10135 }, { "epoch": 1.58, "learning_rate": 6.713181181596556e-06, "logits/chosen": -2.5785164833068848, "logits/rejected": -2.5387790203094482, "logps/chosen": -257.09564208984375, "logps/rejected": -372.1461486816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.771490573883057, "rewards/margins": 11.412909507751465, "rewards/rejected": -16.18440055847168, "step": 10136 }, { "epoch": 1.58, "learning_rate": 6.712447741065408e-06, "logits/chosen": -2.1426947116851807, "logits/rejected": -2.9810123443603516, "logps/chosen": -97.40135192871094, "logps/rejected": -327.6454772949219, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/chosen": -3.2089309692382812, "rewards/margins": 5.157218933105469, "rewards/rejected": -8.36614990234375, "step": 10137 }, { "epoch": 1.58, "learning_rate": 6.71171430053426e-06, "logits/chosen": -2.999739408493042, "logits/rejected": -2.4299299716949463, "logps/chosen": -387.512451171875, "logps/rejected": -362.5057373046875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.203176975250244, "rewards/margins": 6.129275798797607, "rewards/rejected": -9.332452774047852, "step": 10138 }, { "epoch": 1.58, "learning_rate": 6.710980860003112e-06, "logits/chosen": -2.8283402919769287, "logits/rejected": -3.163516044616699, "logps/chosen": -342.4367980957031, "logps/rejected": -501.524169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1020607948303223, "rewards/margins": 11.61902904510498, "rewards/rejected": -14.721089363098145, "step": 10139 }, { "epoch": 1.58, "learning_rate": 6.710247419471964e-06, "logits/chosen": -3.021984577178955, "logits/rejected": -2.443312883377075, "logps/chosen": -707.8104248046875, "logps/rejected": -453.49993896484375, "loss": 2.4642, "rewards/accuracies": 0.5, "rewards/chosen": -5.915598392486572, "rewards/margins": 1.98368501663208, "rewards/rejected": -7.899283409118652, "step": 10140 }, { "epoch": 1.58, "learning_rate": 6.709513978940816e-06, "logits/chosen": -2.6632304191589355, "logits/rejected": -1.9719003438949585, "logps/chosen": -162.77989196777344, "logps/rejected": -235.63134765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.402043104171753, "rewards/margins": 8.74643611907959, "rewards/rejected": -11.148479461669922, "step": 10141 }, { "epoch": 1.58, "learning_rate": 6.708780538409669e-06, "logits/chosen": -2.3865065574645996, "logits/rejected": -2.932692289352417, "logps/chosen": -178.34645080566406, "logps/rejected": -316.2943115234375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.3278651237487793, "rewards/margins": 5.580936431884766, "rewards/rejected": -8.908802032470703, "step": 10142 }, { "epoch": 1.58, "learning_rate": 6.708047097878521e-06, "logits/chosen": -2.08742094039917, "logits/rejected": -2.6310839653015137, "logps/chosen": -244.21905517578125, "logps/rejected": -312.2261047363281, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -3.6253254413604736, "rewards/margins": 4.992293834686279, "rewards/rejected": -8.617619514465332, "step": 10143 }, { "epoch": 1.58, "learning_rate": 6.7073136573473725e-06, "logits/chosen": -1.8227424621582031, "logits/rejected": -2.735513925552368, "logps/chosen": -111.83599853515625, "logps/rejected": -312.98046875, "loss": 0.1459, "rewards/accuracies": 1.0, "rewards/chosen": -4.53065824508667, "rewards/margins": 4.656611442565918, "rewards/rejected": -9.18726921081543, "step": 10144 }, { "epoch": 1.58, "learning_rate": 6.706580216816224e-06, "logits/chosen": -3.1326823234558105, "logits/rejected": -3.3415021896362305, "logps/chosen": -152.98428344726562, "logps/rejected": -238.41709899902344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.473480224609375, "rewards/margins": 8.106422424316406, "rewards/rejected": -9.579902648925781, "step": 10145 }, { "epoch": 1.58, "learning_rate": 6.705846776285077e-06, "logits/chosen": -1.865660309791565, "logits/rejected": -2.8798913955688477, "logps/chosen": -216.09799194335938, "logps/rejected": -474.7046813964844, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.032438039779663, "rewards/margins": 7.253369331359863, "rewards/rejected": -10.285807609558105, "step": 10146 }, { "epoch": 1.58, "learning_rate": 6.70511333575393e-06, "logits/chosen": -2.3093831539154053, "logits/rejected": -2.050956964492798, "logps/chosen": -411.6170959472656, "logps/rejected": -325.7279052734375, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -4.587332248687744, "rewards/margins": 4.681278228759766, "rewards/rejected": -9.268610000610352, "step": 10147 }, { "epoch": 1.58, "learning_rate": 6.704379895222782e-06, "logits/chosen": -2.082007884979248, "logits/rejected": -3.0030276775360107, "logps/chosen": -242.17303466796875, "logps/rejected": -427.5044250488281, "loss": 0.8559, "rewards/accuracies": 0.5, "rewards/chosen": -5.13045597076416, "rewards/margins": 3.877676010131836, "rewards/rejected": -9.008131980895996, "step": 10148 }, { "epoch": 1.58, "learning_rate": 6.7036464546916336e-06, "logits/chosen": -2.82415509223938, "logits/rejected": -3.0670790672302246, "logps/chosen": -181.1366729736328, "logps/rejected": -227.5984344482422, "loss": 0.1597, "rewards/accuracies": 1.0, "rewards/chosen": -4.380887985229492, "rewards/margins": 3.132631301879883, "rewards/rejected": -7.513519287109375, "step": 10149 }, { "epoch": 1.58, "learning_rate": 6.7029130141604854e-06, "logits/chosen": -2.0252177715301514, "logits/rejected": -2.855586290359497, "logps/chosen": -208.01589965820312, "logps/rejected": -319.0550537109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.517697334289551, "rewards/margins": 8.590389251708984, "rewards/rejected": -12.108086585998535, "step": 10150 }, { "epoch": 1.58, "learning_rate": 6.702179573629338e-06, "logits/chosen": -2.4894211292266846, "logits/rejected": -2.3006324768066406, "logps/chosen": -472.5476989746094, "logps/rejected": -534.1171264648438, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -2.763503074645996, "rewards/margins": 8.105294227600098, "rewards/rejected": -10.868797302246094, "step": 10151 }, { "epoch": 1.58, "learning_rate": 6.70144613309819e-06, "logits/chosen": -2.845818519592285, "logits/rejected": -3.3024818897247314, "logps/chosen": -133.3073272705078, "logps/rejected": -413.8167419433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1039605140686035, "rewards/margins": 10.279356956481934, "rewards/rejected": -12.383317947387695, "step": 10152 }, { "epoch": 1.58, "learning_rate": 6.700712692567042e-06, "logits/chosen": -1.7549675703048706, "logits/rejected": -2.953712224960327, "logps/chosen": -141.10275268554688, "logps/rejected": -439.37139892578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5443817377090454, "rewards/margins": 10.400863647460938, "rewards/rejected": -11.945244789123535, "step": 10153 }, { "epoch": 1.58, "learning_rate": 6.699979252035894e-06, "logits/chosen": -2.3457303047180176, "logits/rejected": -2.9811418056488037, "logps/chosen": -165.0704345703125, "logps/rejected": -473.5888671875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.7816262245178223, "rewards/margins": 7.410680294036865, "rewards/rejected": -9.192306518554688, "step": 10154 }, { "epoch": 1.58, "learning_rate": 6.6992458115047465e-06, "logits/chosen": -2.662257671356201, "logits/rejected": -3.1040515899658203, "logps/chosen": -264.489013671875, "logps/rejected": -461.9148864746094, "loss": 0.1209, "rewards/accuracies": 1.0, "rewards/chosen": -5.8547258377075195, "rewards/margins": 3.0471408367156982, "rewards/rejected": -8.901866912841797, "step": 10155 }, { "epoch": 1.58, "learning_rate": 6.698512370973598e-06, "logits/chosen": -2.290604591369629, "logits/rejected": -2.9644322395324707, "logps/chosen": -47.267723083496094, "logps/rejected": -369.05926513671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.061973810195923, "rewards/margins": 10.71208667755127, "rewards/rejected": -13.77406120300293, "step": 10156 }, { "epoch": 1.58, "learning_rate": 6.69777893044245e-06, "logits/chosen": -2.8358514308929443, "logits/rejected": -3.1680405139923096, "logps/chosen": -274.7996826171875, "logps/rejected": -315.7744140625, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -2.5437402725219727, "rewards/margins": 7.345131874084473, "rewards/rejected": -9.888872146606445, "step": 10157 }, { "epoch": 1.58, "learning_rate": 6.697045489911302e-06, "logits/chosen": -2.876776933670044, "logits/rejected": -1.9211821556091309, "logps/chosen": -460.2132568359375, "logps/rejected": -332.1459655761719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.0914113521575928, "rewards/margins": 9.59068775177002, "rewards/rejected": -11.682099342346191, "step": 10158 }, { "epoch": 1.58, "learning_rate": 6.696312049380154e-06, "logits/chosen": -1.875958800315857, "logits/rejected": -2.723323106765747, "logps/chosen": -94.53955841064453, "logps/rejected": -467.77117919921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.0262937545776367, "rewards/margins": 11.810823440551758, "rewards/rejected": -14.837117195129395, "step": 10159 }, { "epoch": 1.58, "learning_rate": 6.695578608849007e-06, "logits/chosen": -2.469590663909912, "logits/rejected": -2.8982722759246826, "logps/chosen": -184.7091522216797, "logps/rejected": -241.4350128173828, "loss": 0.268, "rewards/accuracies": 1.0, "rewards/chosen": -4.291125297546387, "rewards/margins": 4.051459789276123, "rewards/rejected": -8.342585563659668, "step": 10160 }, { "epoch": 1.58, "learning_rate": 6.694845168317859e-06, "logits/chosen": -2.783848285675049, "logits/rejected": -2.756930351257324, "logps/chosen": -68.69996643066406, "logps/rejected": -206.33462524414062, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2352747917175293, "rewards/margins": 7.302762031555176, "rewards/rejected": -9.538036346435547, "step": 10161 }, { "epoch": 1.58, "learning_rate": 6.6941117277867105e-06, "logits/chosen": -2.6528561115264893, "logits/rejected": -2.9509449005126953, "logps/chosen": -1123.008544921875, "logps/rejected": -876.5494384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6056456565856934, "rewards/margins": 14.93193531036377, "rewards/rejected": -18.537582397460938, "step": 10162 }, { "epoch": 1.58, "learning_rate": 6.693378287255563e-06, "logits/chosen": -2.307718515396118, "logits/rejected": -2.9462778568267822, "logps/chosen": -191.6383056640625, "logps/rejected": -371.1155700683594, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -4.978386878967285, "rewards/margins": 6.308877944946289, "rewards/rejected": -11.287264823913574, "step": 10163 }, { "epoch": 1.58, "learning_rate": 6.692644846724416e-06, "logits/chosen": -2.407381057739258, "logits/rejected": -2.999147653579712, "logps/chosen": -337.37005615234375, "logps/rejected": -405.5069580078125, "loss": 0.3076, "rewards/accuracies": 1.0, "rewards/chosen": -3.4655442237854004, "rewards/margins": 3.679190158843994, "rewards/rejected": -7.1447343826293945, "step": 10164 }, { "epoch": 1.58, "learning_rate": 6.691911406193268e-06, "logits/chosen": -2.2749078273773193, "logits/rejected": -3.118236780166626, "logps/chosen": -171.46185302734375, "logps/rejected": -445.87567138671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.562373399734497, "rewards/margins": 8.033390045166016, "rewards/rejected": -11.595763206481934, "step": 10165 }, { "epoch": 1.58, "learning_rate": 6.69117796566212e-06, "logits/chosen": -3.026858329772949, "logits/rejected": -3.362786293029785, "logps/chosen": -208.36505126953125, "logps/rejected": -345.7076416015625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -4.822226524353027, "rewards/margins": 5.296574592590332, "rewards/rejected": -10.11880111694336, "step": 10166 }, { "epoch": 1.58, "learning_rate": 6.690444525130972e-06, "logits/chosen": -2.0923972129821777, "logits/rejected": -3.134965181350708, "logps/chosen": -267.7187194824219, "logps/rejected": -426.3236389160156, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -4.638161659240723, "rewards/margins": 4.848386287689209, "rewards/rejected": -9.486547470092773, "step": 10167 }, { "epoch": 1.58, "learning_rate": 6.6897110845998235e-06, "logits/chosen": -2.9881138801574707, "logits/rejected": -2.9535739421844482, "logps/chosen": -133.1522216796875, "logps/rejected": -180.89260864257812, "loss": 1.5961, "rewards/accuracies": 0.5, "rewards/chosen": -6.365822792053223, "rewards/margins": 2.7142109870910645, "rewards/rejected": -9.080034255981445, "step": 10168 }, { "epoch": 1.58, "learning_rate": 6.688977644068676e-06, "logits/chosen": -2.663557767868042, "logits/rejected": -2.7248055934906006, "logps/chosen": -169.88937377929688, "logps/rejected": -275.79486083984375, "loss": 0.9924, "rewards/accuracies": 0.5, "rewards/chosen": -6.1725006103515625, "rewards/margins": 6.253849983215332, "rewards/rejected": -12.426349639892578, "step": 10169 }, { "epoch": 1.58, "learning_rate": 6.688244203537528e-06, "logits/chosen": -2.171302556991577, "logits/rejected": -2.6732370853424072, "logps/chosen": -312.37664794921875, "logps/rejected": -448.35321044921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5214076042175293, "rewards/margins": 9.876581192016602, "rewards/rejected": -13.397988319396973, "step": 10170 }, { "epoch": 1.58, "learning_rate": 6.68751076300638e-06, "logits/chosen": -1.261557936668396, "logits/rejected": -1.779380440711975, "logps/chosen": -235.83570861816406, "logps/rejected": -477.4297790527344, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.301234245300293, "rewards/margins": 7.801374435424805, "rewards/rejected": -12.102609634399414, "step": 10171 }, { "epoch": 1.58, "learning_rate": 6.686777322475232e-06, "logits/chosen": -2.8984782695770264, "logits/rejected": -2.563145875930786, "logps/chosen": -123.70124816894531, "logps/rejected": -188.97935485839844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.7576959133148193, "rewards/margins": 7.190921783447266, "rewards/rejected": -9.948617935180664, "step": 10172 }, { "epoch": 1.58, "learning_rate": 6.6860438819440846e-06, "logits/chosen": -2.6997060775756836, "logits/rejected": -1.6818809509277344, "logps/chosen": -318.8727722167969, "logps/rejected": -254.5341339111328, "loss": 1.5438, "rewards/accuracies": 0.5, "rewards/chosen": -5.284274101257324, "rewards/margins": 2.3218209743499756, "rewards/rejected": -7.606095314025879, "step": 10173 }, { "epoch": 1.58, "learning_rate": 6.6853104414129364e-06, "logits/chosen": -2.339811086654663, "logits/rejected": -3.0364019870758057, "logps/chosen": -70.11165618896484, "logps/rejected": -271.88165283203125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.169487953186035, "rewards/margins": 8.138711929321289, "rewards/rejected": -10.30820083618164, "step": 10174 }, { "epoch": 1.58, "learning_rate": 6.684577000881788e-06, "logits/chosen": -1.5924378633499146, "logits/rejected": -1.1975246667861938, "logps/chosen": -414.84674072265625, "logps/rejected": -702.0516357421875, "loss": 0.7219, "rewards/accuracies": 0.5, "rewards/chosen": -6.130439758300781, "rewards/margins": 2.8788154125213623, "rewards/rejected": -9.009255409240723, "step": 10175 }, { "epoch": 1.58, "learning_rate": 6.68384356035064e-06, "logits/chosen": -3.0112173557281494, "logits/rejected": -2.4551870822906494, "logps/chosen": -575.6002197265625, "logps/rejected": -452.29608154296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.528193712234497, "rewards/margins": 9.107528686523438, "rewards/rejected": -12.635722160339355, "step": 10176 }, { "epoch": 1.58, "learning_rate": 6.683110119819492e-06, "logits/chosen": -2.036252737045288, "logits/rejected": -2.4344193935394287, "logps/chosen": -121.676025390625, "logps/rejected": -305.78076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.824190616607666, "rewards/margins": 11.733952522277832, "rewards/rejected": -14.558143615722656, "step": 10177 }, { "epoch": 1.58, "learning_rate": 6.682376679288345e-06, "logits/chosen": -2.8731656074523926, "logits/rejected": -2.58166766166687, "logps/chosen": -546.3864135742188, "logps/rejected": -535.1206665039062, "loss": 2.4242, "rewards/accuracies": 0.5, "rewards/chosen": -6.944247722625732, "rewards/margins": -0.46045470237731934, "rewards/rejected": -6.483793258666992, "step": 10178 }, { "epoch": 1.58, "learning_rate": 6.681643238757197e-06, "logits/chosen": -2.645592451095581, "logits/rejected": -2.857355833053589, "logps/chosen": -87.97967529296875, "logps/rejected": -190.74234008789062, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.527827024459839, "rewards/margins": 6.633355140686035, "rewards/rejected": -10.161182403564453, "step": 10179 }, { "epoch": 1.58, "learning_rate": 6.680909798226049e-06, "logits/chosen": -2.6027796268463135, "logits/rejected": -2.039701461791992, "logps/chosen": -222.69903564453125, "logps/rejected": -372.0810546875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -3.861220121383667, "rewards/margins": 5.948333740234375, "rewards/rejected": -9.809553146362305, "step": 10180 }, { "epoch": 1.58, "learning_rate": 6.680176357694901e-06, "logits/chosen": -3.127497911453247, "logits/rejected": -2.7737157344818115, "logps/chosen": -637.6552734375, "logps/rejected": -620.951171875, "loss": 0.2721, "rewards/accuracies": 1.0, "rewards/chosen": -8.201489448547363, "rewards/margins": 2.927523136138916, "rewards/rejected": -11.129013061523438, "step": 10181 }, { "epoch": 1.58, "learning_rate": 6.679442917163754e-06, "logits/chosen": -1.569785237312317, "logits/rejected": -2.8791210651397705, "logps/chosen": -167.08590698242188, "logps/rejected": -564.447021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4349899291992188, "rewards/margins": 14.807037353515625, "rewards/rejected": -17.242027282714844, "step": 10182 }, { "epoch": 1.58, "learning_rate": 6.678709476632606e-06, "logits/chosen": -1.5122636556625366, "logits/rejected": -1.681166172027588, "logps/chosen": -299.49249267578125, "logps/rejected": -450.27960205078125, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -6.328303813934326, "rewards/margins": 4.921820163726807, "rewards/rejected": -11.250123977661133, "step": 10183 }, { "epoch": 1.58, "learning_rate": 6.677976036101458e-06, "logits/chosen": -2.732185125350952, "logits/rejected": -3.1369822025299072, "logps/chosen": -259.89031982421875, "logps/rejected": -335.8833312988281, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -5.850710868835449, "rewards/margins": 4.2575531005859375, "rewards/rejected": -10.108263969421387, "step": 10184 }, { "epoch": 1.58, "learning_rate": 6.67724259557031e-06, "logits/chosen": -2.91213059425354, "logits/rejected": -3.0254430770874023, "logps/chosen": -520.7274780273438, "logps/rejected": -392.54046630859375, "loss": 1.4829, "rewards/accuracies": 0.5, "rewards/chosen": -7.611551284790039, "rewards/margins": 2.428793430328369, "rewards/rejected": -10.04034423828125, "step": 10185 }, { "epoch": 1.58, "learning_rate": 6.6765091550391615e-06, "logits/chosen": -2.9223952293395996, "logits/rejected": -1.9600913524627686, "logps/chosen": -407.8336486816406, "logps/rejected": -330.54656982421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.9952635765075684, "rewards/margins": 7.963460445404053, "rewards/rejected": -9.958724021911621, "step": 10186 }, { "epoch": 1.58, "learning_rate": 6.675775714508014e-06, "logits/chosen": -1.9686014652252197, "logits/rejected": -2.701878786087036, "logps/chosen": -219.01211547851562, "logps/rejected": -481.6482238769531, "loss": 0.0746, "rewards/accuracies": 1.0, "rewards/chosen": -4.765135765075684, "rewards/margins": 8.60639476776123, "rewards/rejected": -13.371530532836914, "step": 10187 }, { "epoch": 1.58, "learning_rate": 6.675042273976866e-06, "logits/chosen": -2.4104628562927246, "logits/rejected": -3.002721071243286, "logps/chosen": -137.4473876953125, "logps/rejected": -345.3505859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.2466487884521484, "rewards/margins": 8.920675277709961, "rewards/rejected": -11.16732406616211, "step": 10188 }, { "epoch": 1.58, "learning_rate": 6.674308833445718e-06, "logits/chosen": -2.6163644790649414, "logits/rejected": -3.2295360565185547, "logps/chosen": -584.5907592773438, "logps/rejected": -684.2000122070312, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -3.7370200157165527, "rewards/margins": 5.796784400939941, "rewards/rejected": -9.533803939819336, "step": 10189 }, { "epoch": 1.58, "learning_rate": 6.67357539291457e-06, "logits/chosen": -2.8272101879119873, "logits/rejected": -3.17411470413208, "logps/chosen": -329.38916015625, "logps/rejected": -465.74169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2351434230804443, "rewards/margins": 10.652606964111328, "rewards/rejected": -12.887750625610352, "step": 10190 }, { "epoch": 1.58, "learning_rate": 6.672841952383423e-06, "logits/chosen": -2.6971487998962402, "logits/rejected": -2.8822693824768066, "logps/chosen": -253.35292053222656, "logps/rejected": -215.1229248046875, "loss": 0.2872, "rewards/accuracies": 1.0, "rewards/chosen": -5.056356906890869, "rewards/margins": 2.2776458263397217, "rewards/rejected": -7.334002494812012, "step": 10191 }, { "epoch": 1.59, "learning_rate": 6.6721085118522745e-06, "logits/chosen": -2.7284066677093506, "logits/rejected": -3.393375873565674, "logps/chosen": -38.54570007324219, "logps/rejected": -283.92193603515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.32660174369812, "rewards/margins": 9.086915969848633, "rewards/rejected": -11.413517951965332, "step": 10192 }, { "epoch": 1.59, "learning_rate": 6.671375071321126e-06, "logits/chosen": -2.6640567779541016, "logits/rejected": -2.8364005088806152, "logps/chosen": -194.783935546875, "logps/rejected": -222.17103576660156, "loss": 0.9711, "rewards/accuracies": 0.5, "rewards/chosen": -5.169581413269043, "rewards/margins": 1.6790940761566162, "rewards/rejected": -6.848675727844238, "step": 10193 }, { "epoch": 1.59, "learning_rate": 6.670641630789978e-06, "logits/chosen": -2.8955721855163574, "logits/rejected": -3.0387368202209473, "logps/chosen": -115.609130859375, "logps/rejected": -220.96237182617188, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.117394208908081, "rewards/margins": 7.847774505615234, "rewards/rejected": -9.965168952941895, "step": 10194 }, { "epoch": 1.59, "learning_rate": 6.669908190258831e-06, "logits/chosen": -2.1711645126342773, "logits/rejected": -2.904414415359497, "logps/chosen": -48.56666946411133, "logps/rejected": -192.78781127929688, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.369904041290283, "rewards/margins": 6.006999969482422, "rewards/rejected": -9.376904487609863, "step": 10195 }, { "epoch": 1.59, "learning_rate": 6.669174749727683e-06, "logits/chosen": -2.81402850151062, "logits/rejected": -2.257969379425049, "logps/chosen": -244.6969757080078, "logps/rejected": -299.4123229980469, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.61395263671875, "rewards/margins": 8.129030227661133, "rewards/rejected": -10.742981910705566, "step": 10196 }, { "epoch": 1.59, "learning_rate": 6.6684413091965356e-06, "logits/chosen": -2.580705404281616, "logits/rejected": -3.184375524520874, "logps/chosen": -79.81797790527344, "logps/rejected": -416.1990051269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.697767734527588, "rewards/margins": 13.251226425170898, "rewards/rejected": -15.948993682861328, "step": 10197 }, { "epoch": 1.59, "learning_rate": 6.6677078686653874e-06, "logits/chosen": -1.9579819440841675, "logits/rejected": -2.8288021087646484, "logps/chosen": -91.27857208251953, "logps/rejected": -300.8166809082031, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.887019157409668, "rewards/margins": 6.726849555969238, "rewards/rejected": -10.613868713378906, "step": 10198 }, { "epoch": 1.59, "learning_rate": 6.666974428134239e-06, "logits/chosen": -2.3858587741851807, "logits/rejected": -3.1058576107025146, "logps/chosen": -158.876708984375, "logps/rejected": -369.9344482421875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -4.220391750335693, "rewards/margins": 8.380470275878906, "rewards/rejected": -12.600862503051758, "step": 10199 }, { "epoch": 1.59, "learning_rate": 6.666240987603092e-06, "logits/chosen": -1.8571829795837402, "logits/rejected": -2.9490458965301514, "logps/chosen": -249.12185668945312, "logps/rejected": -421.178955078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.034043788909912, "rewards/margins": 6.680572509765625, "rewards/rejected": -10.714616775512695, "step": 10200 }, { "epoch": 1.59, "learning_rate": 6.665507547071944e-06, "logits/chosen": -3.2144205570220947, "logits/rejected": -3.1436803340911865, "logps/chosen": -108.85307312011719, "logps/rejected": -181.12518310546875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.5904502868652344, "rewards/margins": 6.408254146575928, "rewards/rejected": -9.99870491027832, "step": 10201 }, { "epoch": 1.59, "learning_rate": 6.664774106540796e-06, "logits/chosen": -2.743272304534912, "logits/rejected": -3.34938383102417, "logps/chosen": -542.1915893554688, "logps/rejected": -560.3194580078125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -4.5052642822265625, "rewards/margins": 6.051299095153809, "rewards/rejected": -10.556563377380371, "step": 10202 }, { "epoch": 1.59, "learning_rate": 6.664040666009648e-06, "logits/chosen": -2.561328172683716, "logits/rejected": -1.1529461145401, "logps/chosen": -309.6759033203125, "logps/rejected": -251.54034423828125, "loss": 0.2931, "rewards/accuracies": 1.0, "rewards/chosen": -4.235086441040039, "rewards/margins": 4.57124137878418, "rewards/rejected": -8.806327819824219, "step": 10203 }, { "epoch": 1.59, "learning_rate": 6.6633072254785e-06, "logits/chosen": -3.127232789993286, "logits/rejected": -3.224745035171509, "logps/chosen": -544.1845703125, "logps/rejected": -305.1001892089844, "loss": 1.0527, "rewards/accuracies": 0.5, "rewards/chosen": -7.158477783203125, "rewards/margins": 3.13321852684021, "rewards/rejected": -10.291696548461914, "step": 10204 }, { "epoch": 1.59, "learning_rate": 6.662573784947352e-06, "logits/chosen": -2.073115348815918, "logits/rejected": -3.0697085857391357, "logps/chosen": -155.86309814453125, "logps/rejected": -445.384521484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.2532310485839844, "rewards/margins": 9.505464553833008, "rewards/rejected": -12.758695602416992, "step": 10205 }, { "epoch": 1.59, "learning_rate": 6.661840344416204e-06, "logits/chosen": -2.0407371520996094, "logits/rejected": -3.05531644821167, "logps/chosen": -72.28812408447266, "logps/rejected": -306.7886962890625, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -5.419870853424072, "rewards/margins": 7.0972394943237305, "rewards/rejected": -12.517110824584961, "step": 10206 }, { "epoch": 1.59, "learning_rate": 6.661106903885056e-06, "logits/chosen": -2.682392120361328, "logits/rejected": -2.769395112991333, "logps/chosen": -443.35015869140625, "logps/rejected": -423.39666748046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.727941870689392, "rewards/margins": 10.159395217895508, "rewards/rejected": -11.887336730957031, "step": 10207 }, { "epoch": 1.59, "learning_rate": 6.660373463353908e-06, "logits/chosen": -3.1617491245269775, "logits/rejected": -3.054853677749634, "logps/chosen": -220.64694213867188, "logps/rejected": -236.74740600585938, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -3.048753023147583, "rewards/margins": 7.01107931137085, "rewards/rejected": -10.059832572937012, "step": 10208 }, { "epoch": 1.59, "learning_rate": 6.659640022822761e-06, "logits/chosen": -3.0801193714141846, "logits/rejected": -2.1290016174316406, "logps/chosen": -358.5933837890625, "logps/rejected": -191.78497314453125, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -1.2743209600448608, "rewards/margins": 3.5465757846832275, "rewards/rejected": -4.820896625518799, "step": 10209 }, { "epoch": 1.59, "learning_rate": 6.6589065822916125e-06, "logits/chosen": -2.6144282817840576, "logits/rejected": -2.4144158363342285, "logps/chosen": -281.5396423339844, "logps/rejected": -320.26885986328125, "loss": 0.1136, "rewards/accuracies": 1.0, "rewards/chosen": -4.523171901702881, "rewards/margins": 2.193053722381592, "rewards/rejected": -6.716225624084473, "step": 10210 }, { "epoch": 1.59, "learning_rate": 6.658173141760464e-06, "logits/chosen": -2.680203676223755, "logits/rejected": -2.1843652725219727, "logps/chosen": -164.4386444091797, "logps/rejected": -195.71983337402344, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.222796678543091, "rewards/margins": 6.39334774017334, "rewards/rejected": -8.616144180297852, "step": 10211 }, { "epoch": 1.59, "learning_rate": 6.657439701229316e-06, "logits/chosen": -1.8459044694900513, "logits/rejected": -2.4916436672210693, "logps/chosen": -196.00698852539062, "logps/rejected": -322.6636962890625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.076545715332031, "rewards/margins": 6.068255424499512, "rewards/rejected": -11.144800186157227, "step": 10212 }, { "epoch": 1.59, "learning_rate": 6.656706260698169e-06, "logits/chosen": -2.1150870323181152, "logits/rejected": -2.9244942665100098, "logps/chosen": -132.013916015625, "logps/rejected": -333.9742431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7212233543395996, "rewards/margins": 10.073744773864746, "rewards/rejected": -12.794967651367188, "step": 10213 }, { "epoch": 1.59, "learning_rate": 6.655972820167022e-06, "logits/chosen": -3.2687535285949707, "logits/rejected": -3.397942543029785, "logps/chosen": -121.90162658691406, "logps/rejected": -226.78179931640625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -3.5775251388549805, "rewards/margins": 5.07181453704834, "rewards/rejected": -8.64933967590332, "step": 10214 }, { "epoch": 1.59, "learning_rate": 6.655239379635874e-06, "logits/chosen": -2.26304030418396, "logits/rejected": -2.7771785259246826, "logps/chosen": -278.35650634765625, "logps/rejected": -418.8594665527344, "loss": 0.0692, "rewards/accuracies": 1.0, "rewards/chosen": -3.3391356468200684, "rewards/margins": 7.295372009277344, "rewards/rejected": -10.63450813293457, "step": 10215 }, { "epoch": 1.59, "learning_rate": 6.6545059391047255e-06, "logits/chosen": -2.7007803916931152, "logits/rejected": -2.0037150382995605, "logps/chosen": -290.21038818359375, "logps/rejected": -377.21429443359375, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -1.8679707050323486, "rewards/margins": 7.717585563659668, "rewards/rejected": -9.585556030273438, "step": 10216 }, { "epoch": 1.59, "learning_rate": 6.653772498573577e-06, "logits/chosen": -2.918548822402954, "logits/rejected": -2.5506231784820557, "logps/chosen": -653.8726806640625, "logps/rejected": -504.4670715332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.7976013422012329, "rewards/margins": 14.594608306884766, "rewards/rejected": -15.392210006713867, "step": 10217 }, { "epoch": 1.59, "learning_rate": 6.65303905804243e-06, "logits/chosen": -2.9719350337982178, "logits/rejected": -3.098726511001587, "logps/chosen": -69.49575805664062, "logps/rejected": -209.9433135986328, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.7453956604003906, "rewards/margins": 7.842526912689209, "rewards/rejected": -9.587923049926758, "step": 10218 }, { "epoch": 1.59, "learning_rate": 6.652305617511282e-06, "logits/chosen": -2.56919527053833, "logits/rejected": -3.048457145690918, "logps/chosen": -79.46133422851562, "logps/rejected": -214.89163208007812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.525452136993408, "rewards/margins": 6.413627624511719, "rewards/rejected": -9.939080238342285, "step": 10219 }, { "epoch": 1.59, "learning_rate": 6.651572176980134e-06, "logits/chosen": -2.735413074493408, "logits/rejected": -2.6151862144470215, "logps/chosen": -554.4075927734375, "logps/rejected": -755.202392578125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -3.216827392578125, "rewards/margins": 4.506656646728516, "rewards/rejected": -7.723484039306641, "step": 10220 }, { "epoch": 1.59, "learning_rate": 6.650838736448986e-06, "logits/chosen": -2.6402227878570557, "logits/rejected": -2.571733236312866, "logps/chosen": -133.9258575439453, "logps/rejected": -199.7584228515625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -4.792973518371582, "rewards/margins": 5.906388282775879, "rewards/rejected": -10.699361801147461, "step": 10221 }, { "epoch": 1.59, "learning_rate": 6.6501052959178385e-06, "logits/chosen": -2.6272754669189453, "logits/rejected": -2.317783832550049, "logps/chosen": -430.2168273925781, "logps/rejected": -339.02386474609375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.5389506816864014, "rewards/margins": 5.971432685852051, "rewards/rejected": -9.510383605957031, "step": 10222 }, { "epoch": 1.59, "learning_rate": 6.64937185538669e-06, "logits/chosen": -2.931915044784546, "logits/rejected": -3.0278818607330322, "logps/chosen": -172.23565673828125, "logps/rejected": -348.99298095703125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.5646262168884277, "rewards/margins": 7.9256062507629395, "rewards/rejected": -10.490232467651367, "step": 10223 }, { "epoch": 1.59, "learning_rate": 6.648638414855542e-06, "logits/chosen": -2.836474657058716, "logits/rejected": -2.4224798679351807, "logps/chosen": -368.3157958984375, "logps/rejected": -209.773681640625, "loss": 1.2729, "rewards/accuracies": 0.5, "rewards/chosen": -5.990269660949707, "rewards/margins": 1.1939232349395752, "rewards/rejected": -7.184192657470703, "step": 10224 }, { "epoch": 1.59, "learning_rate": 6.647904974324394e-06, "logits/chosen": -2.927156448364258, "logits/rejected": -1.8161437511444092, "logps/chosen": -636.4467163085938, "logps/rejected": -348.01812744140625, "loss": 0.0483, "rewards/accuracies": 1.0, "rewards/chosen": -3.111804246902466, "rewards/margins": 3.4444761276245117, "rewards/rejected": -6.556280136108398, "step": 10225 }, { "epoch": 1.59, "learning_rate": 6.647171533793246e-06, "logits/chosen": -2.9093785285949707, "logits/rejected": -2.5414843559265137, "logps/chosen": -157.12025451660156, "logps/rejected": -115.66949462890625, "loss": 0.6785, "rewards/accuracies": 0.5, "rewards/chosen": -5.1469407081604, "rewards/margins": 3.4519243240356445, "rewards/rejected": -8.598865509033203, "step": 10226 }, { "epoch": 1.59, "learning_rate": 6.646438093262099e-06, "logits/chosen": -3.100691318511963, "logits/rejected": -3.1246554851531982, "logps/chosen": -455.92913818359375, "logps/rejected": -578.7138671875, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -3.927182912826538, "rewards/margins": 6.604009628295898, "rewards/rejected": -10.531192779541016, "step": 10227 }, { "epoch": 1.59, "learning_rate": 6.6457046527309506e-06, "logits/chosen": -2.6334712505340576, "logits/rejected": -1.4133546352386475, "logps/chosen": -147.11203002929688, "logps/rejected": -137.8803253173828, "loss": 0.0953, "rewards/accuracies": 1.0, "rewards/chosen": -4.729308128356934, "rewards/margins": 3.9644458293914795, "rewards/rejected": -8.693754196166992, "step": 10228 }, { "epoch": 1.59, "learning_rate": 6.6449712121998025e-06, "logits/chosen": -2.567178726196289, "logits/rejected": -3.184912919998169, "logps/chosen": -103.31123352050781, "logps/rejected": -296.40179443359375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.31634259223938, "rewards/margins": 7.560649394989014, "rewards/rejected": -9.876992225646973, "step": 10229 }, { "epoch": 1.59, "learning_rate": 6.644237771668655e-06, "logits/chosen": -3.1425838470458984, "logits/rejected": -2.914884328842163, "logps/chosen": -169.41921997070312, "logps/rejected": -348.75439453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.183009624481201, "rewards/margins": 6.71124792098999, "rewards/rejected": -9.894257545471191, "step": 10230 }, { "epoch": 1.59, "learning_rate": 6.643504331137508e-06, "logits/chosen": -2.8614869117736816, "logits/rejected": -2.415057420730591, "logps/chosen": -147.54705810546875, "logps/rejected": -206.17205810546875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -4.829781532287598, "rewards/margins": 4.732975006103516, "rewards/rejected": -9.562756538391113, "step": 10231 }, { "epoch": 1.59, "learning_rate": 6.64277089060636e-06, "logits/chosen": -1.9965306520462036, "logits/rejected": -2.9789302349090576, "logps/chosen": -162.2322998046875, "logps/rejected": -496.5774841308594, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -3.175997257232666, "rewards/margins": 8.697304725646973, "rewards/rejected": -11.873302459716797, "step": 10232 }, { "epoch": 1.59, "learning_rate": 6.642037450075212e-06, "logits/chosen": -2.4847476482391357, "logits/rejected": -2.9982922077178955, "logps/chosen": -122.53712463378906, "logps/rejected": -240.00552368164062, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.695145845413208, "rewards/margins": 7.064662933349609, "rewards/rejected": -9.759809494018555, "step": 10233 }, { "epoch": 1.59, "learning_rate": 6.6413040095440635e-06, "logits/chosen": -2.7837271690368652, "logits/rejected": -2.048234701156616, "logps/chosen": -224.38156127929688, "logps/rejected": -145.6961212158203, "loss": 1.8222, "rewards/accuracies": 0.5, "rewards/chosen": -6.375372409820557, "rewards/margins": -1.3701444864273071, "rewards/rejected": -5.005228042602539, "step": 10234 }, { "epoch": 1.59, "learning_rate": 6.640570569012916e-06, "logits/chosen": -0.9071651697158813, "logits/rejected": -2.9482486248016357, "logps/chosen": -147.91893005371094, "logps/rejected": -601.8720703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.016319274902344, "rewards/margins": 6.6490278244018555, "rewards/rejected": -11.665348052978516, "step": 10235 }, { "epoch": 1.59, "learning_rate": 6.639837128481768e-06, "logits/chosen": -2.8464958667755127, "logits/rejected": -2.9376776218414307, "logps/chosen": -221.25819396972656, "logps/rejected": -259.4044189453125, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -3.952078342437744, "rewards/margins": 5.0454864501953125, "rewards/rejected": -8.997565269470215, "step": 10236 }, { "epoch": 1.59, "learning_rate": 6.63910368795062e-06, "logits/chosen": -1.0568158626556396, "logits/rejected": -2.2828354835510254, "logps/chosen": -294.04510498046875, "logps/rejected": -696.72900390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.312941074371338, "rewards/margins": 8.776872634887695, "rewards/rejected": -13.089813232421875, "step": 10237 }, { "epoch": 1.59, "learning_rate": 6.638370247419472e-06, "logits/chosen": -2.84842848777771, "logits/rejected": -2.8504064083099365, "logps/chosen": -115.64539337158203, "logps/rejected": -168.10931396484375, "loss": 2.201, "rewards/accuracies": 0.5, "rewards/chosen": -5.257100582122803, "rewards/margins": -0.4619448184967041, "rewards/rejected": -4.7951555252075195, "step": 10238 }, { "epoch": 1.59, "learning_rate": 6.637636806888324e-06, "logits/chosen": -2.930283546447754, "logits/rejected": -2.475414991378784, "logps/chosen": -765.7247314453125, "logps/rejected": -539.2916870117188, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.995361328125, "rewards/margins": 5.549875259399414, "rewards/rejected": -8.545236587524414, "step": 10239 }, { "epoch": 1.59, "learning_rate": 6.6369033663571765e-06, "logits/chosen": -2.8909802436828613, "logits/rejected": -3.047438621520996, "logps/chosen": -246.1375732421875, "logps/rejected": -369.128173828125, "loss": 0.4616, "rewards/accuracies": 0.5, "rewards/chosen": -4.7914581298828125, "rewards/margins": 3.3275623321533203, "rewards/rejected": -8.119020462036133, "step": 10240 }, { "epoch": 1.59, "learning_rate": 6.636169925826028e-06, "logits/chosen": -2.367981433868408, "logits/rejected": -3.035210371017456, "logps/chosen": -69.36339569091797, "logps/rejected": -407.04901123046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.4082179069519043, "rewards/margins": 8.918962478637695, "rewards/rejected": -11.327180862426758, "step": 10241 }, { "epoch": 1.59, "learning_rate": 6.63543648529488e-06, "logits/chosen": -2.1108012199401855, "logits/rejected": -1.3424826860427856, "logps/chosen": -429.98516845703125, "logps/rejected": -258.42889404296875, "loss": 1.1143, "rewards/accuracies": 0.5, "rewards/chosen": -5.144331455230713, "rewards/margins": 2.5265555381774902, "rewards/rejected": -7.670886993408203, "step": 10242 }, { "epoch": 1.59, "learning_rate": 6.634703044763732e-06, "logits/chosen": -2.39941668510437, "logits/rejected": -2.3615896701812744, "logps/chosen": -115.57681274414062, "logps/rejected": -197.06959533691406, "loss": 1.3939, "rewards/accuracies": 0.5, "rewards/chosen": -5.432950496673584, "rewards/margins": 1.834371566772461, "rewards/rejected": -7.267322063446045, "step": 10243 }, { "epoch": 1.59, "learning_rate": 6.633969604232585e-06, "logits/chosen": -2.7293856143951416, "logits/rejected": -3.0114147663116455, "logps/chosen": -84.35910034179688, "logps/rejected": -261.21240234375, "loss": 0.0468, "rewards/accuracies": 1.0, "rewards/chosen": -5.453298091888428, "rewards/margins": 3.5319790840148926, "rewards/rejected": -8.98527717590332, "step": 10244 }, { "epoch": 1.59, "learning_rate": 6.633236163701437e-06, "logits/chosen": -2.087402820587158, "logits/rejected": -2.576270341873169, "logps/chosen": -193.64144897460938, "logps/rejected": -212.5467529296875, "loss": 0.3262, "rewards/accuracies": 1.0, "rewards/chosen": -4.980739593505859, "rewards/margins": 2.0430502891540527, "rewards/rejected": -7.02379035949707, "step": 10245 }, { "epoch": 1.59, "learning_rate": 6.632502723170289e-06, "logits/chosen": -2.711242437362671, "logits/rejected": -2.986036539077759, "logps/chosen": -375.62811279296875, "logps/rejected": -444.94329833984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.4405159950256348, "rewards/margins": 8.268585205078125, "rewards/rejected": -10.709101676940918, "step": 10246 }, { "epoch": 1.59, "learning_rate": 6.631769282639141e-06, "logits/chosen": -1.8136862516403198, "logits/rejected": -2.7172763347625732, "logps/chosen": -234.592041015625, "logps/rejected": -657.694580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6373249292373657, "rewards/margins": 10.776698112487793, "rewards/rejected": -12.414022445678711, "step": 10247 }, { "epoch": 1.59, "learning_rate": 6.631035842107993e-06, "logits/chosen": -2.443239212036133, "logits/rejected": -2.8386435508728027, "logps/chosen": -192.43563842773438, "logps/rejected": -232.9563751220703, "loss": 3.1916, "rewards/accuracies": 0.5, "rewards/chosen": -6.484750747680664, "rewards/margins": 0.9625203609466553, "rewards/rejected": -7.447271347045898, "step": 10248 }, { "epoch": 1.59, "learning_rate": 6.630302401576846e-06, "logits/chosen": -2.756457567214966, "logits/rejected": -1.6425590515136719, "logps/chosen": -459.3217468261719, "logps/rejected": -318.15093994140625, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -6.3211774826049805, "rewards/margins": 4.370702266693115, "rewards/rejected": -10.691879272460938, "step": 10249 }, { "epoch": 1.59, "learning_rate": 6.629568961045698e-06, "logits/chosen": -1.7128527164459229, "logits/rejected": -3.168062686920166, "logps/chosen": -63.339134216308594, "logps/rejected": -458.7348937988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4788665771484375, "rewards/margins": 10.825159072875977, "rewards/rejected": -13.304025650024414, "step": 10250 }, { "epoch": 1.59, "learning_rate": 6.62883552051455e-06, "logits/chosen": -2.852870225906372, "logits/rejected": -2.5264439582824707, "logps/chosen": -358.4471130371094, "logps/rejected": -322.6256408691406, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -3.6096713542938232, "rewards/margins": 4.535985946655273, "rewards/rejected": -8.145657539367676, "step": 10251 }, { "epoch": 1.59, "learning_rate": 6.628102079983402e-06, "logits/chosen": -2.9492626190185547, "logits/rejected": -1.8496861457824707, "logps/chosen": -218.2338409423828, "logps/rejected": -216.80059814453125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.741971731185913, "rewards/margins": 6.978021621704102, "rewards/rejected": -10.719993591308594, "step": 10252 }, { "epoch": 1.59, "learning_rate": 6.627368639452254e-06, "logits/chosen": -2.0298774242401123, "logits/rejected": -3.0460453033447266, "logps/chosen": -127.70748901367188, "logps/rejected": -367.6822204589844, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -2.1543970108032227, "rewards/margins": 7.781822204589844, "rewards/rejected": -9.936219215393066, "step": 10253 }, { "epoch": 1.59, "learning_rate": 6.626635198921106e-06, "logits/chosen": -2.2495481967926025, "logits/rejected": -3.161076307296753, "logps/chosen": -131.19029235839844, "logps/rejected": -348.74072265625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -2.8076772689819336, "rewards/margins": 6.474032402038574, "rewards/rejected": -9.281709671020508, "step": 10254 }, { "epoch": 1.59, "learning_rate": 6.625901758389958e-06, "logits/chosen": -2.591782331466675, "logits/rejected": -2.920924425125122, "logps/chosen": -593.2877197265625, "logps/rejected": -590.8088989257812, "loss": 0.5091, "rewards/accuracies": 0.5, "rewards/chosen": -5.849652290344238, "rewards/margins": 1.7679975032806396, "rewards/rejected": -7.617650032043457, "step": 10255 }, { "epoch": 1.6, "learning_rate": 6.62516831785881e-06, "logits/chosen": -2.3641397953033447, "logits/rejected": -3.089439630508423, "logps/chosen": -196.7987518310547, "logps/rejected": -323.68389892578125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.483511447906494, "rewards/margins": 8.258682250976562, "rewards/rejected": -10.742193222045898, "step": 10256 }, { "epoch": 1.6, "learning_rate": 6.624434877327662e-06, "logits/chosen": -2.861597776412964, "logits/rejected": -2.4879815578460693, "logps/chosen": -101.14697265625, "logps/rejected": -323.7166748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2034881114959717, "rewards/margins": 12.243833541870117, "rewards/rejected": -13.447320938110352, "step": 10257 }, { "epoch": 1.6, "learning_rate": 6.6237014367965145e-06, "logits/chosen": -1.4419718980789185, "logits/rejected": -2.654780387878418, "logps/chosen": -154.4718017578125, "logps/rejected": -385.22711181640625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.7585206031799316, "rewards/margins": 6.92504358291626, "rewards/rejected": -10.683564186096191, "step": 10258 }, { "epoch": 1.6, "learning_rate": 6.6229679962653664e-06, "logits/chosen": -3.0291907787323, "logits/rejected": -2.9135630130767822, "logps/chosen": -186.8028564453125, "logps/rejected": -354.8160400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8905727863311768, "rewards/margins": 11.082128524780273, "rewards/rejected": -14.972702026367188, "step": 10259 }, { "epoch": 1.6, "learning_rate": 6.622234555734218e-06, "logits/chosen": -2.942617416381836, "logits/rejected": -2.0457284450531006, "logps/chosen": -306.3492431640625, "logps/rejected": -202.80224609375, "loss": 3.6481, "rewards/accuracies": 0.5, "rewards/chosen": -6.528216361999512, "rewards/margins": -0.6898958683013916, "rewards/rejected": -5.838320255279541, "step": 10260 }, { "epoch": 1.6, "learning_rate": 6.62150111520307e-06, "logits/chosen": -2.2057945728302, "logits/rejected": -2.6853866577148438, "logps/chosen": -354.10162353515625, "logps/rejected": -455.2839660644531, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -6.396814823150635, "rewards/margins": 6.429724216461182, "rewards/rejected": -12.826539039611816, "step": 10261 }, { "epoch": 1.6, "learning_rate": 6.620767674671923e-06, "logits/chosen": -2.536874532699585, "logits/rejected": -2.773796558380127, "logps/chosen": -107.74683380126953, "logps/rejected": -215.7848358154297, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.740755558013916, "rewards/margins": 8.346504211425781, "rewards/rejected": -11.087259292602539, "step": 10262 }, { "epoch": 1.6, "learning_rate": 6.620034234140775e-06, "logits/chosen": -1.5500738620758057, "logits/rejected": -2.4821815490722656, "logps/chosen": -141.1268310546875, "logps/rejected": -319.6030578613281, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.1289267539978027, "rewards/margins": 6.83668327331543, "rewards/rejected": -9.96561050415039, "step": 10263 }, { "epoch": 1.6, "learning_rate": 6.6193007936096275e-06, "logits/chosen": -3.0862696170806885, "logits/rejected": -2.521865129470825, "logps/chosen": -147.82571411132812, "logps/rejected": -328.57861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.440422296524048, "rewards/margins": 10.42605972290039, "rewards/rejected": -13.86648178100586, "step": 10264 }, { "epoch": 1.6, "learning_rate": 6.618567353078479e-06, "logits/chosen": -1.706526517868042, "logits/rejected": -2.290415048599243, "logps/chosen": -232.2025146484375, "logps/rejected": -272.4396057128906, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.993213653564453, "rewards/margins": 6.454240798950195, "rewards/rejected": -9.447454452514648, "step": 10265 }, { "epoch": 1.6, "learning_rate": 6.617833912547331e-06, "logits/chosen": -2.8207554817199707, "logits/rejected": -2.611177682876587, "logps/chosen": -439.4951171875, "logps/rejected": -472.48724365234375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -4.374499797821045, "rewards/margins": 7.116610527038574, "rewards/rejected": -11.491109848022461, "step": 10266 }, { "epoch": 1.6, "learning_rate": 6.617100472016184e-06, "logits/chosen": -2.2115962505340576, "logits/rejected": -2.8969500064849854, "logps/chosen": -355.1963806152344, "logps/rejected": -601.96484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.206761360168457, "rewards/margins": 8.132988929748535, "rewards/rejected": -11.339750289916992, "step": 10267 }, { "epoch": 1.6, "learning_rate": 6.616367031485036e-06, "logits/chosen": -2.3961451053619385, "logits/rejected": -2.8088557720184326, "logps/chosen": -228.27957153320312, "logps/rejected": -397.19781494140625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.843791961669922, "rewards/margins": 8.473920822143555, "rewards/rejected": -11.317712783813477, "step": 10268 }, { "epoch": 1.6, "learning_rate": 6.615633590953888e-06, "logits/chosen": -2.722245454788208, "logits/rejected": -2.6131954193115234, "logps/chosen": -482.99432373046875, "logps/rejected": -446.5860290527344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.2119150161743164, "rewards/margins": 7.493715286254883, "rewards/rejected": -9.705629348754883, "step": 10269 }, { "epoch": 1.6, "learning_rate": 6.61490015042274e-06, "logits/chosen": -3.150160074234009, "logits/rejected": -3.0987014770507812, "logps/chosen": -578.8826904296875, "logps/rejected": -509.34735107421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.61490797996521, "rewards/margins": 7.964398384094238, "rewards/rejected": -10.579305648803711, "step": 10270 }, { "epoch": 1.6, "learning_rate": 6.614166709891592e-06, "logits/chosen": -2.2719533443450928, "logits/rejected": -2.950573205947876, "logps/chosen": -519.3594970703125, "logps/rejected": -577.8704833984375, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -2.7441656589508057, "rewards/margins": 5.185153484344482, "rewards/rejected": -7.929319381713867, "step": 10271 }, { "epoch": 1.6, "learning_rate": 6.613433269360444e-06, "logits/chosen": -2.0515503883361816, "logits/rejected": -2.8914077281951904, "logps/chosen": -494.75213623046875, "logps/rejected": -718.7574462890625, "loss": 0.1375, "rewards/accuracies": 1.0, "rewards/chosen": -4.129245281219482, "rewards/margins": 4.250927925109863, "rewards/rejected": -8.380172729492188, "step": 10272 }, { "epoch": 1.6, "learning_rate": 6.612699828829296e-06, "logits/chosen": -3.1371350288391113, "logits/rejected": -2.7290937900543213, "logps/chosen": -376.8401184082031, "logps/rejected": -262.3609924316406, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.0391969680786133, "rewards/margins": 6.594766616821289, "rewards/rejected": -9.633964538574219, "step": 10273 }, { "epoch": 1.6, "learning_rate": 6.611966388298148e-06, "logits/chosen": -1.3663334846496582, "logits/rejected": -2.709672451019287, "logps/chosen": -106.40907287597656, "logps/rejected": -273.0523986816406, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.230511665344238, "rewards/margins": 7.275008678436279, "rewards/rejected": -11.50551986694336, "step": 10274 }, { "epoch": 1.6, "learning_rate": 6.611232947767e-06, "logits/chosen": -1.6665401458740234, "logits/rejected": -2.9603166580200195, "logps/chosen": -173.48175048828125, "logps/rejected": -306.1156005859375, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -4.442103862762451, "rewards/margins": 5.979270935058594, "rewards/rejected": -10.421375274658203, "step": 10275 }, { "epoch": 1.6, "learning_rate": 6.610499507235853e-06, "logits/chosen": -2.606452226638794, "logits/rejected": -2.8781933784484863, "logps/chosen": -159.6378173828125, "logps/rejected": -270.943359375, "loss": 1.6769, "rewards/accuracies": 0.5, "rewards/chosen": -7.237803936004639, "rewards/margins": 4.117641448974609, "rewards/rejected": -11.355445861816406, "step": 10276 }, { "epoch": 1.6, "learning_rate": 6.6097660667047045e-06, "logits/chosen": -2.4753055572509766, "logits/rejected": -2.9593615531921387, "logps/chosen": -143.30987548828125, "logps/rejected": -230.79580688476562, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -4.291073799133301, "rewards/margins": 5.913887977600098, "rewards/rejected": -10.204961776733398, "step": 10277 }, { "epoch": 1.6, "learning_rate": 6.609032626173556e-06, "logits/chosen": -1.290068507194519, "logits/rejected": -2.9455506801605225, "logps/chosen": -436.93450927734375, "logps/rejected": -837.8694458007812, "loss": 0.1459, "rewards/accuracies": 1.0, "rewards/chosen": -6.2919816970825195, "rewards/margins": 2.5456349849700928, "rewards/rejected": -8.837615966796875, "step": 10278 }, { "epoch": 1.6, "learning_rate": 6.608299185642408e-06, "logits/chosen": -2.2980895042419434, "logits/rejected": -2.6250290870666504, "logps/chosen": -106.61518096923828, "logps/rejected": -322.877685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.651177167892456, "rewards/margins": 11.395933151245117, "rewards/rejected": -13.047109603881836, "step": 10279 }, { "epoch": 1.6, "learning_rate": 6.607565745111261e-06, "logits/chosen": -1.9499174356460571, "logits/rejected": -2.8073971271514893, "logps/chosen": -385.7192077636719, "logps/rejected": -431.00067138671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.5291748046875, "rewards/margins": 8.132412910461426, "rewards/rejected": -12.661587715148926, "step": 10280 }, { "epoch": 1.6, "learning_rate": 6.606832304580114e-06, "logits/chosen": -1.3756276369094849, "logits/rejected": -2.866738796234131, "logps/chosen": -108.77871704101562, "logps/rejected": -371.85009765625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.966245174407959, "rewards/margins": 5.775216102600098, "rewards/rejected": -9.741460800170898, "step": 10281 }, { "epoch": 1.6, "learning_rate": 6.6060988640489656e-06, "logits/chosen": -1.6075342893600464, "logits/rejected": -2.780137300491333, "logps/chosen": -144.78250122070312, "logps/rejected": -311.49237060546875, "loss": 0.4145, "rewards/accuracies": 0.5, "rewards/chosen": -3.954707622528076, "rewards/margins": 4.250528335571289, "rewards/rejected": -8.205236434936523, "step": 10282 }, { "epoch": 1.6, "learning_rate": 6.6053654235178174e-06, "logits/chosen": -2.2820589542388916, "logits/rejected": -2.851638078689575, "logps/chosen": -70.46223449707031, "logps/rejected": -233.2530517578125, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -5.484706878662109, "rewards/margins": 5.951735496520996, "rewards/rejected": -11.436442375183105, "step": 10283 }, { "epoch": 1.6, "learning_rate": 6.60463198298667e-06, "logits/chosen": -2.992154836654663, "logits/rejected": -2.4580936431884766, "logps/chosen": -264.800048828125, "logps/rejected": -198.41168212890625, "loss": 0.9009, "rewards/accuracies": 0.5, "rewards/chosen": -6.293676376342773, "rewards/margins": 2.104692220687866, "rewards/rejected": -8.398368835449219, "step": 10284 }, { "epoch": 1.6, "learning_rate": 6.603898542455522e-06, "logits/chosen": -2.423675060272217, "logits/rejected": -3.0746676921844482, "logps/chosen": -191.06375122070312, "logps/rejected": -291.9749755859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.238214015960693, "rewards/margins": 6.793742656707764, "rewards/rejected": -11.031956672668457, "step": 10285 }, { "epoch": 1.6, "learning_rate": 6.603165101924374e-06, "logits/chosen": -2.4223721027374268, "logits/rejected": -2.741698741912842, "logps/chosen": -108.40773010253906, "logps/rejected": -223.6891326904297, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.8616065979003906, "rewards/margins": 5.804393768310547, "rewards/rejected": -8.666000366210938, "step": 10286 }, { "epoch": 1.6, "learning_rate": 6.602431661393226e-06, "logits/chosen": -2.1649110317230225, "logits/rejected": -2.8963818550109863, "logps/chosen": -267.57373046875, "logps/rejected": -488.0201416015625, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -4.065333843231201, "rewards/margins": 5.302166938781738, "rewards/rejected": -9.367500305175781, "step": 10287 }, { "epoch": 1.6, "learning_rate": 6.601698220862078e-06, "logits/chosen": -3.10207200050354, "logits/rejected": -2.5789084434509277, "logps/chosen": -520.9890747070312, "logps/rejected": -270.3243408203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.3301941156387329, "rewards/margins": 8.424665451049805, "rewards/rejected": -8.754859924316406, "step": 10288 }, { "epoch": 1.6, "learning_rate": 6.60096478033093e-06, "logits/chosen": -2.4266273975372314, "logits/rejected": -3.0867135524749756, "logps/chosen": -127.98762512207031, "logps/rejected": -331.3045654296875, "loss": 0.1501, "rewards/accuracies": 1.0, "rewards/chosen": -3.717644214630127, "rewards/margins": 7.387509346008301, "rewards/rejected": -11.10515308380127, "step": 10289 }, { "epoch": 1.6, "learning_rate": 6.600231339799782e-06, "logits/chosen": -2.425420045852661, "logits/rejected": -3.1229774951934814, "logps/chosen": -88.74458312988281, "logps/rejected": -315.31976318359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.5396528244018555, "rewards/margins": 8.322813034057617, "rewards/rejected": -10.862464904785156, "step": 10290 }, { "epoch": 1.6, "learning_rate": 6.599497899268634e-06, "logits/chosen": -1.6652532815933228, "logits/rejected": -3.211693286895752, "logps/chosen": -147.66329956054688, "logps/rejected": -513.16015625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -4.643507957458496, "rewards/margins": 7.569791793823242, "rewards/rejected": -12.213299751281738, "step": 10291 }, { "epoch": 1.6, "learning_rate": 6.598764458737486e-06, "logits/chosen": -2.896570920944214, "logits/rejected": -2.210688352584839, "logps/chosen": -200.76611328125, "logps/rejected": -252.3555450439453, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6266491413116455, "rewards/margins": 9.013612747192383, "rewards/rejected": -10.640262603759766, "step": 10292 }, { "epoch": 1.6, "learning_rate": 6.598031018206339e-06, "logits/chosen": -2.546257495880127, "logits/rejected": -2.846282720565796, "logps/chosen": -428.02044677734375, "logps/rejected": -442.8675842285156, "loss": 0.4243, "rewards/accuracies": 0.5, "rewards/chosen": -3.457162618637085, "rewards/margins": 6.305533409118652, "rewards/rejected": -9.762696266174316, "step": 10293 }, { "epoch": 1.6, "learning_rate": 6.597297577675191e-06, "logits/chosen": -1.7153509855270386, "logits/rejected": -2.420011520385742, "logps/chosen": -117.38289642333984, "logps/rejected": -330.7190856933594, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.994790554046631, "rewards/margins": 7.7173542976379395, "rewards/rejected": -11.71214485168457, "step": 10294 }, { "epoch": 1.6, "learning_rate": 6.5965641371440425e-06, "logits/chosen": -3.199897527694702, "logits/rejected": -3.008336067199707, "logps/chosen": -882.7092895507812, "logps/rejected": -623.9268798828125, "loss": 0.4112, "rewards/accuracies": 0.5, "rewards/chosen": -7.072809219360352, "rewards/margins": 2.5234007835388184, "rewards/rejected": -9.596209526062012, "step": 10295 }, { "epoch": 1.6, "learning_rate": 6.595830696612894e-06, "logits/chosen": -1.35910964012146, "logits/rejected": -2.6780874729156494, "logps/chosen": -148.8595733642578, "logps/rejected": -215.8096466064453, "loss": 0.0526, "rewards/accuracies": 1.0, "rewards/chosen": -3.2473034858703613, "rewards/margins": 4.5373077392578125, "rewards/rejected": -7.784611701965332, "step": 10296 }, { "epoch": 1.6, "learning_rate": 6.595097256081747e-06, "logits/chosen": -3.0976502895355225, "logits/rejected": -2.8097496032714844, "logps/chosen": -318.66485595703125, "logps/rejected": -416.620361328125, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -6.302397727966309, "rewards/margins": 4.594087600708008, "rewards/rejected": -10.896484375, "step": 10297 }, { "epoch": 1.6, "learning_rate": 6.5943638155506e-06, "logits/chosen": -1.7732338905334473, "logits/rejected": -3.0019278526306152, "logps/chosen": -116.894775390625, "logps/rejected": -318.51220703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.856034517288208, "rewards/margins": 8.560331344604492, "rewards/rejected": -10.416366577148438, "step": 10298 }, { "epoch": 1.6, "learning_rate": 6.593630375019452e-06, "logits/chosen": -2.5218071937561035, "logits/rejected": -2.1992039680480957, "logps/chosen": -470.9851989746094, "logps/rejected": -447.011962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.0483016967773438, "rewards/margins": 9.215652465820312, "rewards/rejected": -11.263954162597656, "step": 10299 }, { "epoch": 1.6, "learning_rate": 6.592896934488304e-06, "logits/chosen": -2.409752130508423, "logits/rejected": -3.1778552532196045, "logps/chosen": -352.79095458984375, "logps/rejected": -528.9625854492188, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -3.4552576541900635, "rewards/margins": 3.2058815956115723, "rewards/rejected": -6.661139488220215, "step": 10300 }, { "epoch": 1.6, "learning_rate": 6.5921634939571555e-06, "logits/chosen": -2.5588412284851074, "logits/rejected": -2.5460407733917236, "logps/chosen": -138.86032104492188, "logps/rejected": -348.92254638671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4250082969665527, "rewards/margins": 9.836103439331055, "rewards/rejected": -13.26111125946045, "step": 10301 }, { "epoch": 1.6, "learning_rate": 6.591430053426008e-06, "logits/chosen": -1.982398271560669, "logits/rejected": -2.3956406116485596, "logps/chosen": -170.1221466064453, "logps/rejected": -431.57257080078125, "loss": 0.2568, "rewards/accuracies": 1.0, "rewards/chosen": -5.756821632385254, "rewards/margins": 6.061460494995117, "rewards/rejected": -11.818282127380371, "step": 10302 }, { "epoch": 1.6, "learning_rate": 6.59069661289486e-06, "logits/chosen": -1.8394989967346191, "logits/rejected": -3.1615512371063232, "logps/chosen": -70.3246841430664, "logps/rejected": -322.77154541015625, "loss": 0.192, "rewards/accuracies": 1.0, "rewards/chosen": -3.2913103103637695, "rewards/margins": 5.581600666046143, "rewards/rejected": -8.87291145324707, "step": 10303 }, { "epoch": 1.6, "learning_rate": 6.589963172363712e-06, "logits/chosen": -2.033118963241577, "logits/rejected": -2.803476095199585, "logps/chosen": -428.59661865234375, "logps/rejected": -400.26190185546875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -3.7700469493865967, "rewards/margins": 6.579495429992676, "rewards/rejected": -10.349542617797852, "step": 10304 }, { "epoch": 1.6, "learning_rate": 6.589229731832564e-06, "logits/chosen": -1.7650114297866821, "logits/rejected": -2.9200527667999268, "logps/chosen": -257.92181396484375, "logps/rejected": -532.8909301757812, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.907124042510986, "rewards/margins": 8.125516891479492, "rewards/rejected": -13.032642364501953, "step": 10305 }, { "epoch": 1.6, "learning_rate": 6.588496291301416e-06, "logits/chosen": -2.8036231994628906, "logits/rejected": -2.2882604598999023, "logps/chosen": -377.0539855957031, "logps/rejected": -521.9866943359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5280604362487793, "rewards/margins": 8.94831657409668, "rewards/rejected": -12.4763765335083, "step": 10306 }, { "epoch": 1.6, "learning_rate": 6.5877628507702684e-06, "logits/chosen": -2.3448779582977295, "logits/rejected": -2.8517091274261475, "logps/chosen": -722.1124267578125, "logps/rejected": -835.3390502929688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.242031097412109, "rewards/margins": 9.124524116516113, "rewards/rejected": -13.366555213928223, "step": 10307 }, { "epoch": 1.6, "learning_rate": 6.58702941023912e-06, "logits/chosen": -2.8927676677703857, "logits/rejected": -3.4242324829101562, "logps/chosen": -123.6874008178711, "logps/rejected": -478.79644775390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.184094429016113, "rewards/margins": 8.867387771606445, "rewards/rejected": -15.051483154296875, "step": 10308 }, { "epoch": 1.6, "learning_rate": 6.586295969707972e-06, "logits/chosen": -2.7485973834991455, "logits/rejected": -3.145524740219116, "logps/chosen": -139.5059814453125, "logps/rejected": -268.1474304199219, "loss": 0.1463, "rewards/accuracies": 1.0, "rewards/chosen": -4.520083427429199, "rewards/margins": 2.3079562187194824, "rewards/rejected": -6.828039169311523, "step": 10309 }, { "epoch": 1.6, "learning_rate": 6.585562529176824e-06, "logits/chosen": -2.861600637435913, "logits/rejected": -1.8012527227401733, "logps/chosen": -330.24066162109375, "logps/rejected": -289.87042236328125, "loss": 1.6882, "rewards/accuracies": 0.5, "rewards/chosen": -3.8502397537231445, "rewards/margins": 2.3854143619537354, "rewards/rejected": -6.235653877258301, "step": 10310 }, { "epoch": 1.6, "learning_rate": 6.584829088645677e-06, "logits/chosen": -2.670776844024658, "logits/rejected": -3.310382604598999, "logps/chosen": -294.326171875, "logps/rejected": -585.560791015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.3887739181518555, "rewards/margins": 7.9975056648254395, "rewards/rejected": -12.386280059814453, "step": 10311 }, { "epoch": 1.6, "learning_rate": 6.584095648114529e-06, "logits/chosen": -1.9690020084381104, "logits/rejected": -2.858227491378784, "logps/chosen": -352.5604248046875, "logps/rejected": -375.2241516113281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4004364013671875, "rewards/margins": 10.780094146728516, "rewards/rejected": -13.180530548095703, "step": 10312 }, { "epoch": 1.6, "learning_rate": 6.5833622075833806e-06, "logits/chosen": -2.3294854164123535, "logits/rejected": -2.777951240539551, "logps/chosen": -123.38773345947266, "logps/rejected": -250.2132110595703, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -3.6724624633789062, "rewards/margins": 5.998106479644775, "rewards/rejected": -9.670568466186523, "step": 10313 }, { "epoch": 1.6, "learning_rate": 6.582628767052233e-06, "logits/chosen": -2.9646847248077393, "logits/rejected": -2.82422137260437, "logps/chosen": -151.2864990234375, "logps/rejected": -181.66598510742188, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.2116432189941406, "rewards/margins": 6.700913906097412, "rewards/rejected": -9.912557601928711, "step": 10314 }, { "epoch": 1.6, "learning_rate": 6.581895326521085e-06, "logits/chosen": -2.953049659729004, "logits/rejected": -2.8760933876037598, "logps/chosen": -113.22917175292969, "logps/rejected": -318.8406982421875, "loss": 0.1154, "rewards/accuracies": 1.0, "rewards/chosen": -4.117189407348633, "rewards/margins": 8.437094688415527, "rewards/rejected": -12.55428409576416, "step": 10315 }, { "epoch": 1.6, "learning_rate": 6.581161885989938e-06, "logits/chosen": -3.1164021492004395, "logits/rejected": -3.0769639015197754, "logps/chosen": -352.605712890625, "logps/rejected": -320.65863037109375, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -5.1451311111450195, "rewards/margins": 4.891144752502441, "rewards/rejected": -10.036275863647461, "step": 10316 }, { "epoch": 1.6, "learning_rate": 6.58042844545879e-06, "logits/chosen": -2.612851142883301, "logits/rejected": -3.1169497966766357, "logps/chosen": -373.01495361328125, "logps/rejected": -579.233642578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.8956046104431152, "rewards/margins": 10.225456237792969, "rewards/rejected": -13.121061325073242, "step": 10317 }, { "epoch": 1.6, "learning_rate": 6.579695004927642e-06, "logits/chosen": -2.020329475402832, "logits/rejected": -2.2875583171844482, "logps/chosen": -329.0171203613281, "logps/rejected": -402.44915771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.595254898071289, "rewards/margins": 11.690540313720703, "rewards/rejected": -16.28579330444336, "step": 10318 }, { "epoch": 1.6, "learning_rate": 6.5789615643964935e-06, "logits/chosen": -2.2535104751586914, "logits/rejected": -2.723977565765381, "logps/chosen": -301.7396240234375, "logps/rejected": -344.56732177734375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -7.290288925170898, "rewards/margins": 5.268046855926514, "rewards/rejected": -12.55833625793457, "step": 10319 }, { "epoch": 1.6, "learning_rate": 6.578228123865346e-06, "logits/chosen": -2.9793291091918945, "logits/rejected": -2.9488587379455566, "logps/chosen": -340.9678955078125, "logps/rejected": -323.4001159667969, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -4.007752895355225, "rewards/margins": 4.603575706481934, "rewards/rejected": -8.611329078674316, "step": 10320 }, { "epoch": 1.61, "learning_rate": 6.577494683334198e-06, "logits/chosen": -2.797640800476074, "logits/rejected": -3.1210341453552246, "logps/chosen": -111.43240356445312, "logps/rejected": -317.86328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.2429542541503906, "rewards/margins": 7.690061569213867, "rewards/rejected": -10.933015823364258, "step": 10321 }, { "epoch": 1.61, "learning_rate": 6.57676124280305e-06, "logits/chosen": -1.8004124164581299, "logits/rejected": -2.905548572540283, "logps/chosen": -243.7419891357422, "logps/rejected": -468.7674560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1710808277130127, "rewards/margins": 9.462606430053711, "rewards/rejected": -12.633687019348145, "step": 10322 }, { "epoch": 1.61, "learning_rate": 6.576027802271902e-06, "logits/chosen": -3.3474583625793457, "logits/rejected": -3.370422124862671, "logps/chosen": -144.0534210205078, "logps/rejected": -272.5306396484375, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -5.422774314880371, "rewards/margins": 5.754000663757324, "rewards/rejected": -11.176774978637695, "step": 10323 }, { "epoch": 1.61, "learning_rate": 6.575294361740754e-06, "logits/chosen": -2.9123001098632812, "logits/rejected": -2.792349100112915, "logps/chosen": -281.4879455566406, "logps/rejected": -217.6191864013672, "loss": 0.377, "rewards/accuracies": 0.5, "rewards/chosen": -4.676372051239014, "rewards/margins": 2.0483028888702393, "rewards/rejected": -6.724675178527832, "step": 10324 }, { "epoch": 1.61, "learning_rate": 6.5745609212096065e-06, "logits/chosen": -2.430729389190674, "logits/rejected": -2.9013924598693848, "logps/chosen": -155.19900512695312, "logps/rejected": -501.04241943359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.227278232574463, "rewards/margins": 7.911749362945557, "rewards/rejected": -11.13902759552002, "step": 10325 }, { "epoch": 1.61, "learning_rate": 6.573827480678458e-06, "logits/chosen": -2.0338234901428223, "logits/rejected": -2.993074417114258, "logps/chosen": -354.1626281738281, "logps/rejected": -642.306640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.952225685119629, "rewards/margins": 9.943907737731934, "rewards/rejected": -13.896133422851562, "step": 10326 }, { "epoch": 1.61, "learning_rate": 6.57309404014731e-06, "logits/chosen": -2.174252510070801, "logits/rejected": -2.637598991394043, "logps/chosen": -323.86029052734375, "logps/rejected": -346.57562255859375, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -3.3022446632385254, "rewards/margins": 4.138205528259277, "rewards/rejected": -7.4404497146606445, "step": 10327 }, { "epoch": 1.61, "learning_rate": 6.572360599616162e-06, "logits/chosen": -3.023391008377075, "logits/rejected": -2.218508720397949, "logps/chosen": -637.0887451171875, "logps/rejected": -498.79766845703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.712493896484375, "rewards/margins": 9.349199295043945, "rewards/rejected": -12.06169319152832, "step": 10328 }, { "epoch": 1.61, "learning_rate": 6.571627159085015e-06, "logits/chosen": -2.2780232429504395, "logits/rejected": -2.793649911880493, "logps/chosen": -133.91751098632812, "logps/rejected": -457.55828857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.928341865539551, "rewards/margins": 13.146648406982422, "rewards/rejected": -16.07499122619629, "step": 10329 }, { "epoch": 1.61, "learning_rate": 6.570893718553867e-06, "logits/chosen": -2.0957376956939697, "logits/rejected": -2.6822869777679443, "logps/chosen": -140.16082763671875, "logps/rejected": -321.6622314453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7674782276153564, "rewards/margins": 8.80485725402832, "rewards/rejected": -11.572335243225098, "step": 10330 }, { "epoch": 1.61, "learning_rate": 6.5701602780227194e-06, "logits/chosen": -2.60552978515625, "logits/rejected": -2.287496566772461, "logps/chosen": -421.99761962890625, "logps/rejected": -435.64654541015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8684539794921875, "rewards/margins": 9.340948104858398, "rewards/rejected": -11.209402084350586, "step": 10331 }, { "epoch": 1.61, "learning_rate": 6.569426837491571e-06, "logits/chosen": -2.6107401847839355, "logits/rejected": -2.912095308303833, "logps/chosen": -64.96293640136719, "logps/rejected": -340.6566162109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6203579902648926, "rewards/margins": 9.12708854675293, "rewards/rejected": -12.747446060180664, "step": 10332 }, { "epoch": 1.61, "learning_rate": 6.568693396960424e-06, "logits/chosen": -1.5959835052490234, "logits/rejected": -2.8617444038391113, "logps/chosen": -121.09127807617188, "logps/rejected": -577.9354248046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.182072162628174, "rewards/margins": 10.528892517089844, "rewards/rejected": -13.71096420288086, "step": 10333 }, { "epoch": 1.61, "learning_rate": 6.567959956429276e-06, "logits/chosen": -2.424765110015869, "logits/rejected": -2.837859630584717, "logps/chosen": -129.21694946289062, "logps/rejected": -344.81732177734375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.9334030151367188, "rewards/margins": 6.597442626953125, "rewards/rejected": -10.530845642089844, "step": 10334 }, { "epoch": 1.61, "learning_rate": 6.567226515898128e-06, "logits/chosen": -1.3670762777328491, "logits/rejected": -3.004134178161621, "logps/chosen": -98.08622741699219, "logps/rejected": -506.5400390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9328713417053223, "rewards/margins": 11.540660858154297, "rewards/rejected": -15.473532676696777, "step": 10335 }, { "epoch": 1.61, "learning_rate": 6.56649307536698e-06, "logits/chosen": -1.69540274143219, "logits/rejected": -2.7721781730651855, "logps/chosen": -261.0842590332031, "logps/rejected": -472.0885009765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.245646953582764, "rewards/margins": 8.11406421661377, "rewards/rejected": -12.359710693359375, "step": 10336 }, { "epoch": 1.61, "learning_rate": 6.5657596348358316e-06, "logits/chosen": -2.6184184551239014, "logits/rejected": -2.9370787143707275, "logps/chosen": -68.25273895263672, "logps/rejected": -212.90109252929688, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.3372366428375244, "rewards/margins": 6.209253311157227, "rewards/rejected": -9.546489715576172, "step": 10337 }, { "epoch": 1.61, "learning_rate": 6.565026194304684e-06, "logits/chosen": -2.7808055877685547, "logits/rejected": -2.286149024963379, "logps/chosen": -400.3960876464844, "logps/rejected": -495.5682067871094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.210131883621216, "rewards/margins": 10.816328048706055, "rewards/rejected": -13.026460647583008, "step": 10338 }, { "epoch": 1.61, "learning_rate": 6.564292753773536e-06, "logits/chosen": -2.4525346755981445, "logits/rejected": -2.9636433124542236, "logps/chosen": -152.14706420898438, "logps/rejected": -368.68731689453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.868412494659424, "rewards/margins": 8.121268272399902, "rewards/rejected": -12.989681243896484, "step": 10339 }, { "epoch": 1.61, "learning_rate": 6.563559313242388e-06, "logits/chosen": -1.566918969154358, "logits/rejected": -2.785003900527954, "logps/chosen": -536.0789794921875, "logps/rejected": -519.79833984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.560665130615234, "rewards/margins": 7.715860843658447, "rewards/rejected": -13.276525497436523, "step": 10340 }, { "epoch": 1.61, "learning_rate": 6.56282587271124e-06, "logits/chosen": -2.913315773010254, "logits/rejected": -2.9270823001861572, "logps/chosen": -259.7318115234375, "logps/rejected": -321.59368896484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -0.851409912109375, "rewards/margins": 8.285200119018555, "rewards/rejected": -9.13661003112793, "step": 10341 }, { "epoch": 1.61, "learning_rate": 6.562092432180093e-06, "logits/chosen": -2.6216158866882324, "logits/rejected": -2.9337005615234375, "logps/chosen": -142.12896728515625, "logps/rejected": -322.54034423828125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.1453566551208496, "rewards/margins": 6.652040481567383, "rewards/rejected": -8.79739761352539, "step": 10342 }, { "epoch": 1.61, "learning_rate": 6.5613589916489445e-06, "logits/chosen": -2.838005542755127, "logits/rejected": -2.6267693042755127, "logps/chosen": -323.4002685546875, "logps/rejected": -293.079345703125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -4.246618747711182, "rewards/margins": 6.204467296600342, "rewards/rejected": -10.451086044311523, "step": 10343 }, { "epoch": 1.61, "learning_rate": 6.560625551117796e-06, "logits/chosen": -2.7986464500427246, "logits/rejected": -2.381229877471924, "logps/chosen": -151.24424743652344, "logps/rejected": -308.8482360839844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.818204879760742, "rewards/margins": 9.378320693969727, "rewards/rejected": -13.196525573730469, "step": 10344 }, { "epoch": 1.61, "learning_rate": 6.559892110586648e-06, "logits/chosen": -2.7612290382385254, "logits/rejected": -2.456148147583008, "logps/chosen": -336.3909912109375, "logps/rejected": -573.2980346679688, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.318583965301514, "rewards/margins": 6.619070053100586, "rewards/rejected": -10.937654495239258, "step": 10345 }, { "epoch": 1.61, "learning_rate": 6.5591586700555e-06, "logits/chosen": -2.143157958984375, "logits/rejected": -2.9304237365722656, "logps/chosen": -194.73008728027344, "logps/rejected": -364.54541015625, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -4.234625816345215, "rewards/margins": 5.932649612426758, "rewards/rejected": -10.167275428771973, "step": 10346 }, { "epoch": 1.61, "learning_rate": 6.558425229524353e-06, "logits/chosen": -2.890324592590332, "logits/rejected": -2.1471590995788574, "logps/chosen": -830.6371459960938, "logps/rejected": -577.6419067382812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5650839805603027, "rewards/margins": 10.464193344116211, "rewards/rejected": -14.029276847839355, "step": 10347 }, { "epoch": 1.61, "learning_rate": 6.557691788993206e-06, "logits/chosen": -2.921732187271118, "logits/rejected": -1.828757643699646, "logps/chosen": -348.1240539550781, "logps/rejected": -266.78173828125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.0817840099334717, "rewards/margins": 7.272188186645508, "rewards/rejected": -8.353972434997559, "step": 10348 }, { "epoch": 1.61, "learning_rate": 6.5569583484620575e-06, "logits/chosen": -2.7382185459136963, "logits/rejected": -2.3018312454223633, "logps/chosen": -196.16131591796875, "logps/rejected": -218.93624877929688, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -3.384171485900879, "rewards/margins": 4.531989574432373, "rewards/rejected": -7.91616153717041, "step": 10349 }, { "epoch": 1.61, "learning_rate": 6.556224907930909e-06, "logits/chosen": -1.8964895009994507, "logits/rejected": -2.893695116043091, "logps/chosen": -382.8702087402344, "logps/rejected": -488.15216064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.938380241394043, "rewards/margins": 11.682019233703613, "rewards/rejected": -17.620399475097656, "step": 10350 }, { "epoch": 1.61, "learning_rate": 6.555491467399762e-06, "logits/chosen": -1.6370841264724731, "logits/rejected": -2.8491644859313965, "logps/chosen": -361.97100830078125, "logps/rejected": -523.531494140625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -4.076632022857666, "rewards/margins": 6.695456504821777, "rewards/rejected": -10.772089004516602, "step": 10351 }, { "epoch": 1.61, "learning_rate": 6.554758026868614e-06, "logits/chosen": -2.5576486587524414, "logits/rejected": -2.842691659927368, "logps/chosen": -312.56201171875, "logps/rejected": -453.4119873046875, "loss": 0.5768, "rewards/accuracies": 0.5, "rewards/chosen": -7.70575475692749, "rewards/margins": 5.522223472595215, "rewards/rejected": -13.227977752685547, "step": 10352 }, { "epoch": 1.61, "learning_rate": 6.554024586337466e-06, "logits/chosen": -1.7075676918029785, "logits/rejected": -2.8582377433776855, "logps/chosen": -151.3492431640625, "logps/rejected": -550.5982666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.369325160980225, "rewards/margins": 14.66316032409668, "rewards/rejected": -19.032485961914062, "step": 10353 }, { "epoch": 1.61, "learning_rate": 6.553291145806318e-06, "logits/chosen": -2.6203548908233643, "logits/rejected": -3.3619582653045654, "logps/chosen": -284.1982421875, "logps/rejected": -383.9872741699219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.2674667835235596, "rewards/margins": 8.557934761047363, "rewards/rejected": -11.825401306152344, "step": 10354 }, { "epoch": 1.61, "learning_rate": 6.55255770527517e-06, "logits/chosen": -2.517397165298462, "logits/rejected": -2.7213006019592285, "logps/chosen": -102.60247039794922, "logps/rejected": -169.0601043701172, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.438781261444092, "rewards/margins": 7.473113059997559, "rewards/rejected": -9.911893844604492, "step": 10355 }, { "epoch": 1.61, "learning_rate": 6.551824264744022e-06, "logits/chosen": -2.779855728149414, "logits/rejected": -3.0833213329315186, "logps/chosen": -152.5101776123047, "logps/rejected": -349.9261169433594, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.744377851486206, "rewards/margins": 7.3039655685424805, "rewards/rejected": -9.048343658447266, "step": 10356 }, { "epoch": 1.61, "learning_rate": 6.551090824212874e-06, "logits/chosen": -2.218812942504883, "logits/rejected": -2.702927589416504, "logps/chosen": -122.39347839355469, "logps/rejected": -371.6627502441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9784538745880127, "rewards/margins": 11.390357971191406, "rewards/rejected": -14.36881160736084, "step": 10357 }, { "epoch": 1.61, "learning_rate": 6.550357383681726e-06, "logits/chosen": -1.8501230478286743, "logits/rejected": -2.9419000148773193, "logps/chosen": -149.4071807861328, "logps/rejected": -351.8829650878906, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -4.3749613761901855, "rewards/margins": 7.164124011993408, "rewards/rejected": -11.539085388183594, "step": 10358 }, { "epoch": 1.61, "learning_rate": 6.549623943150578e-06, "logits/chosen": -2.9431569576263428, "logits/rejected": -2.9093830585479736, "logps/chosen": -201.3719940185547, "logps/rejected": -260.06402587890625, "loss": 0.9625, "rewards/accuracies": 0.5, "rewards/chosen": -6.313197135925293, "rewards/margins": 4.336549282073975, "rewards/rejected": -10.649746894836426, "step": 10359 }, { "epoch": 1.61, "learning_rate": 6.548890502619431e-06, "logits/chosen": -2.8593530654907227, "logits/rejected": -2.3702993392944336, "logps/chosen": -135.17120361328125, "logps/rejected": -258.245849609375, "loss": 1.7527, "rewards/accuracies": 0.5, "rewards/chosen": -3.9544482231140137, "rewards/margins": 3.5811777114868164, "rewards/rejected": -7.53562593460083, "step": 10360 }, { "epoch": 1.61, "learning_rate": 6.5481570620882826e-06, "logits/chosen": -1.8196535110473633, "logits/rejected": -2.7440967559814453, "logps/chosen": -132.75836181640625, "logps/rejected": -292.81781005859375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -2.862974166870117, "rewards/margins": 5.391142845153809, "rewards/rejected": -8.254117965698242, "step": 10361 }, { "epoch": 1.61, "learning_rate": 6.5474236215571345e-06, "logits/chosen": -1.9884024858474731, "logits/rejected": -2.9449210166931152, "logps/chosen": -152.9649200439453, "logps/rejected": -269.570068359375, "loss": 1.2343, "rewards/accuracies": 0.5, "rewards/chosen": -6.365179061889648, "rewards/margins": 2.5480573177337646, "rewards/rejected": -8.913236618041992, "step": 10362 }, { "epoch": 1.61, "learning_rate": 6.546690181025986e-06, "logits/chosen": -1.0589962005615234, "logits/rejected": -2.8136184215545654, "logps/chosen": -111.6502685546875, "logps/rejected": -317.89862060546875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.533001184463501, "rewards/margins": 8.383440017700195, "rewards/rejected": -10.916440963745117, "step": 10363 }, { "epoch": 1.61, "learning_rate": 6.545956740494839e-06, "logits/chosen": -2.8006882667541504, "logits/rejected": -2.464613914489746, "logps/chosen": -384.2733154296875, "logps/rejected": -519.4574584960938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.058367967605591, "rewards/margins": 10.589816093444824, "rewards/rejected": -13.648183822631836, "step": 10364 }, { "epoch": 1.61, "learning_rate": 6.545223299963692e-06, "logits/chosen": -2.3632521629333496, "logits/rejected": -2.805248498916626, "logps/chosen": -148.44564819335938, "logps/rejected": -329.351806640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.8232418298721313, "rewards/margins": 8.624205589294434, "rewards/rejected": -10.447447776794434, "step": 10365 }, { "epoch": 1.61, "learning_rate": 6.544489859432544e-06, "logits/chosen": -1.9917067289352417, "logits/rejected": -3.0171518325805664, "logps/chosen": -222.62991333007812, "logps/rejected": -498.16790771484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.719083309173584, "rewards/margins": 9.658817291259766, "rewards/rejected": -14.377901077270508, "step": 10366 }, { "epoch": 1.61, "learning_rate": 6.5437564189013955e-06, "logits/chosen": -1.735796570777893, "logits/rejected": -2.162105083465576, "logps/chosen": -288.3821105957031, "logps/rejected": -354.8460388183594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.5881917476654053, "rewards/margins": 8.549609184265137, "rewards/rejected": -11.137800216674805, "step": 10367 }, { "epoch": 1.61, "learning_rate": 6.543022978370247e-06, "logits/chosen": -2.806638240814209, "logits/rejected": -2.633725166320801, "logps/chosen": -524.3565673828125, "logps/rejected": -692.2401123046875, "loss": 0.4878, "rewards/accuracies": 0.5, "rewards/chosen": -5.421402931213379, "rewards/margins": 3.2460622787475586, "rewards/rejected": -8.667465209960938, "step": 10368 }, { "epoch": 1.61, "learning_rate": 6.5422895378391e-06, "logits/chosen": -2.668194532394409, "logits/rejected": -3.1903645992279053, "logps/chosen": -80.88909149169922, "logps/rejected": -326.5390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -1.1390489339828491, "rewards/margins": 9.391667366027832, "rewards/rejected": -10.530715942382812, "step": 10369 }, { "epoch": 1.61, "learning_rate": 6.541556097307952e-06, "logits/chosen": -2.7975692749023438, "logits/rejected": -2.638775587081909, "logps/chosen": -214.23541259765625, "logps/rejected": -208.90872192382812, "loss": 3.6564, "rewards/accuracies": 0.5, "rewards/chosen": -6.48024320602417, "rewards/margins": 0.41048669815063477, "rewards/rejected": -6.890729904174805, "step": 10370 }, { "epoch": 1.61, "learning_rate": 6.540822656776804e-06, "logits/chosen": -2.74666690826416, "logits/rejected": -1.7228399515151978, "logps/chosen": -230.6292266845703, "logps/rejected": -189.2001953125, "loss": 0.104, "rewards/accuracies": 1.0, "rewards/chosen": -2.5081353187561035, "rewards/margins": 4.604447841644287, "rewards/rejected": -7.112583160400391, "step": 10371 }, { "epoch": 1.61, "learning_rate": 6.540089216245656e-06, "logits/chosen": -2.224773645401001, "logits/rejected": -2.586381435394287, "logps/chosen": -380.6746520996094, "logps/rejected": -400.8035888671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.997133255004883, "rewards/margins": 7.273944854736328, "rewards/rejected": -12.271078109741211, "step": 10372 }, { "epoch": 1.61, "learning_rate": 6.539355775714508e-06, "logits/chosen": -0.9917596578598022, "logits/rejected": -2.9697787761688232, "logps/chosen": -116.74129486083984, "logps/rejected": -653.0125732421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.776911735534668, "rewards/margins": 8.924200057983398, "rewards/rejected": -13.70111083984375, "step": 10373 }, { "epoch": 1.61, "learning_rate": 6.53862233518336e-06, "logits/chosen": -2.989931583404541, "logits/rejected": -3.1969385147094727, "logps/chosen": -209.6304473876953, "logps/rejected": -424.5289306640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7386164665222168, "rewards/margins": 9.048587799072266, "rewards/rejected": -10.78720474243164, "step": 10374 }, { "epoch": 1.61, "learning_rate": 6.537888894652212e-06, "logits/chosen": -2.050567388534546, "logits/rejected": -2.9340875148773193, "logps/chosen": -117.86590576171875, "logps/rejected": -343.06097412109375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -4.666557788848877, "rewards/margins": 8.169215202331543, "rewards/rejected": -12.835773468017578, "step": 10375 }, { "epoch": 1.61, "learning_rate": 6.537155454121064e-06, "logits/chosen": -2.751194477081299, "logits/rejected": -2.1171791553497314, "logps/chosen": -281.5668029785156, "logps/rejected": -244.49009704589844, "loss": 1.4642, "rewards/accuracies": 0.5, "rewards/chosen": -4.49324893951416, "rewards/margins": 4.060705661773682, "rewards/rejected": -8.553954124450684, "step": 10376 }, { "epoch": 1.61, "learning_rate": 6.536422013589916e-06, "logits/chosen": -2.762725830078125, "logits/rejected": -2.4651122093200684, "logps/chosen": -161.43246459960938, "logps/rejected": -198.53419494628906, "loss": 0.6618, "rewards/accuracies": 0.5, "rewards/chosen": -3.921581268310547, "rewards/margins": 2.7919085025787354, "rewards/rejected": -6.713489532470703, "step": 10377 }, { "epoch": 1.61, "learning_rate": 6.535688573058769e-06, "logits/chosen": -2.8112668991088867, "logits/rejected": -2.478761911392212, "logps/chosen": -343.5451965332031, "logps/rejected": -370.80047607421875, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -3.506533622741699, "rewards/margins": 6.328627586364746, "rewards/rejected": -9.835161209106445, "step": 10378 }, { "epoch": 1.61, "learning_rate": 6.534955132527621e-06, "logits/chosen": -2.217465877532959, "logits/rejected": -3.1635260581970215, "logps/chosen": -129.54478454589844, "logps/rejected": -508.3409729003906, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.8284807205200195, "rewards/margins": 9.179911613464355, "rewards/rejected": -13.008392333984375, "step": 10379 }, { "epoch": 1.61, "learning_rate": 6.5342216919964725e-06, "logits/chosen": -2.7362096309661865, "logits/rejected": -2.7321510314941406, "logps/chosen": -366.970947265625, "logps/rejected": -377.044189453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.900595188140869, "rewards/margins": 8.912501335144043, "rewards/rejected": -12.81309700012207, "step": 10380 }, { "epoch": 1.61, "learning_rate": 6.533488251465325e-06, "logits/chosen": -2.558751344680786, "logits/rejected": -3.040562868118286, "logps/chosen": -201.9708251953125, "logps/rejected": -226.66702270507812, "loss": 3.1594, "rewards/accuracies": 0.5, "rewards/chosen": -6.963503837585449, "rewards/margins": -0.09563231468200684, "rewards/rejected": -6.867871284484863, "step": 10381 }, { "epoch": 1.61, "learning_rate": 6.532754810934178e-06, "logits/chosen": -3.1113924980163574, "logits/rejected": -2.7816429138183594, "logps/chosen": -455.8013916015625, "logps/rejected": -535.8541259765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.32706880569458, "rewards/margins": 8.49103832244873, "rewards/rejected": -13.818107604980469, "step": 10382 }, { "epoch": 1.61, "learning_rate": 6.53202137040303e-06, "logits/chosen": -2.2998313903808594, "logits/rejected": -3.173888683319092, "logps/chosen": -207.71304321289062, "logps/rejected": -347.6425476074219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.5971741676330566, "rewards/margins": 9.18986701965332, "rewards/rejected": -11.787040710449219, "step": 10383 }, { "epoch": 1.61, "learning_rate": 6.531287929871882e-06, "logits/chosen": -2.4163506031036377, "logits/rejected": -2.563753604888916, "logps/chosen": -256.6015625, "logps/rejected": -365.85919189453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8062806129455566, "rewards/margins": 9.684896469116211, "rewards/rejected": -12.491177558898926, "step": 10384 }, { "epoch": 1.62, "learning_rate": 6.530554489340734e-06, "logits/chosen": -2.8947997093200684, "logits/rejected": -2.8911898136138916, "logps/chosen": -139.50473022460938, "logps/rejected": -240.8690185546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9794071912765503, "rewards/margins": 8.527914047241211, "rewards/rejected": -9.50732135772705, "step": 10385 }, { "epoch": 1.62, "learning_rate": 6.5298210488095855e-06, "logits/chosen": -2.775949478149414, "logits/rejected": -2.6848599910736084, "logps/chosen": -256.3762512207031, "logps/rejected": -279.9439392089844, "loss": 2.3307, "rewards/accuracies": 0.5, "rewards/chosen": -6.570940017700195, "rewards/margins": 2.3276660442352295, "rewards/rejected": -8.898606300354004, "step": 10386 }, { "epoch": 1.62, "learning_rate": 6.529087608278438e-06, "logits/chosen": -1.5106019973754883, "logits/rejected": -2.878535747528076, "logps/chosen": -80.26737976074219, "logps/rejected": -277.0684814453125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.697706699371338, "rewards/margins": 6.007719039916992, "rewards/rejected": -8.705425262451172, "step": 10387 }, { "epoch": 1.62, "learning_rate": 6.52835416774729e-06, "logits/chosen": -2.8480935096740723, "logits/rejected": -0.8369225859642029, "logps/chosen": -490.9522399902344, "logps/rejected": -236.1099090576172, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -4.105236053466797, "rewards/margins": 5.678912162780762, "rewards/rejected": -9.784148216247559, "step": 10388 }, { "epoch": 1.62, "learning_rate": 6.527620727216142e-06, "logits/chosen": -2.636876106262207, "logits/rejected": -2.977055311203003, "logps/chosen": -89.34213256835938, "logps/rejected": -129.48561096191406, "loss": 0.4062, "rewards/accuracies": 0.5, "rewards/chosen": -4.091514587402344, "rewards/margins": 3.52968692779541, "rewards/rejected": -7.621201515197754, "step": 10389 }, { "epoch": 1.62, "learning_rate": 6.526887286684994e-06, "logits/chosen": -2.8261032104492188, "logits/rejected": -2.861185312271118, "logps/chosen": -141.01583862304688, "logps/rejected": -235.87977600097656, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -2.612624168395996, "rewards/margins": 6.963598251342773, "rewards/rejected": -9.57622241973877, "step": 10390 }, { "epoch": 1.62, "learning_rate": 6.5261538461538465e-06, "logits/chosen": -2.116386651992798, "logits/rejected": -3.015352249145508, "logps/chosen": -611.2615356445312, "logps/rejected": -712.3468017578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.417851448059082, "rewards/margins": 10.339590072631836, "rewards/rejected": -13.757442474365234, "step": 10391 }, { "epoch": 1.62, "learning_rate": 6.5254204056226984e-06, "logits/chosen": -2.811936616897583, "logits/rejected": -2.4989125728607178, "logps/chosen": -212.14434814453125, "logps/rejected": -287.717529296875, "loss": 0.8709, "rewards/accuracies": 0.5, "rewards/chosen": -7.015888214111328, "rewards/margins": 1.3764152526855469, "rewards/rejected": -8.392303466796875, "step": 10392 }, { "epoch": 1.62, "learning_rate": 6.52468696509155e-06, "logits/chosen": -2.9374945163726807, "logits/rejected": -3.0707132816314697, "logps/chosen": -115.43207550048828, "logps/rejected": -213.78211975097656, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.2790236473083496, "rewards/margins": 6.617559909820557, "rewards/rejected": -8.896583557128906, "step": 10393 }, { "epoch": 1.62, "learning_rate": 6.523953524560402e-06, "logits/chosen": -3.14145827293396, "logits/rejected": -3.2252659797668457, "logps/chosen": -83.32117462158203, "logps/rejected": -176.80087280273438, "loss": 0.0228, "rewards/accuracies": 1.0, "rewards/chosen": -3.9093334674835205, "rewards/margins": 4.435816764831543, "rewards/rejected": -8.345149993896484, "step": 10394 }, { "epoch": 1.62, "learning_rate": 6.523220084029254e-06, "logits/chosen": -2.4506783485412598, "logits/rejected": -2.8547873497009277, "logps/chosen": -70.00140380859375, "logps/rejected": -281.56341552734375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -1.9736785888671875, "rewards/margins": 7.877943515777588, "rewards/rejected": -9.851621627807617, "step": 10395 }, { "epoch": 1.62, "learning_rate": 6.522486643498107e-06, "logits/chosen": -2.569485902786255, "logits/rejected": -2.502824544906616, "logps/chosen": -388.008056640625, "logps/rejected": -494.3985900878906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5596668720245361, "rewards/margins": 9.224417686462402, "rewards/rejected": -10.78408432006836, "step": 10396 }, { "epoch": 1.62, "learning_rate": 6.521753202966959e-06, "logits/chosen": -2.9240729808807373, "logits/rejected": -3.0589771270751953, "logps/chosen": -125.15850830078125, "logps/rejected": -212.0557403564453, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -3.777918815612793, "rewards/margins": 4.461444854736328, "rewards/rejected": -8.239363670349121, "step": 10397 }, { "epoch": 1.62, "learning_rate": 6.521019762435811e-06, "logits/chosen": -2.5161032676696777, "logits/rejected": -2.714118242263794, "logps/chosen": -120.2730941772461, "logps/rejected": -323.11590576171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.946420669555664, "rewards/margins": 8.229293823242188, "rewards/rejected": -10.175714492797852, "step": 10398 }, { "epoch": 1.62, "learning_rate": 6.520286321904663e-06, "logits/chosen": -1.8531116247177124, "logits/rejected": -3.052823066711426, "logps/chosen": -128.59716796875, "logps/rejected": -521.3538208007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9661022424697876, "rewards/margins": 11.949339866638184, "rewards/rejected": -13.915441513061523, "step": 10399 }, { "epoch": 1.62, "learning_rate": 6.519552881373516e-06, "logits/chosen": -2.2120537757873535, "logits/rejected": -2.9365787506103516, "logps/chosen": -345.01300048828125, "logps/rejected": -509.9636535644531, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -4.262453556060791, "rewards/margins": 5.517972946166992, "rewards/rejected": -9.780426025390625, "step": 10400 }, { "epoch": 1.62, "learning_rate": 6.518819440842368e-06, "logits/chosen": -2.7830774784088135, "logits/rejected": -2.897810697555542, "logps/chosen": -142.55447387695312, "logps/rejected": -294.2337646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2351582050323486, "rewards/margins": 9.439233779907227, "rewards/rejected": -11.674391746520996, "step": 10401 }, { "epoch": 1.62, "learning_rate": 6.51808600031122e-06, "logits/chosen": -1.9541517496109009, "logits/rejected": -2.9244320392608643, "logps/chosen": -55.557579040527344, "logps/rejected": -333.5268249511719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.098556995391846, "rewards/margins": 8.635286331176758, "rewards/rejected": -12.733842849731445, "step": 10402 }, { "epoch": 1.62, "learning_rate": 6.517352559780072e-06, "logits/chosen": -1.606019377708435, "logits/rejected": -2.959012985229492, "logps/chosen": -114.61490631103516, "logps/rejected": -293.5654296875, "loss": 1.932, "rewards/accuracies": 0.5, "rewards/chosen": -5.159690856933594, "rewards/margins": 2.692713975906372, "rewards/rejected": -7.852405071258545, "step": 10403 }, { "epoch": 1.62, "learning_rate": 6.5166191192489235e-06, "logits/chosen": -2.986785411834717, "logits/rejected": -2.1731173992156982, "logps/chosen": -352.1881408691406, "logps/rejected": -345.58349609375, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -3.1488022804260254, "rewards/margins": 5.641750335693359, "rewards/rejected": -8.790553092956543, "step": 10404 }, { "epoch": 1.62, "learning_rate": 6.515885678717776e-06, "logits/chosen": -2.7015979290008545, "logits/rejected": -3.003239870071411, "logps/chosen": -78.91766357421875, "logps/rejected": -185.7963409423828, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.285740852355957, "rewards/margins": 7.707018852233887, "rewards/rejected": -11.992759704589844, "step": 10405 }, { "epoch": 1.62, "learning_rate": 6.515152238186628e-06, "logits/chosen": -2.7946441173553467, "logits/rejected": -2.9555699825286865, "logps/chosen": -543.7987060546875, "logps/rejected": -577.70556640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.675494432449341, "rewards/margins": 12.043581008911133, "rewards/rejected": -14.719076156616211, "step": 10406 }, { "epoch": 1.62, "learning_rate": 6.51441879765548e-06, "logits/chosen": -3.0868377685546875, "logits/rejected": -2.610291004180908, "logps/chosen": -252.74398803710938, "logps/rejected": -222.83099365234375, "loss": 2.4177, "rewards/accuracies": 0.5, "rewards/chosen": -6.469268798828125, "rewards/margins": 0.8123977184295654, "rewards/rejected": -7.2816667556762695, "step": 10407 }, { "epoch": 1.62, "learning_rate": 6.513685357124332e-06, "logits/chosen": -1.324953556060791, "logits/rejected": -2.377570629119873, "logps/chosen": -204.1605224609375, "logps/rejected": -478.5443420410156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.9786643981933594, "rewards/margins": 9.133369445800781, "rewards/rejected": -12.11203384399414, "step": 10408 }, { "epoch": 1.62, "learning_rate": 6.512951916593185e-06, "logits/chosen": -3.108445405960083, "logits/rejected": -2.176238536834717, "logps/chosen": -448.66339111328125, "logps/rejected": -294.9033203125, "loss": 0.1096, "rewards/accuracies": 1.0, "rewards/chosen": -5.268459320068359, "rewards/margins": 4.292148113250732, "rewards/rejected": -9.56060791015625, "step": 10409 }, { "epoch": 1.62, "learning_rate": 6.5122184760620365e-06, "logits/chosen": -2.036557674407959, "logits/rejected": -3.120431900024414, "logps/chosen": -94.075439453125, "logps/rejected": -286.440673828125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -5.767053127288818, "rewards/margins": 4.790950298309326, "rewards/rejected": -10.558003425598145, "step": 10410 }, { "epoch": 1.62, "learning_rate": 6.511485035530888e-06, "logits/chosen": -2.9154646396636963, "logits/rejected": -2.6907496452331543, "logps/chosen": -417.1631774902344, "logps/rejected": -361.62445068359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.247879981994629, "rewards/margins": 7.302303314208984, "rewards/rejected": -10.550183296203613, "step": 10411 }, { "epoch": 1.62, "learning_rate": 6.51075159499974e-06, "logits/chosen": -2.8790488243103027, "logits/rejected": -2.067258358001709, "logps/chosen": -412.05029296875, "logps/rejected": -294.4029541015625, "loss": 0.4215, "rewards/accuracies": 0.5, "rewards/chosen": -7.0228776931762695, "rewards/margins": 3.25879168510437, "rewards/rejected": -10.281669616699219, "step": 10412 }, { "epoch": 1.62, "learning_rate": 6.510018154468592e-06, "logits/chosen": -2.212345600128174, "logits/rejected": -2.727388620376587, "logps/chosen": -137.76712036132812, "logps/rejected": -459.0361328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.4847283363342285, "rewards/margins": 8.914959907531738, "rewards/rejected": -12.399688720703125, "step": 10413 }, { "epoch": 1.62, "learning_rate": 6.509284713937445e-06, "logits/chosen": -2.7342875003814697, "logits/rejected": -3.0341968536376953, "logps/chosen": -164.25088500976562, "logps/rejected": -308.3893127441406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.090956926345825, "rewards/margins": 7.4539618492126465, "rewards/rejected": -9.544918060302734, "step": 10414 }, { "epoch": 1.62, "learning_rate": 6.508551273406297e-06, "logits/chosen": -3.0999908447265625, "logits/rejected": -3.0474231243133545, "logps/chosen": -166.7108612060547, "logps/rejected": -313.2799072265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.5540804862976074, "rewards/margins": 8.963030815124512, "rewards/rejected": -10.517110824584961, "step": 10415 }, { "epoch": 1.62, "learning_rate": 6.5078178328751494e-06, "logits/chosen": -1.8732798099517822, "logits/rejected": -3.0283761024475098, "logps/chosen": -134.0282745361328, "logps/rejected": -362.5421447753906, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.107876777648926, "rewards/margins": 7.356119632720947, "rewards/rejected": -10.463996887207031, "step": 10416 }, { "epoch": 1.62, "learning_rate": 6.507084392344001e-06, "logits/chosen": -2.470641613006592, "logits/rejected": -2.849310874938965, "logps/chosen": -468.3721923828125, "logps/rejected": -504.78961181640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.493274450302124, "rewards/margins": 11.521894454956055, "rewards/rejected": -13.015169143676758, "step": 10417 }, { "epoch": 1.62, "learning_rate": 6.506350951812854e-06, "logits/chosen": -2.0323774814605713, "logits/rejected": -2.811307907104492, "logps/chosen": -472.92510986328125, "logps/rejected": -504.5244140625, "loss": 1.5588, "rewards/accuracies": 0.5, "rewards/chosen": -5.300742149353027, "rewards/margins": 2.627950668334961, "rewards/rejected": -7.928692817687988, "step": 10418 }, { "epoch": 1.62, "learning_rate": 6.505617511281706e-06, "logits/chosen": -2.8210763931274414, "logits/rejected": -2.475191354751587, "logps/chosen": -283.2444763183594, "logps/rejected": -355.02142333984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.5051307678222656, "rewards/margins": 7.653674602508545, "rewards/rejected": -10.158805847167969, "step": 10419 }, { "epoch": 1.62, "learning_rate": 6.504884070750558e-06, "logits/chosen": -2.8864922523498535, "logits/rejected": -2.8806068897247314, "logps/chosen": -319.934814453125, "logps/rejected": -314.4825439453125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.363532304763794, "rewards/margins": 6.396991729736328, "rewards/rejected": -8.76052474975586, "step": 10420 }, { "epoch": 1.62, "learning_rate": 6.50415063021941e-06, "logits/chosen": -2.8650059700012207, "logits/rejected": -2.8792829513549805, "logps/chosen": -154.92164611816406, "logps/rejected": -339.23565673828125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -2.9492392539978027, "rewards/margins": 7.652446746826172, "rewards/rejected": -10.601686477661133, "step": 10421 }, { "epoch": 1.62, "learning_rate": 6.503417189688262e-06, "logits/chosen": -2.797910690307617, "logits/rejected": -1.845629096031189, "logps/chosen": -369.08392333984375, "logps/rejected": -198.22500610351562, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -4.306140422821045, "rewards/margins": 3.6946027278900146, "rewards/rejected": -8.00074291229248, "step": 10422 }, { "epoch": 1.62, "learning_rate": 6.502683749157114e-06, "logits/chosen": -2.17909574508667, "logits/rejected": -2.753821849822998, "logps/chosen": -181.76336669921875, "logps/rejected": -292.0964050292969, "loss": 0.3246, "rewards/accuracies": 1.0, "rewards/chosen": -4.5946221351623535, "rewards/margins": 4.784363746643066, "rewards/rejected": -9.378986358642578, "step": 10423 }, { "epoch": 1.62, "learning_rate": 6.501950308625966e-06, "logits/chosen": -2.994662284851074, "logits/rejected": -2.087704658508301, "logps/chosen": -222.22763061523438, "logps/rejected": -263.5497131347656, "loss": 2.3065, "rewards/accuracies": 0.5, "rewards/chosen": -4.6392645835876465, "rewards/margins": 5.183173179626465, "rewards/rejected": -9.82243824005127, "step": 10424 }, { "epoch": 1.62, "learning_rate": 6.501216868094818e-06, "logits/chosen": -2.6383883953094482, "logits/rejected": -3.1193137168884277, "logps/chosen": -134.3408203125, "logps/rejected": -308.8225402832031, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.736483097076416, "rewards/margins": 6.807160377502441, "rewards/rejected": -11.543643951416016, "step": 10425 }, { "epoch": 1.62, "learning_rate": 6.50048342756367e-06, "logits/chosen": -1.447955846786499, "logits/rejected": -2.8048994541168213, "logps/chosen": -106.0477294921875, "logps/rejected": -371.3137512207031, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.8226661682128906, "rewards/margins": 8.75643539428711, "rewards/rejected": -11.5791015625, "step": 10426 }, { "epoch": 1.62, "learning_rate": 6.499749987032523e-06, "logits/chosen": -0.9920923113822937, "logits/rejected": -2.354048252105713, "logps/chosen": -114.71522521972656, "logps/rejected": -390.05615234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.798163890838623, "rewards/margins": 10.674395561218262, "rewards/rejected": -14.472558975219727, "step": 10427 }, { "epoch": 1.62, "learning_rate": 6.4990165465013745e-06, "logits/chosen": -2.490081787109375, "logits/rejected": -2.756983757019043, "logps/chosen": -90.98114776611328, "logps/rejected": -183.1840362548828, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -3.2866668701171875, "rewards/margins": 6.452578067779541, "rewards/rejected": -9.73924446105957, "step": 10428 }, { "epoch": 1.62, "learning_rate": 6.498283105970226e-06, "logits/chosen": -2.1735875606536865, "logits/rejected": -2.836346387863159, "logps/chosen": -146.86151123046875, "logps/rejected": -296.4676513671875, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -4.249419212341309, "rewards/margins": 5.596076965332031, "rewards/rejected": -9.84549617767334, "step": 10429 }, { "epoch": 1.62, "learning_rate": 6.497549665439078e-06, "logits/chosen": -2.6584157943725586, "logits/rejected": -2.1152994632720947, "logps/chosen": -155.98890686035156, "logps/rejected": -163.98068237304688, "loss": 0.1573, "rewards/accuracies": 1.0, "rewards/chosen": -4.687784194946289, "rewards/margins": 4.3860697746276855, "rewards/rejected": -9.073854446411133, "step": 10430 }, { "epoch": 1.62, "learning_rate": 6.496816224907931e-06, "logits/chosen": -2.943282127380371, "logits/rejected": -1.853572130203247, "logps/chosen": -421.59600830078125, "logps/rejected": -273.74102783203125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -3.563164710998535, "rewards/margins": 5.9745097160339355, "rewards/rejected": -9.537673950195312, "step": 10431 }, { "epoch": 1.62, "learning_rate": 6.496082784376783e-06, "logits/chosen": -2.9635138511657715, "logits/rejected": -2.06526517868042, "logps/chosen": -504.2747802734375, "logps/rejected": -304.2945251464844, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -5.942741394042969, "rewards/margins": 5.8041887283325195, "rewards/rejected": -11.746930122375488, "step": 10432 }, { "epoch": 1.62, "learning_rate": 6.495349343845636e-06, "logits/chosen": -2.6317877769470215, "logits/rejected": -1.8866667747497559, "logps/chosen": -213.48338317871094, "logps/rejected": -265.6465759277344, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -2.072986602783203, "rewards/margins": 6.971372604370117, "rewards/rejected": -9.04435920715332, "step": 10433 }, { "epoch": 1.62, "learning_rate": 6.4946159033144875e-06, "logits/chosen": -2.381500720977783, "logits/rejected": -2.870232343673706, "logps/chosen": -349.36822509765625, "logps/rejected": -463.48199462890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1556992530822754, "rewards/margins": 8.522308349609375, "rewards/rejected": -9.678007125854492, "step": 10434 }, { "epoch": 1.62, "learning_rate": 6.493882462783339e-06, "logits/chosen": -2.4218204021453857, "logits/rejected": -2.709864616394043, "logps/chosen": -548.4678955078125, "logps/rejected": -557.383544921875, "loss": 0.3936, "rewards/accuracies": 0.5, "rewards/chosen": -4.792954444885254, "rewards/margins": 5.08983850479126, "rewards/rejected": -9.882792472839355, "step": 10435 }, { "epoch": 1.62, "learning_rate": 6.493149022252192e-06, "logits/chosen": -2.0923657417297363, "logits/rejected": -3.1409506797790527, "logps/chosen": -76.63395690917969, "logps/rejected": -374.183349609375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.03402042388916, "rewards/margins": 5.765030384063721, "rewards/rejected": -8.799051284790039, "step": 10436 }, { "epoch": 1.62, "learning_rate": 6.492415581721044e-06, "logits/chosen": -1.1834748983383179, "logits/rejected": -2.8344311714172363, "logps/chosen": -206.85043334960938, "logps/rejected": -533.0108032226562, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.4802956581115723, "rewards/margins": 6.802065372467041, "rewards/rejected": -10.282361030578613, "step": 10437 }, { "epoch": 1.62, "learning_rate": 6.491682141189896e-06, "logits/chosen": -2.2866899967193604, "logits/rejected": -2.0390264987945557, "logps/chosen": -527.8154907226562, "logps/rejected": -381.636474609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.814718008041382, "rewards/margins": 7.8026123046875, "rewards/rejected": -11.617330551147461, "step": 10438 }, { "epoch": 1.62, "learning_rate": 6.490948700658748e-06, "logits/chosen": -2.1982760429382324, "logits/rejected": -3.003394365310669, "logps/chosen": -99.78169250488281, "logps/rejected": -265.74969482421875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -1.9644888639450073, "rewards/margins": 6.337843894958496, "rewards/rejected": -8.302332878112793, "step": 10439 }, { "epoch": 1.62, "learning_rate": 6.4902152601276004e-06, "logits/chosen": -3.2018706798553467, "logits/rejected": -2.9188907146453857, "logps/chosen": -354.3004150390625, "logps/rejected": -217.28189086914062, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.4482336044311523, "rewards/margins": 6.281816482543945, "rewards/rejected": -9.730051040649414, "step": 10440 }, { "epoch": 1.62, "learning_rate": 6.489481819596452e-06, "logits/chosen": -2.6474649906158447, "logits/rejected": -2.9709408283233643, "logps/chosen": -346.19256591796875, "logps/rejected": -417.162353515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.9204590320587158, "rewards/margins": 8.096324920654297, "rewards/rejected": -10.016783714294434, "step": 10441 }, { "epoch": 1.62, "learning_rate": 6.488748379065304e-06, "logits/chosen": -2.726989269256592, "logits/rejected": -2.8631818294525146, "logps/chosen": -92.15278625488281, "logps/rejected": -191.15884399414062, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -2.9917759895324707, "rewards/margins": 4.612865924835205, "rewards/rejected": -7.604641914367676, "step": 10442 }, { "epoch": 1.62, "learning_rate": 6.488014938534156e-06, "logits/chosen": -2.874539613723755, "logits/rejected": -3.4084360599517822, "logps/chosen": -72.54116821289062, "logps/rejected": -239.4332733154297, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.911149501800537, "rewards/margins": 6.270611763000488, "rewards/rejected": -10.181760787963867, "step": 10443 }, { "epoch": 1.62, "learning_rate": 6.487281498003008e-06, "logits/chosen": -2.8189821243286133, "logits/rejected": -2.514141321182251, "logps/chosen": -217.75814819335938, "logps/rejected": -230.68833923339844, "loss": 0.124, "rewards/accuracies": 1.0, "rewards/chosen": -1.1546837091445923, "rewards/margins": 3.2882063388824463, "rewards/rejected": -4.442890167236328, "step": 10444 }, { "epoch": 1.62, "learning_rate": 6.486548057471861e-06, "logits/chosen": -2.9372711181640625, "logits/rejected": -2.548508644104004, "logps/chosen": -192.31790161132812, "logps/rejected": -117.40808868408203, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.8090927600860596, "rewards/margins": 4.2016921043396, "rewards/rejected": -5.010785102844238, "step": 10445 }, { "epoch": 1.62, "learning_rate": 6.4858146169407126e-06, "logits/chosen": -2.313506841659546, "logits/rejected": -3.1723392009735107, "logps/chosen": -163.50900268554688, "logps/rejected": -317.44769287109375, "loss": 0.1045, "rewards/accuracies": 1.0, "rewards/chosen": -4.737630844116211, "rewards/margins": 5.1530327796936035, "rewards/rejected": -9.890663146972656, "step": 10446 }, { "epoch": 1.62, "learning_rate": 6.4850811764095644e-06, "logits/chosen": -2.867462158203125, "logits/rejected": -1.645866870880127, "logps/chosen": -226.13400268554688, "logps/rejected": -152.29022216796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.2283124923706055, "rewards/margins": 8.36530590057373, "rewards/rejected": -10.593618392944336, "step": 10447 }, { "epoch": 1.62, "learning_rate": 6.484347735878416e-06, "logits/chosen": -2.7216172218322754, "logits/rejected": -2.860013484954834, "logps/chosen": -380.3643798828125, "logps/rejected": -308.31695556640625, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -2.7023682594299316, "rewards/margins": 6.162854194641113, "rewards/rejected": -8.865222930908203, "step": 10448 }, { "epoch": 1.63, "learning_rate": 6.483614295347269e-06, "logits/chosen": -1.6890476942062378, "logits/rejected": -2.8180301189422607, "logps/chosen": -90.14469909667969, "logps/rejected": -315.456787109375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -3.857724666595459, "rewards/margins": 5.58006477355957, "rewards/rejected": -9.437788963317871, "step": 10449 }, { "epoch": 1.63, "learning_rate": 6.482880854816122e-06, "logits/chosen": -2.7047078609466553, "logits/rejected": -2.1564366817474365, "logps/chosen": -298.89764404296875, "logps/rejected": -287.4994201660156, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -4.369797229766846, "rewards/margins": 4.9272847175598145, "rewards/rejected": -9.29708194732666, "step": 10450 }, { "epoch": 1.63, "learning_rate": 6.482147414284974e-06, "logits/chosen": -3.190133810043335, "logits/rejected": -2.0079104900360107, "logps/chosen": -963.7244262695312, "logps/rejected": -321.169677734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.8790175914764404, "rewards/margins": 8.563372611999512, "rewards/rejected": -12.442390441894531, "step": 10451 }, { "epoch": 1.63, "learning_rate": 6.4814139737538255e-06, "logits/chosen": -2.047991991043091, "logits/rejected": -2.76849102973938, "logps/chosen": -116.94945526123047, "logps/rejected": -327.347900390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.146026611328125, "rewards/margins": 7.500934600830078, "rewards/rejected": -11.646961212158203, "step": 10452 }, { "epoch": 1.63, "learning_rate": 6.480680533222677e-06, "logits/chosen": -1.993842363357544, "logits/rejected": -2.8508341312408447, "logps/chosen": -330.8032531738281, "logps/rejected": -485.9950256347656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.50114369392395, "rewards/margins": 7.964632034301758, "rewards/rejected": -10.465776443481445, "step": 10453 }, { "epoch": 1.63, "learning_rate": 6.47994709269153e-06, "logits/chosen": -2.708064556121826, "logits/rejected": -3.180309534072876, "logps/chosen": -517.49951171875, "logps/rejected": -544.9435424804688, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.111626625061035, "rewards/margins": 6.6323394775390625, "rewards/rejected": -10.743965148925781, "step": 10454 }, { "epoch": 1.63, "learning_rate": 6.479213652160382e-06, "logits/chosen": -2.665512800216675, "logits/rejected": -2.4665067195892334, "logps/chosen": -875.2277221679688, "logps/rejected": -643.7808227539062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.8347039222717285, "rewards/margins": 8.147404670715332, "rewards/rejected": -10.982109069824219, "step": 10455 }, { "epoch": 1.63, "learning_rate": 6.478480211629234e-06, "logits/chosen": -1.1066956520080566, "logits/rejected": -2.75675892829895, "logps/chosen": -185.50347900390625, "logps/rejected": -329.08477783203125, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -2.81750226020813, "rewards/margins": 7.610107898712158, "rewards/rejected": -10.427610397338867, "step": 10456 }, { "epoch": 1.63, "learning_rate": 6.477746771098086e-06, "logits/chosen": -2.8945581912994385, "logits/rejected": -2.8129963874816895, "logps/chosen": -166.14007568359375, "logps/rejected": -182.3401641845703, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -3.4078874588012695, "rewards/margins": 5.894814491271973, "rewards/rejected": -9.302701950073242, "step": 10457 }, { "epoch": 1.63, "learning_rate": 6.4770133305669385e-06, "logits/chosen": -2.6025753021240234, "logits/rejected": -3.1036596298217773, "logps/chosen": -129.92945861816406, "logps/rejected": -249.67465209960938, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.322982311248779, "rewards/margins": 6.3399810791015625, "rewards/rejected": -10.6629638671875, "step": 10458 }, { "epoch": 1.63, "learning_rate": 6.47627989003579e-06, "logits/chosen": -2.644880533218384, "logits/rejected": -2.779585838317871, "logps/chosen": -167.79922485351562, "logps/rejected": -296.8916015625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.9793198108673096, "rewards/margins": 4.995461463928223, "rewards/rejected": -7.974781036376953, "step": 10459 }, { "epoch": 1.63, "learning_rate": 6.475546449504642e-06, "logits/chosen": -2.0080857276916504, "logits/rejected": -2.819934129714966, "logps/chosen": -270.84588623046875, "logps/rejected": -326.8673095703125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -3.5968682765960693, "rewards/margins": 5.77083683013916, "rewards/rejected": -9.367704391479492, "step": 10460 }, { "epoch": 1.63, "learning_rate": 6.474813008973494e-06, "logits/chosen": -2.6946802139282227, "logits/rejected": -2.885037660598755, "logps/chosen": -102.90606689453125, "logps/rejected": -202.09375, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -2.6149370670318604, "rewards/margins": 5.8355560302734375, "rewards/rejected": -8.450492858886719, "step": 10461 }, { "epoch": 1.63, "learning_rate": 6.474079568442346e-06, "logits/chosen": -1.7619019746780396, "logits/rejected": -2.795398235321045, "logps/chosen": -169.8021240234375, "logps/rejected": -274.16796875, "loss": 0.22, "rewards/accuracies": 1.0, "rewards/chosen": -3.046816349029541, "rewards/margins": 2.535616397857666, "rewards/rejected": -5.582432746887207, "step": 10462 }, { "epoch": 1.63, "learning_rate": 6.473346127911199e-06, "logits/chosen": -3.07331919670105, "logits/rejected": -2.5619072914123535, "logps/chosen": -473.59130859375, "logps/rejected": -337.5037536621094, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -3.311403751373291, "rewards/margins": 4.484644889831543, "rewards/rejected": -7.796048164367676, "step": 10463 }, { "epoch": 1.63, "learning_rate": 6.472612687380051e-06, "logits/chosen": -2.9480679035186768, "logits/rejected": -2.84903883934021, "logps/chosen": -275.10821533203125, "logps/rejected": -275.01068115234375, "loss": 0.7547, "rewards/accuracies": 0.5, "rewards/chosen": -4.260000705718994, "rewards/margins": 2.902445077896118, "rewards/rejected": -7.162446022033691, "step": 10464 }, { "epoch": 1.63, "learning_rate": 6.4718792468489025e-06, "logits/chosen": -2.3631412982940674, "logits/rejected": -3.2620110511779785, "logps/chosen": -54.64896774291992, "logps/rejected": -235.6004180908203, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.8580472469329834, "rewards/margins": 7.331286430358887, "rewards/rejected": -9.189332962036133, "step": 10465 }, { "epoch": 1.63, "learning_rate": 6.471145806317755e-06, "logits/chosen": -1.9806506633758545, "logits/rejected": -3.1130177974700928, "logps/chosen": -192.37942504882812, "logps/rejected": -400.34649658203125, "loss": 2.4543, "rewards/accuracies": 0.5, "rewards/chosen": -5.855649948120117, "rewards/margins": 2.1489827632904053, "rewards/rejected": -8.004632949829102, "step": 10466 }, { "epoch": 1.63, "learning_rate": 6.470412365786608e-06, "logits/chosen": -2.4685866832733154, "logits/rejected": -1.873630404472351, "logps/chosen": -176.09939575195312, "logps/rejected": -333.82769775390625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -4.150693893432617, "rewards/margins": 5.881294250488281, "rewards/rejected": -10.031988143920898, "step": 10467 }, { "epoch": 1.63, "learning_rate": 6.46967892525546e-06, "logits/chosen": -2.89812970161438, "logits/rejected": -2.0514707565307617, "logps/chosen": -201.91299438476562, "logps/rejected": -217.2801513671875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.019339084625244, "rewards/margins": 6.932543754577637, "rewards/rejected": -8.951883316040039, "step": 10468 }, { "epoch": 1.63, "learning_rate": 6.468945484724312e-06, "logits/chosen": -2.761793375015259, "logits/rejected": -1.8751417398452759, "logps/chosen": -228.80224609375, "logps/rejected": -307.35235595703125, "loss": 0.1581, "rewards/accuracies": 1.0, "rewards/chosen": -2.564197540283203, "rewards/margins": 4.723236083984375, "rewards/rejected": -7.287433624267578, "step": 10469 }, { "epoch": 1.63, "learning_rate": 6.4682120441931636e-06, "logits/chosen": -2.9497835636138916, "logits/rejected": -3.0869150161743164, "logps/chosen": -96.2357177734375, "logps/rejected": -175.26486206054688, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -3.2161574363708496, "rewards/margins": 5.424439907073975, "rewards/rejected": -8.640597343444824, "step": 10470 }, { "epoch": 1.63, "learning_rate": 6.467478603662016e-06, "logits/chosen": -2.8724281787872314, "logits/rejected": -2.6469430923461914, "logps/chosen": -402.08172607421875, "logps/rejected": -259.7821350097656, "loss": 2.7528, "rewards/accuracies": 0.5, "rewards/chosen": -6.241203784942627, "rewards/margins": -0.12645578384399414, "rewards/rejected": -6.114748001098633, "step": 10471 }, { "epoch": 1.63, "learning_rate": 6.466745163130868e-06, "logits/chosen": -2.7127647399902344, "logits/rejected": -2.856339931488037, "logps/chosen": -303.3927001953125, "logps/rejected": -522.6614379882812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.600308656692505, "rewards/margins": 9.803899765014648, "rewards/rejected": -13.40420913696289, "step": 10472 }, { "epoch": 1.63, "learning_rate": 6.46601172259972e-06, "logits/chosen": -1.8948220014572144, "logits/rejected": -3.131246328353882, "logps/chosen": -112.69588470458984, "logps/rejected": -445.6861572265625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -3.5534093379974365, "rewards/margins": 5.174515724182129, "rewards/rejected": -8.727924346923828, "step": 10473 }, { "epoch": 1.63, "learning_rate": 6.465278282068572e-06, "logits/chosen": -2.201756715774536, "logits/rejected": -2.8064727783203125, "logps/chosen": -274.93743896484375, "logps/rejected": -352.83074951171875, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -4.677186489105225, "rewards/margins": 3.6372361183166504, "rewards/rejected": -8.314422607421875, "step": 10474 }, { "epoch": 1.63, "learning_rate": 6.464544841537424e-06, "logits/chosen": -2.8433823585510254, "logits/rejected": -2.954296827316284, "logps/chosen": -117.0394515991211, "logps/rejected": -152.3600311279297, "loss": 0.9974, "rewards/accuracies": 0.5, "rewards/chosen": -4.6373748779296875, "rewards/margins": 3.1635615825653076, "rewards/rejected": -7.800936698913574, "step": 10475 }, { "epoch": 1.63, "learning_rate": 6.4638114010062765e-06, "logits/chosen": -2.753021240234375, "logits/rejected": -1.7496404647827148, "logps/chosen": -237.37872314453125, "logps/rejected": -197.21739196777344, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.8913589715957642, "rewards/margins": 7.850092887878418, "rewards/rejected": -8.741451263427734, "step": 10476 }, { "epoch": 1.63, "learning_rate": 6.463077960475128e-06, "logits/chosen": -2.919391632080078, "logits/rejected": -3.0889625549316406, "logps/chosen": -457.13031005859375, "logps/rejected": -399.11309814453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.7891876697540283, "rewards/margins": 7.66711950302124, "rewards/rejected": -9.456306457519531, "step": 10477 }, { "epoch": 1.63, "learning_rate": 6.46234451994398e-06, "logits/chosen": -2.1158807277679443, "logits/rejected": -3.4128806591033936, "logps/chosen": -73.2362060546875, "logps/rejected": -409.31085205078125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -4.708600044250488, "rewards/margins": 7.614518642425537, "rewards/rejected": -12.323118209838867, "step": 10478 }, { "epoch": 1.63, "learning_rate": 6.461611079412832e-06, "logits/chosen": -2.612233877182007, "logits/rejected": -3.1286847591400146, "logps/chosen": -459.4698181152344, "logps/rejected": -528.6790771484375, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": -2.5559003353118896, "rewards/margins": 5.998689651489258, "rewards/rejected": -8.554590225219727, "step": 10479 }, { "epoch": 1.63, "learning_rate": 6.460877638881685e-06, "logits/chosen": -1.9362833499908447, "logits/rejected": -2.9199111461639404, "logps/chosen": -200.5138397216797, "logps/rejected": -357.01385498046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.351221561431885, "rewards/margins": 9.287437438964844, "rewards/rejected": -13.63865852355957, "step": 10480 }, { "epoch": 1.63, "learning_rate": 6.460144198350537e-06, "logits/chosen": -2.77266788482666, "logits/rejected": -3.179762125015259, "logps/chosen": -184.568115234375, "logps/rejected": -303.83251953125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -5.114407062530518, "rewards/margins": 5.044651031494141, "rewards/rejected": -10.1590576171875, "step": 10481 }, { "epoch": 1.63, "learning_rate": 6.459410757819389e-06, "logits/chosen": -2.1893558502197266, "logits/rejected": -1.736862063407898, "logps/chosen": -558.4638671875, "logps/rejected": -529.1245727539062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6561501026153564, "rewards/margins": 14.361602783203125, "rewards/rejected": -16.017751693725586, "step": 10482 }, { "epoch": 1.63, "learning_rate": 6.458677317288241e-06, "logits/chosen": -3.018272638320923, "logits/rejected": -1.868105411529541, "logps/chosen": -283.86614990234375, "logps/rejected": -157.88319396972656, "loss": 0.5255, "rewards/accuracies": 0.5, "rewards/chosen": -3.5506863594055176, "rewards/margins": 3.52913236618042, "rewards/rejected": -7.0798187255859375, "step": 10483 }, { "epoch": 1.63, "learning_rate": 6.457943876757093e-06, "logits/chosen": -2.0759384632110596, "logits/rejected": -3.123509407043457, "logps/chosen": -259.0517883300781, "logps/rejected": -541.6370849609375, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -2.6871304512023926, "rewards/margins": 4.2905426025390625, "rewards/rejected": -6.977673530578613, "step": 10484 }, { "epoch": 1.63, "learning_rate": 6.457210436225946e-06, "logits/chosen": -2.8637588024139404, "logits/rejected": -2.4330854415893555, "logps/chosen": -158.65127563476562, "logps/rejected": -374.16326904296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.169647216796875, "rewards/margins": 10.625259399414062, "rewards/rejected": -13.794906616210938, "step": 10485 }, { "epoch": 1.63, "learning_rate": 6.456476995694798e-06, "logits/chosen": -2.3043885231018066, "logits/rejected": -2.8204801082611084, "logps/chosen": -161.90719604492188, "logps/rejected": -352.93414306640625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.2714622020721436, "rewards/margins": 7.8207807540893555, "rewards/rejected": -9.092243194580078, "step": 10486 }, { "epoch": 1.63, "learning_rate": 6.45574355516365e-06, "logits/chosen": -1.4749469757080078, "logits/rejected": -2.686023473739624, "logps/chosen": -261.64599609375, "logps/rejected": -508.5378723144531, "loss": 0.0737, "rewards/accuracies": 1.0, "rewards/chosen": -4.444972038269043, "rewards/margins": 5.9985127449035645, "rewards/rejected": -10.443485260009766, "step": 10487 }, { "epoch": 1.63, "learning_rate": 6.455010114632502e-06, "logits/chosen": -2.8252992630004883, "logits/rejected": -3.0046353340148926, "logps/chosen": -52.78961181640625, "logps/rejected": -227.00677490234375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.0884292125701904, "rewards/margins": 8.55660629272461, "rewards/rejected": -10.645035743713379, "step": 10488 }, { "epoch": 1.63, "learning_rate": 6.454276674101354e-06, "logits/chosen": -2.4972405433654785, "logits/rejected": -3.169987440109253, "logps/chosen": -86.27445220947266, "logps/rejected": -324.5435485839844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.111859917640686, "rewards/margins": 9.042720794677734, "rewards/rejected": -10.154581069946289, "step": 10489 }, { "epoch": 1.63, "learning_rate": 6.453543233570206e-06, "logits/chosen": -3.175874948501587, "logits/rejected": -2.8509068489074707, "logps/chosen": -399.92523193359375, "logps/rejected": -185.92372131347656, "loss": 0.3023, "rewards/accuracies": 1.0, "rewards/chosen": -5.898645401000977, "rewards/margins": 2.167649269104004, "rewards/rejected": -8.06629467010498, "step": 10490 }, { "epoch": 1.63, "learning_rate": 6.452809793039058e-06, "logits/chosen": -2.928074836730957, "logits/rejected": -2.729969024658203, "logps/chosen": -156.8076629638672, "logps/rejected": -206.8299560546875, "loss": 1.0224, "rewards/accuracies": 0.5, "rewards/chosen": -5.548892021179199, "rewards/margins": 3.294182062149048, "rewards/rejected": -8.843073844909668, "step": 10491 }, { "epoch": 1.63, "learning_rate": 6.45207635250791e-06, "logits/chosen": -2.827343702316284, "logits/rejected": -2.926001787185669, "logps/chosen": -386.91387939453125, "logps/rejected": -511.1931457519531, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -5.207751750946045, "rewards/margins": 4.88181734085083, "rewards/rejected": -10.089569091796875, "step": 10492 }, { "epoch": 1.63, "learning_rate": 6.451342911976762e-06, "logits/chosen": -2.303691864013672, "logits/rejected": -2.963047981262207, "logps/chosen": -87.18885803222656, "logps/rejected": -386.3775329589844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.64764928817749, "rewards/margins": 10.577520370483398, "rewards/rejected": -15.225170135498047, "step": 10493 }, { "epoch": 1.63, "learning_rate": 6.4506094714456146e-06, "logits/chosen": -2.213458776473999, "logits/rejected": -2.88700532913208, "logps/chosen": -94.95671081542969, "logps/rejected": -194.45816040039062, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.9052672386169434, "rewards/margins": 6.344660758972168, "rewards/rejected": -10.249927520751953, "step": 10494 }, { "epoch": 1.63, "learning_rate": 6.4498760309144665e-06, "logits/chosen": -2.125392198562622, "logits/rejected": -2.722097396850586, "logps/chosen": -112.54684448242188, "logps/rejected": -146.53219604492188, "loss": 0.2598, "rewards/accuracies": 1.0, "rewards/chosen": -4.347516059875488, "rewards/margins": 2.7822299003601074, "rewards/rejected": -7.129745960235596, "step": 10495 }, { "epoch": 1.63, "learning_rate": 6.449142590383318e-06, "logits/chosen": -2.929509162902832, "logits/rejected": -1.8537085056304932, "logps/chosen": -516.9876098632812, "logps/rejected": -430.3788757324219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5817818641662598, "rewards/margins": 10.635963439941406, "rewards/rejected": -14.21774673461914, "step": 10496 }, { "epoch": 1.63, "learning_rate": 6.44840914985217e-06, "logits/chosen": -2.084284543991089, "logits/rejected": -2.682431697845459, "logps/chosen": -221.84413146972656, "logps/rejected": -283.31195068359375, "loss": 0.4113, "rewards/accuracies": 0.5, "rewards/chosen": -5.176408290863037, "rewards/margins": 4.898458480834961, "rewards/rejected": -10.074867248535156, "step": 10497 }, { "epoch": 1.63, "learning_rate": 6.447675709321023e-06, "logits/chosen": -1.3985662460327148, "logits/rejected": -2.6959030628204346, "logps/chosen": -115.65121459960938, "logps/rejected": -317.26580810546875, "loss": 0.1352, "rewards/accuracies": 1.0, "rewards/chosen": -4.858042240142822, "rewards/margins": 6.268266677856445, "rewards/rejected": -11.126309394836426, "step": 10498 }, { "epoch": 1.63, "learning_rate": 6.446942268789875e-06, "logits/chosen": -3.2224888801574707, "logits/rejected": -2.418449878692627, "logps/chosen": -1156.459228515625, "logps/rejected": -520.9732666015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.354474067687988, "rewards/margins": 8.133607864379883, "rewards/rejected": -13.488080978393555, "step": 10499 }, { "epoch": 1.63, "learning_rate": 6.4462088282587275e-06, "logits/chosen": -1.519089698791504, "logits/rejected": -2.9179282188415527, "logps/chosen": -69.40728759765625, "logps/rejected": -256.8260498046875, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -2.4319701194763184, "rewards/margins": 4.310994625091553, "rewards/rejected": -6.742964744567871, "step": 10500 }, { "epoch": 1.63, "learning_rate": 6.445475387727579e-06, "logits/chosen": -3.0315091609954834, "logits/rejected": -1.9006680250167847, "logps/chosen": -256.7426452636719, "logps/rejected": -206.19859313964844, "loss": 0.0792, "rewards/accuracies": 1.0, "rewards/chosen": -2.0365819931030273, "rewards/margins": 4.371209144592285, "rewards/rejected": -6.4077911376953125, "step": 10501 }, { "epoch": 1.63, "learning_rate": 6.444741947196431e-06, "logits/chosen": -2.35957932472229, "logits/rejected": -2.825298547744751, "logps/chosen": -320.381591796875, "logps/rejected": -374.0940246582031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.7853939533233643, "rewards/margins": 10.147109985351562, "rewards/rejected": -12.932504653930664, "step": 10502 }, { "epoch": 1.63, "learning_rate": 6.444008506665284e-06, "logits/chosen": -1.6608145236968994, "logits/rejected": -2.8798704147338867, "logps/chosen": -177.37615966796875, "logps/rejected": -405.1504821777344, "loss": 0.5258, "rewards/accuracies": 0.5, "rewards/chosen": -4.557278156280518, "rewards/margins": 5.622908592224121, "rewards/rejected": -10.18018627166748, "step": 10503 }, { "epoch": 1.63, "learning_rate": 6.443275066134136e-06, "logits/chosen": -2.6075146198272705, "logits/rejected": -3.1298177242279053, "logps/chosen": -201.24435424804688, "logps/rejected": -327.8515319824219, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -3.278529644012451, "rewards/margins": 5.864972114562988, "rewards/rejected": -9.143501281738281, "step": 10504 }, { "epoch": 1.63, "learning_rate": 6.442541625602988e-06, "logits/chosen": -2.8660073280334473, "logits/rejected": -2.867241621017456, "logps/chosen": -340.4390563964844, "logps/rejected": -471.4577941894531, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.322108268737793, "rewards/margins": 8.386570930480957, "rewards/rejected": -12.70867919921875, "step": 10505 }, { "epoch": 1.63, "learning_rate": 6.44180818507184e-06, "logits/chosen": -2.892956256866455, "logits/rejected": -1.8292630910873413, "logps/chosen": -148.80975341796875, "logps/rejected": -136.30792236328125, "loss": 0.9325, "rewards/accuracies": 0.5, "rewards/chosen": -4.446280479431152, "rewards/margins": 4.722928047180176, "rewards/rejected": -9.169208526611328, "step": 10506 }, { "epoch": 1.63, "learning_rate": 6.441074744540692e-06, "logits/chosen": -2.8747339248657227, "logits/rejected": -2.750258207321167, "logps/chosen": -321.25567626953125, "logps/rejected": -307.6234130859375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.8684738278388977, "rewards/margins": 6.966372013092041, "rewards/rejected": -7.834845542907715, "step": 10507 }, { "epoch": 1.63, "learning_rate": 6.440341304009544e-06, "logits/chosen": -1.8780343532562256, "logits/rejected": -2.6201066970825195, "logps/chosen": -291.0634765625, "logps/rejected": -621.2431030273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0874557495117188, "rewards/margins": 11.097677230834961, "rewards/rejected": -14.18513298034668, "step": 10508 }, { "epoch": 1.63, "learning_rate": 6.439607863478396e-06, "logits/chosen": -1.1778136491775513, "logits/rejected": -2.628141403198242, "logps/chosen": -87.60247802734375, "logps/rejected": -358.8109436035156, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -6.473626613616943, "rewards/margins": 7.194800853729248, "rewards/rejected": -13.668427467346191, "step": 10509 }, { "epoch": 1.63, "learning_rate": 6.438874422947248e-06, "logits/chosen": -3.109123706817627, "logits/rejected": -3.121088981628418, "logps/chosen": -239.85789489746094, "logps/rejected": -404.3448791503906, "loss": 0.4585, "rewards/accuracies": 0.5, "rewards/chosen": -2.544106960296631, "rewards/margins": 8.69946575164795, "rewards/rejected": -11.243573188781738, "step": 10510 }, { "epoch": 1.63, "learning_rate": 6.4381409824161e-06, "logits/chosen": -2.750356912612915, "logits/rejected": -2.7471444606781006, "logps/chosen": -114.43450927734375, "logps/rejected": -254.2890167236328, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.965794086456299, "rewards/margins": 7.347195625305176, "rewards/rejected": -11.312989234924316, "step": 10511 }, { "epoch": 1.63, "learning_rate": 6.437407541884953e-06, "logits/chosen": -1.8842833042144775, "logits/rejected": -2.709021806716919, "logps/chosen": -92.56913757324219, "logps/rejected": -251.74234008789062, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -4.738608360290527, "rewards/margins": 5.061374664306641, "rewards/rejected": -9.799983024597168, "step": 10512 }, { "epoch": 1.63, "learning_rate": 6.4366741013538045e-06, "logits/chosen": -1.7531288862228394, "logits/rejected": -2.637352466583252, "logps/chosen": -195.28851318359375, "logps/rejected": -484.30706787109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5089737176895142, "rewards/margins": 10.665204048156738, "rewards/rejected": -12.174177169799805, "step": 10513 }, { "epoch": 1.64, "learning_rate": 6.435940660822656e-06, "logits/chosen": -3.129915952682495, "logits/rejected": -2.9734716415405273, "logps/chosen": -246.46286010742188, "logps/rejected": -200.924072265625, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -4.118564605712891, "rewards/margins": 3.703019857406616, "rewards/rejected": -7.821584701538086, "step": 10514 }, { "epoch": 1.64, "learning_rate": 6.435207220291508e-06, "logits/chosen": -2.3957579135894775, "logits/rejected": -2.8799798488616943, "logps/chosen": -171.27928161621094, "logps/rejected": -402.5741882324219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.375582218170166, "rewards/margins": 9.781694412231445, "rewards/rejected": -13.15727710723877, "step": 10515 }, { "epoch": 1.64, "learning_rate": 6.434473779760361e-06, "logits/chosen": -2.559091329574585, "logits/rejected": -2.88978910446167, "logps/chosen": -178.95494079589844, "logps/rejected": -359.598876953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.9840400218963623, "rewards/margins": 6.744606971740723, "rewards/rejected": -8.728647232055664, "step": 10516 }, { "epoch": 1.64, "learning_rate": 6.433740339229214e-06, "logits/chosen": -2.4259095191955566, "logits/rejected": -3.088143825531006, "logps/chosen": -145.0125732421875, "logps/rejected": -442.3009948730469, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.1646511554718018, "rewards/margins": 9.769759178161621, "rewards/rejected": -10.934410095214844, "step": 10517 }, { "epoch": 1.64, "learning_rate": 6.433006898698066e-06, "logits/chosen": -2.536252737045288, "logits/rejected": -2.8805243968963623, "logps/chosen": -245.85421752929688, "logps/rejected": -533.3768920898438, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.0895877480506897, "rewards/margins": 8.362516403198242, "rewards/rejected": -8.452104568481445, "step": 10518 }, { "epoch": 1.64, "learning_rate": 6.4322734581669175e-06, "logits/chosen": -2.548142433166504, "logits/rejected": -2.944516181945801, "logps/chosen": -267.67498779296875, "logps/rejected": -284.1274719238281, "loss": 1.3205, "rewards/accuracies": 0.5, "rewards/chosen": -5.820237159729004, "rewards/margins": 0.9602210521697998, "rewards/rejected": -6.780458450317383, "step": 10519 }, { "epoch": 1.64, "learning_rate": 6.43154001763577e-06, "logits/chosen": -2.8317368030548096, "logits/rejected": -2.453279733657837, "logps/chosen": -745.9507446289062, "logps/rejected": -739.7235107421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.7691726684570312, "rewards/margins": 9.978666305541992, "rewards/rejected": -12.747838973999023, "step": 10520 }, { "epoch": 1.64, "learning_rate": 6.430806577104622e-06, "logits/chosen": -2.8126633167266846, "logits/rejected": -3.23199200630188, "logps/chosen": -179.09725952148438, "logps/rejected": -374.2160339355469, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.1724343299865723, "rewards/margins": 6.043787002563477, "rewards/rejected": -9.21622085571289, "step": 10521 }, { "epoch": 1.64, "learning_rate": 6.430073136573474e-06, "logits/chosen": -3.2183563709259033, "logits/rejected": -2.2069568634033203, "logps/chosen": -511.1990051269531, "logps/rejected": -489.36993408203125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": 0.9185196161270142, "rewards/margins": 9.996177673339844, "rewards/rejected": -9.077657699584961, "step": 10522 }, { "epoch": 1.64, "learning_rate": 6.429339696042326e-06, "logits/chosen": -2.3494622707366943, "logits/rejected": -2.551968574523926, "logps/chosen": -546.5384521484375, "logps/rejected": -647.9288940429688, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.629289150238037, "rewards/margins": 9.819440841674805, "rewards/rejected": -13.448729515075684, "step": 10523 }, { "epoch": 1.64, "learning_rate": 6.428606255511178e-06, "logits/chosen": -2.9777963161468506, "logits/rejected": -2.9689512252807617, "logps/chosen": -315.1422119140625, "logps/rejected": -222.5126953125, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": -2.0495765209198, "rewards/margins": 2.8284478187561035, "rewards/rejected": -4.878024101257324, "step": 10524 }, { "epoch": 1.64, "learning_rate": 6.4278728149800304e-06, "logits/chosen": -2.41300368309021, "logits/rejected": -3.0691230297088623, "logps/chosen": -63.70762634277344, "logps/rejected": -291.8953552246094, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.8604364395141602, "rewards/margins": 6.765374660491943, "rewards/rejected": -8.625810623168945, "step": 10525 }, { "epoch": 1.64, "learning_rate": 6.427139374448882e-06, "logits/chosen": -1.7725186347961426, "logits/rejected": -2.8985543251037598, "logps/chosen": -687.0386352539062, "logps/rejected": -489.910888671875, "loss": 1.9524, "rewards/accuracies": 0.5, "rewards/chosen": -7.809136390686035, "rewards/margins": 1.559330701828003, "rewards/rejected": -9.368467330932617, "step": 10526 }, { "epoch": 1.64, "learning_rate": 6.426405933917734e-06, "logits/chosen": -3.2120585441589355, "logits/rejected": -2.8327155113220215, "logps/chosen": -392.5469665527344, "logps/rejected": -336.61981201171875, "loss": 0.4046, "rewards/accuracies": 0.5, "rewards/chosen": -3.5953805446624756, "rewards/margins": 3.5206923484802246, "rewards/rejected": -7.116072654724121, "step": 10527 }, { "epoch": 1.64, "learning_rate": 6.425672493386586e-06, "logits/chosen": -2.7808501720428467, "logits/rejected": -2.7897279262542725, "logps/chosen": -204.91650390625, "logps/rejected": -287.0544128417969, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.7636921405792236, "rewards/margins": 6.830038547515869, "rewards/rejected": -10.593730926513672, "step": 10528 }, { "epoch": 1.64, "learning_rate": 6.424939052855439e-06, "logits/chosen": -1.83145272731781, "logits/rejected": -2.9205751419067383, "logps/chosen": -87.888427734375, "logps/rejected": -405.31988525390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.0167901515960693, "rewards/margins": 10.74836540222168, "rewards/rejected": -13.765155792236328, "step": 10529 }, { "epoch": 1.64, "learning_rate": 6.424205612324291e-06, "logits/chosen": -1.6686956882476807, "logits/rejected": -2.8600289821624756, "logps/chosen": -112.56239318847656, "logps/rejected": -415.4574890136719, "loss": 0.9056, "rewards/accuracies": 0.5, "rewards/chosen": -5.385437965393066, "rewards/margins": 7.923898696899414, "rewards/rejected": -13.30933666229248, "step": 10530 }, { "epoch": 1.64, "learning_rate": 6.4234721717931425e-06, "logits/chosen": -3.328077793121338, "logits/rejected": -2.8671112060546875, "logps/chosen": -300.4111633300781, "logps/rejected": -354.5950622558594, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.455278396606445, "rewards/margins": 6.69046688079834, "rewards/rejected": -11.145745277404785, "step": 10531 }, { "epoch": 1.64, "learning_rate": 6.422738731261994e-06, "logits/chosen": -2.299206018447876, "logits/rejected": -2.958099365234375, "logps/chosen": -96.98324584960938, "logps/rejected": -368.4328308105469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.400564193725586, "rewards/margins": 9.45401382446289, "rewards/rejected": -12.854578018188477, "step": 10532 }, { "epoch": 1.64, "learning_rate": 6.422005290730847e-06, "logits/chosen": -2.6484603881835938, "logits/rejected": -2.724430799484253, "logps/chosen": -109.982177734375, "logps/rejected": -194.42019653320312, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.7477493286132812, "rewards/margins": 6.935882091522217, "rewards/rejected": -9.683631896972656, "step": 10533 }, { "epoch": 1.64, "learning_rate": 6.4212718501997e-06, "logits/chosen": -1.6928067207336426, "logits/rejected": -3.1537888050079346, "logps/chosen": -71.79420471191406, "logps/rejected": -368.2872009277344, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -3.042015552520752, "rewards/margins": 6.507322311401367, "rewards/rejected": -9.549337387084961, "step": 10534 }, { "epoch": 1.64, "learning_rate": 6.420538409668552e-06, "logits/chosen": -2.9321718215942383, "logits/rejected": -2.445740222930908, "logps/chosen": -235.39610290527344, "logps/rejected": -277.39117431640625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.7923552989959717, "rewards/margins": 6.79353666305542, "rewards/rejected": -8.585891723632812, "step": 10535 }, { "epoch": 1.64, "learning_rate": 6.419804969137404e-06, "logits/chosen": -2.8118231296539307, "logits/rejected": -3.129991292953491, "logps/chosen": -189.28872680664062, "logps/rejected": -314.3861083984375, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -4.112602710723877, "rewards/margins": 3.743083953857422, "rewards/rejected": -7.855687141418457, "step": 10536 }, { "epoch": 1.64, "learning_rate": 6.4190715286062555e-06, "logits/chosen": -2.8961374759674072, "logits/rejected": -2.245394468307495, "logps/chosen": -774.9058227539062, "logps/rejected": -555.1256713867188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.8913068771362305, "rewards/margins": 7.066431045532227, "rewards/rejected": -11.957738876342773, "step": 10537 }, { "epoch": 1.64, "learning_rate": 6.418338088075108e-06, "logits/chosen": -3.151453971862793, "logits/rejected": -3.2810473442077637, "logps/chosen": -100.97346496582031, "logps/rejected": -193.86502075195312, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -1.7530763149261475, "rewards/margins": 5.161347389221191, "rewards/rejected": -6.91442346572876, "step": 10538 }, { "epoch": 1.64, "learning_rate": 6.41760464754396e-06, "logits/chosen": -2.703451156616211, "logits/rejected": -2.302781105041504, "logps/chosen": -231.48623657226562, "logps/rejected": -270.25079345703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.5578453540802, "rewards/margins": 7.868950843811035, "rewards/rejected": -10.426795959472656, "step": 10539 }, { "epoch": 1.64, "learning_rate": 6.416871207012812e-06, "logits/chosen": -2.5727827548980713, "logits/rejected": -3.303114891052246, "logps/chosen": -118.50714111328125, "logps/rejected": -367.4685974121094, "loss": 2.306, "rewards/accuracies": 0.5, "rewards/chosen": -7.6728715896606445, "rewards/margins": 3.481572151184082, "rewards/rejected": -11.154443740844727, "step": 10540 }, { "epoch": 1.64, "learning_rate": 6.416137766481664e-06, "logits/chosen": -2.9944517612457275, "logits/rejected": -2.8350319862365723, "logps/chosen": -347.2225341796875, "logps/rejected": -288.43353271484375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -4.543687343597412, "rewards/margins": 5.655853271484375, "rewards/rejected": -10.199541091918945, "step": 10541 }, { "epoch": 1.64, "learning_rate": 6.415404325950516e-06, "logits/chosen": -1.8607285022735596, "logits/rejected": -2.524747848510742, "logps/chosen": -241.63650512695312, "logps/rejected": -313.17132568359375, "loss": 0.0693, "rewards/accuracies": 1.0, "rewards/chosen": -4.711341381072998, "rewards/margins": 5.315664291381836, "rewards/rejected": -10.027006149291992, "step": 10542 }, { "epoch": 1.64, "learning_rate": 6.4146708854193685e-06, "logits/chosen": -2.8340861797332764, "logits/rejected": -3.0802812576293945, "logps/chosen": -562.7067260742188, "logps/rejected": -478.8236999511719, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.078157424926758, "rewards/margins": 7.5096354484558105, "rewards/rejected": -12.587793350219727, "step": 10543 }, { "epoch": 1.64, "learning_rate": 6.41393744488822e-06, "logits/chosen": -1.7527155876159668, "logits/rejected": -2.208711862564087, "logps/chosen": -360.8553771972656, "logps/rejected": -618.0921630859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.629964351654053, "rewards/margins": 9.349635124206543, "rewards/rejected": -13.979598999023438, "step": 10544 }, { "epoch": 1.64, "learning_rate": 6.413204004357072e-06, "logits/chosen": -3.0071723461151123, "logits/rejected": -2.6954636573791504, "logps/chosen": -625.4658203125, "logps/rejected": -549.4447021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 2.0419602394104004, "rewards/margins": 15.454421997070312, "rewards/rejected": -13.41246223449707, "step": 10545 }, { "epoch": 1.64, "learning_rate": 6.412470563825924e-06, "logits/chosen": -3.0190389156341553, "logits/rejected": -3.0964534282684326, "logps/chosen": -585.6265869140625, "logps/rejected": -544.5040283203125, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -5.6274847984313965, "rewards/margins": 3.6274752616882324, "rewards/rejected": -9.254960060119629, "step": 10546 }, { "epoch": 1.64, "learning_rate": 6.411737123294777e-06, "logits/chosen": -3.356997013092041, "logits/rejected": -2.7008883953094482, "logps/chosen": -174.9820556640625, "logps/rejected": -149.37930297851562, "loss": 0.3026, "rewards/accuracies": 1.0, "rewards/chosen": -5.773372173309326, "rewards/margins": 4.418372631072998, "rewards/rejected": -10.191744804382324, "step": 10547 }, { "epoch": 1.64, "learning_rate": 6.411003682763629e-06, "logits/chosen": -2.129044532775879, "logits/rejected": -2.6343610286712646, "logps/chosen": -130.7159423828125, "logps/rejected": -372.2566833496094, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.119246006011963, "rewards/margins": 8.385934829711914, "rewards/rejected": -9.505180358886719, "step": 10548 }, { "epoch": 1.64, "learning_rate": 6.410270242232481e-06, "logits/chosen": -3.255000114440918, "logits/rejected": -2.5976643562316895, "logps/chosen": -362.0653076171875, "logps/rejected": -379.97796630859375, "loss": 0.576, "rewards/accuracies": 0.5, "rewards/chosen": -4.207696437835693, "rewards/margins": 3.481482982635498, "rewards/rejected": -7.689179420471191, "step": 10549 }, { "epoch": 1.64, "learning_rate": 6.409536801701333e-06, "logits/chosen": -1.4630858898162842, "logits/rejected": -2.7967662811279297, "logps/chosen": -85.91313934326172, "logps/rejected": -407.15167236328125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.490295648574829, "rewards/margins": 9.863293647766113, "rewards/rejected": -12.353589057922363, "step": 10550 }, { "epoch": 1.64, "learning_rate": 6.408803361170185e-06, "logits/chosen": -2.396622896194458, "logits/rejected": -2.9468064308166504, "logps/chosen": -391.41357421875, "logps/rejected": -388.98016357421875, "loss": 2.0994, "rewards/accuracies": 0.0, "rewards/chosen": -10.849394798278809, "rewards/margins": -1.7966420650482178, "rewards/rejected": -9.052752494812012, "step": 10551 }, { "epoch": 1.64, "learning_rate": 6.408069920639038e-06, "logits/chosen": -1.8346216678619385, "logits/rejected": -3.0789122581481934, "logps/chosen": -57.44149398803711, "logps/rejected": -236.74957275390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.9229657649993896, "rewards/margins": 8.368868827819824, "rewards/rejected": -10.291833877563477, "step": 10552 }, { "epoch": 1.64, "learning_rate": 6.40733648010789e-06, "logits/chosen": -2.5695641040802, "logits/rejected": -2.681089401245117, "logps/chosen": -449.57720947265625, "logps/rejected": -651.310546875, "loss": 0.1924, "rewards/accuracies": 1.0, "rewards/chosen": -5.198899269104004, "rewards/margins": 4.186304092407227, "rewards/rejected": -9.38520336151123, "step": 10553 }, { "epoch": 1.64, "learning_rate": 6.406603039576742e-06, "logits/chosen": -2.7152099609375, "logits/rejected": -2.7748427391052246, "logps/chosen": -149.73541259765625, "logps/rejected": -248.5844268798828, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -5.197700023651123, "rewards/margins": 5.970889568328857, "rewards/rejected": -11.16858959197998, "step": 10554 }, { "epoch": 1.64, "learning_rate": 6.4058695990455935e-06, "logits/chosen": -2.168323278427124, "logits/rejected": -2.9750802516937256, "logps/chosen": -220.10914611816406, "logps/rejected": -401.61865234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.6179451942443848, "rewards/margins": 9.646726608276367, "rewards/rejected": -13.264671325683594, "step": 10555 }, { "epoch": 1.64, "learning_rate": 6.405136158514446e-06, "logits/chosen": -1.9140015840530396, "logits/rejected": -3.197563409805298, "logps/chosen": -154.40524291992188, "logps/rejected": -426.15240478515625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -4.386179447174072, "rewards/margins": 7.302543640136719, "rewards/rejected": -11.688722610473633, "step": 10556 }, { "epoch": 1.64, "learning_rate": 6.404402717983298e-06, "logits/chosen": -2.7772743701934814, "logits/rejected": -2.9142849445343018, "logps/chosen": -74.24552917480469, "logps/rejected": -207.49819946289062, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -3.0459301471710205, "rewards/margins": 5.819720268249512, "rewards/rejected": -8.865650177001953, "step": 10557 }, { "epoch": 1.64, "learning_rate": 6.40366927745215e-06, "logits/chosen": -2.6363413333892822, "logits/rejected": -3.30501127243042, "logps/chosen": -110.26754760742188, "logps/rejected": -325.18035888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8536860942840576, "rewards/margins": 10.47237491607666, "rewards/rejected": -12.326061248779297, "step": 10558 }, { "epoch": 1.64, "learning_rate": 6.402935836921002e-06, "logits/chosen": -3.0030746459960938, "logits/rejected": -3.009721279144287, "logps/chosen": -343.68487548828125, "logps/rejected": -378.657470703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.8001511096954346, "rewards/margins": 7.219394683837891, "rewards/rejected": -10.019545555114746, "step": 10559 }, { "epoch": 1.64, "learning_rate": 6.402202396389854e-06, "logits/chosen": -2.621344566345215, "logits/rejected": -3.040841579437256, "logps/chosen": -140.95574951171875, "logps/rejected": -364.26275634765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.615927219390869, "rewards/margins": 9.019246101379395, "rewards/rejected": -12.635173797607422, "step": 10560 }, { "epoch": 1.64, "learning_rate": 6.4014689558587065e-06, "logits/chosen": -2.1981983184814453, "logits/rejected": -3.0930120944976807, "logps/chosen": -173.62379455566406, "logps/rejected": -491.2417907714844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.649509906768799, "rewards/margins": 7.520268440246582, "rewards/rejected": -13.169778823852539, "step": 10561 }, { "epoch": 1.64, "learning_rate": 6.400735515327558e-06, "logits/chosen": -2.8776285648345947, "logits/rejected": -3.0058341026306152, "logps/chosen": -142.55467224121094, "logps/rejected": -265.4429931640625, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -1.932117223739624, "rewards/margins": 5.052570343017578, "rewards/rejected": -6.984687805175781, "step": 10562 }, { "epoch": 1.64, "learning_rate": 6.40000207479641e-06, "logits/chosen": -2.7568259239196777, "logits/rejected": -2.6004717350006104, "logps/chosen": -475.4236755371094, "logps/rejected": -486.7549743652344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.430326461791992, "rewards/margins": 8.692151069641113, "rewards/rejected": -14.122478485107422, "step": 10563 }, { "epoch": 1.64, "learning_rate": 6.399268634265262e-06, "logits/chosen": -1.8606088161468506, "logits/rejected": -3.0675384998321533, "logps/chosen": -87.77708435058594, "logps/rejected": -278.359130859375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.48480224609375, "rewards/margins": 6.0724053382873535, "rewards/rejected": -8.557207107543945, "step": 10564 }, { "epoch": 1.64, "learning_rate": 6.398535193734115e-06, "logits/chosen": -2.6307902336120605, "logits/rejected": -2.857522487640381, "logps/chosen": -132.22528076171875, "logps/rejected": -322.186767578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.1479485034942627, "rewards/margins": 9.888705253601074, "rewards/rejected": -11.036653518676758, "step": 10565 }, { "epoch": 1.64, "learning_rate": 6.397801753202967e-06, "logits/chosen": -2.2310914993286133, "logits/rejected": -2.950639247894287, "logps/chosen": -264.69561767578125, "logps/rejected": -462.681396484375, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": -3.956211805343628, "rewards/margins": 8.017364501953125, "rewards/rejected": -11.973576545715332, "step": 10566 }, { "epoch": 1.64, "learning_rate": 6.3970683126718195e-06, "logits/chosen": -3.194904327392578, "logits/rejected": -2.849222421646118, "logps/chosen": -217.97763061523438, "logps/rejected": -118.8167724609375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.2650306224822998, "rewards/margins": 6.414422988891602, "rewards/rejected": -7.679453372955322, "step": 10567 }, { "epoch": 1.64, "learning_rate": 6.396334872140671e-06, "logits/chosen": -2.8441104888916016, "logits/rejected": -2.8350255489349365, "logps/chosen": -368.7459716796875, "logps/rejected": -433.09979248046875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.960514068603516, "rewards/margins": 6.553571701049805, "rewards/rejected": -12.51408576965332, "step": 10568 }, { "epoch": 1.64, "learning_rate": 6.395601431609524e-06, "logits/chosen": -1.9126116037368774, "logits/rejected": -2.7553982734680176, "logps/chosen": -176.29788208007812, "logps/rejected": -296.7195739746094, "loss": 0.1135, "rewards/accuracies": 1.0, "rewards/chosen": -3.858109474182129, "rewards/margins": 5.218009948730469, "rewards/rejected": -9.076119422912598, "step": 10569 }, { "epoch": 1.64, "learning_rate": 6.394867991078376e-06, "logits/chosen": -2.4920225143432617, "logits/rejected": -2.2042720317840576, "logps/chosen": -249.56405639648438, "logps/rejected": -304.83123779296875, "loss": 0.7472, "rewards/accuracies": 0.5, "rewards/chosen": -5.676766872406006, "rewards/margins": 3.0882303714752197, "rewards/rejected": -8.764997482299805, "step": 10570 }, { "epoch": 1.64, "learning_rate": 6.394134550547228e-06, "logits/chosen": -2.762816905975342, "logits/rejected": -2.856152296066284, "logps/chosen": -283.85888671875, "logps/rejected": -497.81134033203125, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -4.154559135437012, "rewards/margins": 9.842273712158203, "rewards/rejected": -13.996832847595215, "step": 10571 }, { "epoch": 1.64, "learning_rate": 6.39340111001608e-06, "logits/chosen": -1.8242852687835693, "logits/rejected": -2.8764522075653076, "logps/chosen": -251.21780395507812, "logps/rejected": -468.888427734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.9938979148864746, "rewards/margins": 8.024290084838867, "rewards/rejected": -12.0181884765625, "step": 10572 }, { "epoch": 1.64, "learning_rate": 6.392667669484932e-06, "logits/chosen": -2.485618829727173, "logits/rejected": -2.9737606048583984, "logps/chosen": -39.04579544067383, "logps/rejected": -155.276123046875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -2.4145946502685547, "rewards/margins": 5.302590847015381, "rewards/rejected": -7.717185974121094, "step": 10573 }, { "epoch": 1.64, "learning_rate": 6.391934228953784e-06, "logits/chosen": -2.527919292449951, "logits/rejected": -3.0308315753936768, "logps/chosen": -295.78350830078125, "logps/rejected": -371.65960693359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.3408188819885254, "rewards/margins": 8.15234375, "rewards/rejected": -11.493162155151367, "step": 10574 }, { "epoch": 1.64, "learning_rate": 6.391200788422636e-06, "logits/chosen": -3.2723548412323, "logits/rejected": -2.9831156730651855, "logps/chosen": -652.200927734375, "logps/rejected": -480.22906494140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.331521511077881, "rewards/margins": 6.619054317474365, "rewards/rejected": -12.950575828552246, "step": 10575 }, { "epoch": 1.64, "learning_rate": 6.390467347891488e-06, "logits/chosen": -2.6886494159698486, "logits/rejected": -3.4217796325683594, "logps/chosen": -102.63470458984375, "logps/rejected": -433.810546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.9718852043151855, "rewards/margins": 9.340391159057617, "rewards/rejected": -12.312275886535645, "step": 10576 }, { "epoch": 1.64, "learning_rate": 6.38973390736034e-06, "logits/chosen": -2.3939270973205566, "logits/rejected": -3.1243648529052734, "logps/chosen": -176.96417236328125, "logps/rejected": -354.7181396484375, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -5.011618137359619, "rewards/margins": 3.6612343788146973, "rewards/rejected": -8.672852516174316, "step": 10577 }, { "epoch": 1.65, "learning_rate": 6.389000466829193e-06, "logits/chosen": -2.957977771759033, "logits/rejected": -2.5309433937072754, "logps/chosen": -160.6422119140625, "logps/rejected": -161.7157745361328, "loss": 0.1326, "rewards/accuracies": 1.0, "rewards/chosen": -5.100603103637695, "rewards/margins": 2.529597759246826, "rewards/rejected": -7.630200386047363, "step": 10578 }, { "epoch": 1.65, "learning_rate": 6.3882670262980446e-06, "logits/chosen": -1.655909776687622, "logits/rejected": -2.9166247844696045, "logps/chosen": -55.50544738769531, "logps/rejected": -339.0382080078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.6580514907836914, "rewards/margins": 7.52786922454834, "rewards/rejected": -11.185920715332031, "step": 10579 }, { "epoch": 1.65, "learning_rate": 6.3875335857668964e-06, "logits/chosen": -2.9384307861328125, "logits/rejected": -3.016080617904663, "logps/chosen": -95.50016021728516, "logps/rejected": -206.02069091796875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -2.666071891784668, "rewards/margins": 5.889649391174316, "rewards/rejected": -8.555721282958984, "step": 10580 }, { "epoch": 1.65, "learning_rate": 6.386800145235748e-06, "logits/chosen": -1.997527003288269, "logits/rejected": -3.1096413135528564, "logps/chosen": -170.1683349609375, "logps/rejected": -518.08203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.363864898681641, "rewards/margins": 9.030397415161133, "rewards/rejected": -13.39426326751709, "step": 10581 }, { "epoch": 1.65, "learning_rate": 6.3860667047046e-06, "logits/chosen": -2.191218852996826, "logits/rejected": -3.147881031036377, "logps/chosen": -234.73410034179688, "logps/rejected": -500.5914001464844, "loss": 0.1917, "rewards/accuracies": 1.0, "rewards/chosen": -3.6237244606018066, "rewards/margins": 4.884772300720215, "rewards/rejected": -8.508496284484863, "step": 10582 }, { "epoch": 1.65, "learning_rate": 6.385333264173453e-06, "logits/chosen": -2.700794219970703, "logits/rejected": -3.2889859676361084, "logps/chosen": -758.3584594726562, "logps/rejected": -709.178955078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.1606690883636475, "rewards/margins": 8.064457893371582, "rewards/rejected": -10.225126266479492, "step": 10583 }, { "epoch": 1.65, "learning_rate": 6.384599823642306e-06, "logits/chosen": -2.972482919692993, "logits/rejected": -2.3790833950042725, "logps/chosen": -213.59364318847656, "logps/rejected": -281.4006042480469, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.0838828086853027, "rewards/margins": 7.7104387283325195, "rewards/rejected": -9.794321060180664, "step": 10584 }, { "epoch": 1.65, "learning_rate": 6.3838663831111575e-06, "logits/chosen": -2.705216407775879, "logits/rejected": -2.214017152786255, "logps/chosen": -646.43505859375, "logps/rejected": -488.6669921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.5405502319335938, "rewards/margins": 9.239938735961914, "rewards/rejected": -11.780488967895508, "step": 10585 }, { "epoch": 1.65, "learning_rate": 6.383132942580009e-06, "logits/chosen": -2.4007041454315186, "logits/rejected": -2.8351991176605225, "logps/chosen": -602.8643798828125, "logps/rejected": -527.770263671875, "loss": 1.5794, "rewards/accuracies": 0.5, "rewards/chosen": -4.046724319458008, "rewards/margins": 4.194709300994873, "rewards/rejected": -8.241433143615723, "step": 10586 }, { "epoch": 1.65, "learning_rate": 6.382399502048862e-06, "logits/chosen": -2.8640143871307373, "logits/rejected": -3.250537633895874, "logps/chosen": -121.55014038085938, "logps/rejected": -272.8404541015625, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/chosen": -3.2294888496398926, "rewards/margins": 5.611127853393555, "rewards/rejected": -8.840616226196289, "step": 10587 }, { "epoch": 1.65, "learning_rate": 6.381666061517714e-06, "logits/chosen": -2.6358914375305176, "logits/rejected": -2.979177236557007, "logps/chosen": -634.078369140625, "logps/rejected": -650.6962280273438, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -4.241727828979492, "rewards/margins": 8.802048683166504, "rewards/rejected": -13.04377555847168, "step": 10588 }, { "epoch": 1.65, "learning_rate": 6.380932620986566e-06, "logits/chosen": -2.3559882640838623, "logits/rejected": -2.8921728134155273, "logps/chosen": -85.8120346069336, "logps/rejected": -213.7626495361328, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -3.3993639945983887, "rewards/margins": 4.1466522216796875, "rewards/rejected": -7.546015739440918, "step": 10589 }, { "epoch": 1.65, "learning_rate": 6.380199180455418e-06, "logits/chosen": -1.3468232154846191, "logits/rejected": -2.3444302082061768, "logps/chosen": -170.129150390625, "logps/rejected": -402.30438232421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.55537223815918, "rewards/margins": 8.815085411071777, "rewards/rejected": -13.370457649230957, "step": 10590 }, { "epoch": 1.65, "learning_rate": 6.37946573992427e-06, "logits/chosen": -2.7651846408843994, "logits/rejected": -2.988623857498169, "logps/chosen": -225.363037109375, "logps/rejected": -397.3504943847656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.395878553390503, "rewards/margins": 11.401020050048828, "rewards/rejected": -13.796899795532227, "step": 10591 }, { "epoch": 1.65, "learning_rate": 6.378732299393122e-06, "logits/chosen": -2.8163511753082275, "logits/rejected": -2.720834255218506, "logps/chosen": -229.92984008789062, "logps/rejected": -233.24508666992188, "loss": 0.0834, "rewards/accuracies": 1.0, "rewards/chosen": -4.108922481536865, "rewards/margins": 5.614494323730469, "rewards/rejected": -9.723417282104492, "step": 10592 }, { "epoch": 1.65, "learning_rate": 6.377998858861974e-06, "logits/chosen": -2.9933230876922607, "logits/rejected": -3.0961616039276123, "logps/chosen": -59.05009841918945, "logps/rejected": -215.80361938476562, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.7174980640411377, "rewards/margins": 8.417739868164062, "rewards/rejected": -10.135237693786621, "step": 10593 }, { "epoch": 1.65, "learning_rate": 6.377265418330826e-06, "logits/chosen": -2.8695640563964844, "logits/rejected": -3.000631093978882, "logps/chosen": -129.61915588378906, "logps/rejected": -271.2860107421875, "loss": 0.2197, "rewards/accuracies": 1.0, "rewards/chosen": -4.277809143066406, "rewards/margins": 7.600240707397461, "rewards/rejected": -11.878049850463867, "step": 10594 }, { "epoch": 1.65, "learning_rate": 6.376531977799678e-06, "logits/chosen": -2.8984694480895996, "logits/rejected": -2.958129644393921, "logps/chosen": -59.11644744873047, "logps/rejected": -156.08644104003906, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -2.835230588912964, "rewards/margins": 7.934095859527588, "rewards/rejected": -10.769326210021973, "step": 10595 }, { "epoch": 1.65, "learning_rate": 6.375798537268531e-06, "logits/chosen": -1.4911913871765137, "logits/rejected": -2.7276976108551025, "logps/chosen": -243.50521850585938, "logps/rejected": -577.2579345703125, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": -5.668275833129883, "rewards/margins": 4.830720901489258, "rewards/rejected": -10.49899673461914, "step": 10596 }, { "epoch": 1.65, "learning_rate": 6.375065096737383e-06, "logits/chosen": -2.530280113220215, "logits/rejected": -1.6500380039215088, "logps/chosen": -227.2679443359375, "logps/rejected": -285.6579895019531, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.5144801139831543, "rewards/margins": 5.934447288513184, "rewards/rejected": -8.44892692565918, "step": 10597 }, { "epoch": 1.65, "learning_rate": 6.3743316562062345e-06, "logits/chosen": -2.6246132850646973, "logits/rejected": -2.872577428817749, "logps/chosen": -278.33709716796875, "logps/rejected": -347.45977783203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -0.0589652955532074, "rewards/margins": 8.47659683227539, "rewards/rejected": -8.535562515258789, "step": 10598 }, { "epoch": 1.65, "learning_rate": 6.373598215675086e-06, "logits/chosen": -2.7426857948303223, "logits/rejected": -1.2873893976211548, "logps/chosen": -226.21209716796875, "logps/rejected": -248.9234161376953, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -3.895012855529785, "rewards/margins": 8.902660369873047, "rewards/rejected": -12.797673225402832, "step": 10599 }, { "epoch": 1.65, "learning_rate": 6.372864775143939e-06, "logits/chosen": -1.394637942314148, "logits/rejected": -2.97001576423645, "logps/chosen": -105.78599548339844, "logps/rejected": -237.81521606445312, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -2.785808801651001, "rewards/margins": 6.30386209487915, "rewards/rejected": -9.08967113494873, "step": 10600 }, { "epoch": 1.65, "learning_rate": 6.372131334612792e-06, "logits/chosen": -2.0577824115753174, "logits/rejected": -2.791480779647827, "logps/chosen": -176.0946044921875, "logps/rejected": -339.019287109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.6452083587646484, "rewards/margins": 7.354790210723877, "rewards/rejected": -9.999998092651367, "step": 10601 }, { "epoch": 1.65, "learning_rate": 6.371397894081644e-06, "logits/chosen": -2.627997398376465, "logits/rejected": -3.1201939582824707, "logps/chosen": -78.60423278808594, "logps/rejected": -354.4200439453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.5087960958480835, "rewards/margins": 8.309900283813477, "rewards/rejected": -8.818696022033691, "step": 10602 }, { "epoch": 1.65, "learning_rate": 6.3706644535504956e-06, "logits/chosen": -2.2294654846191406, "logits/rejected": -3.2272815704345703, "logps/chosen": -155.29087829589844, "logps/rejected": -579.9522705078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5773019790649414, "rewards/margins": 9.710453987121582, "rewards/rejected": -13.287755966186523, "step": 10603 }, { "epoch": 1.65, "learning_rate": 6.3699310130193474e-06, "logits/chosen": -2.1860339641571045, "logits/rejected": -2.857942581176758, "logps/chosen": -80.0193099975586, "logps/rejected": -407.59197998046875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.3228330612182617, "rewards/margins": 6.920198440551758, "rewards/rejected": -9.24303150177002, "step": 10604 }, { "epoch": 1.65, "learning_rate": 6.3691975724882e-06, "logits/chosen": -1.2581011056900024, "logits/rejected": -3.0059995651245117, "logps/chosen": -69.45503234863281, "logps/rejected": -623.8453369140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.3336386680603027, "rewards/margins": 12.409605026245117, "rewards/rejected": -14.743244171142578, "step": 10605 }, { "epoch": 1.65, "learning_rate": 6.368464131957052e-06, "logits/chosen": -2.5101587772369385, "logits/rejected": -3.236590623855591, "logps/chosen": -263.9754638671875, "logps/rejected": -476.74334716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1203413009643555, "rewards/margins": 10.157184600830078, "rewards/rejected": -12.277524948120117, "step": 10606 }, { "epoch": 1.65, "learning_rate": 6.367730691425904e-06, "logits/chosen": -2.9190077781677246, "logits/rejected": -1.8528172969818115, "logps/chosen": -396.7474060058594, "logps/rejected": -218.2003173828125, "loss": 1.1666, "rewards/accuracies": 0.5, "rewards/chosen": -3.9584717750549316, "rewards/margins": -0.5636283159255981, "rewards/rejected": -3.394843339920044, "step": 10607 }, { "epoch": 1.65, "learning_rate": 6.366997250894756e-06, "logits/chosen": -2.153470277786255, "logits/rejected": -2.8569560050964355, "logps/chosen": -195.518798828125, "logps/rejected": -185.93701171875, "loss": 0.2639, "rewards/accuracies": 1.0, "rewards/chosen": -3.272097587585449, "rewards/margins": 3.610954999923706, "rewards/rejected": -6.883052349090576, "step": 10608 }, { "epoch": 1.65, "learning_rate": 6.3662638103636085e-06, "logits/chosen": -2.818207263946533, "logits/rejected": -3.1811368465423584, "logps/chosen": -197.1813507080078, "logps/rejected": -375.2598876953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.6564910411834717, "rewards/margins": 8.643054962158203, "rewards/rejected": -10.299545288085938, "step": 10609 }, { "epoch": 1.65, "learning_rate": 6.36553036983246e-06, "logits/chosen": -1.8719110488891602, "logits/rejected": -3.0797011852264404, "logps/chosen": -89.97703552246094, "logps/rejected": -397.601806640625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -2.8234708309173584, "rewards/margins": 8.878194808959961, "rewards/rejected": -11.701665878295898, "step": 10610 }, { "epoch": 1.65, "learning_rate": 6.364796929301312e-06, "logits/chosen": -2.709129571914673, "logits/rejected": -2.968780517578125, "logps/chosen": -60.92621612548828, "logps/rejected": -286.105224609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.376244068145752, "rewards/margins": 10.200170516967773, "rewards/rejected": -13.576414108276367, "step": 10611 }, { "epoch": 1.65, "learning_rate": 6.364063488770164e-06, "logits/chosen": -2.8807532787323, "logits/rejected": -2.6129918098449707, "logps/chosen": -660.8525390625, "logps/rejected": -546.9104614257812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.80047607421875, "rewards/margins": 10.54926872253418, "rewards/rejected": -16.34974479675293, "step": 10612 }, { "epoch": 1.65, "learning_rate": 6.363330048239016e-06, "logits/chosen": -2.8098442554473877, "logits/rejected": -2.2026314735412598, "logps/chosen": -152.89419555664062, "logps/rejected": -192.42205810546875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.513995409011841, "rewards/margins": 7.352893829345703, "rewards/rejected": -9.866888999938965, "step": 10613 }, { "epoch": 1.65, "learning_rate": 6.362596607707869e-06, "logits/chosen": -2.9544427394866943, "logits/rejected": -3.039140462875366, "logps/chosen": -895.9417114257812, "logps/rejected": -855.345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7678771018981934, "rewards/margins": 10.499160766601562, "rewards/rejected": -14.267038345336914, "step": 10614 }, { "epoch": 1.65, "learning_rate": 6.361863167176721e-06, "logits/chosen": -2.829075813293457, "logits/rejected": -2.2407310009002686, "logps/chosen": -626.6409301757812, "logps/rejected": -636.0665283203125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -5.555453777313232, "rewards/margins": 6.842207908630371, "rewards/rejected": -12.397661209106445, "step": 10615 }, { "epoch": 1.65, "learning_rate": 6.3611297266455725e-06, "logits/chosen": -2.8128745555877686, "logits/rejected": -2.8875458240509033, "logps/chosen": -182.39166259765625, "logps/rejected": -298.77490234375, "loss": 2.2283, "rewards/accuracies": 0.5, "rewards/chosen": -5.486421585083008, "rewards/margins": 3.3067736625671387, "rewards/rejected": -8.793194770812988, "step": 10616 }, { "epoch": 1.65, "learning_rate": 6.360396286114425e-06, "logits/chosen": -2.6667964458465576, "logits/rejected": -2.899101734161377, "logps/chosen": -200.662841796875, "logps/rejected": -457.0050354003906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.160759687423706, "rewards/margins": 7.846107006072998, "rewards/rejected": -11.006866455078125, "step": 10617 }, { "epoch": 1.65, "learning_rate": 6.359662845583278e-06, "logits/chosen": -0.4927465617656708, "logits/rejected": -2.227985143661499, "logps/chosen": -124.087890625, "logps/rejected": -852.222900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.656282424926758, "rewards/margins": 17.113658905029297, "rewards/rejected": -20.769941329956055, "step": 10618 }, { "epoch": 1.65, "learning_rate": 6.35892940505213e-06, "logits/chosen": -0.5121625065803528, "logits/rejected": -2.7383999824523926, "logps/chosen": -133.97811889648438, "logps/rejected": -336.08514404296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.161299705505371, "rewards/margins": 12.188834190368652, "rewards/rejected": -16.350133895874023, "step": 10619 }, { "epoch": 1.65, "learning_rate": 6.358195964520982e-06, "logits/chosen": -2.9660372734069824, "logits/rejected": -3.305532217025757, "logps/chosen": -366.72003173828125, "logps/rejected": -435.9539794921875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.5311646461486816, "rewards/margins": 7.970022678375244, "rewards/rejected": -10.501187324523926, "step": 10620 }, { "epoch": 1.65, "learning_rate": 6.357462523989834e-06, "logits/chosen": -1.589275598526001, "logits/rejected": -2.7814273834228516, "logps/chosen": -127.68252563476562, "logps/rejected": -365.29669189453125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.1351362466812134, "rewards/margins": 7.414433479309082, "rewards/rejected": -8.549570083618164, "step": 10621 }, { "epoch": 1.65, "learning_rate": 6.3567290834586855e-06, "logits/chosen": -1.995863676071167, "logits/rejected": -2.887176275253296, "logps/chosen": -158.93478393554688, "logps/rejected": -276.47515869140625, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -2.7447736263275146, "rewards/margins": 7.910053730010986, "rewards/rejected": -10.654827117919922, "step": 10622 }, { "epoch": 1.65, "learning_rate": 6.355995642927538e-06, "logits/chosen": -2.6370623111724854, "logits/rejected": -2.2939255237579346, "logps/chosen": -94.5708999633789, "logps/rejected": -207.19187927246094, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.3332316875457764, "rewards/margins": 7.889857292175293, "rewards/rejected": -11.223089218139648, "step": 10623 }, { "epoch": 1.65, "learning_rate": 6.35526220239639e-06, "logits/chosen": -3.1635727882385254, "logits/rejected": -3.3711414337158203, "logps/chosen": -44.826656341552734, "logps/rejected": -176.72579956054688, "loss": 0.0232, "rewards/accuracies": 1.0, "rewards/chosen": -2.8559017181396484, "rewards/margins": 4.789915084838867, "rewards/rejected": -7.645816802978516, "step": 10624 }, { "epoch": 1.65, "learning_rate": 6.354528761865242e-06, "logits/chosen": -2.2864468097686768, "logits/rejected": -3.0135228633880615, "logps/chosen": -447.0925598144531, "logps/rejected": -536.4758911132812, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.2385568618774414, "rewards/margins": 8.522558212280273, "rewards/rejected": -11.761114120483398, "step": 10625 }, { "epoch": 1.65, "learning_rate": 6.353795321334094e-06, "logits/chosen": -2.772247552871704, "logits/rejected": -2.3873190879821777, "logps/chosen": -557.136474609375, "logps/rejected": -413.30792236328125, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -4.364073276519775, "rewards/margins": 6.243470668792725, "rewards/rejected": -10.6075439453125, "step": 10626 }, { "epoch": 1.65, "learning_rate": 6.3530618808029466e-06, "logits/chosen": -2.859846591949463, "logits/rejected": -2.8982832431793213, "logps/chosen": -152.30917358398438, "logps/rejected": -336.4164733886719, "loss": 0.1378, "rewards/accuracies": 1.0, "rewards/chosen": -4.744351387023926, "rewards/margins": 4.63582181930542, "rewards/rejected": -9.380172729492188, "step": 10627 }, { "epoch": 1.65, "learning_rate": 6.3523284402717985e-06, "logits/chosen": -2.674715518951416, "logits/rejected": -0.8539997339248657, "logps/chosen": -413.73065185546875, "logps/rejected": -295.8190612792969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.881290912628174, "rewards/margins": 8.652934074401855, "rewards/rejected": -13.534225463867188, "step": 10628 }, { "epoch": 1.65, "learning_rate": 6.35159499974065e-06, "logits/chosen": -2.6642751693725586, "logits/rejected": -2.907071590423584, "logps/chosen": -586.8712768554688, "logps/rejected": -390.0715637207031, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.4032721519470215, "rewards/margins": 7.5421977043151855, "rewards/rejected": -12.945469856262207, "step": 10629 }, { "epoch": 1.65, "learning_rate": 6.350861559209502e-06, "logits/chosen": -2.984950065612793, "logits/rejected": -3.3193159103393555, "logps/chosen": -56.75419235229492, "logps/rejected": -223.73870849609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.8531455993652344, "rewards/margins": 9.4490327835083, "rewards/rejected": -11.302178382873535, "step": 10630 }, { "epoch": 1.65, "learning_rate": 6.350128118678354e-06, "logits/chosen": -3.127293109893799, "logits/rejected": -3.288821220397949, "logps/chosen": -547.397705078125, "logps/rejected": -543.7730712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.36381077766418457, "rewards/margins": 12.631237983703613, "rewards/rejected": -12.995048522949219, "step": 10631 }, { "epoch": 1.65, "learning_rate": 6.349394678147207e-06, "logits/chosen": -2.8813130855560303, "logits/rejected": -3.060690402984619, "logps/chosen": -695.919189453125, "logps/rejected": -938.2623291015625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.087457656860352, "rewards/margins": 7.516168594360352, "rewards/rejected": -11.603626251220703, "step": 10632 }, { "epoch": 1.65, "learning_rate": 6.348661237616059e-06, "logits/chosen": -2.5578901767730713, "logits/rejected": -3.0902152061462402, "logps/chosen": -172.5105743408203, "logps/rejected": -442.49346923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0210564136505127, "rewards/margins": 11.050840377807617, "rewards/rejected": -14.071897506713867, "step": 10633 }, { "epoch": 1.65, "learning_rate": 6.347927797084911e-06, "logits/chosen": -1.6629565954208374, "logits/rejected": -2.3323237895965576, "logps/chosen": -187.72381591796875, "logps/rejected": -312.69415283203125, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -4.008946418762207, "rewards/margins": 6.969164848327637, "rewards/rejected": -10.978111267089844, "step": 10634 }, { "epoch": 1.65, "learning_rate": 6.347194356553763e-06, "logits/chosen": -2.1699881553649902, "logits/rejected": -2.7719650268554688, "logps/chosen": -41.711490631103516, "logps/rejected": -284.40338134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2279911041259766, "rewards/margins": 9.275686264038086, "rewards/rejected": -11.503677368164062, "step": 10635 }, { "epoch": 1.65, "learning_rate": 6.346460916022616e-06, "logits/chosen": -2.91874098777771, "logits/rejected": -3.1717305183410645, "logps/chosen": -170.66534423828125, "logps/rejected": -398.870849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.182807207107544, "rewards/margins": 11.035924911499023, "rewards/rejected": -12.218731880187988, "step": 10636 }, { "epoch": 1.65, "learning_rate": 6.345727475491468e-06, "logits/chosen": -2.4835402965545654, "logits/rejected": -2.946160316467285, "logps/chosen": -105.80874633789062, "logps/rejected": -245.79953002929688, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.449300527572632, "rewards/margins": 9.33997917175293, "rewards/rejected": -11.78927993774414, "step": 10637 }, { "epoch": 1.65, "learning_rate": 6.34499403496032e-06, "logits/chosen": -1.5039986371994019, "logits/rejected": -1.793595314025879, "logps/chosen": -228.20867919921875, "logps/rejected": -397.101318359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.52534556388855, "rewards/margins": 9.120887756347656, "rewards/rejected": -11.646233558654785, "step": 10638 }, { "epoch": 1.65, "learning_rate": 6.344260594429172e-06, "logits/chosen": -1.5756433010101318, "logits/rejected": -2.9110960960388184, "logps/chosen": -161.9945068359375, "logps/rejected": -396.3316650390625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -2.6329805850982666, "rewards/margins": 10.23130989074707, "rewards/rejected": -12.864290237426758, "step": 10639 }, { "epoch": 1.65, "learning_rate": 6.3435271538980235e-06, "logits/chosen": -2.5698604583740234, "logits/rejected": -2.925222158432007, "logps/chosen": -137.59327697753906, "logps/rejected": -370.1925354003906, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -4.559730529785156, "rewards/margins": 7.720743656158447, "rewards/rejected": -12.280473709106445, "step": 10640 }, { "epoch": 1.65, "learning_rate": 6.342793713366876e-06, "logits/chosen": -2.995853900909424, "logits/rejected": -2.028452157974243, "logps/chosen": -593.7227172851562, "logps/rejected": -614.4781494140625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -5.489834785461426, "rewards/margins": 8.299027442932129, "rewards/rejected": -13.788862228393555, "step": 10641 }, { "epoch": 1.66, "learning_rate": 6.342060272835728e-06, "logits/chosen": -1.4508452415466309, "logits/rejected": -3.1720616817474365, "logps/chosen": -104.21066284179688, "logps/rejected": -416.36669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1047983169555664, "rewards/margins": 10.141305923461914, "rewards/rejected": -12.246105194091797, "step": 10642 }, { "epoch": 1.66, "learning_rate": 6.34132683230458e-06, "logits/chosen": -1.9193800687789917, "logits/rejected": -2.879305839538574, "logps/chosen": -205.34732055664062, "logps/rejected": -486.053955078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.1641173362731934, "rewards/margins": 8.977006912231445, "rewards/rejected": -12.141124725341797, "step": 10643 }, { "epoch": 1.66, "learning_rate": 6.340593391773432e-06, "logits/chosen": -1.9936704635620117, "logits/rejected": -2.960116386413574, "logps/chosen": -177.6436004638672, "logps/rejected": -359.773193359375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.935258388519287, "rewards/margins": 7.763807773590088, "rewards/rejected": -11.699066162109375, "step": 10644 }, { "epoch": 1.66, "learning_rate": 6.339859951242285e-06, "logits/chosen": -2.1297833919525146, "logits/rejected": -2.6410348415374756, "logps/chosen": -263.7144775390625, "logps/rejected": -333.8397216796875, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -1.8815727233886719, "rewards/margins": 7.103486061096191, "rewards/rejected": -8.985058784484863, "step": 10645 }, { "epoch": 1.66, "learning_rate": 6.3391265107111365e-06, "logits/chosen": -2.7797582149505615, "logits/rejected": -2.1721644401550293, "logps/chosen": -139.33409118652344, "logps/rejected": -203.01734924316406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.6829596757888794, "rewards/margins": 7.464392185211182, "rewards/rejected": -9.14735221862793, "step": 10646 }, { "epoch": 1.66, "learning_rate": 6.338393070179988e-06, "logits/chosen": -2.724257230758667, "logits/rejected": -2.960714340209961, "logps/chosen": -140.4021453857422, "logps/rejected": -423.1585998535156, "loss": 0.0844, "rewards/accuracies": 1.0, "rewards/chosen": -2.1411571502685547, "rewards/margins": 6.173731803894043, "rewards/rejected": -8.314888954162598, "step": 10647 }, { "epoch": 1.66, "learning_rate": 6.33765962964884e-06, "logits/chosen": -1.5597468614578247, "logits/rejected": -2.761244535446167, "logps/chosen": -188.55010986328125, "logps/rejected": -376.9844970703125, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -3.6793527603149414, "rewards/margins": 6.6168646812438965, "rewards/rejected": -10.29621696472168, "step": 10648 }, { "epoch": 1.66, "learning_rate": 6.336926189117692e-06, "logits/chosen": -2.92657470703125, "logits/rejected": -2.6002562046051025, "logps/chosen": -245.73291015625, "logps/rejected": -343.17578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.5631208419799805, "rewards/margins": 6.295431137084961, "rewards/rejected": -10.858551025390625, "step": 10649 }, { "epoch": 1.66, "learning_rate": 6.336192748586545e-06, "logits/chosen": -2.490330696105957, "logits/rejected": -2.958566188812256, "logps/chosen": -172.0217742919922, "logps/rejected": -270.260986328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.530503511428833, "rewards/margins": 10.001520156860352, "rewards/rejected": -11.532024383544922, "step": 10650 }, { "epoch": 1.66, "learning_rate": 6.335459308055398e-06, "logits/chosen": -2.684901237487793, "logits/rejected": -2.6839139461517334, "logps/chosen": -530.3181762695312, "logps/rejected": -448.4968566894531, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -3.3768391609191895, "rewards/margins": 5.6248979568481445, "rewards/rejected": -9.001737594604492, "step": 10651 }, { "epoch": 1.66, "learning_rate": 6.3347258675242495e-06, "logits/chosen": -2.228724479675293, "logits/rejected": -3.036144256591797, "logps/chosen": -220.365478515625, "logps/rejected": -370.6744079589844, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -3.2185873985290527, "rewards/margins": 8.884674072265625, "rewards/rejected": -12.10326099395752, "step": 10652 }, { "epoch": 1.66, "learning_rate": 6.333992426993101e-06, "logits/chosen": -2.6268787384033203, "logits/rejected": -1.7189124822616577, "logps/chosen": -359.0103454589844, "logps/rejected": -263.51641845703125, "loss": 0.9015, "rewards/accuracies": 0.5, "rewards/chosen": -3.091045379638672, "rewards/margins": 2.6630911827087402, "rewards/rejected": -5.754136562347412, "step": 10653 }, { "epoch": 1.66, "learning_rate": 6.333258986461954e-06, "logits/chosen": -2.1293461322784424, "logits/rejected": -3.078784704208374, "logps/chosen": -290.1286926269531, "logps/rejected": -586.002685546875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.7999839782714844, "rewards/margins": 7.348330497741699, "rewards/rejected": -9.148314476013184, "step": 10654 }, { "epoch": 1.66, "learning_rate": 6.332525545930806e-06, "logits/chosen": -3.1179792881011963, "logits/rejected": -1.727903127670288, "logps/chosen": -310.10064697265625, "logps/rejected": -329.28033447265625, "loss": 1.9942, "rewards/accuracies": 0.5, "rewards/chosen": -4.984718322753906, "rewards/margins": 3.6895503997802734, "rewards/rejected": -8.67426872253418, "step": 10655 }, { "epoch": 1.66, "learning_rate": 6.331792105399658e-06, "logits/chosen": -1.8726688623428345, "logits/rejected": -2.855212688446045, "logps/chosen": -215.9971160888672, "logps/rejected": -380.997802734375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -3.5820724964141846, "rewards/margins": 8.192442893981934, "rewards/rejected": -11.774515151977539, "step": 10656 }, { "epoch": 1.66, "learning_rate": 6.33105866486851e-06, "logits/chosen": -2.412760019302368, "logits/rejected": -3.1209659576416016, "logps/chosen": -154.9306640625, "logps/rejected": -427.50701904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0615787506103516, "rewards/margins": 14.441271781921387, "rewards/rejected": -16.502849578857422, "step": 10657 }, { "epoch": 1.66, "learning_rate": 6.330325224337362e-06, "logits/chosen": -2.5213570594787598, "logits/rejected": -3.1232759952545166, "logps/chosen": -271.6094665527344, "logps/rejected": -362.2099304199219, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.3896836042404175, "rewards/margins": 7.1252546310424805, "rewards/rejected": -8.514938354492188, "step": 10658 }, { "epoch": 1.66, "learning_rate": 6.329591783806214e-06, "logits/chosen": -1.4719384908676147, "logits/rejected": -2.762408494949341, "logps/chosen": -529.0546875, "logps/rejected": -624.74658203125, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -6.4285054206848145, "rewards/margins": 5.148918151855469, "rewards/rejected": -11.577423095703125, "step": 10659 }, { "epoch": 1.66, "learning_rate": 6.328858343275066e-06, "logits/chosen": -2.818948745727539, "logits/rejected": -2.3211913108825684, "logps/chosen": -488.81011962890625, "logps/rejected": -581.6363525390625, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": -6.714636325836182, "rewards/margins": 2.812619686126709, "rewards/rejected": -9.52725601196289, "step": 10660 }, { "epoch": 1.66, "learning_rate": 6.328124902743918e-06, "logits/chosen": -2.156167507171631, "logits/rejected": -3.2432491779327393, "logps/chosen": -253.35015869140625, "logps/rejected": -629.143310546875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -4.252856254577637, "rewards/margins": 4.640397071838379, "rewards/rejected": -8.893253326416016, "step": 10661 }, { "epoch": 1.66, "learning_rate": 6.32739146221277e-06, "logits/chosen": -2.822831869125366, "logits/rejected": -2.6229429244995117, "logps/chosen": -480.83642578125, "logps/rejected": -451.0718688964844, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -7.1761155128479, "rewards/margins": 7.874486923217773, "rewards/rejected": -15.050601959228516, "step": 10662 }, { "epoch": 1.66, "learning_rate": 6.326658021681623e-06, "logits/chosen": -2.487936496734619, "logits/rejected": -3.0350546836853027, "logps/chosen": -177.7729034423828, "logps/rejected": -291.3185119628906, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.3904972076416016, "rewards/margins": 6.495623588562012, "rewards/rejected": -9.886120796203613, "step": 10663 }, { "epoch": 1.66, "learning_rate": 6.3259245811504745e-06, "logits/chosen": -1.1406952142715454, "logits/rejected": -2.8482580184936523, "logps/chosen": -95.34622192382812, "logps/rejected": -489.3304443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8375015258789062, "rewards/margins": 10.406559944152832, "rewards/rejected": -13.244061470031738, "step": 10664 }, { "epoch": 1.66, "learning_rate": 6.325191140619326e-06, "logits/chosen": -2.747058868408203, "logits/rejected": -2.67580509185791, "logps/chosen": -556.2692260742188, "logps/rejected": -626.3320922851562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.5705347061157227, "rewards/margins": 9.582557678222656, "rewards/rejected": -12.153093338012695, "step": 10665 }, { "epoch": 1.66, "learning_rate": 6.324457700088178e-06, "logits/chosen": -3.0212008953094482, "logits/rejected": -2.467332124710083, "logps/chosen": -459.4064636230469, "logps/rejected": -369.44580078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9777512550354004, "rewards/margins": 9.963860511779785, "rewards/rejected": -12.941612243652344, "step": 10666 }, { "epoch": 1.66, "learning_rate": 6.323724259557031e-06, "logits/chosen": -2.7175228595733643, "logits/rejected": -2.8811984062194824, "logps/chosen": -61.397457122802734, "logps/rejected": -149.44921875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.053422689437866, "rewards/margins": 5.6128740310668945, "rewards/rejected": -8.66629695892334, "step": 10667 }, { "epoch": 1.66, "learning_rate": 6.322990819025884e-06, "logits/chosen": -2.613525629043579, "logits/rejected": -3.088622570037842, "logps/chosen": -82.64372253417969, "logps/rejected": -270.50311279296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8106985092163086, "rewards/margins": 9.327045440673828, "rewards/rejected": -10.13774299621582, "step": 10668 }, { "epoch": 1.66, "learning_rate": 6.322257378494736e-06, "logits/chosen": -2.1807451248168945, "logits/rejected": -3.2085018157958984, "logps/chosen": -92.40520477294922, "logps/rejected": -254.30526733398438, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.3299269676208496, "rewards/margins": 6.249238014221191, "rewards/rejected": -9.5791654586792, "step": 10669 }, { "epoch": 1.66, "learning_rate": 6.3215239379635875e-06, "logits/chosen": -1.8038575649261475, "logits/rejected": -2.6735215187072754, "logps/chosen": -82.08413696289062, "logps/rejected": -200.821044921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.819361686706543, "rewards/margins": 7.084902763366699, "rewards/rejected": -10.904264450073242, "step": 10670 }, { "epoch": 1.66, "learning_rate": 6.320790497432439e-06, "logits/chosen": -2.5218758583068848, "logits/rejected": -2.89504075050354, "logps/chosen": -370.8673400878906, "logps/rejected": -682.7214965820312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.962482452392578, "rewards/margins": 10.886444091796875, "rewards/rejected": -14.848926544189453, "step": 10671 }, { "epoch": 1.66, "learning_rate": 6.320057056901292e-06, "logits/chosen": -2.8755931854248047, "logits/rejected": -1.9584869146347046, "logps/chosen": -379.12872314453125, "logps/rejected": -354.0133361816406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7258567810058594, "rewards/margins": 9.987785339355469, "rewards/rejected": -12.713642120361328, "step": 10672 }, { "epoch": 1.66, "learning_rate": 6.319323616370144e-06, "logits/chosen": -2.9148104190826416, "logits/rejected": -3.383484125137329, "logps/chosen": -48.62460708618164, "logps/rejected": -364.4671936035156, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.488614082336426, "rewards/margins": 8.861175537109375, "rewards/rejected": -12.349790573120117, "step": 10673 }, { "epoch": 1.66, "learning_rate": 6.318590175838996e-06, "logits/chosen": -2.80790114402771, "logits/rejected": -3.0372138023376465, "logps/chosen": -99.11738586425781, "logps/rejected": -272.5223693847656, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.273991584777832, "rewards/margins": 7.589773654937744, "rewards/rejected": -9.863765716552734, "step": 10674 }, { "epoch": 1.66, "learning_rate": 6.317856735307848e-06, "logits/chosen": -2.1989243030548096, "logits/rejected": -3.022313356399536, "logps/chosen": -176.54425048828125, "logps/rejected": -266.0501708984375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.1113743782043457, "rewards/margins": 8.026833534240723, "rewards/rejected": -10.13820743560791, "step": 10675 }, { "epoch": 1.66, "learning_rate": 6.3171232947767005e-06, "logits/chosen": -3.0547842979431152, "logits/rejected": -3.170902967453003, "logps/chosen": -82.58135986328125, "logps/rejected": -186.58184814453125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.2661867141723633, "rewards/margins": 8.405179977416992, "rewards/rejected": -11.671367645263672, "step": 10676 }, { "epoch": 1.66, "learning_rate": 6.316389854245552e-06, "logits/chosen": -2.897569179534912, "logits/rejected": -2.7300641536712646, "logps/chosen": -548.188232421875, "logps/rejected": -659.4164428710938, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.607062339782715, "rewards/margins": 6.469861030578613, "rewards/rejected": -12.076923370361328, "step": 10677 }, { "epoch": 1.66, "learning_rate": 6.315656413714404e-06, "logits/chosen": -2.777218818664551, "logits/rejected": -3.061950206756592, "logps/chosen": -196.17507934570312, "logps/rejected": -361.78851318359375, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -3.6348869800567627, "rewards/margins": 6.907530784606934, "rewards/rejected": -10.542417526245117, "step": 10678 }, { "epoch": 1.66, "learning_rate": 6.314922973183256e-06, "logits/chosen": -1.8176229000091553, "logits/rejected": -3.122650384902954, "logps/chosen": -316.5506896972656, "logps/rejected": -429.93670654296875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.857102870941162, "rewards/margins": 7.556954383850098, "rewards/rejected": -10.414057731628418, "step": 10679 }, { "epoch": 1.66, "learning_rate": 6.314189532652108e-06, "logits/chosen": -2.6290953159332275, "logits/rejected": -3.1082534790039062, "logps/chosen": -98.83224487304688, "logps/rejected": -356.21173095703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.7771210670471191, "rewards/margins": 10.142949104309082, "rewards/rejected": -11.92007064819336, "step": 10680 }, { "epoch": 1.66, "learning_rate": 6.313456092120961e-06, "logits/chosen": -2.8618578910827637, "logits/rejected": -2.690037250518799, "logps/chosen": -158.47100830078125, "logps/rejected": -220.27984619140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.964268207550049, "rewards/margins": 7.153258800506592, "rewards/rejected": -11.11752700805664, "step": 10681 }, { "epoch": 1.66, "learning_rate": 6.312722651589813e-06, "logits/chosen": -2.237088680267334, "logits/rejected": -3.1531224250793457, "logps/chosen": -67.34842681884766, "logps/rejected": -251.49295043945312, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.4246902465820312, "rewards/margins": 6.027705192565918, "rewards/rejected": -8.45239543914795, "step": 10682 }, { "epoch": 1.66, "learning_rate": 6.3119892110586645e-06, "logits/chosen": -2.8588597774505615, "logits/rejected": -2.609405755996704, "logps/chosen": -177.52011108398438, "logps/rejected": -239.22598266601562, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.15432071685791, "rewards/margins": 7.062828540802002, "rewards/rejected": -10.21714973449707, "step": 10683 }, { "epoch": 1.66, "learning_rate": 6.311255770527517e-06, "logits/chosen": -1.3480825424194336, "logits/rejected": -2.404097080230713, "logps/chosen": -94.38700103759766, "logps/rejected": -350.0467529296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.320324420928955, "rewards/margins": 11.607858657836914, "rewards/rejected": -13.928182601928711, "step": 10684 }, { "epoch": 1.66, "learning_rate": 6.31052232999637e-06, "logits/chosen": -2.839184284210205, "logits/rejected": -1.550521969795227, "logps/chosen": -475.6471862792969, "logps/rejected": -340.47430419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9443297386169434, "rewards/margins": 11.085529327392578, "rewards/rejected": -14.029858589172363, "step": 10685 }, { "epoch": 1.66, "learning_rate": 6.309788889465222e-06, "logits/chosen": -1.9994233846664429, "logits/rejected": -2.8489794731140137, "logps/chosen": -251.6665496826172, "logps/rejected": -401.61688232421875, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/chosen": -4.982285976409912, "rewards/margins": 5.011882305145264, "rewards/rejected": -9.994168281555176, "step": 10686 }, { "epoch": 1.66, "learning_rate": 6.309055448934074e-06, "logits/chosen": -1.663183331489563, "logits/rejected": -2.623947858810425, "logps/chosen": -122.48698425292969, "logps/rejected": -239.0381622314453, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -2.3889970779418945, "rewards/margins": 9.067327499389648, "rewards/rejected": -11.456324577331543, "step": 10687 }, { "epoch": 1.66, "learning_rate": 6.3083220084029255e-06, "logits/chosen": -1.727899432182312, "logits/rejected": -2.8146636486053467, "logps/chosen": -83.51582336425781, "logps/rejected": -325.16741943359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.9955310821533203, "rewards/margins": 10.388671875, "rewards/rejected": -12.38420295715332, "step": 10688 }, { "epoch": 1.66, "learning_rate": 6.3075885678717774e-06, "logits/chosen": -2.354217052459717, "logits/rejected": -3.058544635772705, "logps/chosen": -506.1763916015625, "logps/rejected": -684.9306640625, "loss": 0.2945, "rewards/accuracies": 1.0, "rewards/chosen": -8.491605758666992, "rewards/margins": 3.819279670715332, "rewards/rejected": -12.31088638305664, "step": 10689 }, { "epoch": 1.66, "learning_rate": 6.30685512734063e-06, "logits/chosen": -2.1250102519989014, "logits/rejected": -2.8070871829986572, "logps/chosen": -339.08544921875, "logps/rejected": -681.6046142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0062226057052612, "rewards/margins": 13.362678527832031, "rewards/rejected": -14.368901252746582, "step": 10690 }, { "epoch": 1.66, "learning_rate": 6.306121686809482e-06, "logits/chosen": -2.199890375137329, "logits/rejected": -3.174142837524414, "logps/chosen": -152.3267822265625, "logps/rejected": -489.1998596191406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2919280529022217, "rewards/margins": 8.430906295776367, "rewards/rejected": -10.722835540771484, "step": 10691 }, { "epoch": 1.66, "learning_rate": 6.305388246278334e-06, "logits/chosen": -2.7695446014404297, "logits/rejected": -2.3704519271850586, "logps/chosen": -279.4171447753906, "logps/rejected": -287.3306884765625, "loss": 0.047, "rewards/accuracies": 1.0, "rewards/chosen": -5.077455520629883, "rewards/margins": 6.153595924377441, "rewards/rejected": -11.231051445007324, "step": 10692 }, { "epoch": 1.66, "learning_rate": 6.304654805747186e-06, "logits/chosen": -2.669039487838745, "logits/rejected": -2.925696849822998, "logps/chosen": -101.9311294555664, "logps/rejected": -220.35000610351562, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.5311856269836426, "rewards/margins": 7.707615375518799, "rewards/rejected": -10.238801002502441, "step": 10693 }, { "epoch": 1.66, "learning_rate": 6.3039213652160385e-06, "logits/chosen": -3.1877832412719727, "logits/rejected": -2.570436954498291, "logps/chosen": -312.4941711425781, "logps/rejected": -162.55169677734375, "loss": 1.9284, "rewards/accuracies": 0.5, "rewards/chosen": -4.163822174072266, "rewards/margins": 2.0312068462371826, "rewards/rejected": -6.195029258728027, "step": 10694 }, { "epoch": 1.66, "learning_rate": 6.30318792468489e-06, "logits/chosen": -2.445333957672119, "logits/rejected": -2.8584938049316406, "logps/chosen": -281.2569274902344, "logps/rejected": -375.91162109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2710800170898438, "rewards/margins": 8.971485137939453, "rewards/rejected": -11.242565155029297, "step": 10695 }, { "epoch": 1.66, "learning_rate": 6.302454484153742e-06, "logits/chosen": -2.9583804607391357, "logits/rejected": -3.1529030799865723, "logps/chosen": -136.31890869140625, "logps/rejected": -364.144287109375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.212235689163208, "rewards/margins": 8.014052391052246, "rewards/rejected": -10.226287841796875, "step": 10696 }, { "epoch": 1.66, "learning_rate": 6.301721043622594e-06, "logits/chosen": -1.8811099529266357, "logits/rejected": -2.708115577697754, "logps/chosen": -103.17437744140625, "logps/rejected": -410.03857421875, "loss": 0.0273, "rewards/accuracies": 1.0, "rewards/chosen": -2.1351685523986816, "rewards/margins": 11.527525901794434, "rewards/rejected": -13.662694931030273, "step": 10697 }, { "epoch": 1.66, "learning_rate": 6.300987603091446e-06, "logits/chosen": -2.804161548614502, "logits/rejected": -2.0992555618286133, "logps/chosen": -166.0774688720703, "logps/rejected": -260.382568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.584148406982422, "rewards/margins": 10.14460563659668, "rewards/rejected": -12.728754043579102, "step": 10698 }, { "epoch": 1.66, "learning_rate": 6.300254162560299e-06, "logits/chosen": -2.298121452331543, "logits/rejected": -2.905531167984009, "logps/chosen": -72.99421691894531, "logps/rejected": -236.17532348632812, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.028388500213623, "rewards/margins": 8.523416519165039, "rewards/rejected": -11.55180549621582, "step": 10699 }, { "epoch": 1.66, "learning_rate": 6.299520722029151e-06, "logits/chosen": -1.4945262670516968, "logits/rejected": -2.751460313796997, "logps/chosen": -195.5111846923828, "logps/rejected": -359.07147216796875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -2.9400253295898438, "rewards/margins": 5.557114601135254, "rewards/rejected": -8.497139930725098, "step": 10700 }, { "epoch": 1.66, "learning_rate": 6.298787281498003e-06, "logits/chosen": -1.9842244386672974, "logits/rejected": -3.1240949630737305, "logps/chosen": -146.6315460205078, "logps/rejected": -454.22442626953125, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -3.9277265071868896, "rewards/margins": 7.265769958496094, "rewards/rejected": -11.193496704101562, "step": 10701 }, { "epoch": 1.66, "learning_rate": 6.298053840966855e-06, "logits/chosen": -2.7825255393981934, "logits/rejected": -2.5212392807006836, "logps/chosen": -423.6036071777344, "logps/rejected": -455.5275573730469, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.3566131591796875, "rewards/margins": 7.481025695800781, "rewards/rejected": -9.837638854980469, "step": 10702 }, { "epoch": 1.66, "learning_rate": 6.297320400435708e-06, "logits/chosen": -3.022082805633545, "logits/rejected": -2.690717935562134, "logps/chosen": -455.16058349609375, "logps/rejected": -309.14801025390625, "loss": 0.0787, "rewards/accuracies": 1.0, "rewards/chosen": -2.8544113636016846, "rewards/margins": 4.881946086883545, "rewards/rejected": -7.73635721206665, "step": 10703 }, { "epoch": 1.66, "learning_rate": 6.29658695990456e-06, "logits/chosen": -1.4553805589675903, "logits/rejected": -2.866511106491089, "logps/chosen": -115.45991516113281, "logps/rejected": -333.15899658203125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.7403664588928223, "rewards/margins": 7.485841274261475, "rewards/rejected": -10.226207733154297, "step": 10704 }, { "epoch": 1.66, "learning_rate": 6.295853519373412e-06, "logits/chosen": -2.794891834259033, "logits/rejected": -2.132927417755127, "logps/chosen": -139.8120574951172, "logps/rejected": -223.97604370117188, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.221508026123047, "rewards/margins": 8.66690444946289, "rewards/rejected": -11.888412475585938, "step": 10705 }, { "epoch": 1.67, "learning_rate": 6.295120078842264e-06, "logits/chosen": -2.2714128494262695, "logits/rejected": -3.0753557682037354, "logps/chosen": -76.49562072753906, "logps/rejected": -174.683349609375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.420748710632324, "rewards/margins": 6.55798864364624, "rewards/rejected": -8.978736877441406, "step": 10706 }, { "epoch": 1.67, "learning_rate": 6.294386638311116e-06, "logits/chosen": -2.853661060333252, "logits/rejected": -1.7585880756378174, "logps/chosen": -463.66473388671875, "logps/rejected": -233.40219116210938, "loss": 0.0905, "rewards/accuracies": 1.0, "rewards/chosen": -1.886102318763733, "rewards/margins": 4.373331069946289, "rewards/rejected": -6.259433269500732, "step": 10707 }, { "epoch": 1.67, "learning_rate": 6.293653197779968e-06, "logits/chosen": -2.8734018802642822, "logits/rejected": -1.777835488319397, "logps/chosen": -898.5216064453125, "logps/rejected": -403.6822509765625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -4.690057277679443, "rewards/margins": 5.689457893371582, "rewards/rejected": -10.379514694213867, "step": 10708 }, { "epoch": 1.67, "learning_rate": 6.29291975724882e-06, "logits/chosen": -2.8171558380126953, "logits/rejected": -2.0266857147216797, "logps/chosen": -450.1046447753906, "logps/rejected": -416.6697692871094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.418088436126709, "rewards/margins": 7.173295497894287, "rewards/rejected": -10.591383934020996, "step": 10709 }, { "epoch": 1.67, "learning_rate": 6.292186316717672e-06, "logits/chosen": -2.663745403289795, "logits/rejected": -2.0145204067230225, "logps/chosen": -832.0087890625, "logps/rejected": -499.50006103515625, "loss": 1.7675, "rewards/accuracies": 0.0, "rewards/chosen": -6.746264457702637, "rewards/margins": -1.4028944969177246, "rewards/rejected": -5.34337043762207, "step": 10710 }, { "epoch": 1.67, "learning_rate": 6.291452876186524e-06, "logits/chosen": -3.161839246749878, "logits/rejected": -1.7039350271224976, "logps/chosen": -391.8238525390625, "logps/rejected": -133.6079559326172, "loss": 0.0876, "rewards/accuracies": 1.0, "rewards/chosen": -4.119316101074219, "rewards/margins": 3.2904253005981445, "rewards/rejected": -7.409741401672363, "step": 10711 }, { "epoch": 1.67, "learning_rate": 6.2907194356553766e-06, "logits/chosen": -2.863837480545044, "logits/rejected": -1.7859667539596558, "logps/chosen": -390.5055236816406, "logps/rejected": -324.32342529296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.333330154418945, "rewards/margins": 7.454780578613281, "rewards/rejected": -12.788110733032227, "step": 10712 }, { "epoch": 1.67, "learning_rate": 6.2899859951242284e-06, "logits/chosen": -1.8160663843154907, "logits/rejected": -2.8114686012268066, "logps/chosen": -105.51148223876953, "logps/rejected": -241.49993896484375, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -3.173597574234009, "rewards/margins": 5.434291839599609, "rewards/rejected": -8.607889175415039, "step": 10713 }, { "epoch": 1.67, "learning_rate": 6.28925255459308e-06, "logits/chosen": -2.6772491931915283, "logits/rejected": -1.5550944805145264, "logps/chosen": -349.40802001953125, "logps/rejected": -374.04425048828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.6651439666748047, "rewards/margins": 10.59078311920166, "rewards/rejected": -12.255927085876465, "step": 10714 }, { "epoch": 1.67, "learning_rate": 6.288519114061932e-06, "logits/chosen": -1.5085482597351074, "logits/rejected": -2.4481074810028076, "logps/chosen": -235.37423706054688, "logps/rejected": -442.9757995605469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.488846778869629, "rewards/margins": 11.509662628173828, "rewards/rejected": -13.998509407043457, "step": 10715 }, { "epoch": 1.67, "learning_rate": 6.287785673530785e-06, "logits/chosen": -2.6590139865875244, "logits/rejected": -2.1418585777282715, "logps/chosen": -184.5895538330078, "logps/rejected": -233.5737762451172, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.8101940155029297, "rewards/margins": 7.7213592529296875, "rewards/rejected": -10.531553268432617, "step": 10716 }, { "epoch": 1.67, "learning_rate": 6.287052232999637e-06, "logits/chosen": -1.4555399417877197, "logits/rejected": -2.8718924522399902, "logps/chosen": -216.77899169921875, "logps/rejected": -563.048583984375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.8567960262298584, "rewards/margins": 9.585874557495117, "rewards/rejected": -11.442670822143555, "step": 10717 }, { "epoch": 1.67, "learning_rate": 6.2863187924684895e-06, "logits/chosen": -2.9270966053009033, "logits/rejected": -2.21010422706604, "logps/chosen": -439.6203918457031, "logps/rejected": -389.369140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.798214912414551, "rewards/margins": 8.520560264587402, "rewards/rejected": -11.318775177001953, "step": 10718 }, { "epoch": 1.67, "learning_rate": 6.285585351937341e-06, "logits/chosen": -1.8023957014083862, "logits/rejected": -2.5947933197021484, "logps/chosen": -134.4725341796875, "logps/rejected": -254.14422607421875, "loss": 0.0189, "rewards/accuracies": 1.0, "rewards/chosen": -2.1490120887756348, "rewards/margins": 5.555655479431152, "rewards/rejected": -7.704668045043945, "step": 10719 }, { "epoch": 1.67, "learning_rate": 6.284851911406193e-06, "logits/chosen": -1.8726041316986084, "logits/rejected": -2.6195802688598633, "logps/chosen": -266.7281494140625, "logps/rejected": -381.4961242675781, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -3.9078855514526367, "rewards/margins": 7.199615478515625, "rewards/rejected": -11.107501029968262, "step": 10720 }, { "epoch": 1.67, "learning_rate": 6.284118470875046e-06, "logits/chosen": -2.5482048988342285, "logits/rejected": -3.0530967712402344, "logps/chosen": -111.77940368652344, "logps/rejected": -492.13397216796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.843480110168457, "rewards/margins": 11.15040111541748, "rewards/rejected": -14.993881225585938, "step": 10721 }, { "epoch": 1.67, "learning_rate": 6.283385030343898e-06, "logits/chosen": -2.6429829597473145, "logits/rejected": -3.092986583709717, "logps/chosen": -168.02626037597656, "logps/rejected": -504.5460205078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.64128577709198, "rewards/margins": 7.942522048950195, "rewards/rejected": -9.583807945251465, "step": 10722 }, { "epoch": 1.67, "learning_rate": 6.28265158981275e-06, "logits/chosen": -2.413477897644043, "logits/rejected": -2.898193597793579, "logps/chosen": -189.08441162109375, "logps/rejected": -333.91876220703125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.2589404582977295, "rewards/margins": 6.010918140411377, "rewards/rejected": -8.269859313964844, "step": 10723 }, { "epoch": 1.67, "learning_rate": 6.281918149281602e-06, "logits/chosen": -2.726591110229492, "logits/rejected": -2.885979175567627, "logps/chosen": -110.03934478759766, "logps/rejected": -252.47943115234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.221553325653076, "rewards/margins": 7.719820976257324, "rewards/rejected": -11.941373825073242, "step": 10724 }, { "epoch": 1.67, "learning_rate": 6.281184708750454e-06, "logits/chosen": -2.8168067932128906, "logits/rejected": -3.0827267169952393, "logps/chosen": -224.592529296875, "logps/rejected": -349.5539855957031, "loss": 0.2832, "rewards/accuracies": 1.0, "rewards/chosen": -3.469719648361206, "rewards/margins": 2.683530569076538, "rewards/rejected": -6.153250217437744, "step": 10725 }, { "epoch": 1.67, "learning_rate": 6.280451268219306e-06, "logits/chosen": -3.1660261154174805, "logits/rejected": -3.2300641536712646, "logps/chosen": -458.0118103027344, "logps/rejected": -461.8792724609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.267324924468994, "rewards/margins": 9.204374313354492, "rewards/rejected": -11.471699714660645, "step": 10726 }, { "epoch": 1.67, "learning_rate": 6.279717827688158e-06, "logits/chosen": -2.744842290878296, "logits/rejected": -2.026502847671509, "logps/chosen": -571.0156860351562, "logps/rejected": -519.3618774414062, "loss": 0.1244, "rewards/accuracies": 1.0, "rewards/chosen": -3.865297794342041, "rewards/margins": 2.558528184890747, "rewards/rejected": -6.423826217651367, "step": 10727 }, { "epoch": 1.67, "learning_rate": 6.27898438715701e-06, "logits/chosen": -2.476262092590332, "logits/rejected": -2.6403353214263916, "logps/chosen": -143.05621337890625, "logps/rejected": -256.14300537109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.615480899810791, "rewards/margins": 7.925596237182617, "rewards/rejected": -10.54107666015625, "step": 10728 }, { "epoch": 1.67, "learning_rate": 6.278250946625862e-06, "logits/chosen": -2.2365894317626953, "logits/rejected": -2.9988327026367188, "logps/chosen": -167.92630004882812, "logps/rejected": -599.1375732421875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8933991193771362, "rewards/margins": 10.105599403381348, "rewards/rejected": -11.998998641967773, "step": 10729 }, { "epoch": 1.67, "learning_rate": 6.277517506094715e-06, "logits/chosen": -1.8837467432022095, "logits/rejected": -3.191668748855591, "logps/chosen": -451.43603515625, "logps/rejected": -477.7712097167969, "loss": 0.1174, "rewards/accuracies": 1.0, "rewards/chosen": -5.703346252441406, "rewards/margins": 6.185691833496094, "rewards/rejected": -11.8890380859375, "step": 10730 }, { "epoch": 1.67, "learning_rate": 6.2767840655635665e-06, "logits/chosen": -2.8020896911621094, "logits/rejected": -3.091932535171509, "logps/chosen": -46.35187530517578, "logps/rejected": -274.0537109375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.0040085315704346, "rewards/margins": 7.998292922973633, "rewards/rejected": -10.002302169799805, "step": 10731 }, { "epoch": 1.67, "learning_rate": 6.276050625032418e-06, "logits/chosen": -1.95490562915802, "logits/rejected": -2.0417892932891846, "logps/chosen": -277.16717529296875, "logps/rejected": -526.2037353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.417552947998047, "rewards/margins": 13.204893112182617, "rewards/rejected": -17.622446060180664, "step": 10732 }, { "epoch": 1.67, "learning_rate": 6.27531718450127e-06, "logits/chosen": -2.752197027206421, "logits/rejected": -3.2726645469665527, "logps/chosen": -284.1859130859375, "logps/rejected": -392.28192138671875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.8995394706726074, "rewards/margins": 7.490049362182617, "rewards/rejected": -10.389588356018066, "step": 10733 }, { "epoch": 1.67, "learning_rate": 6.274583743970123e-06, "logits/chosen": -2.9788830280303955, "logits/rejected": -3.3622796535491943, "logps/chosen": -48.13014221191406, "logps/rejected": -194.74412536621094, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.465458869934082, "rewards/margins": 6.982636451721191, "rewards/rejected": -9.448095321655273, "step": 10734 }, { "epoch": 1.67, "learning_rate": 6.273850303438976e-06, "logits/chosen": -2.1298789978027344, "logits/rejected": -2.7664573192596436, "logps/chosen": -332.498291015625, "logps/rejected": -533.0323486328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.5555107593536377, "rewards/margins": 9.889184951782227, "rewards/rejected": -13.444696426391602, "step": 10735 }, { "epoch": 1.67, "learning_rate": 6.2731168629078276e-06, "logits/chosen": -2.5264484882354736, "logits/rejected": -3.096719980239868, "logps/chosen": -386.31976318359375, "logps/rejected": -529.4457397460938, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.8603134155273438, "rewards/margins": 7.953176498413086, "rewards/rejected": -9.81348991394043, "step": 10736 }, { "epoch": 1.67, "learning_rate": 6.2723834223766794e-06, "logits/chosen": -2.6613173484802246, "logits/rejected": -2.865543842315674, "logps/chosen": -306.9914245605469, "logps/rejected": -461.84869384765625, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -4.141770362854004, "rewards/margins": 4.535279273986816, "rewards/rejected": -8.67704963684082, "step": 10737 }, { "epoch": 1.67, "learning_rate": 6.271649981845531e-06, "logits/chosen": -2.6026906967163086, "logits/rejected": -2.9683144092559814, "logps/chosen": -366.9122314453125, "logps/rejected": -407.7798767089844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.8195388317108154, "rewards/margins": 8.625210762023926, "rewards/rejected": -11.44474983215332, "step": 10738 }, { "epoch": 1.67, "learning_rate": 6.270916541314384e-06, "logits/chosen": -2.0354979038238525, "logits/rejected": -2.8665263652801514, "logps/chosen": -76.65867614746094, "logps/rejected": -308.81884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.860687017440796, "rewards/margins": 11.449175834655762, "rewards/rejected": -13.30986213684082, "step": 10739 }, { "epoch": 1.67, "learning_rate": 6.270183100783236e-06, "logits/chosen": -1.507081151008606, "logits/rejected": -2.950077533721924, "logps/chosen": -196.603515625, "logps/rejected": -457.728271484375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.106419801712036, "rewards/margins": 8.36790657043457, "rewards/rejected": -11.474325180053711, "step": 10740 }, { "epoch": 1.67, "learning_rate": 6.269449660252088e-06, "logits/chosen": -2.8293871879577637, "logits/rejected": -1.5261316299438477, "logps/chosen": -467.4112243652344, "logps/rejected": -108.1749038696289, "loss": 0.7729, "rewards/accuracies": 0.5, "rewards/chosen": -3.54241943359375, "rewards/margins": 0.7690757513046265, "rewards/rejected": -4.311495304107666, "step": 10741 }, { "epoch": 1.67, "learning_rate": 6.26871621972094e-06, "logits/chosen": -2.712368965148926, "logits/rejected": -3.043365716934204, "logps/chosen": -133.32391357421875, "logps/rejected": -292.56475830078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.7486400604248047, "rewards/margins": 8.642674446105957, "rewards/rejected": -10.391314506530762, "step": 10742 }, { "epoch": 1.67, "learning_rate": 6.267982779189792e-06, "logits/chosen": -2.5805559158325195, "logits/rejected": -2.4435815811157227, "logps/chosen": -402.32501220703125, "logps/rejected": -518.156005859375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -0.9287971258163452, "rewards/margins": 8.05300521850586, "rewards/rejected": -8.981802940368652, "step": 10743 }, { "epoch": 1.67, "learning_rate": 6.267249338658644e-06, "logits/chosen": -2.4018468856811523, "logits/rejected": -2.7699203491210938, "logps/chosen": -247.83837890625, "logps/rejected": -460.1749267578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.128211975097656, "rewards/margins": 9.441736221313477, "rewards/rejected": -14.569948196411133, "step": 10744 }, { "epoch": 1.67, "learning_rate": 6.266515898127496e-06, "logits/chosen": -2.040365695953369, "logits/rejected": -2.588961601257324, "logps/chosen": -153.1545867919922, "logps/rejected": -275.9336242675781, "loss": 1.3601, "rewards/accuracies": 0.5, "rewards/chosen": -4.124640941619873, "rewards/margins": 5.586537837982178, "rewards/rejected": -9.71117877960205, "step": 10745 }, { "epoch": 1.67, "learning_rate": 6.265782457596348e-06, "logits/chosen": -1.4812253713607788, "logits/rejected": -2.9958598613739014, "logps/chosen": -58.88292694091797, "logps/rejected": -282.9625244140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9566189050674438, "rewards/margins": 8.495595932006836, "rewards/rejected": -10.452214241027832, "step": 10746 }, { "epoch": 1.67, "learning_rate": 6.2650490170652e-06, "logits/chosen": -1.1102616786956787, "logits/rejected": -3.0053436756134033, "logps/chosen": -124.65459442138672, "logps/rejected": -363.573486328125, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": -6.4321184158325195, "rewards/margins": 5.24104118347168, "rewards/rejected": -11.6731595993042, "step": 10747 }, { "epoch": 1.67, "learning_rate": 6.264315576534053e-06, "logits/chosen": -2.810462713241577, "logits/rejected": -2.349658727645874, "logps/chosen": -103.49458312988281, "logps/rejected": -206.84762573242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.823986768722534, "rewards/margins": 10.771785736083984, "rewards/rejected": -13.595772743225098, "step": 10748 }, { "epoch": 1.67, "learning_rate": 6.2635821360029045e-06, "logits/chosen": -3.016662120819092, "logits/rejected": -3.0878140926361084, "logps/chosen": -195.19512939453125, "logps/rejected": -224.97308349609375, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/chosen": -1.829568862915039, "rewards/margins": 6.893716812133789, "rewards/rejected": -8.723285675048828, "step": 10749 }, { "epoch": 1.67, "learning_rate": 6.262848695471756e-06, "logits/chosen": -2.7582144737243652, "logits/rejected": -1.7614001035690308, "logps/chosen": -162.8755340576172, "logps/rejected": -169.16720581054688, "loss": 0.1582, "rewards/accuracies": 1.0, "rewards/chosen": -5.2415080070495605, "rewards/margins": 5.028948783874512, "rewards/rejected": -10.270456314086914, "step": 10750 }, { "epoch": 1.67, "learning_rate": 6.262115254940609e-06, "logits/chosen": -1.0831701755523682, "logits/rejected": -2.6270534992218018, "logps/chosen": -45.073951721191406, "logps/rejected": -413.6401672363281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.8248424530029297, "rewards/margins": 10.223653793334961, "rewards/rejected": -13.04849624633789, "step": 10751 }, { "epoch": 1.67, "learning_rate": 6.261381814409462e-06, "logits/chosen": -2.6507158279418945, "logits/rejected": -2.736096143722534, "logps/chosen": -313.494140625, "logps/rejected": -440.4173583984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0446404218673706, "rewards/margins": 9.991281509399414, "rewards/rejected": -11.035922050476074, "step": 10752 }, { "epoch": 1.67, "learning_rate": 6.260648373878314e-06, "logits/chosen": -2.5268771648406982, "logits/rejected": -3.035701274871826, "logps/chosen": -382.11822509765625, "logps/rejected": -563.4859619140625, "loss": 0.0271, "rewards/accuracies": 1.0, "rewards/chosen": -6.060295104980469, "rewards/margins": 6.188319683074951, "rewards/rejected": -12.248615264892578, "step": 10753 }, { "epoch": 1.67, "learning_rate": 6.259914933347166e-06, "logits/chosen": -1.5602697134017944, "logits/rejected": -2.932964563369751, "logps/chosen": -119.5912094116211, "logps/rejected": -358.04345703125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.2610249519348145, "rewards/margins": 7.972346305847168, "rewards/rejected": -14.23337173461914, "step": 10754 }, { "epoch": 1.67, "learning_rate": 6.2591814928160175e-06, "logits/chosen": -1.3277443647384644, "logits/rejected": -1.6549066305160522, "logps/chosen": -301.87591552734375, "logps/rejected": -397.89825439453125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -3.3775696754455566, "rewards/margins": 9.896939277648926, "rewards/rejected": -13.27450942993164, "step": 10755 }, { "epoch": 1.67, "learning_rate": 6.25844805228487e-06, "logits/chosen": -1.886452555656433, "logits/rejected": -2.6123745441436768, "logps/chosen": -139.67532348632812, "logps/rejected": -349.29840087890625, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -1.8043087720870972, "rewards/margins": 9.919536590576172, "rewards/rejected": -11.723844528198242, "step": 10756 }, { "epoch": 1.67, "learning_rate": 6.257714611753722e-06, "logits/chosen": -2.214151382446289, "logits/rejected": -2.8652617931365967, "logps/chosen": -53.82362365722656, "logps/rejected": -242.73635864257812, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.7998249530792236, "rewards/margins": 8.053993225097656, "rewards/rejected": -10.8538179397583, "step": 10757 }, { "epoch": 1.67, "learning_rate": 6.256981171222574e-06, "logits/chosen": -2.2491636276245117, "logits/rejected": -2.9694559574127197, "logps/chosen": -83.69950103759766, "logps/rejected": -290.61907958984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.3725333213806152, "rewards/margins": 9.222097396850586, "rewards/rejected": -12.594630241394043, "step": 10758 }, { "epoch": 1.67, "learning_rate": 6.256247730691426e-06, "logits/chosen": -2.4835188388824463, "logits/rejected": -2.6007919311523438, "logps/chosen": -204.87161254882812, "logps/rejected": -368.4408874511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3909530639648438, "rewards/margins": 12.052827835083008, "rewards/rejected": -14.443780899047852, "step": 10759 }, { "epoch": 1.67, "learning_rate": 6.255514290160278e-06, "logits/chosen": -2.3990638256073, "logits/rejected": -2.923358678817749, "logps/chosen": -88.93621826171875, "logps/rejected": -334.9185485839844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.589841842651367, "rewards/margins": 9.57576847076416, "rewards/rejected": -13.165611267089844, "step": 10760 }, { "epoch": 1.67, "learning_rate": 6.2547808496291305e-06, "logits/chosen": -2.557007312774658, "logits/rejected": -3.0326335430145264, "logps/chosen": -182.84841918945312, "logps/rejected": -340.5311584472656, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.670428514480591, "rewards/margins": 9.454827308654785, "rewards/rejected": -12.125255584716797, "step": 10761 }, { "epoch": 1.67, "learning_rate": 6.254047409097982e-06, "logits/chosen": -2.65749454498291, "logits/rejected": -1.3074465990066528, "logps/chosen": -681.0134887695312, "logps/rejected": -396.04461669921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.758084297180176, "rewards/margins": 10.325508117675781, "rewards/rejected": -14.083593368530273, "step": 10762 }, { "epoch": 1.67, "learning_rate": 6.253313968566834e-06, "logits/chosen": -3.0018150806427, "logits/rejected": -3.059464454650879, "logps/chosen": -407.87628173828125, "logps/rejected": -475.385498046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.590752124786377, "rewards/margins": 9.015535354614258, "rewards/rejected": -12.606287002563477, "step": 10763 }, { "epoch": 1.67, "learning_rate": 6.252580528035686e-06, "logits/chosen": -1.834147334098816, "logits/rejected": -3.132099151611328, "logps/chosen": -135.30886840820312, "logps/rejected": -430.66864013671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.997321367263794, "rewards/margins": 8.086559295654297, "rewards/rejected": -11.083881378173828, "step": 10764 }, { "epoch": 1.67, "learning_rate": 6.251847087504539e-06, "logits/chosen": -2.6918118000030518, "logits/rejected": -2.9415833950042725, "logps/chosen": -125.38298034667969, "logps/rejected": -255.17608642578125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7048180103302002, "rewards/margins": 7.1183319091796875, "rewards/rejected": -8.823149681091309, "step": 10765 }, { "epoch": 1.67, "learning_rate": 6.251113646973391e-06, "logits/chosen": -2.9236066341400146, "logits/rejected": -2.342132568359375, "logps/chosen": -665.930908203125, "logps/rejected": -526.661376953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.651618480682373, "rewards/margins": 7.142472267150879, "rewards/rejected": -11.794090270996094, "step": 10766 }, { "epoch": 1.67, "learning_rate": 6.2503802064422426e-06, "logits/chosen": -3.1248536109924316, "logits/rejected": -2.4278879165649414, "logps/chosen": -535.6883544921875, "logps/rejected": -133.99456787109375, "loss": 1.7062, "rewards/accuracies": 0.5, "rewards/chosen": -7.675732612609863, "rewards/margins": 1.2771828174591064, "rewards/rejected": -8.95291519165039, "step": 10767 }, { "epoch": 1.67, "learning_rate": 6.249646765911095e-06, "logits/chosen": -3.213327646255493, "logits/rejected": -3.1513772010803223, "logps/chosen": -422.38116455078125, "logps/rejected": -705.2265625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.800006866455078, "rewards/margins": 9.173312187194824, "rewards/rejected": -13.973319053649902, "step": 10768 }, { "epoch": 1.67, "learning_rate": 6.248913325379947e-06, "logits/chosen": -1.8973472118377686, "logits/rejected": -2.9369418621063232, "logps/chosen": -211.97964477539062, "logps/rejected": -498.00909423828125, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -5.009169101715088, "rewards/margins": 8.66850471496582, "rewards/rejected": -13.67767333984375, "step": 10769 }, { "epoch": 1.67, "learning_rate": 6.2481798848488e-06, "logits/chosen": -2.862410068511963, "logits/rejected": -3.133107900619507, "logps/chosen": -229.84957885742188, "logps/rejected": -388.0404052734375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.741421699523926, "rewards/margins": 7.261007785797119, "rewards/rejected": -10.002429962158203, "step": 10770 }, { "epoch": 1.68, "learning_rate": 6.247446444317652e-06, "logits/chosen": -1.4117661714553833, "logits/rejected": -2.850799083709717, "logps/chosen": -44.92803192138672, "logps/rejected": -373.3714599609375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.351423740386963, "rewards/margins": 8.968839645385742, "rewards/rejected": -11.320262908935547, "step": 10771 }, { "epoch": 1.68, "learning_rate": 6.246713003786504e-06, "logits/chosen": -1.0289171934127808, "logits/rejected": -2.802098035812378, "logps/chosen": -102.02203369140625, "logps/rejected": -376.2160339355469, "loss": 0.967, "rewards/accuracies": 0.5, "rewards/chosen": -6.677608013153076, "rewards/margins": 3.8824920654296875, "rewards/rejected": -10.560100555419922, "step": 10772 }, { "epoch": 1.68, "learning_rate": 6.2459795632553555e-06, "logits/chosen": -2.4834704399108887, "logits/rejected": -2.9049232006073, "logps/chosen": -288.3396911621094, "logps/rejected": -413.81243896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.148321628570557, "rewards/margins": 11.56022834777832, "rewards/rejected": -16.70854949951172, "step": 10773 }, { "epoch": 1.68, "learning_rate": 6.245246122724208e-06, "logits/chosen": -2.265759229660034, "logits/rejected": -2.86503529548645, "logps/chosen": -126.50083923339844, "logps/rejected": -414.4963073730469, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.22158145904541, "rewards/margins": 10.97186279296875, "rewards/rejected": -13.19344425201416, "step": 10774 }, { "epoch": 1.68, "learning_rate": 6.24451268219306e-06, "logits/chosen": -2.428467035293579, "logits/rejected": -1.4215646982192993, "logps/chosen": -118.40303039550781, "logps/rejected": -203.39122009277344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5521576404571533, "rewards/margins": 10.538663864135742, "rewards/rejected": -12.090821266174316, "step": 10775 }, { "epoch": 1.68, "learning_rate": 6.243779241661912e-06, "logits/chosen": -2.8073902130126953, "logits/rejected": -1.6551740169525146, "logps/chosen": -137.50558471679688, "logps/rejected": -151.09385681152344, "loss": 0.9412, "rewards/accuracies": 0.5, "rewards/chosen": -4.097092628479004, "rewards/margins": 4.153428554534912, "rewards/rejected": -8.250521659851074, "step": 10776 }, { "epoch": 1.68, "learning_rate": 6.243045801130764e-06, "logits/chosen": -2.992375612258911, "logits/rejected": -2.946903944015503, "logps/chosen": -694.7906494140625, "logps/rejected": -730.497802734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.029504299163818, "rewards/margins": 8.689214706420898, "rewards/rejected": -12.718719482421875, "step": 10777 }, { "epoch": 1.68, "learning_rate": 6.242312360599616e-06, "logits/chosen": -3.045198440551758, "logits/rejected": -2.891522169113159, "logps/chosen": -196.29847717285156, "logps/rejected": -252.36001586914062, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.4808571338653564, "rewards/margins": 6.024935245513916, "rewards/rejected": -9.505792617797852, "step": 10778 }, { "epoch": 1.68, "learning_rate": 6.2415789200684685e-06, "logits/chosen": -2.208031177520752, "logits/rejected": -3.1105663776397705, "logps/chosen": -252.37025451660156, "logps/rejected": -428.356689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.5568091869354248, "rewards/margins": 10.009590148925781, "rewards/rejected": -11.566398620605469, "step": 10779 }, { "epoch": 1.68, "learning_rate": 6.24084547953732e-06, "logits/chosen": -2.8802311420440674, "logits/rejected": -2.739962577819824, "logps/chosen": -139.56556701660156, "logps/rejected": -238.35052490234375, "loss": 0.3927, "rewards/accuracies": 0.5, "rewards/chosen": -4.358857154846191, "rewards/margins": 2.8907980918884277, "rewards/rejected": -7.249655246734619, "step": 10780 }, { "epoch": 1.68, "learning_rate": 6.240112039006172e-06, "logits/chosen": -2.2628021240234375, "logits/rejected": -2.927690267562866, "logps/chosen": -318.0587463378906, "logps/rejected": -445.6861572265625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.704042434692383, "rewards/margins": 6.6974873542785645, "rewards/rejected": -10.401529312133789, "step": 10781 }, { "epoch": 1.68, "learning_rate": 6.239378598475024e-06, "logits/chosen": -2.571749448776245, "logits/rejected": -2.611156702041626, "logps/chosen": -239.7135772705078, "logps/rejected": -435.58489990234375, "loss": 0.1665, "rewards/accuracies": 1.0, "rewards/chosen": -4.229477882385254, "rewards/margins": 9.890972137451172, "rewards/rejected": -14.120450019836426, "step": 10782 }, { "epoch": 1.68, "learning_rate": 6.238645157943877e-06, "logits/chosen": -1.7884535789489746, "logits/rejected": -2.944087028503418, "logps/chosen": -62.05143737792969, "logps/rejected": -184.16259765625, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -4.650283336639404, "rewards/margins": 4.9207682609558105, "rewards/rejected": -9.571051597595215, "step": 10783 }, { "epoch": 1.68, "learning_rate": 6.237911717412729e-06, "logits/chosen": -3.0379984378814697, "logits/rejected": -3.2586026191711426, "logps/chosen": -460.1297607421875, "logps/rejected": -572.8329467773438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9915573596954346, "rewards/margins": 10.171609878540039, "rewards/rejected": -12.163167953491211, "step": 10784 }, { "epoch": 1.68, "learning_rate": 6.237178276881581e-06, "logits/chosen": -1.3816407918930054, "logits/rejected": -2.3438611030578613, "logps/chosen": -251.19967651367188, "logps/rejected": -618.577880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4172168970108032, "rewards/margins": 12.793760299682617, "rewards/rejected": -14.210977554321289, "step": 10785 }, { "epoch": 1.68, "learning_rate": 6.236444836350433e-06, "logits/chosen": -2.767188310623169, "logits/rejected": -2.8670082092285156, "logps/chosen": -101.20362854003906, "logps/rejected": -123.73192596435547, "loss": 3.0365, "rewards/accuracies": 0.5, "rewards/chosen": -6.547944068908691, "rewards/margins": 1.213160753250122, "rewards/rejected": -7.761104583740234, "step": 10786 }, { "epoch": 1.68, "learning_rate": 6.235711395819285e-06, "logits/chosen": -2.673013925552368, "logits/rejected": -2.0266644954681396, "logps/chosen": -181.87783813476562, "logps/rejected": -183.1708221435547, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": -4.328096389770508, "rewards/margins": 3.4576170444488525, "rewards/rejected": -7.785713195800781, "step": 10787 }, { "epoch": 1.68, "learning_rate": 6.234977955288138e-06, "logits/chosen": -2.7884647846221924, "logits/rejected": -1.98909592628479, "logps/chosen": -778.364013671875, "logps/rejected": -464.67242431640625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.000808238983154, "rewards/margins": 7.1143646240234375, "rewards/rejected": -12.115172386169434, "step": 10788 }, { "epoch": 1.68, "learning_rate": 6.23424451475699e-06, "logits/chosen": -2.8582265377044678, "logits/rejected": -2.4236555099487305, "logps/chosen": -191.41683959960938, "logps/rejected": -350.0458984375, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -3.4640793800354004, "rewards/margins": 6.359343528747559, "rewards/rejected": -9.823423385620117, "step": 10789 }, { "epoch": 1.68, "learning_rate": 6.233511074225842e-06, "logits/chosen": -1.7020865678787231, "logits/rejected": -2.6674163341522217, "logps/chosen": -68.028076171875, "logps/rejected": -316.2757568359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.8040446043014526, "rewards/margins": 8.289381980895996, "rewards/rejected": -10.093425750732422, "step": 10790 }, { "epoch": 1.68, "learning_rate": 6.2327776336946936e-06, "logits/chosen": -1.8955986499786377, "logits/rejected": -2.42728328704834, "logps/chosen": -436.9691467285156, "logps/rejected": -696.076904296875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.398708343505859, "rewards/margins": 9.127192497253418, "rewards/rejected": -13.525900840759277, "step": 10791 }, { "epoch": 1.68, "learning_rate": 6.232044193163546e-06, "logits/chosen": -1.7551467418670654, "logits/rejected": -2.424534559249878, "logps/chosen": -289.7640380859375, "logps/rejected": -401.1578063964844, "loss": 1.5478, "rewards/accuracies": 0.5, "rewards/chosen": -7.619372844696045, "rewards/margins": 4.498984336853027, "rewards/rejected": -12.118356704711914, "step": 10792 }, { "epoch": 1.68, "learning_rate": 6.231310752632398e-06, "logits/chosen": -1.9095559120178223, "logits/rejected": -3.0702059268951416, "logps/chosen": -187.1212158203125, "logps/rejected": -397.939697265625, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -1.3756614923477173, "rewards/margins": 5.899527549743652, "rewards/rejected": -7.275189399719238, "step": 10793 }, { "epoch": 1.68, "learning_rate": 6.23057731210125e-06, "logits/chosen": -3.1900954246520996, "logits/rejected": -3.4189233779907227, "logps/chosen": -97.8254165649414, "logps/rejected": -239.58865356445312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.773460388183594, "rewards/margins": 7.547725200653076, "rewards/rejected": -13.321186065673828, "step": 10794 }, { "epoch": 1.68, "learning_rate": 6.229843871570102e-06, "logits/chosen": -2.6711103916168213, "logits/rejected": -3.064152479171753, "logps/chosen": -62.239967346191406, "logps/rejected": -303.441650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.241218566894531, "rewards/margins": 10.970404624938965, "rewards/rejected": -15.211623191833496, "step": 10795 }, { "epoch": 1.68, "learning_rate": 6.229110431038955e-06, "logits/chosen": -2.647193431854248, "logits/rejected": -1.6660068035125732, "logps/chosen": -295.36993408203125, "logps/rejected": -128.40139770507812, "loss": 2.1216, "rewards/accuracies": 0.5, "rewards/chosen": -6.929290771484375, "rewards/margins": -1.0863587856292725, "rewards/rejected": -5.842932224273682, "step": 10796 }, { "epoch": 1.68, "learning_rate": 6.2283769905078065e-06, "logits/chosen": -2.8692378997802734, "logits/rejected": -2.7586305141448975, "logps/chosen": -175.20790100097656, "logps/rejected": -200.7126922607422, "loss": 1.0041, "rewards/accuracies": 0.5, "rewards/chosen": -4.036980628967285, "rewards/margins": 2.5652151107788086, "rewards/rejected": -6.602196216583252, "step": 10797 }, { "epoch": 1.68, "learning_rate": 6.227643549976658e-06, "logits/chosen": -2.391632318496704, "logits/rejected": -2.717412233352661, "logps/chosen": -117.72103118896484, "logps/rejected": -372.921630859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.314537525177002, "rewards/margins": 9.852163314819336, "rewards/rejected": -12.16670036315918, "step": 10798 }, { "epoch": 1.68, "learning_rate": 6.22691010944551e-06, "logits/chosen": -1.8076103925704956, "logits/rejected": -2.962552070617676, "logps/chosen": -74.26933288574219, "logps/rejected": -364.6800842285156, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.369041442871094, "rewards/margins": 7.962411880493164, "rewards/rejected": -13.331453323364258, "step": 10799 }, { "epoch": 1.68, "learning_rate": 6.226176668914362e-06, "logits/chosen": -1.6017866134643555, "logits/rejected": -2.9518282413482666, "logps/chosen": -248.54910278320312, "logps/rejected": -564.7573852539062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5350379943847656, "rewards/margins": 11.037654876708984, "rewards/rejected": -14.57269287109375, "step": 10800 }, { "epoch": 1.68, "learning_rate": 6.225443228383215e-06, "logits/chosen": -0.8355585336685181, "logits/rejected": -2.153290271759033, "logps/chosen": -300.8649597167969, "logps/rejected": -600.685546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.610817909240723, "rewards/margins": 8.017334938049316, "rewards/rejected": -12.628152847290039, "step": 10801 }, { "epoch": 1.68, "learning_rate": 6.224709787852067e-06, "logits/chosen": -0.9014008641242981, "logits/rejected": -2.9325098991394043, "logps/chosen": -129.2828826904297, "logps/rejected": -480.32171630859375, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": -6.659111499786377, "rewards/margins": 2.82350754737854, "rewards/rejected": -9.482619285583496, "step": 10802 }, { "epoch": 1.68, "learning_rate": 6.2239763473209195e-06, "logits/chosen": -2.9186065196990967, "logits/rejected": -2.850712537765503, "logps/chosen": -595.4114990234375, "logps/rejected": -598.101318359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.2765092849731445, "rewards/margins": 10.286120414733887, "rewards/rejected": -14.562629699707031, "step": 10803 }, { "epoch": 1.68, "learning_rate": 6.223242906789771e-06, "logits/chosen": -3.0446956157684326, "logits/rejected": -2.94191837310791, "logps/chosen": -201.18748474121094, "logps/rejected": -195.80020141601562, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -5.636016845703125, "rewards/margins": 5.1041693687438965, "rewards/rejected": -10.74018669128418, "step": 10804 }, { "epoch": 1.68, "learning_rate": 6.222509466258624e-06, "logits/chosen": -2.653346300125122, "logits/rejected": -2.9071290493011475, "logps/chosen": -81.65498352050781, "logps/rejected": -214.3319091796875, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -6.133038520812988, "rewards/margins": 5.387831687927246, "rewards/rejected": -11.520870208740234, "step": 10805 }, { "epoch": 1.68, "learning_rate": 6.221776025727476e-06, "logits/chosen": -2.806380271911621, "logits/rejected": -3.2465336322784424, "logps/chosen": -65.74787139892578, "logps/rejected": -240.59548950195312, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -5.22440242767334, "rewards/margins": 4.701286315917969, "rewards/rejected": -9.925689697265625, "step": 10806 }, { "epoch": 1.68, "learning_rate": 6.221042585196328e-06, "logits/chosen": -2.6173346042633057, "logits/rejected": -2.238779067993164, "logps/chosen": -449.5023498535156, "logps/rejected": -485.9434814453125, "loss": 3.5024, "rewards/accuracies": 0.5, "rewards/chosen": -7.3216400146484375, "rewards/margins": 4.622741222381592, "rewards/rejected": -11.944381713867188, "step": 10807 }, { "epoch": 1.68, "learning_rate": 6.22030914466518e-06, "logits/chosen": -1.9753400087356567, "logits/rejected": -2.477635622024536, "logps/chosen": -213.55014038085938, "logps/rejected": -370.2567138671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.877387285232544, "rewards/margins": 8.087774276733398, "rewards/rejected": -10.96516227722168, "step": 10808 }, { "epoch": 1.68, "learning_rate": 6.219575704134032e-06, "logits/chosen": -2.99741792678833, "logits/rejected": -2.101787805557251, "logps/chosen": -311.3468017578125, "logps/rejected": -259.51776123046875, "loss": 1.1016, "rewards/accuracies": 0.5, "rewards/chosen": -7.429760932922363, "rewards/margins": 3.4884159564971924, "rewards/rejected": -10.918176651000977, "step": 10809 }, { "epoch": 1.68, "learning_rate": 6.218842263602884e-06, "logits/chosen": -1.294244408607483, "logits/rejected": -2.7145283222198486, "logps/chosen": -123.36309051513672, "logps/rejected": -274.5433654785156, "loss": 1.1241, "rewards/accuracies": 0.5, "rewards/chosen": -4.123762607574463, "rewards/margins": 2.692878484725952, "rewards/rejected": -6.816641330718994, "step": 10810 }, { "epoch": 1.68, "learning_rate": 6.218108823071736e-06, "logits/chosen": -0.6457659602165222, "logits/rejected": -2.5167787075042725, "logps/chosen": -82.16397094726562, "logps/rejected": -422.9080810546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.6379284858703613, "rewards/margins": 9.642724990844727, "rewards/rejected": -13.28065299987793, "step": 10811 }, { "epoch": 1.68, "learning_rate": 6.217375382540588e-06, "logits/chosen": -2.2916183471679688, "logits/rejected": -3.001183271408081, "logps/chosen": -166.98239135742188, "logps/rejected": -319.00750732421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.1175127029418945, "rewards/margins": 8.146617889404297, "rewards/rejected": -10.264131546020508, "step": 10812 }, { "epoch": 1.68, "learning_rate": 6.21664194200944e-06, "logits/chosen": -2.7142744064331055, "logits/rejected": -3.2527167797088623, "logps/chosen": -76.45755767822266, "logps/rejected": -290.014892578125, "loss": 0.6473, "rewards/accuracies": 0.5, "rewards/chosen": -6.162833213806152, "rewards/margins": 3.319901466369629, "rewards/rejected": -9.482734680175781, "step": 10813 }, { "epoch": 1.68, "learning_rate": 6.215908501478293e-06, "logits/chosen": -2.8413052558898926, "logits/rejected": -2.608145236968994, "logps/chosen": -168.09136962890625, "logps/rejected": -306.72747802734375, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -3.002479314804077, "rewards/margins": 7.5817155838012695, "rewards/rejected": -10.584195137023926, "step": 10814 }, { "epoch": 1.68, "learning_rate": 6.215175060947145e-06, "logits/chosen": -2.6942105293273926, "logits/rejected": -2.0757195949554443, "logps/chosen": -500.0770263671875, "logps/rejected": -483.5216979980469, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -5.932911396026611, "rewards/margins": 5.7718586921691895, "rewards/rejected": -11.7047700881958, "step": 10815 }, { "epoch": 1.68, "learning_rate": 6.2144416204159965e-06, "logits/chosen": -3.113919734954834, "logits/rejected": -2.2586262226104736, "logps/chosen": -426.17755126953125, "logps/rejected": -302.7351379394531, "loss": 1.1785, "rewards/accuracies": 0.5, "rewards/chosen": -5.0940446853637695, "rewards/margins": 1.1993671655654907, "rewards/rejected": -6.293411731719971, "step": 10816 }, { "epoch": 1.68, "learning_rate": 6.213708179884848e-06, "logits/chosen": -2.639044761657715, "logits/rejected": -3.0228099822998047, "logps/chosen": -787.4608154296875, "logps/rejected": -693.6666870117188, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -5.711728096008301, "rewards/margins": 6.477434158325195, "rewards/rejected": -12.18916130065918, "step": 10817 }, { "epoch": 1.68, "learning_rate": 6.2129747393537e-06, "logits/chosen": -2.8826746940612793, "logits/rejected": -2.245455026626587, "logps/chosen": -113.60340881347656, "logps/rejected": -191.51356506347656, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.4435184001922607, "rewards/margins": 6.9329400062561035, "rewards/rejected": -10.376459121704102, "step": 10818 }, { "epoch": 1.68, "learning_rate": 6.212241298822553e-06, "logits/chosen": -2.8164660930633545, "logits/rejected": -2.9723105430603027, "logps/chosen": -192.14129638671875, "logps/rejected": -183.76800537109375, "loss": 0.3007, "rewards/accuracies": 1.0, "rewards/chosen": -5.511960983276367, "rewards/margins": 1.6481504440307617, "rewards/rejected": -7.160111427307129, "step": 10819 }, { "epoch": 1.68, "learning_rate": 6.211507858291406e-06, "logits/chosen": -2.3703911304473877, "logits/rejected": -2.660871982574463, "logps/chosen": -133.5554962158203, "logps/rejected": -281.95562744140625, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -2.826948642730713, "rewards/margins": 6.127696514129639, "rewards/rejected": -8.954645156860352, "step": 10820 }, { "epoch": 1.68, "learning_rate": 6.2107744177602575e-06, "logits/chosen": -2.1030964851379395, "logits/rejected": -2.934267282485962, "logps/chosen": -272.4989013671875, "logps/rejected": -551.5551147460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8869529962539673, "rewards/margins": 11.599076271057129, "rewards/rejected": -13.486028671264648, "step": 10821 }, { "epoch": 1.68, "learning_rate": 6.2100409772291094e-06, "logits/chosen": -3.0565178394317627, "logits/rejected": -2.6889395713806152, "logps/chosen": -394.4930725097656, "logps/rejected": -296.5777893066406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.190355062484741, "rewards/margins": 8.63286018371582, "rewards/rejected": -11.82321548461914, "step": 10822 }, { "epoch": 1.68, "learning_rate": 6.209307536697962e-06, "logits/chosen": -2.6979050636291504, "logits/rejected": -1.6732465028762817, "logps/chosen": -385.05999755859375, "logps/rejected": -343.7640380859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.704010009765625, "rewards/margins": 8.44559097290039, "rewards/rejected": -12.149600982666016, "step": 10823 }, { "epoch": 1.68, "learning_rate": 6.208574096166814e-06, "logits/chosen": -2.2983946800231934, "logits/rejected": -2.976092576980591, "logps/chosen": -170.08267211914062, "logps/rejected": -273.7072448730469, "loss": 0.1118, "rewards/accuracies": 1.0, "rewards/chosen": -3.2663681507110596, "rewards/margins": 4.981047630310059, "rewards/rejected": -8.247415542602539, "step": 10824 }, { "epoch": 1.68, "learning_rate": 6.207840655635666e-06, "logits/chosen": -2.201404333114624, "logits/rejected": -3.1193439960479736, "logps/chosen": -189.07504272460938, "logps/rejected": -169.54983520507812, "loss": 4.1399, "rewards/accuracies": 0.5, "rewards/chosen": -7.424682140350342, "rewards/margins": -0.06369686126708984, "rewards/rejected": -7.36098575592041, "step": 10825 }, { "epoch": 1.68, "learning_rate": 6.207107215104518e-06, "logits/chosen": -1.4719043970108032, "logits/rejected": -2.876718759536743, "logps/chosen": -152.76943969726562, "logps/rejected": -343.0727844238281, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -1.7041854858398438, "rewards/margins": 6.628348350524902, "rewards/rejected": -8.332533836364746, "step": 10826 }, { "epoch": 1.68, "learning_rate": 6.20637377457337e-06, "logits/chosen": -3.0477676391601562, "logits/rejected": -2.434938430786133, "logps/chosen": -705.3071899414062, "logps/rejected": -349.4591979980469, "loss": 0.8066, "rewards/accuracies": 0.5, "rewards/chosen": -5.488485336303711, "rewards/margins": 1.1921579837799072, "rewards/rejected": -6.680643081665039, "step": 10827 }, { "epoch": 1.68, "learning_rate": 6.205640334042222e-06, "logits/chosen": -2.739046812057495, "logits/rejected": -2.3666789531707764, "logps/chosen": -523.5728759765625, "logps/rejected": -453.9984130859375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.604661226272583, "rewards/margins": 6.471141815185547, "rewards/rejected": -9.07580280303955, "step": 10828 }, { "epoch": 1.68, "learning_rate": 6.204906893511074e-06, "logits/chosen": -2.716768264770508, "logits/rejected": -2.9015069007873535, "logps/chosen": -103.43280029296875, "logps/rejected": -422.3738708496094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.255212783813477, "rewards/margins": 13.586026191711426, "rewards/rejected": -17.84123992919922, "step": 10829 }, { "epoch": 1.68, "learning_rate": 6.204173452979926e-06, "logits/chosen": -2.7287094593048096, "logits/rejected": -2.845653772354126, "logps/chosen": -197.68722534179688, "logps/rejected": -329.41986083984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -0.9211775064468384, "rewards/margins": 10.009525299072266, "rewards/rejected": -10.930702209472656, "step": 10830 }, { "epoch": 1.68, "learning_rate": 6.203440012448778e-06, "logits/chosen": -2.595903158187866, "logits/rejected": -2.7536940574645996, "logps/chosen": -187.7503662109375, "logps/rejected": -483.5855712890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.978377103805542, "rewards/margins": 13.845929145812988, "rewards/rejected": -16.82430648803711, "step": 10831 }, { "epoch": 1.68, "learning_rate": 6.202706571917631e-06, "logits/chosen": -3.088876724243164, "logits/rejected": -2.8029260635375977, "logps/chosen": -230.42825317382812, "logps/rejected": -172.09425354003906, "loss": 0.0945, "rewards/accuracies": 1.0, "rewards/chosen": -4.264901161193848, "rewards/margins": 5.576015472412109, "rewards/rejected": -9.840916633605957, "step": 10832 }, { "epoch": 1.68, "learning_rate": 6.201973131386483e-06, "logits/chosen": -0.5869777798652649, "logits/rejected": -1.4908009767532349, "logps/chosen": -211.134521484375, "logps/rejected": -525.104248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0026671886444092, "rewards/margins": 15.788951873779297, "rewards/rejected": -16.79161834716797, "step": 10833 }, { "epoch": 1.68, "learning_rate": 6.2012396908553345e-06, "logits/chosen": -2.587646961212158, "logits/rejected": -3.132986068725586, "logps/chosen": -60.14558792114258, "logps/rejected": -294.625732421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.766258716583252, "rewards/margins": 10.45500373840332, "rewards/rejected": -13.221261978149414, "step": 10834 }, { "epoch": 1.69, "learning_rate": 6.200506250324186e-06, "logits/chosen": -2.5735819339752197, "logits/rejected": -2.5998525619506836, "logps/chosen": -121.35948944091797, "logps/rejected": -242.83639526367188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.5981690883636475, "rewards/margins": 7.92449951171875, "rewards/rejected": -9.522668838500977, "step": 10835 }, { "epoch": 1.69, "learning_rate": 6.199772809793039e-06, "logits/chosen": -1.913511872291565, "logits/rejected": -2.843036651611328, "logps/chosen": -412.2605285644531, "logps/rejected": -608.5914306640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9206981658935547, "rewards/margins": 11.907378196716309, "rewards/rejected": -13.828076362609863, "step": 10836 }, { "epoch": 1.69, "learning_rate": 6.199039369261892e-06, "logits/chosen": -1.3233822584152222, "logits/rejected": -2.7542459964752197, "logps/chosen": -104.60049438476562, "logps/rejected": -543.5213012695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7319040298461914, "rewards/margins": 14.516822814941406, "rewards/rejected": -18.248727798461914, "step": 10837 }, { "epoch": 1.69, "learning_rate": 6.198305928730744e-06, "logits/chosen": -2.4531779289245605, "logits/rejected": -2.8012588024139404, "logps/chosen": -806.5136108398438, "logps/rejected": -578.244140625, "loss": 1.2465, "rewards/accuracies": 0.5, "rewards/chosen": -5.270483016967773, "rewards/margins": 3.9955532550811768, "rewards/rejected": -9.266036033630371, "step": 10838 }, { "epoch": 1.69, "learning_rate": 6.197572488199596e-06, "logits/chosen": -2.6070165634155273, "logits/rejected": -2.7524473667144775, "logps/chosen": -442.0042419433594, "logps/rejected": -576.511474609375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.428450584411621, "rewards/margins": 6.621638298034668, "rewards/rejected": -11.050088882446289, "step": 10839 }, { "epoch": 1.69, "learning_rate": 6.1968390476684475e-06, "logits/chosen": -1.4313825368881226, "logits/rejected": -2.7494120597839355, "logps/chosen": -119.3835220336914, "logps/rejected": -394.231689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.290553092956543, "rewards/margins": 13.59483528137207, "rewards/rejected": -15.885388374328613, "step": 10840 }, { "epoch": 1.69, "learning_rate": 6.1961056071373e-06, "logits/chosen": -2.9176366329193115, "logits/rejected": -2.6797916889190674, "logps/chosen": -217.3780975341797, "logps/rejected": -292.5634460449219, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -1.0561249256134033, "rewards/margins": 6.647864818572998, "rewards/rejected": -7.703989505767822, "step": 10841 }, { "epoch": 1.69, "learning_rate": 6.195372166606152e-06, "logits/chosen": -2.0096352100372314, "logits/rejected": -2.752113103866577, "logps/chosen": -186.16561889648438, "logps/rejected": -412.5348205566406, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -5.242351531982422, "rewards/margins": 4.464250564575195, "rewards/rejected": -9.706602096557617, "step": 10842 }, { "epoch": 1.69, "learning_rate": 6.194638726075004e-06, "logits/chosen": -2.6846516132354736, "logits/rejected": -1.8249386548995972, "logps/chosen": -344.6217346191406, "logps/rejected": -296.6273498535156, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.498785972595215, "rewards/margins": 7.19523286819458, "rewards/rejected": -9.694019317626953, "step": 10843 }, { "epoch": 1.69, "learning_rate": 6.193905285543856e-06, "logits/chosen": -2.457690477371216, "logits/rejected": -2.873636484146118, "logps/chosen": -314.0335693359375, "logps/rejected": -292.451171875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -2.799205780029297, "rewards/margins": 6.214200019836426, "rewards/rejected": -9.013406753540039, "step": 10844 }, { "epoch": 1.69, "learning_rate": 6.1931718450127086e-06, "logits/chosen": -1.4499701261520386, "logits/rejected": -2.8094778060913086, "logps/chosen": -102.8810806274414, "logps/rejected": -523.510498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.18047833442688, "rewards/margins": 12.241230010986328, "rewards/rejected": -14.421708106994629, "step": 10845 }, { "epoch": 1.69, "learning_rate": 6.1924384044815604e-06, "logits/chosen": -1.418777346611023, "logits/rejected": -2.9258341789245605, "logps/chosen": -436.6952819824219, "logps/rejected": -690.6302490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.191693067550659, "rewards/margins": 11.86888599395752, "rewards/rejected": -15.060579299926758, "step": 10846 }, { "epoch": 1.69, "learning_rate": 6.191704963950412e-06, "logits/chosen": -2.214341878890991, "logits/rejected": -3.0986578464508057, "logps/chosen": -62.71276092529297, "logps/rejected": -304.66424560546875, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -3.4249167442321777, "rewards/margins": 5.0092010498046875, "rewards/rejected": -8.434118270874023, "step": 10847 }, { "epoch": 1.69, "learning_rate": 6.190971523419264e-06, "logits/chosen": -2.9022834300994873, "logits/rejected": -3.266860246658325, "logps/chosen": -199.01133728027344, "logps/rejected": -260.25518798828125, "loss": 0.0817, "rewards/accuracies": 1.0, "rewards/chosen": -3.6139588356018066, "rewards/margins": 3.573922872543335, "rewards/rejected": -7.1878814697265625, "step": 10848 }, { "epoch": 1.69, "learning_rate": 6.190238082888116e-06, "logits/chosen": -2.125976800918579, "logits/rejected": -2.6529226303100586, "logps/chosen": -144.1723175048828, "logps/rejected": -172.14181518554688, "loss": 0.2694, "rewards/accuracies": 1.0, "rewards/chosen": -4.452751636505127, "rewards/margins": 4.07157039642334, "rewards/rejected": -8.524322509765625, "step": 10849 }, { "epoch": 1.69, "learning_rate": 6.189504642356969e-06, "logits/chosen": -1.8365418910980225, "logits/rejected": -2.7854671478271484, "logps/chosen": -169.81761169433594, "logps/rejected": -328.62744140625, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/chosen": -5.626627445220947, "rewards/margins": 3.9981558322906494, "rewards/rejected": -9.624783515930176, "step": 10850 }, { "epoch": 1.69, "learning_rate": 6.188771201825821e-06, "logits/chosen": -2.6760172843933105, "logits/rejected": -1.9284138679504395, "logps/chosen": -432.17572021484375, "logps/rejected": -313.27374267578125, "loss": 0.9345, "rewards/accuracies": 0.5, "rewards/chosen": -4.425241470336914, "rewards/margins": 2.9585304260253906, "rewards/rejected": -7.383771896362305, "step": 10851 }, { "epoch": 1.69, "learning_rate": 6.1880377612946726e-06, "logits/chosen": -2.461472749710083, "logits/rejected": -3.2720530033111572, "logps/chosen": -279.9180603027344, "logps/rejected": -532.9512939453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.992144823074341, "rewards/margins": 7.983692169189453, "rewards/rejected": -10.975837707519531, "step": 10852 }, { "epoch": 1.69, "learning_rate": 6.187304320763525e-06, "logits/chosen": -2.7008750438690186, "logits/rejected": -2.7545456886291504, "logps/chosen": -180.22067260742188, "logps/rejected": -357.6634521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.36483097076416, "rewards/margins": 9.994555473327637, "rewards/rejected": -13.359386444091797, "step": 10853 }, { "epoch": 1.69, "learning_rate": 6.186570880232378e-06, "logits/chosen": -2.481889486312866, "logits/rejected": -2.9821436405181885, "logps/chosen": -246.89596557617188, "logps/rejected": -359.48291015625, "loss": 0.5136, "rewards/accuracies": 0.5, "rewards/chosen": -3.8346140384674072, "rewards/margins": 6.524819850921631, "rewards/rejected": -10.359434127807617, "step": 10854 }, { "epoch": 1.69, "learning_rate": 6.18583743970123e-06, "logits/chosen": -2.1951918601989746, "logits/rejected": -2.9666619300842285, "logps/chosen": -798.78759765625, "logps/rejected": -730.8958740234375, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -5.703176975250244, "rewards/margins": 3.969965934753418, "rewards/rejected": -9.67314338684082, "step": 10855 }, { "epoch": 1.69, "learning_rate": 6.185103999170082e-06, "logits/chosen": -2.8224408626556396, "logits/rejected": -3.3734772205352783, "logps/chosen": -114.04283142089844, "logps/rejected": -252.90911865234375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.7834081649780273, "rewards/margins": 6.113333702087402, "rewards/rejected": -8.89674186706543, "step": 10856 }, { "epoch": 1.69, "learning_rate": 6.184370558638934e-06, "logits/chosen": -1.3777694702148438, "logits/rejected": -2.6472933292388916, "logps/chosen": -117.0567398071289, "logps/rejected": -383.0271911621094, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -6.378430366516113, "rewards/margins": 8.376160621643066, "rewards/rejected": -14.75459098815918, "step": 10857 }, { "epoch": 1.69, "learning_rate": 6.1836371181077855e-06, "logits/chosen": -2.64018177986145, "logits/rejected": -2.849876880645752, "logps/chosen": -120.3033676147461, "logps/rejected": -252.47232055664062, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.668798446655273, "rewards/margins": 7.169038772583008, "rewards/rejected": -12.837837219238281, "step": 10858 }, { "epoch": 1.69, "learning_rate": 6.182903677576638e-06, "logits/chosen": -2.0057873725891113, "logits/rejected": -2.985381841659546, "logps/chosen": -216.69784545898438, "logps/rejected": -427.17962646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.157019853591919, "rewards/margins": 11.861602783203125, "rewards/rejected": -14.018622398376465, "step": 10859 }, { "epoch": 1.69, "learning_rate": 6.18217023704549e-06, "logits/chosen": -2.957641124725342, "logits/rejected": -1.922726035118103, "logps/chosen": -280.7475280761719, "logps/rejected": -110.02906036376953, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": 0.014857172966003418, "rewards/margins": 7.516725540161133, "rewards/rejected": -7.50186824798584, "step": 10860 }, { "epoch": 1.69, "learning_rate": 6.181436796514342e-06, "logits/chosen": -2.9692254066467285, "logits/rejected": -1.8441996574401855, "logps/chosen": -247.24765014648438, "logps/rejected": -169.27047729492188, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -3.362708568572998, "rewards/margins": 6.198493957519531, "rewards/rejected": -9.561203002929688, "step": 10861 }, { "epoch": 1.69, "learning_rate": 6.180703355983194e-06, "logits/chosen": -2.054342746734619, "logits/rejected": -2.8606176376342773, "logps/chosen": -103.63714599609375, "logps/rejected": -483.26373291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.179507732391357, "rewards/margins": 11.796812057495117, "rewards/rejected": -15.976318359375, "step": 10862 }, { "epoch": 1.69, "learning_rate": 6.179969915452047e-06, "logits/chosen": -2.689406633377075, "logits/rejected": -3.027639150619507, "logps/chosen": -127.22447204589844, "logps/rejected": -266.82537841796875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.3401479721069336, "rewards/margins": 9.789321899414062, "rewards/rejected": -12.12946891784668, "step": 10863 }, { "epoch": 1.69, "learning_rate": 6.1792364749208985e-06, "logits/chosen": -2.73305606842041, "logits/rejected": -1.1488860845565796, "logps/chosen": -595.1033325195312, "logps/rejected": -343.03875732421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.399065017700195, "rewards/margins": 8.932668685913086, "rewards/rejected": -13.331733703613281, "step": 10864 }, { "epoch": 1.69, "learning_rate": 6.17850303438975e-06, "logits/chosen": -2.651203155517578, "logits/rejected": -2.759596824645996, "logps/chosen": -475.0354309082031, "logps/rejected": -534.55517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0775628089904785, "rewards/margins": 11.062814712524414, "rewards/rejected": -14.14037799835205, "step": 10865 }, { "epoch": 1.69, "learning_rate": 6.177769593858602e-06, "logits/chosen": -1.9923946857452393, "logits/rejected": -2.6169676780700684, "logps/chosen": -492.7415466308594, "logps/rejected": -747.1444091796875, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -6.721866607666016, "rewards/margins": 7.397009372711182, "rewards/rejected": -14.118875503540039, "step": 10866 }, { "epoch": 1.69, "learning_rate": 6.177036153327454e-06, "logits/chosen": -1.3415175676345825, "logits/rejected": -2.8018994331359863, "logps/chosen": -174.45263671875, "logps/rejected": -587.1824951171875, "loss": 0.1741, "rewards/accuracies": 1.0, "rewards/chosen": -4.753396034240723, "rewards/margins": 4.954160690307617, "rewards/rejected": -9.707555770874023, "step": 10867 }, { "epoch": 1.69, "learning_rate": 6.176302712796307e-06, "logits/chosen": -2.2365498542785645, "logits/rejected": -2.9394118785858154, "logps/chosen": -270.2251281738281, "logps/rejected": -342.4302062988281, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -4.107934474945068, "rewards/margins": 7.031455993652344, "rewards/rejected": -11.13939094543457, "step": 10868 }, { "epoch": 1.69, "learning_rate": 6.175569272265159e-06, "logits/chosen": -2.426586627960205, "logits/rejected": -2.951301336288452, "logps/chosen": -59.00880813598633, "logps/rejected": -216.79022216796875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.049875497817993, "rewards/margins": 7.856127738952637, "rewards/rejected": -10.90600299835205, "step": 10869 }, { "epoch": 1.69, "learning_rate": 6.1748358317340114e-06, "logits/chosen": -1.3760530948638916, "logits/rejected": -2.020498037338257, "logps/chosen": -449.2525329589844, "logps/rejected": -422.3839416503906, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -4.0544939041137695, "rewards/margins": 9.444714546203613, "rewards/rejected": -13.499208450317383, "step": 10870 }, { "epoch": 1.69, "learning_rate": 6.174102391202863e-06, "logits/chosen": -1.6752945184707642, "logits/rejected": -3.006222724914551, "logps/chosen": -85.93707275390625, "logps/rejected": -325.4495544433594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.528038024902344, "rewards/margins": 8.898237228393555, "rewards/rejected": -13.426274299621582, "step": 10871 }, { "epoch": 1.69, "learning_rate": 6.173368950671716e-06, "logits/chosen": -2.7675302028656006, "logits/rejected": -2.7493157386779785, "logps/chosen": -186.9518585205078, "logps/rejected": -303.7368469238281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.72711181640625, "rewards/margins": 9.124963760375977, "rewards/rejected": -11.852075576782227, "step": 10872 }, { "epoch": 1.69, "learning_rate": 6.172635510140568e-06, "logits/chosen": -2.5969226360321045, "logits/rejected": -2.4080758094787598, "logps/chosen": -247.94406127929688, "logps/rejected": -415.018310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.332617282867432, "rewards/margins": 10.087638854980469, "rewards/rejected": -16.420257568359375, "step": 10873 }, { "epoch": 1.69, "learning_rate": 6.17190206960942e-06, "logits/chosen": -0.9040087461471558, "logits/rejected": -2.597686767578125, "logps/chosen": -140.7441864013672, "logps/rejected": -420.5755615234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.939833164215088, "rewards/margins": 11.03978443145752, "rewards/rejected": -13.979618072509766, "step": 10874 }, { "epoch": 1.69, "learning_rate": 6.171168629078272e-06, "logits/chosen": -2.3970515727996826, "logits/rejected": -3.068471908569336, "logps/chosen": -281.41839599609375, "logps/rejected": -685.1907958984375, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -4.354857921600342, "rewards/margins": 6.2365593910217285, "rewards/rejected": -10.59141731262207, "step": 10875 }, { "epoch": 1.69, "learning_rate": 6.1704351885471236e-06, "logits/chosen": -2.3784847259521484, "logits/rejected": -3.0097453594207764, "logps/chosen": -76.10074615478516, "logps/rejected": -317.61920166015625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.4499764442443848, "rewards/margins": 7.96270751953125, "rewards/rejected": -10.412683486938477, "step": 10876 }, { "epoch": 1.69, "learning_rate": 6.169701748015976e-06, "logits/chosen": -2.4355061054229736, "logits/rejected": -2.9982142448425293, "logps/chosen": -97.12825012207031, "logps/rejected": -298.9502258300781, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1597771644592285, "rewards/margins": 6.326508522033691, "rewards/rejected": -8.486286163330078, "step": 10877 }, { "epoch": 1.69, "learning_rate": 6.168968307484828e-06, "logits/chosen": -1.947801113128662, "logits/rejected": -2.7379379272460938, "logps/chosen": -185.1217803955078, "logps/rejected": -258.1996154785156, "loss": 0.1941, "rewards/accuracies": 1.0, "rewards/chosen": -3.439828395843506, "rewards/margins": 4.050084114074707, "rewards/rejected": -7.489912986755371, "step": 10878 }, { "epoch": 1.69, "learning_rate": 6.16823486695368e-06, "logits/chosen": -2.945216417312622, "logits/rejected": -2.511498212814331, "logps/chosen": -316.7283020019531, "logps/rejected": -276.08038330078125, "loss": 0.029, "rewards/accuracies": 1.0, "rewards/chosen": -2.9154648780822754, "rewards/margins": 4.544851779937744, "rewards/rejected": -7.4603166580200195, "step": 10879 }, { "epoch": 1.69, "learning_rate": 6.167501426422532e-06, "logits/chosen": -2.3745548725128174, "logits/rejected": -2.750690221786499, "logps/chosen": -214.44078063964844, "logps/rejected": -393.92138671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6871864795684814, "rewards/margins": 9.602862358093262, "rewards/rejected": -13.290048599243164, "step": 10880 }, { "epoch": 1.69, "learning_rate": 6.166767985891385e-06, "logits/chosen": -2.6845953464508057, "logits/rejected": -3.266183614730835, "logps/chosen": -84.46250915527344, "logps/rejected": -317.6230163574219, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -4.039806365966797, "rewards/margins": 6.757022857666016, "rewards/rejected": -10.796829223632812, "step": 10881 }, { "epoch": 1.69, "learning_rate": 6.1660345453602365e-06, "logits/chosen": -2.9072139263153076, "logits/rejected": -3.0338847637176514, "logps/chosen": -147.86923217773438, "logps/rejected": -393.36090087890625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -3.11553955078125, "rewards/margins": 7.919985294342041, "rewards/rejected": -11.035524368286133, "step": 10882 }, { "epoch": 1.69, "learning_rate": 6.165301104829088e-06, "logits/chosen": -2.8592123985290527, "logits/rejected": -2.0999295711517334, "logps/chosen": -324.5708923339844, "logps/rejected": -261.4825744628906, "loss": 0.1377, "rewards/accuracies": 1.0, "rewards/chosen": -3.048034429550171, "rewards/margins": 2.7937471866607666, "rewards/rejected": -5.8417816162109375, "step": 10883 }, { "epoch": 1.69, "learning_rate": 6.16456766429794e-06, "logits/chosen": -2.6634252071380615, "logits/rejected": -2.7186686992645264, "logps/chosen": -309.8637390136719, "logps/rejected": -263.56744384765625, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -3.1192374229431152, "rewards/margins": 4.909407615661621, "rewards/rejected": -8.028645515441895, "step": 10884 }, { "epoch": 1.69, "learning_rate": 6.163834223766792e-06, "logits/chosen": -2.0229406356811523, "logits/rejected": -3.2786054611206055, "logps/chosen": -313.2373352050781, "logps/rejected": -423.66265869140625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -2.525324821472168, "rewards/margins": 5.781456470489502, "rewards/rejected": -8.306780815124512, "step": 10885 }, { "epoch": 1.69, "learning_rate": 6.163100783235645e-06, "logits/chosen": -2.812046766281128, "logits/rejected": -2.663567066192627, "logps/chosen": -504.754638671875, "logps/rejected": -476.7560119628906, "loss": 0.024, "rewards/accuracies": 1.0, "rewards/chosen": -4.597320556640625, "rewards/margins": 4.9261322021484375, "rewards/rejected": -9.523452758789062, "step": 10886 }, { "epoch": 1.69, "learning_rate": 6.162367342704498e-06, "logits/chosen": -2.2270922660827637, "logits/rejected": -3.240725517272949, "logps/chosen": -97.03968811035156, "logps/rejected": -324.85504150390625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -4.086528778076172, "rewards/margins": 6.127619743347168, "rewards/rejected": -10.21414852142334, "step": 10887 }, { "epoch": 1.69, "learning_rate": 6.1616339021733495e-06, "logits/chosen": -2.7870535850524902, "logits/rejected": -2.995328187942505, "logps/chosen": -79.99346160888672, "logps/rejected": -288.09649658203125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.8849992752075195, "rewards/margins": 7.702306747436523, "rewards/rejected": -10.58730697631836, "step": 10888 }, { "epoch": 1.69, "learning_rate": 6.160900461642201e-06, "logits/chosen": -2.788954257965088, "logits/rejected": -2.109055519104004, "logps/chosen": -314.5106201171875, "logps/rejected": -316.6148376464844, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -5.714129447937012, "rewards/margins": 3.432123899459839, "rewards/rejected": -9.14625358581543, "step": 10889 }, { "epoch": 1.69, "learning_rate": 6.160167021111054e-06, "logits/chosen": -2.248544931411743, "logits/rejected": -2.6855618953704834, "logps/chosen": -115.98245239257812, "logps/rejected": -161.3458709716797, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -4.438507080078125, "rewards/margins": 5.2995147705078125, "rewards/rejected": -9.738021850585938, "step": 10890 }, { "epoch": 1.69, "learning_rate": 6.159433580579906e-06, "logits/chosen": -2.7016849517822266, "logits/rejected": -3.1659884452819824, "logps/chosen": -134.16925048828125, "logps/rejected": -343.96759033203125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.807661533355713, "rewards/margins": 6.306361198425293, "rewards/rejected": -11.114023208618164, "step": 10891 }, { "epoch": 1.69, "learning_rate": 6.158700140048758e-06, "logits/chosen": -2.809735059738159, "logits/rejected": -1.741709589958191, "logps/chosen": -218.2144775390625, "logps/rejected": -250.54888916015625, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": -5.201125144958496, "rewards/margins": 6.845568656921387, "rewards/rejected": -12.046693801879883, "step": 10892 }, { "epoch": 1.69, "learning_rate": 6.15796669951761e-06, "logits/chosen": -2.4614577293395996, "logits/rejected": -2.8918509483337402, "logps/chosen": -268.6326904296875, "logps/rejected": -344.0433654785156, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -4.570389747619629, "rewards/margins": 4.267263412475586, "rewards/rejected": -8.837652206420898, "step": 10893 }, { "epoch": 1.69, "learning_rate": 6.1572332589864625e-06, "logits/chosen": -2.913717031478882, "logits/rejected": -3.024705648422241, "logps/chosen": -121.72337341308594, "logps/rejected": -358.04766845703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.809109926223755, "rewards/margins": 9.248296737670898, "rewards/rejected": -13.057406425476074, "step": 10894 }, { "epoch": 1.69, "learning_rate": 6.156499818455314e-06, "logits/chosen": -2.961230993270874, "logits/rejected": -2.7768640518188477, "logps/chosen": -150.06768798828125, "logps/rejected": -180.47683715820312, "loss": 1.094, "rewards/accuracies": 0.5, "rewards/chosen": -4.1309494972229, "rewards/margins": 3.256127119064331, "rewards/rejected": -7.387076377868652, "step": 10895 }, { "epoch": 1.69, "learning_rate": 6.155766377924166e-06, "logits/chosen": -3.0974035263061523, "logits/rejected": -2.667891025543213, "logps/chosen": -278.2371826171875, "logps/rejected": -310.7112121582031, "loss": 0.7193, "rewards/accuracies": 0.5, "rewards/chosen": -4.892392635345459, "rewards/margins": 4.509397506713867, "rewards/rejected": -9.401790618896484, "step": 10896 }, { "epoch": 1.69, "learning_rate": 6.155032937393018e-06, "logits/chosen": -3.0738208293914795, "logits/rejected": -3.010117769241333, "logps/chosen": -409.0560302734375, "logps/rejected": -360.5486755371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7601203918457031, "rewards/margins": 11.340576171875, "rewards/rejected": -13.100696563720703, "step": 10897 }, { "epoch": 1.69, "learning_rate": 6.15429949686187e-06, "logits/chosen": -2.347465753555298, "logits/rejected": -2.8692641258239746, "logps/chosen": -122.53195190429688, "logps/rejected": -277.12701416015625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -4.311362266540527, "rewards/margins": 7.127834796905518, "rewards/rejected": -11.439197540283203, "step": 10898 }, { "epoch": 1.7, "learning_rate": 6.153566056330723e-06, "logits/chosen": -2.804978370666504, "logits/rejected": -2.6360127925872803, "logps/chosen": -303.9836120605469, "logps/rejected": -345.22314453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.9485886096954346, "rewards/margins": 6.436141014099121, "rewards/rejected": -8.384729385375977, "step": 10899 }, { "epoch": 1.7, "learning_rate": 6.1528326157995746e-06, "logits/chosen": -2.890031576156616, "logits/rejected": -2.1866989135742188, "logps/chosen": -589.9911499023438, "logps/rejected": -429.4452209472656, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -4.152887344360352, "rewards/margins": 5.598483085632324, "rewards/rejected": -9.751370429992676, "step": 10900 }, { "epoch": 1.7, "learning_rate": 6.1520991752684264e-06, "logits/chosen": -1.7355923652648926, "logits/rejected": -2.7337241172790527, "logps/chosen": -139.92526245117188, "logps/rejected": -316.35498046875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.254408836364746, "rewards/margins": 7.625942230224609, "rewards/rejected": -11.880352020263672, "step": 10901 }, { "epoch": 1.7, "learning_rate": 6.151365734737278e-06, "logits/chosen": -2.47698974609375, "logits/rejected": -2.8381295204162598, "logps/chosen": -104.62126159667969, "logps/rejected": -155.19195556640625, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -2.689115524291992, "rewards/margins": 5.856512546539307, "rewards/rejected": -8.54562759399414, "step": 10902 }, { "epoch": 1.7, "learning_rate": 6.150632294206131e-06, "logits/chosen": -2.5457258224487305, "logits/rejected": -2.7247872352600098, "logps/chosen": -307.7568359375, "logps/rejected": -466.16693115234375, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -3.259871482849121, "rewards/margins": 3.978231430053711, "rewards/rejected": -7.238102912902832, "step": 10903 }, { "epoch": 1.7, "learning_rate": 6.149898853674984e-06, "logits/chosen": -2.440958261489868, "logits/rejected": -2.954634666442871, "logps/chosen": -62.10832214355469, "logps/rejected": -276.202880859375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -3.910398006439209, "rewards/margins": 7.589253902435303, "rewards/rejected": -11.499651908874512, "step": 10904 }, { "epoch": 1.7, "learning_rate": 6.149165413143836e-06, "logits/chosen": -2.8576695919036865, "logits/rejected": -3.211489677429199, "logps/chosen": -521.3135375976562, "logps/rejected": -684.3335571289062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8699760437011719, "rewards/margins": 12.101016998291016, "rewards/rejected": -12.970993041992188, "step": 10905 }, { "epoch": 1.7, "learning_rate": 6.1484319726126875e-06, "logits/chosen": -1.903252363204956, "logits/rejected": -1.4150676727294922, "logps/chosen": -407.19903564453125, "logps/rejected": -461.70635986328125, "loss": 2.1873, "rewards/accuracies": 0.5, "rewards/chosen": -7.23110294342041, "rewards/margins": 2.0905580520629883, "rewards/rejected": -9.321660995483398, "step": 10906 }, { "epoch": 1.7, "learning_rate": 6.147698532081539e-06, "logits/chosen": -2.536017656326294, "logits/rejected": -3.06638765335083, "logps/chosen": -191.93765258789062, "logps/rejected": -386.7962341308594, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.0821871757507324, "rewards/margins": 7.7035980224609375, "rewards/rejected": -10.785785675048828, "step": 10907 }, { "epoch": 1.7, "learning_rate": 6.146965091550392e-06, "logits/chosen": -2.338972806930542, "logits/rejected": -2.4997408390045166, "logps/chosen": -289.5912170410156, "logps/rejected": -281.77923583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.9454796314239502, "rewards/margins": 10.719255447387695, "rewards/rejected": -11.664735794067383, "step": 10908 }, { "epoch": 1.7, "learning_rate": 6.146231651019244e-06, "logits/chosen": -1.0306724309921265, "logits/rejected": -3.0976903438568115, "logps/chosen": -93.4330825805664, "logps/rejected": -468.8119812011719, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": -7.492239475250244, "rewards/margins": 4.339926242828369, "rewards/rejected": -11.832165718078613, "step": 10909 }, { "epoch": 1.7, "learning_rate": 6.145498210488096e-06, "logits/chosen": -2.4565796852111816, "logits/rejected": -3.0520339012145996, "logps/chosen": -221.52659606933594, "logps/rejected": -411.663818359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.455655574798584, "rewards/margins": 11.452695846557617, "rewards/rejected": -13.90835189819336, "step": 10910 }, { "epoch": 1.7, "learning_rate": 6.144764769956948e-06, "logits/chosen": -2.1435797214508057, "logits/rejected": -2.9558212757110596, "logps/chosen": -129.01405334472656, "logps/rejected": -240.3277587890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.0130844116210938, "rewards/margins": 8.86376953125, "rewards/rejected": -10.876853942871094, "step": 10911 }, { "epoch": 1.7, "learning_rate": 6.1440313294258005e-06, "logits/chosen": -2.0539445877075195, "logits/rejected": -3.009145975112915, "logps/chosen": -109.32359313964844, "logps/rejected": -315.1376953125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.9810216426849365, "rewards/margins": 7.195512771606445, "rewards/rejected": -10.176534652709961, "step": 10912 }, { "epoch": 1.7, "learning_rate": 6.143297888894652e-06, "logits/chosen": -1.4849683046340942, "logits/rejected": -2.738398551940918, "logps/chosen": -177.1346435546875, "logps/rejected": -292.06298828125, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -6.6153364181518555, "rewards/margins": 6.230555534362793, "rewards/rejected": -12.845891952514648, "step": 10913 }, { "epoch": 1.7, "learning_rate": 6.142564448363504e-06, "logits/chosen": -2.393669366836548, "logits/rejected": -3.161005973815918, "logps/chosen": -126.7360610961914, "logps/rejected": -303.732666015625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -4.833686828613281, "rewards/margins": 5.153835296630859, "rewards/rejected": -9.98752212524414, "step": 10914 }, { "epoch": 1.7, "learning_rate": 6.141831007832356e-06, "logits/chosen": -2.3361988067626953, "logits/rejected": -3.005889654159546, "logps/chosen": -75.22537994384766, "logps/rejected": -455.67498779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.547402858734131, "rewards/margins": 11.41733169555664, "rewards/rejected": -15.96473503112793, "step": 10915 }, { "epoch": 1.7, "learning_rate": 6.141097567301208e-06, "logits/chosen": -3.0383684635162354, "logits/rejected": -1.906392216682434, "logps/chosen": -260.7189025878906, "logps/rejected": -214.46548461914062, "loss": 0.082, "rewards/accuracies": 1.0, "rewards/chosen": -4.248904705047607, "rewards/margins": 3.1497631072998047, "rewards/rejected": -7.398667812347412, "step": 10916 }, { "epoch": 1.7, "learning_rate": 6.140364126770061e-06, "logits/chosen": -3.0741820335388184, "logits/rejected": -2.4933254718780518, "logps/chosen": -712.21240234375, "logps/rejected": -417.41644287109375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -3.556872606277466, "rewards/margins": 8.862699508666992, "rewards/rejected": -12.419572830200195, "step": 10917 }, { "epoch": 1.7, "learning_rate": 6.139630686238913e-06, "logits/chosen": -2.117405414581299, "logits/rejected": -2.9645776748657227, "logps/chosen": -144.70249938964844, "logps/rejected": -402.412109375, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -2.7363388538360596, "rewards/margins": 7.759705543518066, "rewards/rejected": -10.496045112609863, "step": 10918 }, { "epoch": 1.7, "learning_rate": 6.1388972457077645e-06, "logits/chosen": -2.51344895362854, "logits/rejected": -2.6650571823120117, "logps/chosen": -472.3789978027344, "logps/rejected": -465.1386413574219, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.2228217124938965, "rewards/margins": 7.492616176605225, "rewards/rejected": -12.715438842773438, "step": 10919 }, { "epoch": 1.7, "learning_rate": 6.138163805176617e-06, "logits/chosen": -2.205303430557251, "logits/rejected": -2.9179821014404297, "logps/chosen": -50.63349151611328, "logps/rejected": -313.3334045410156, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.2891688346862793, "rewards/margins": 9.779800415039062, "rewards/rejected": -12.0689697265625, "step": 10920 }, { "epoch": 1.7, "learning_rate": 6.13743036464547e-06, "logits/chosen": -2.879211187362671, "logits/rejected": -1.723217487335205, "logps/chosen": -183.33270263671875, "logps/rejected": -126.64259338378906, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -1.7388579845428467, "rewards/margins": 5.75956916809082, "rewards/rejected": -7.498427391052246, "step": 10921 }, { "epoch": 1.7, "learning_rate": 6.136696924114322e-06, "logits/chosen": -1.7858299016952515, "logits/rejected": -2.7995455265045166, "logps/chosen": -149.8277130126953, "logps/rejected": -371.5078430175781, "loss": 1.2051, "rewards/accuracies": 0.5, "rewards/chosen": -3.786285400390625, "rewards/margins": 6.018552780151367, "rewards/rejected": -9.804838180541992, "step": 10922 }, { "epoch": 1.7, "learning_rate": 6.135963483583174e-06, "logits/chosen": -2.7964816093444824, "logits/rejected": -2.6872739791870117, "logps/chosen": -575.861328125, "logps/rejected": -491.96600341796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.6890158653259277, "rewards/margins": 7.428237438201904, "rewards/rejected": -10.117253303527832, "step": 10923 }, { "epoch": 1.7, "learning_rate": 6.1352300430520256e-06, "logits/chosen": -3.03652024269104, "logits/rejected": -2.9196386337280273, "logps/chosen": -307.8638916015625, "logps/rejected": -395.21624755859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.264387130737305, "rewards/margins": 9.981866836547852, "rewards/rejected": -14.246253967285156, "step": 10924 }, { "epoch": 1.7, "learning_rate": 6.1344966025208775e-06, "logits/chosen": -2.3344976902008057, "logits/rejected": -3.391746759414673, "logps/chosen": -173.51785278320312, "logps/rejected": -496.04559326171875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.1002626419067383, "rewards/margins": 7.786747932434082, "rewards/rejected": -10.88701057434082, "step": 10925 }, { "epoch": 1.7, "learning_rate": 6.13376316198973e-06, "logits/chosen": -2.594193458557129, "logits/rejected": -3.05830979347229, "logps/chosen": -154.2548828125, "logps/rejected": -341.1014709472656, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.7327423095703125, "rewards/margins": 7.4444684982299805, "rewards/rejected": -10.17721176147461, "step": 10926 }, { "epoch": 1.7, "learning_rate": 6.133029721458582e-06, "logits/chosen": -2.7185709476470947, "logits/rejected": -1.5054267644882202, "logps/chosen": -172.51019287109375, "logps/rejected": -205.91915893554688, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -5.921116828918457, "rewards/margins": 5.415155410766602, "rewards/rejected": -11.336273193359375, "step": 10927 }, { "epoch": 1.7, "learning_rate": 6.132296280927434e-06, "logits/chosen": -2.165832757949829, "logits/rejected": -3.009037971496582, "logps/chosen": -107.28400421142578, "logps/rejected": -313.393798828125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.513460159301758, "rewards/margins": 6.619307518005371, "rewards/rejected": -10.132767677307129, "step": 10928 }, { "epoch": 1.7, "learning_rate": 6.131562840396286e-06, "logits/chosen": -2.8354265689849854, "logits/rejected": -3.1352808475494385, "logps/chosen": -160.03082275390625, "logps/rejected": -347.6316833496094, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -1.5748192071914673, "rewards/margins": 6.961991786956787, "rewards/rejected": -8.536810874938965, "step": 10929 }, { "epoch": 1.7, "learning_rate": 6.1308293998651385e-06, "logits/chosen": -2.3509693145751953, "logits/rejected": -2.8826143741607666, "logps/chosen": -190.35076904296875, "logps/rejected": -446.7509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8538503646850586, "rewards/margins": 12.21821403503418, "rewards/rejected": -15.072064399719238, "step": 10930 }, { "epoch": 1.7, "learning_rate": 6.13009595933399e-06, "logits/chosen": -2.9408507347106934, "logits/rejected": -1.1454461812973022, "logps/chosen": -394.99627685546875, "logps/rejected": -214.10374450683594, "loss": 0.9012, "rewards/accuracies": 0.5, "rewards/chosen": -5.259922981262207, "rewards/margins": 1.4694490432739258, "rewards/rejected": -6.729372024536133, "step": 10931 }, { "epoch": 1.7, "learning_rate": 6.129362518802842e-06, "logits/chosen": -1.4525835514068604, "logits/rejected": -2.8808658123016357, "logps/chosen": -98.60875701904297, "logps/rejected": -253.35897827148438, "loss": 0.0728, "rewards/accuracies": 1.0, "rewards/chosen": -5.56839656829834, "rewards/margins": 2.7440028190612793, "rewards/rejected": -8.312398910522461, "step": 10932 }, { "epoch": 1.7, "learning_rate": 6.128629078271694e-06, "logits/chosen": -2.7469897270202637, "logits/rejected": -2.514923334121704, "logps/chosen": -307.8553161621094, "logps/rejected": -342.56085205078125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -2.6872856616973877, "rewards/margins": 5.383191108703613, "rewards/rejected": -8.070476531982422, "step": 10933 }, { "epoch": 1.7, "learning_rate": 6.127895637740546e-06, "logits/chosen": -2.6360671520233154, "logits/rejected": -2.78132700920105, "logps/chosen": -503.08154296875, "logps/rejected": -662.4248657226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.944112777709961, "rewards/margins": 13.00441837310791, "rewards/rejected": -18.948532104492188, "step": 10934 }, { "epoch": 1.7, "learning_rate": 6.127162197209399e-06, "logits/chosen": -2.7960643768310547, "logits/rejected": -2.9945738315582275, "logps/chosen": -111.24478149414062, "logps/rejected": -218.2608642578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.327566623687744, "rewards/margins": 6.687515735626221, "rewards/rejected": -11.015082359313965, "step": 10935 }, { "epoch": 1.7, "learning_rate": 6.126428756678251e-06, "logits/chosen": -2.922205924987793, "logits/rejected": -2.1700844764709473, "logps/chosen": -530.4531860351562, "logps/rejected": -413.5539245605469, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -5.359376907348633, "rewards/margins": 6.994908332824707, "rewards/rejected": -12.354284286499023, "step": 10936 }, { "epoch": 1.7, "learning_rate": 6.125695316147103e-06, "logits/chosen": -2.6306262016296387, "logits/rejected": -3.0186429023742676, "logps/chosen": -149.0059356689453, "logps/rejected": -354.47003173828125, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -5.591963768005371, "rewards/margins": 9.23462200164795, "rewards/rejected": -14.82658576965332, "step": 10937 }, { "epoch": 1.7, "learning_rate": 6.124961875615955e-06, "logits/chosen": -2.0930166244506836, "logits/rejected": -3.0157742500305176, "logps/chosen": -113.52994537353516, "logps/rejected": -357.19146728515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.6904349327087402, "rewards/margins": 8.958298683166504, "rewards/rejected": -12.648733139038086, "step": 10938 }, { "epoch": 1.7, "learning_rate": 6.124228435084808e-06, "logits/chosen": -2.6818137168884277, "logits/rejected": -3.0166537761688232, "logps/chosen": -260.6462097167969, "logps/rejected": -419.13800048828125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -3.0189671516418457, "rewards/margins": 6.320680618286133, "rewards/rejected": -9.33964729309082, "step": 10939 }, { "epoch": 1.7, "learning_rate": 6.12349499455366e-06, "logits/chosen": -2.225130796432495, "logits/rejected": -2.7938756942749023, "logps/chosen": -700.695068359375, "logps/rejected": -646.0433349609375, "loss": 0.8006, "rewards/accuracies": 0.5, "rewards/chosen": -5.042733192443848, "rewards/margins": 6.849519729614258, "rewards/rejected": -11.892252922058105, "step": 10940 }, { "epoch": 1.7, "learning_rate": 6.122761554022512e-06, "logits/chosen": -2.481910228729248, "logits/rejected": -3.0057168006896973, "logps/chosen": -139.180908203125, "logps/rejected": -276.73358154296875, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -4.0818328857421875, "rewards/margins": 3.875244617462158, "rewards/rejected": -7.957077980041504, "step": 10941 }, { "epoch": 1.7, "learning_rate": 6.122028113491364e-06, "logits/chosen": -1.5858808755874634, "logits/rejected": -2.9774129390716553, "logps/chosen": -118.19328308105469, "logps/rejected": -313.8538818359375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.230564117431641, "rewards/margins": 8.260525703430176, "rewards/rejected": -12.491089820861816, "step": 10942 }, { "epoch": 1.7, "learning_rate": 6.121294672960216e-06, "logits/chosen": -2.160673141479492, "logits/rejected": -3.089506149291992, "logps/chosen": -255.59695434570312, "logps/rejected": -566.581787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8976173400878906, "rewards/margins": 10.443178176879883, "rewards/rejected": -12.340795516967773, "step": 10943 }, { "epoch": 1.7, "learning_rate": 6.120561232429068e-06, "logits/chosen": -2.7523581981658936, "logits/rejected": -3.0023016929626465, "logps/chosen": -165.2008819580078, "logps/rejected": -284.4678039550781, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.083385467529297, "rewards/margins": 8.021424293518066, "rewards/rejected": -14.104809761047363, "step": 10944 }, { "epoch": 1.7, "learning_rate": 6.11982779189792e-06, "logits/chosen": -1.423344373703003, "logits/rejected": -2.7882790565490723, "logps/chosen": -339.77874755859375, "logps/rejected": -598.5692138671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.390570163726807, "rewards/margins": 10.303997993469238, "rewards/rejected": -14.694568634033203, "step": 10945 }, { "epoch": 1.7, "learning_rate": 6.119094351366772e-06, "logits/chosen": -2.7079246044158936, "logits/rejected": -2.8880093097686768, "logps/chosen": -387.65618896484375, "logps/rejected": -435.6060791015625, "loss": 2.1043, "rewards/accuracies": 0.5, "rewards/chosen": -5.7757439613342285, "rewards/margins": 8.154958724975586, "rewards/rejected": -13.930702209472656, "step": 10946 }, { "epoch": 1.7, "learning_rate": 6.118360910835624e-06, "logits/chosen": -2.363454818725586, "logits/rejected": -2.9499404430389404, "logps/chosen": -268.9253234863281, "logps/rejected": -351.836181640625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.5276284217834473, "rewards/margins": 7.380640983581543, "rewards/rejected": -10.908268928527832, "step": 10947 }, { "epoch": 1.7, "learning_rate": 6.117627470304477e-06, "logits/chosen": -3.234933614730835, "logits/rejected": -2.9910154342651367, "logps/chosen": -212.45870971679688, "logps/rejected": -339.66558837890625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.344305038452148, "rewards/margins": 8.993183135986328, "rewards/rejected": -14.337488174438477, "step": 10948 }, { "epoch": 1.7, "learning_rate": 6.1168940297733285e-06, "logits/chosen": -2.6810264587402344, "logits/rejected": -3.0483200550079346, "logps/chosen": -136.090576171875, "logps/rejected": -259.6257629394531, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.81962251663208, "rewards/margins": 5.987417221069336, "rewards/rejected": -9.807039260864258, "step": 10949 }, { "epoch": 1.7, "learning_rate": 6.11616058924218e-06, "logits/chosen": -2.7916054725646973, "logits/rejected": -2.8435888290405273, "logps/chosen": -691.6112670898438, "logps/rejected": -717.1603393554688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -8.218118667602539, "rewards/margins": 7.510884761810303, "rewards/rejected": -15.729002952575684, "step": 10950 }, { "epoch": 1.7, "learning_rate": 6.115427148711032e-06, "logits/chosen": -2.5767464637756348, "logits/rejected": -3.1031696796417236, "logps/chosen": -551.7918701171875, "logps/rejected": -688.6865234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.1985716819763184, "rewards/margins": 7.936453342437744, "rewards/rejected": -11.135025024414062, "step": 10951 }, { "epoch": 1.7, "learning_rate": 6.114693708179885e-06, "logits/chosen": -2.9966652393341064, "logits/rejected": -2.9698197841644287, "logps/chosen": -191.60211181640625, "logps/rejected": -296.88763427734375, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -2.635343074798584, "rewards/margins": 6.516551971435547, "rewards/rejected": -9.151895523071289, "step": 10952 }, { "epoch": 1.7, "learning_rate": 6.113960267648737e-06, "logits/chosen": -2.7168712615966797, "logits/rejected": -2.672907590866089, "logps/chosen": -512.0265502929688, "logps/rejected": -574.8327026367188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9654699563980103, "rewards/margins": 14.55099105834961, "rewards/rejected": -16.516460418701172, "step": 10953 }, { "epoch": 1.7, "learning_rate": 6.1132268271175895e-06, "logits/chosen": -2.809847593307495, "logits/rejected": -3.2980895042419434, "logps/chosen": -163.87344360351562, "logps/rejected": -329.7705993652344, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.355927467346191, "rewards/margins": 7.5863471031188965, "rewards/rejected": -12.942275047302246, "step": 10954 }, { "epoch": 1.7, "learning_rate": 6.1124933865864414e-06, "logits/chosen": -2.389397621154785, "logits/rejected": -3.0326168537139893, "logps/chosen": -133.2559814453125, "logps/rejected": -272.70123291015625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.409895896911621, "rewards/margins": 6.556878566741943, "rewards/rejected": -9.966773986816406, "step": 10955 }, { "epoch": 1.7, "learning_rate": 6.111759946055293e-06, "logits/chosen": -1.3696517944335938, "logits/rejected": -2.902552843093872, "logps/chosen": -212.561767578125, "logps/rejected": -541.84619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.296861171722412, "rewards/margins": 10.340129852294922, "rewards/rejected": -13.636991500854492, "step": 10956 }, { "epoch": 1.7, "learning_rate": 6.111026505524146e-06, "logits/chosen": -2.982055902481079, "logits/rejected": -1.811853051185608, "logps/chosen": -415.6948547363281, "logps/rejected": -347.84918212890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7927002906799316, "rewards/margins": 9.09585952758789, "rewards/rejected": -11.888559341430664, "step": 10957 }, { "epoch": 1.7, "learning_rate": 6.110293064992998e-06, "logits/chosen": -3.0632028579711914, "logits/rejected": -3.211380958557129, "logps/chosen": -360.8173828125, "logps/rejected": -377.9848327636719, "loss": 2.0461, "rewards/accuracies": 0.5, "rewards/chosen": -10.432546615600586, "rewards/margins": 3.629981756210327, "rewards/rejected": -14.062528610229492, "step": 10958 }, { "epoch": 1.7, "learning_rate": 6.10955962446185e-06, "logits/chosen": -2.26464581489563, "logits/rejected": -2.9563212394714355, "logps/chosen": -197.142578125, "logps/rejected": -386.59051513671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.5168681144714355, "rewards/margins": 9.49542236328125, "rewards/rejected": -14.012290000915527, "step": 10959 }, { "epoch": 1.7, "learning_rate": 6.108826183930702e-06, "logits/chosen": -2.657184600830078, "logits/rejected": -2.774984836578369, "logps/chosen": -324.84375, "logps/rejected": -307.75506591796875, "loss": 1.2941, "rewards/accuracies": 0.5, "rewards/chosen": -5.895589351654053, "rewards/margins": 2.1790401935577393, "rewards/rejected": -8.074629783630371, "step": 10960 }, { "epoch": 1.7, "learning_rate": 6.108092743399554e-06, "logits/chosen": -2.720949411392212, "logits/rejected": -2.918191909790039, "logps/chosen": -62.3454704284668, "logps/rejected": -454.7704162597656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.193857431411743, "rewards/margins": 11.923887252807617, "rewards/rejected": -15.117744445800781, "step": 10961 }, { "epoch": 1.7, "learning_rate": 6.107359302868406e-06, "logits/chosen": -2.6809589862823486, "logits/rejected": -2.0818049907684326, "logps/chosen": -487.62774658203125, "logps/rejected": -420.2036437988281, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.0530953407287598, "rewards/margins": 10.78927993774414, "rewards/rejected": -13.842375755310059, "step": 10962 }, { "epoch": 1.7, "learning_rate": 6.106625862337258e-06, "logits/chosen": -3.170544147491455, "logits/rejected": -2.9123196601867676, "logps/chosen": -263.2330017089844, "logps/rejected": -279.9054260253906, "loss": 0.3622, "rewards/accuracies": 0.5, "rewards/chosen": -5.810959815979004, "rewards/margins": 2.089826822280884, "rewards/rejected": -7.900786399841309, "step": 10963 }, { "epoch": 1.71, "learning_rate": 6.10589242180611e-06, "logits/chosen": -2.8833022117614746, "logits/rejected": -2.1358749866485596, "logps/chosen": -322.26025390625, "logps/rejected": -128.55865478515625, "loss": 1.2931, "rewards/accuracies": 0.5, "rewards/chosen": -4.123497009277344, "rewards/margins": 0.8906573057174683, "rewards/rejected": -5.014154434204102, "step": 10964 }, { "epoch": 1.71, "learning_rate": 6.105158981274962e-06, "logits/chosen": -2.3416748046875, "logits/rejected": -2.985496997833252, "logps/chosen": -117.09070587158203, "logps/rejected": -290.2485046386719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.0182206630706787, "rewards/margins": 8.395975112915039, "rewards/rejected": -11.414196014404297, "step": 10965 }, { "epoch": 1.71, "learning_rate": 6.104425540743815e-06, "logits/chosen": -0.7787020206451416, "logits/rejected": -2.1537587642669678, "logps/chosen": -200.80335998535156, "logps/rejected": -463.5537414550781, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.1465375423431396, "rewards/margins": 11.192915916442871, "rewards/rejected": -14.33945369720459, "step": 10966 }, { "epoch": 1.71, "learning_rate": 6.1036921002126665e-06, "logits/chosen": -1.3124669790267944, "logits/rejected": -2.9415552616119385, "logps/chosen": -141.70913696289062, "logps/rejected": -451.60101318359375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.72636604309082, "rewards/margins": 7.500611305236816, "rewards/rejected": -13.226977348327637, "step": 10967 }, { "epoch": 1.71, "learning_rate": 6.102958659681518e-06, "logits/chosen": -2.6931426525115967, "logits/rejected": -2.8230838775634766, "logps/chosen": -80.10305786132812, "logps/rejected": -152.89544677734375, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -4.942947864532471, "rewards/margins": 4.567076206207275, "rewards/rejected": -9.510024070739746, "step": 10968 }, { "epoch": 1.71, "learning_rate": 6.10222521915037e-06, "logits/chosen": -1.828445315361023, "logits/rejected": -2.739074945449829, "logps/chosen": -194.09814453125, "logps/rejected": -642.0591430664062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6142492294311523, "rewards/margins": 11.032515525817871, "rewards/rejected": -14.646764755249023, "step": 10969 }, { "epoch": 1.71, "learning_rate": 6.101491778619223e-06, "logits/chosen": -2.868006467819214, "logits/rejected": -3.1006507873535156, "logps/chosen": -70.91149139404297, "logps/rejected": -163.29241943359375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.2583305835723877, "rewards/margins": 5.763829708099365, "rewards/rejected": -9.022160530090332, "step": 10970 }, { "epoch": 1.71, "learning_rate": 6.100758338088076e-06, "logits/chosen": -2.5626096725463867, "logits/rejected": -2.9099466800689697, "logps/chosen": -362.7814636230469, "logps/rejected": -603.8045043945312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.209601879119873, "rewards/margins": 11.272157669067383, "rewards/rejected": -14.481760025024414, "step": 10971 }, { "epoch": 1.71, "learning_rate": 6.100024897556928e-06, "logits/chosen": -1.6131244897842407, "logits/rejected": -2.635284662246704, "logps/chosen": -101.1949462890625, "logps/rejected": -222.29119873046875, "loss": 0.2616, "rewards/accuracies": 1.0, "rewards/chosen": -6.110735893249512, "rewards/margins": 3.0394887924194336, "rewards/rejected": -9.150224685668945, "step": 10972 }, { "epoch": 1.71, "learning_rate": 6.0992914570257795e-06, "logits/chosen": -2.9512791633605957, "logits/rejected": -2.2870612144470215, "logps/chosen": -457.1077880859375, "logps/rejected": -468.6463928222656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.2465133666992188, "rewards/margins": 13.05640983581543, "rewards/rejected": -15.302923202514648, "step": 10973 }, { "epoch": 1.71, "learning_rate": 6.098558016494631e-06, "logits/chosen": -2.522596597671509, "logits/rejected": -3.1995246410369873, "logps/chosen": -126.66553497314453, "logps/rejected": -293.3460388183594, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.987987518310547, "rewards/margins": 8.37226390838623, "rewards/rejected": -11.360250473022461, "step": 10974 }, { "epoch": 1.71, "learning_rate": 6.097824575963484e-06, "logits/chosen": -2.751239538192749, "logits/rejected": -2.703083038330078, "logps/chosen": -182.09295654296875, "logps/rejected": -235.6569366455078, "loss": 0.0326, "rewards/accuracies": 1.0, "rewards/chosen": -3.3432719707489014, "rewards/margins": 5.007969379425049, "rewards/rejected": -8.351241111755371, "step": 10975 }, { "epoch": 1.71, "learning_rate": 6.097091135432336e-06, "logits/chosen": -1.3518935441970825, "logits/rejected": -3.017425060272217, "logps/chosen": -219.17318725585938, "logps/rejected": -534.9486694335938, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.8120522499084473, "rewards/margins": 6.3016886711120605, "rewards/rejected": -10.113740921020508, "step": 10976 }, { "epoch": 1.71, "learning_rate": 6.096357694901188e-06, "logits/chosen": -2.839447259902954, "logits/rejected": -2.5526158809661865, "logps/chosen": -471.0147705078125, "logps/rejected": -444.3352355957031, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -3.7173619270324707, "rewards/margins": 6.786810874938965, "rewards/rejected": -10.504173278808594, "step": 10977 }, { "epoch": 1.71, "learning_rate": 6.09562425437004e-06, "logits/chosen": -1.7503390312194824, "logits/rejected": -2.964350938796997, "logps/chosen": -149.9035186767578, "logps/rejected": -383.9356689453125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.1997642517089844, "rewards/margins": 9.322338104248047, "rewards/rejected": -12.522102355957031, "step": 10978 }, { "epoch": 1.71, "learning_rate": 6.0948908138388924e-06, "logits/chosen": -2.7492029666900635, "logits/rejected": -3.1120495796203613, "logps/chosen": -157.84840393066406, "logps/rejected": -351.2934875488281, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.063559055328369, "rewards/margins": 7.8280229568481445, "rewards/rejected": -11.891582489013672, "step": 10979 }, { "epoch": 1.71, "learning_rate": 6.094157373307744e-06, "logits/chosen": -2.8154289722442627, "logits/rejected": -1.9052845239639282, "logps/chosen": -356.60736083984375, "logps/rejected": -252.08480834960938, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -4.97943115234375, "rewards/margins": 5.555452346801758, "rewards/rejected": -10.534883499145508, "step": 10980 }, { "epoch": 1.71, "learning_rate": 6.093423932776596e-06, "logits/chosen": -1.8963661193847656, "logits/rejected": -2.8288941383361816, "logps/chosen": -113.82561492919922, "logps/rejected": -262.5818176269531, "loss": 1.9526, "rewards/accuracies": 0.5, "rewards/chosen": -7.120071887969971, "rewards/margins": 2.3807220458984375, "rewards/rejected": -9.50079345703125, "step": 10981 }, { "epoch": 1.71, "learning_rate": 6.092690492245448e-06, "logits/chosen": -2.0582306385040283, "logits/rejected": -2.8683853149414062, "logps/chosen": -145.82745361328125, "logps/rejected": -423.8689270019531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8105597496032715, "rewards/margins": 9.770843505859375, "rewards/rejected": -13.581403732299805, "step": 10982 }, { "epoch": 1.71, "learning_rate": 6.0919570517143e-06, "logits/chosen": -2.140366315841675, "logits/rejected": -2.656585454940796, "logps/chosen": -136.28111267089844, "logps/rejected": -314.95355224609375, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": -4.131062030792236, "rewards/margins": 6.4741129875183105, "rewards/rejected": -10.605175018310547, "step": 10983 }, { "epoch": 1.71, "learning_rate": 6.091223611183153e-06, "logits/chosen": -2.740265369415283, "logits/rejected": -2.3640670776367188, "logps/chosen": -347.51904296875, "logps/rejected": -500.5531921386719, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.6959939002990723, "rewards/margins": 8.437975883483887, "rewards/rejected": -11.1339693069458, "step": 10984 }, { "epoch": 1.71, "learning_rate": 6.0904901706520046e-06, "logits/chosen": -3.0776422023773193, "logits/rejected": -2.8335680961608887, "logps/chosen": -194.55087280273438, "logps/rejected": -169.60414123535156, "loss": 1.8866, "rewards/accuracies": 0.5, "rewards/chosen": -5.974445819854736, "rewards/margins": 3.0464999675750732, "rewards/rejected": -9.02094554901123, "step": 10985 }, { "epoch": 1.71, "learning_rate": 6.0897567301208564e-06, "logits/chosen": -2.441405773162842, "logits/rejected": -3.159466505050659, "logps/chosen": -90.948486328125, "logps/rejected": -267.29425048828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.222266674041748, "rewards/margins": 7.337853908538818, "rewards/rejected": -10.560120582580566, "step": 10986 }, { "epoch": 1.71, "learning_rate": 6.089023289589709e-06, "logits/chosen": -2.7002925872802734, "logits/rejected": -2.070507049560547, "logps/chosen": -827.2750244140625, "logps/rejected": -562.0834350585938, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -1.2335480451583862, "rewards/margins": 10.856954574584961, "rewards/rejected": -12.090502738952637, "step": 10987 }, { "epoch": 1.71, "learning_rate": 6.088289849058562e-06, "logits/chosen": -2.1558613777160645, "logits/rejected": -2.8178346157073975, "logps/chosen": -63.06596374511719, "logps/rejected": -280.20843505859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.598757266998291, "rewards/margins": 10.118350982666016, "rewards/rejected": -14.717107772827148, "step": 10988 }, { "epoch": 1.71, "learning_rate": 6.087556408527414e-06, "logits/chosen": -2.6092731952667236, "logits/rejected": -2.8615550994873047, "logps/chosen": -184.63320922851562, "logps/rejected": -416.6905517578125, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": -3.2499165534973145, "rewards/margins": 7.849645614624023, "rewards/rejected": -11.099562644958496, "step": 10989 }, { "epoch": 1.71, "learning_rate": 6.086822967996266e-06, "logits/chosen": -2.2008306980133057, "logits/rejected": -2.9343948364257812, "logps/chosen": -258.07513427734375, "logps/rejected": -471.13214111328125, "loss": 0.1061, "rewards/accuracies": 1.0, "rewards/chosen": -4.4764404296875, "rewards/margins": 3.8598859310150146, "rewards/rejected": -8.336326599121094, "step": 10990 }, { "epoch": 1.71, "learning_rate": 6.0860895274651175e-06, "logits/chosen": -2.2882611751556396, "logits/rejected": -2.9100351333618164, "logps/chosen": -97.78244018554688, "logps/rejected": -364.0362243652344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8416824340820312, "rewards/margins": 11.165397644042969, "rewards/rejected": -15.007080078125, "step": 10991 }, { "epoch": 1.71, "learning_rate": 6.08535608693397e-06, "logits/chosen": -1.4716545343399048, "logits/rejected": -2.9898338317871094, "logps/chosen": -165.0004119873047, "logps/rejected": -441.67645263671875, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -5.502662658691406, "rewards/margins": 7.540621757507324, "rewards/rejected": -13.043285369873047, "step": 10992 }, { "epoch": 1.71, "learning_rate": 6.084622646402822e-06, "logits/chosen": -2.939716339111328, "logits/rejected": -2.304950475692749, "logps/chosen": -324.18408203125, "logps/rejected": -280.74334716796875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": 0.28389739990234375, "rewards/margins": 8.298315048217773, "rewards/rejected": -8.014416694641113, "step": 10993 }, { "epoch": 1.71, "learning_rate": 6.083889205871674e-06, "logits/chosen": -2.7123262882232666, "logits/rejected": -2.8821489810943604, "logps/chosen": -229.5664825439453, "logps/rejected": -347.29266357421875, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -3.6650171279907227, "rewards/margins": 9.727083206176758, "rewards/rejected": -13.39210033416748, "step": 10994 }, { "epoch": 1.71, "learning_rate": 6.083155765340526e-06, "logits/chosen": -3.033442497253418, "logits/rejected": -1.9028918743133545, "logps/chosen": -480.61041259765625, "logps/rejected": -240.96192932128906, "loss": 0.2194, "rewards/accuracies": 1.0, "rewards/chosen": -5.970436096191406, "rewards/margins": 3.4292969703674316, "rewards/rejected": -9.39973258972168, "step": 10995 }, { "epoch": 1.71, "learning_rate": 6.082422324809378e-06, "logits/chosen": -3.0979952812194824, "logits/rejected": -2.7069544792175293, "logps/chosen": -176.85678100585938, "logps/rejected": -266.57666015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.9470772743225098, "rewards/margins": 8.661527633666992, "rewards/rejected": -12.608604431152344, "step": 10996 }, { "epoch": 1.71, "learning_rate": 6.0816888842782305e-06, "logits/chosen": -3.2350709438323975, "logits/rejected": -2.434713363647461, "logps/chosen": -252.18638610839844, "logps/rejected": -91.19630432128906, "loss": 1.8157, "rewards/accuracies": 0.5, "rewards/chosen": -4.066442966461182, "rewards/margins": 0.9467710256576538, "rewards/rejected": -5.013214111328125, "step": 10997 }, { "epoch": 1.71, "learning_rate": 6.080955443747082e-06, "logits/chosen": -1.6811561584472656, "logits/rejected": -2.772540330886841, "logps/chosen": -102.10667419433594, "logps/rejected": -201.61367797851562, "loss": 0.611, "rewards/accuracies": 0.5, "rewards/chosen": -7.154799461364746, "rewards/margins": 2.361818552017212, "rewards/rejected": -9.516618728637695, "step": 10998 }, { "epoch": 1.71, "learning_rate": 6.080222003215934e-06, "logits/chosen": -2.143587112426758, "logits/rejected": -2.9969964027404785, "logps/chosen": -113.82002258300781, "logps/rejected": -308.75543212890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.4110565185546875, "rewards/margins": 7.82670783996582, "rewards/rejected": -10.237764358520508, "step": 10999 }, { "epoch": 1.71, "learning_rate": 6.079488562684786e-06, "logits/chosen": -2.9903504848480225, "logits/rejected": -3.0270187854766846, "logps/chosen": -895.8714599609375, "logps/rejected": -777.458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4804810285568237, "rewards/margins": 10.88036823272705, "rewards/rejected": -12.360849380493164, "step": 11000 }, { "epoch": 1.71, "learning_rate": 6.078755122153639e-06, "logits/chosen": -2.86116623878479, "logits/rejected": -2.757472038269043, "logps/chosen": -196.28704833984375, "logps/rejected": -284.55059814453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.253398895263672, "rewards/margins": 7.359065532684326, "rewards/rejected": -12.612464904785156, "step": 11001 }, { "epoch": 1.71, "learning_rate": 6.078021681622491e-06, "logits/chosen": -3.071425437927246, "logits/rejected": -2.537919044494629, "logps/chosen": -224.63182067871094, "logps/rejected": -244.51065063476562, "loss": 0.1737, "rewards/accuracies": 1.0, "rewards/chosen": -4.8491997718811035, "rewards/margins": 5.194114685058594, "rewards/rejected": -10.043313980102539, "step": 11002 }, { "epoch": 1.71, "learning_rate": 6.077288241091343e-06, "logits/chosen": -2.7588677406311035, "logits/rejected": -2.5227200984954834, "logps/chosen": -289.0, "logps/rejected": -351.6270751953125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -4.563124179840088, "rewards/margins": 6.22622537612915, "rewards/rejected": -10.789349555969238, "step": 11003 }, { "epoch": 1.71, "learning_rate": 6.076554800560195e-06, "logits/chosen": -2.6362619400024414, "logits/rejected": -2.663670539855957, "logps/chosen": -63.75615692138672, "logps/rejected": -205.057373046875, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": -4.43283224105835, "rewards/margins": 4.293851852416992, "rewards/rejected": -8.7266845703125, "step": 11004 }, { "epoch": 1.71, "learning_rate": 6.075821360029047e-06, "logits/chosen": -2.1573641300201416, "logits/rejected": -3.047743082046509, "logps/chosen": -134.52005004882812, "logps/rejected": -237.604736328125, "loss": 3.0138, "rewards/accuracies": 0.5, "rewards/chosen": -7.374024868011475, "rewards/margins": -0.2068946361541748, "rewards/rejected": -7.167130470275879, "step": 11005 }, { "epoch": 1.71, "learning_rate": 6.0750879194979e-06, "logits/chosen": -1.1919673681259155, "logits/rejected": -2.7394230365753174, "logps/chosen": -227.40887451171875, "logps/rejected": -582.5550537109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.146571159362793, "rewards/margins": 12.06856918334961, "rewards/rejected": -15.215139389038086, "step": 11006 }, { "epoch": 1.71, "learning_rate": 6.074354478966752e-06, "logits/chosen": -3.118044853210449, "logits/rejected": -3.321784019470215, "logps/chosen": -681.674072265625, "logps/rejected": -680.3802490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.628808975219727, "rewards/margins": 9.823156356811523, "rewards/rejected": -14.45196533203125, "step": 11007 }, { "epoch": 1.71, "learning_rate": 6.073621038435604e-06, "logits/chosen": -1.9905617237091064, "logits/rejected": -2.9562768936157227, "logps/chosen": -223.51776123046875, "logps/rejected": -308.828125, "loss": 0.0735, "rewards/accuracies": 1.0, "rewards/chosen": -5.102134704589844, "rewards/margins": 3.367496967315674, "rewards/rejected": -8.46963119506836, "step": 11008 }, { "epoch": 1.71, "learning_rate": 6.0728875979044556e-06, "logits/chosen": -2.9425947666168213, "logits/rejected": -2.0563268661499023, "logps/chosen": -523.6484375, "logps/rejected": -188.64688110351562, "loss": 1.0172, "rewards/accuracies": 0.5, "rewards/chosen": -4.018721103668213, "rewards/margins": 0.14217984676361084, "rewards/rejected": -4.160901069641113, "step": 11009 }, { "epoch": 1.71, "learning_rate": 6.072154157373308e-06, "logits/chosen": -2.7797696590423584, "logits/rejected": -1.3547170162200928, "logps/chosen": -501.06390380859375, "logps/rejected": -354.6414489746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6383651494979858, "rewards/margins": 11.17619514465332, "rewards/rejected": -12.814560890197754, "step": 11010 }, { "epoch": 1.71, "learning_rate": 6.07142071684216e-06, "logits/chosen": -1.357801079750061, "logits/rejected": -2.886704921722412, "logps/chosen": -241.7644805908203, "logps/rejected": -339.73846435546875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.674630641937256, "rewards/margins": 6.799811363220215, "rewards/rejected": -9.474441528320312, "step": 11011 }, { "epoch": 1.71, "learning_rate": 6.070687276311012e-06, "logits/chosen": -2.314059019088745, "logits/rejected": -2.634962558746338, "logps/chosen": -75.13549041748047, "logps/rejected": -357.6670837402344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.8476004600524902, "rewards/margins": 10.567913055419922, "rewards/rejected": -14.41551399230957, "step": 11012 }, { "epoch": 1.71, "learning_rate": 6.069953835779864e-06, "logits/chosen": -2.5676705837249756, "logits/rejected": -2.2980947494506836, "logps/chosen": -397.77398681640625, "logps/rejected": -467.544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4193902015686035, "rewards/margins": 11.803407669067383, "rewards/rejected": -15.222797393798828, "step": 11013 }, { "epoch": 1.71, "learning_rate": 6.069220395248716e-06, "logits/chosen": -2.7860538959503174, "logits/rejected": -2.9397072792053223, "logps/chosen": -277.89642333984375, "logps/rejected": -384.7200927734375, "loss": 0.1795, "rewards/accuracies": 1.0, "rewards/chosen": -7.144339561462402, "rewards/margins": 2.207463264465332, "rewards/rejected": -9.351802825927734, "step": 11014 }, { "epoch": 1.71, "learning_rate": 6.0684869547175685e-06, "logits/chosen": -2.6748976707458496, "logits/rejected": -2.9139742851257324, "logps/chosen": -412.489013671875, "logps/rejected": -553.4424438476562, "loss": 1.1608, "rewards/accuracies": 0.5, "rewards/chosen": -6.740492343902588, "rewards/margins": 4.050197124481201, "rewards/rejected": -10.790689468383789, "step": 11015 }, { "epoch": 1.71, "learning_rate": 6.06775351418642e-06, "logits/chosen": -2.6896841526031494, "logits/rejected": -2.919295072555542, "logps/chosen": -481.3382568359375, "logps/rejected": -534.4353637695312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.371420383453369, "rewards/margins": 10.698474884033203, "rewards/rejected": -14.06989574432373, "step": 11016 }, { "epoch": 1.71, "learning_rate": 6.067020073655272e-06, "logits/chosen": -2.2550408840179443, "logits/rejected": -3.137479782104492, "logps/chosen": -109.54999542236328, "logps/rejected": -290.57373046875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -3.90505313873291, "rewards/margins": 7.039017200469971, "rewards/rejected": -10.944070816040039, "step": 11017 }, { "epoch": 1.71, "learning_rate": 6.066286633124124e-06, "logits/chosen": -2.7888565063476562, "logits/rejected": -2.094611883163452, "logps/chosen": -295.91326904296875, "logps/rejected": -247.16827392578125, "loss": 2.8624, "rewards/accuracies": 0.5, "rewards/chosen": -8.99811840057373, "rewards/margins": 1.70759916305542, "rewards/rejected": -10.705717086791992, "step": 11018 }, { "epoch": 1.71, "learning_rate": 6.065553192592977e-06, "logits/chosen": -2.2510769367218018, "logits/rejected": -2.8547110557556152, "logps/chosen": -309.3919982910156, "logps/rejected": -473.8000793457031, "loss": 0.3685, "rewards/accuracies": 0.5, "rewards/chosen": -6.635003089904785, "rewards/margins": 5.192295074462891, "rewards/rejected": -11.827298164367676, "step": 11019 }, { "epoch": 1.71, "learning_rate": 6.064819752061829e-06, "logits/chosen": -2.5211620330810547, "logits/rejected": -2.960104465484619, "logps/chosen": -80.70964050292969, "logps/rejected": -358.7444763183594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.617621421813965, "rewards/margins": 9.16087532043457, "rewards/rejected": -13.778497695922852, "step": 11020 }, { "epoch": 1.71, "learning_rate": 6.0640863115306815e-06, "logits/chosen": -1.7898379564285278, "logits/rejected": -2.870082378387451, "logps/chosen": -121.18218994140625, "logps/rejected": -310.51177978515625, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": -4.067230224609375, "rewards/margins": 4.321610450744629, "rewards/rejected": -8.388840675354004, "step": 11021 }, { "epoch": 1.71, "learning_rate": 6.063352870999533e-06, "logits/chosen": -2.629119396209717, "logits/rejected": -1.3892700672149658, "logps/chosen": -271.7126770019531, "logps/rejected": -185.63320922851562, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -1.1464561223983765, "rewards/margins": 9.688740730285645, "rewards/rejected": -10.835196495056152, "step": 11022 }, { "epoch": 1.71, "learning_rate": 6.062619430468385e-06, "logits/chosen": -2.952700138092041, "logits/rejected": -3.1801586151123047, "logps/chosen": -107.50909423828125, "logps/rejected": -186.93936157226562, "loss": 0.6568, "rewards/accuracies": 0.5, "rewards/chosen": -6.6187424659729, "rewards/margins": 1.9979040622711182, "rewards/rejected": -8.616646766662598, "step": 11023 }, { "epoch": 1.71, "learning_rate": 6.061885989937238e-06, "logits/chosen": -2.3050577640533447, "logits/rejected": -1.7445427179336548, "logps/chosen": -473.86968994140625, "logps/rejected": -436.572265625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -6.731772422790527, "rewards/margins": 9.103618621826172, "rewards/rejected": -15.835391998291016, "step": 11024 }, { "epoch": 1.71, "learning_rate": 6.06115254940609e-06, "logits/chosen": -2.900545358657837, "logits/rejected": -2.3260653018951416, "logps/chosen": -253.90951538085938, "logps/rejected": -265.3565673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.17392265796661377, "rewards/margins": 13.159675598144531, "rewards/rejected": -13.333597183227539, "step": 11025 }, { "epoch": 1.71, "learning_rate": 6.060419108874942e-06, "logits/chosen": -2.6813466548919678, "logits/rejected": -2.8047001361846924, "logps/chosen": -105.1226806640625, "logps/rejected": -241.5647735595703, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.454304814338684, "rewards/margins": 8.489727020263672, "rewards/rejected": -9.944032669067383, "step": 11026 }, { "epoch": 1.71, "learning_rate": 6.059685668343794e-06, "logits/chosen": -2.6959500312805176, "logits/rejected": -3.0176095962524414, "logps/chosen": -92.98646545410156, "logps/rejected": -256.27587890625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -4.507425308227539, "rewards/margins": 5.451505661010742, "rewards/rejected": -9.958930969238281, "step": 11027 }, { "epoch": 1.72, "learning_rate": 6.058952227812646e-06, "logits/chosen": -2.5665016174316406, "logits/rejected": -2.9941909313201904, "logps/chosen": -75.1723403930664, "logps/rejected": -290.14813232421875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -4.6684699058532715, "rewards/margins": 7.83914852142334, "rewards/rejected": -12.507617950439453, "step": 11028 }, { "epoch": 1.72, "learning_rate": 6.058218787281498e-06, "logits/chosen": -2.9954822063446045, "logits/rejected": -3.2244908809661865, "logps/chosen": -187.13699340820312, "logps/rejected": -218.97152709960938, "loss": 0.2553, "rewards/accuracies": 1.0, "rewards/chosen": -4.882760047912598, "rewards/margins": 3.459993839263916, "rewards/rejected": -8.342754364013672, "step": 11029 }, { "epoch": 1.72, "learning_rate": 6.05748534675035e-06, "logits/chosen": -2.756992816925049, "logits/rejected": -3.272252082824707, "logps/chosen": -126.48571014404297, "logps/rejected": -226.7490234375, "loss": 0.9501, "rewards/accuracies": 0.5, "rewards/chosen": -6.350297927856445, "rewards/margins": 2.9970831871032715, "rewards/rejected": -9.347380638122559, "step": 11030 }, { "epoch": 1.72, "learning_rate": 6.056751906219202e-06, "logits/chosen": -2.0829391479492188, "logits/rejected": -2.9169328212738037, "logps/chosen": -289.83197021484375, "logps/rejected": -677.1471557617188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.493093013763428, "rewards/margins": 10.89921760559082, "rewards/rejected": -15.39231014251709, "step": 11031 }, { "epoch": 1.72, "learning_rate": 6.056018465688055e-06, "logits/chosen": -2.784684181213379, "logits/rejected": -1.6622602939605713, "logps/chosen": -229.5605926513672, "logps/rejected": -236.55355834960938, "loss": 0.0274, "rewards/accuracies": 1.0, "rewards/chosen": -4.777872562408447, "rewards/margins": 6.271842956542969, "rewards/rejected": -11.049715042114258, "step": 11032 }, { "epoch": 1.72, "learning_rate": 6.0552850251569066e-06, "logits/chosen": -2.034761905670166, "logits/rejected": -2.8731653690338135, "logps/chosen": -110.1236343383789, "logps/rejected": -196.84393310546875, "loss": 0.9574, "rewards/accuracies": 0.5, "rewards/chosen": -6.311939716339111, "rewards/margins": 1.8612759113311768, "rewards/rejected": -8.173215866088867, "step": 11033 }, { "epoch": 1.72, "learning_rate": 6.0545515846257584e-06, "logits/chosen": -2.9201605319976807, "logits/rejected": -2.6090993881225586, "logps/chosen": -105.0107421875, "logps/rejected": -188.85275268554688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.71213436126709, "rewards/margins": 9.02237319946289, "rewards/rejected": -11.73450756072998, "step": 11034 }, { "epoch": 1.72, "learning_rate": 6.05381814409461e-06, "logits/chosen": -3.058971643447876, "logits/rejected": -3.3112854957580566, "logps/chosen": -589.5441284179688, "logps/rejected": -592.8323974609375, "loss": 0.0886, "rewards/accuracies": 1.0, "rewards/chosen": -6.994091033935547, "rewards/margins": 4.743273735046387, "rewards/rejected": -11.73736572265625, "step": 11035 }, { "epoch": 1.72, "learning_rate": 6.053084703563462e-06, "logits/chosen": -2.9339025020599365, "logits/rejected": -2.75140643119812, "logps/chosen": -448.6830749511719, "logps/rejected": -520.1196899414062, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -5.3936076164245605, "rewards/margins": 4.5786237716674805, "rewards/rejected": -9.9722318649292, "step": 11036 }, { "epoch": 1.72, "learning_rate": 6.052351263032315e-06, "logits/chosen": -2.0612857341766357, "logits/rejected": -2.7942028045654297, "logps/chosen": -305.69403076171875, "logps/rejected": -420.5667724609375, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -4.5683207511901855, "rewards/margins": 5.512239456176758, "rewards/rejected": -10.080560684204102, "step": 11037 }, { "epoch": 1.72, "learning_rate": 6.051617822501168e-06, "logits/chosen": -3.124359369277954, "logits/rejected": -2.6765286922454834, "logps/chosen": -339.5074462890625, "logps/rejected": -334.7921142578125, "loss": 3.5335, "rewards/accuracies": 0.0, "rewards/chosen": -9.165023803710938, "rewards/margins": -3.5007898807525635, "rewards/rejected": -5.664233207702637, "step": 11038 }, { "epoch": 1.72, "learning_rate": 6.0508843819700195e-06, "logits/chosen": -2.7262234687805176, "logits/rejected": -3.1592226028442383, "logps/chosen": -133.136962890625, "logps/rejected": -227.6234130859375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -4.774268627166748, "rewards/margins": 5.347356796264648, "rewards/rejected": -10.121624946594238, "step": 11039 }, { "epoch": 1.72, "learning_rate": 6.050150941438871e-06, "logits/chosen": -3.2992703914642334, "logits/rejected": -3.441547155380249, "logps/chosen": -124.7848892211914, "logps/rejected": -145.62362670898438, "loss": 3.3637, "rewards/accuracies": 0.5, "rewards/chosen": -6.804010391235352, "rewards/margins": 0.5102229118347168, "rewards/rejected": -7.31423282623291, "step": 11040 }, { "epoch": 1.72, "learning_rate": 6.049417500907724e-06, "logits/chosen": -2.6828925609588623, "logits/rejected": -2.270970582962036, "logps/chosen": -473.9833068847656, "logps/rejected": -404.8619689941406, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.556177139282227, "rewards/margins": 6.4606733322143555, "rewards/rejected": -12.016850471496582, "step": 11041 }, { "epoch": 1.72, "learning_rate": 6.048684060376576e-06, "logits/chosen": -3.2172961235046387, "logits/rejected": -3.2865731716156006, "logps/chosen": -650.9390869140625, "logps/rejected": -604.385498046875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.046983480453491, "rewards/margins": 7.673044204711914, "rewards/rejected": -10.720027923583984, "step": 11042 }, { "epoch": 1.72, "learning_rate": 6.047950619845428e-06, "logits/chosen": -2.3613247871398926, "logits/rejected": -3.1765153408050537, "logps/chosen": -214.82034301757812, "logps/rejected": -329.3315124511719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.2126641273498535, "rewards/margins": 7.914816856384277, "rewards/rejected": -10.127481460571289, "step": 11043 }, { "epoch": 1.72, "learning_rate": 6.04721717931428e-06, "logits/chosen": -2.9483282566070557, "logits/rejected": -2.586517572402954, "logps/chosen": -477.9801330566406, "logps/rejected": -355.8581237792969, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -3.345839500427246, "rewards/margins": 6.050158500671387, "rewards/rejected": -9.395998001098633, "step": 11044 }, { "epoch": 1.72, "learning_rate": 6.046483738783132e-06, "logits/chosen": -2.8291471004486084, "logits/rejected": -2.907217264175415, "logps/chosen": -310.17083740234375, "logps/rejected": -290.6748046875, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -5.1364946365356445, "rewards/margins": 5.846766471862793, "rewards/rejected": -10.983261108398438, "step": 11045 }, { "epoch": 1.72, "learning_rate": 6.045750298251984e-06, "logits/chosen": -2.846914529800415, "logits/rejected": -3.0019381046295166, "logps/chosen": -161.38307189941406, "logps/rejected": -273.8835754394531, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.0401511192321777, "rewards/margins": 6.257122039794922, "rewards/rejected": -8.297273635864258, "step": 11046 }, { "epoch": 1.72, "learning_rate": 6.045016857720836e-06, "logits/chosen": -1.1609553098678589, "logits/rejected": -3.0114083290100098, "logps/chosen": -131.28875732421875, "logps/rejected": -497.3675537109375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -4.009594440460205, "rewards/margins": 5.685262680053711, "rewards/rejected": -9.694856643676758, "step": 11047 }, { "epoch": 1.72, "learning_rate": 6.044283417189688e-06, "logits/chosen": -2.4433491230010986, "logits/rejected": -2.860775947570801, "logps/chosen": -192.86956787109375, "logps/rejected": -288.601806640625, "loss": 0.0605, "rewards/accuracies": 1.0, "rewards/chosen": -5.899890422821045, "rewards/margins": 5.050441741943359, "rewards/rejected": -10.950331687927246, "step": 11048 }, { "epoch": 1.72, "learning_rate": 6.04354997665854e-06, "logits/chosen": -2.087843418121338, "logits/rejected": -2.9047367572784424, "logps/chosen": -306.1454772949219, "logps/rejected": -461.1677551269531, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -3.9740543365478516, "rewards/margins": 6.220118522644043, "rewards/rejected": -10.194172859191895, "step": 11049 }, { "epoch": 1.72, "learning_rate": 6.042816536127393e-06, "logits/chosen": -3.06406307220459, "logits/rejected": -2.6676087379455566, "logps/chosen": -181.2762908935547, "logps/rejected": -298.3103942871094, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.9178147315979004, "rewards/margins": 8.019217491149902, "rewards/rejected": -9.937032699584961, "step": 11050 }, { "epoch": 1.72, "learning_rate": 6.042083095596245e-06, "logits/chosen": -3.2172393798828125, "logits/rejected": -3.0331950187683105, "logps/chosen": -141.5436553955078, "logps/rejected": -214.7881622314453, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -6.618645191192627, "rewards/margins": 6.909684181213379, "rewards/rejected": -13.528329849243164, "step": 11051 }, { "epoch": 1.72, "learning_rate": 6.0413496550650965e-06, "logits/chosen": -2.3552064895629883, "logits/rejected": -2.849792718887329, "logps/chosen": -88.51529693603516, "logps/rejected": -223.08358764648438, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -4.052701473236084, "rewards/margins": 5.263128280639648, "rewards/rejected": -9.31583023071289, "step": 11052 }, { "epoch": 1.72, "learning_rate": 6.040616214533948e-06, "logits/chosen": -2.291158676147461, "logits/rejected": -2.930544853210449, "logps/chosen": -76.20259094238281, "logps/rejected": -345.13446044921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.742374897003174, "rewards/margins": 7.9312591552734375, "rewards/rejected": -11.67363452911377, "step": 11053 }, { "epoch": 1.72, "learning_rate": 6.039882774002801e-06, "logits/chosen": -2.674935817718506, "logits/rejected": -2.760920524597168, "logps/chosen": -92.58282470703125, "logps/rejected": -178.09681701660156, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -5.903404235839844, "rewards/margins": 5.284887313842773, "rewards/rejected": -11.188291549682617, "step": 11054 }, { "epoch": 1.72, "learning_rate": 6.039149333471654e-06, "logits/chosen": -2.497481346130371, "logits/rejected": -3.0938849449157715, "logps/chosen": -214.05075073242188, "logps/rejected": -345.5284423828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.80021595954895, "rewards/margins": 8.998095512390137, "rewards/rejected": -12.798311233520508, "step": 11055 }, { "epoch": 1.72, "learning_rate": 6.038415892940506e-06, "logits/chosen": -2.570216178894043, "logits/rejected": -3.2736475467681885, "logps/chosen": -819.584716796875, "logps/rejected": -960.0350341796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.955388069152832, "rewards/margins": 11.216239929199219, "rewards/rejected": -14.17162799835205, "step": 11056 }, { "epoch": 1.72, "learning_rate": 6.0376824524093576e-06, "logits/chosen": -2.686415195465088, "logits/rejected": -2.412402391433716, "logps/chosen": -205.76710510253906, "logps/rejected": -292.9628601074219, "loss": 0.6581, "rewards/accuracies": 0.5, "rewards/chosen": -5.874136924743652, "rewards/margins": 3.475717306137085, "rewards/rejected": -9.349854469299316, "step": 11057 }, { "epoch": 1.72, "learning_rate": 6.0369490118782095e-06, "logits/chosen": -2.052917957305908, "logits/rejected": -2.7544639110565186, "logps/chosen": -95.65425109863281, "logps/rejected": -282.19036865234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.49342155456543, "rewards/margins": 7.970489025115967, "rewards/rejected": -12.463910102844238, "step": 11058 }, { "epoch": 1.72, "learning_rate": 6.036215571347062e-06, "logits/chosen": -2.626094102859497, "logits/rejected": -2.976792812347412, "logps/chosen": -533.458740234375, "logps/rejected": -649.8792724609375, "loss": 0.0755, "rewards/accuracies": 1.0, "rewards/chosen": -5.125879287719727, "rewards/margins": 5.6786651611328125, "rewards/rejected": -10.804544448852539, "step": 11059 }, { "epoch": 1.72, "learning_rate": 6.035482130815914e-06, "logits/chosen": -2.873351573944092, "logits/rejected": -2.971456289291382, "logps/chosen": -343.89422607421875, "logps/rejected": -400.274658203125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.237299919128418, "rewards/margins": 7.6207122802734375, "rewards/rejected": -11.858012199401855, "step": 11060 }, { "epoch": 1.72, "learning_rate": 6.034748690284766e-06, "logits/chosen": -3.0248560905456543, "logits/rejected": -2.79692006111145, "logps/chosen": -239.80380249023438, "logps/rejected": -280.39691162109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.300238847732544, "rewards/margins": 8.524837493896484, "rewards/rejected": -11.82507610321045, "step": 11061 }, { "epoch": 1.72, "learning_rate": 6.034015249753618e-06, "logits/chosen": -2.5113823413848877, "logits/rejected": -3.1087515354156494, "logps/chosen": -385.9491882324219, "logps/rejected": -477.60528564453125, "loss": 0.6152, "rewards/accuracies": 0.5, "rewards/chosen": -6.0524139404296875, "rewards/margins": 4.419058322906494, "rewards/rejected": -10.471471786499023, "step": 11062 }, { "epoch": 1.72, "learning_rate": 6.03328180922247e-06, "logits/chosen": -2.2826919555664062, "logits/rejected": -2.7718427181243896, "logps/chosen": -66.3563461303711, "logps/rejected": -285.2869873046875, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -5.235550403594971, "rewards/margins": 6.068390846252441, "rewards/rejected": -11.30394172668457, "step": 11063 }, { "epoch": 1.72, "learning_rate": 6.032548368691322e-06, "logits/chosen": -1.4088891744613647, "logits/rejected": -2.703803777694702, "logps/chosen": -246.9154052734375, "logps/rejected": -276.8388977050781, "loss": 1.4307, "rewards/accuracies": 0.5, "rewards/chosen": -9.22337818145752, "rewards/margins": 0.3205559253692627, "rewards/rejected": -9.543933868408203, "step": 11064 }, { "epoch": 1.72, "learning_rate": 6.031814928160174e-06, "logits/chosen": -2.789339065551758, "logits/rejected": -3.2249653339385986, "logps/chosen": -451.39300537109375, "logps/rejected": -557.2985229492188, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.187884330749512, "rewards/margins": 6.694183826446533, "rewards/rejected": -11.882068634033203, "step": 11065 }, { "epoch": 1.72, "learning_rate": 6.031081487629026e-06, "logits/chosen": -2.8490850925445557, "logits/rejected": -2.6485276222229004, "logps/chosen": -388.3542175292969, "logps/rejected": -448.92547607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.929098606109619, "rewards/margins": 9.491392135620117, "rewards/rejected": -13.420491218566895, "step": 11066 }, { "epoch": 1.72, "learning_rate": 6.030348047097878e-06, "logits/chosen": -2.788738489151001, "logits/rejected": -1.8212580680847168, "logps/chosen": -461.2448425292969, "logps/rejected": -258.5646057128906, "loss": 0.1681, "rewards/accuracies": 1.0, "rewards/chosen": -3.7513465881347656, "rewards/margins": 3.1218743324279785, "rewards/rejected": -6.873220920562744, "step": 11067 }, { "epoch": 1.72, "learning_rate": 6.029614606566731e-06, "logits/chosen": -2.885514736175537, "logits/rejected": -2.2500760555267334, "logps/chosen": -679.5521850585938, "logps/rejected": -566.7352905273438, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.431918144226074, "rewards/margins": 7.959062576293945, "rewards/rejected": -12.39098072052002, "step": 11068 }, { "epoch": 1.72, "learning_rate": 6.028881166035583e-06, "logits/chosen": -1.7539323568344116, "logits/rejected": -2.9463231563568115, "logps/chosen": -260.17889404296875, "logps/rejected": -573.892578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.9311928749084473, "rewards/margins": 8.282259941101074, "rewards/rejected": -12.213452339172363, "step": 11069 }, { "epoch": 1.72, "learning_rate": 6.0281477255044345e-06, "logits/chosen": -2.830843925476074, "logits/rejected": -2.910944700241089, "logps/chosen": -64.45809936523438, "logps/rejected": -318.21990966796875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.545627593994141, "rewards/margins": 6.402287483215332, "rewards/rejected": -10.947915077209473, "step": 11070 }, { "epoch": 1.72, "learning_rate": 6.027414284973287e-06, "logits/chosen": -2.481151819229126, "logits/rejected": -2.818161964416504, "logps/chosen": -75.95425415039062, "logps/rejected": -173.95684814453125, "loss": 0.3427, "rewards/accuracies": 1.0, "rewards/chosen": -6.14267635345459, "rewards/margins": 3.5865721702575684, "rewards/rejected": -9.729248046875, "step": 11071 }, { "epoch": 1.72, "learning_rate": 6.026680844442139e-06, "logits/chosen": -1.1754940748214722, "logits/rejected": -2.8925130367279053, "logps/chosen": -104.46057891845703, "logps/rejected": -419.30389404296875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.6934404373168945, "rewards/margins": 6.216952323913574, "rewards/rejected": -10.910392761230469, "step": 11072 }, { "epoch": 1.72, "learning_rate": 6.025947403910992e-06, "logits/chosen": -2.928506374359131, "logits/rejected": -2.8393542766571045, "logps/chosen": -251.70314025878906, "logps/rejected": -237.66439819335938, "loss": 1.6116, "rewards/accuracies": 0.5, "rewards/chosen": -5.472633361816406, "rewards/margins": 4.598689079284668, "rewards/rejected": -10.071322441101074, "step": 11073 }, { "epoch": 1.72, "learning_rate": 6.025213963379844e-06, "logits/chosen": -3.0643393993377686, "logits/rejected": -2.3117992877960205, "logps/chosen": -236.68023681640625, "logps/rejected": -185.12808227539062, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": -5.201918125152588, "rewards/margins": 5.591119766235352, "rewards/rejected": -10.793038368225098, "step": 11074 }, { "epoch": 1.72, "learning_rate": 6.024480522848696e-06, "logits/chosen": -2.7181456089019775, "logits/rejected": -3.051788806915283, "logps/chosen": -148.41720581054688, "logps/rejected": -461.44195556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.290231227874756, "rewards/margins": 11.733381271362305, "rewards/rejected": -16.02361297607422, "step": 11075 }, { "epoch": 1.72, "learning_rate": 6.0237470823175475e-06, "logits/chosen": -2.8198251724243164, "logits/rejected": -2.195862293243408, "logps/chosen": -286.83642578125, "logps/rejected": -293.9941711425781, "loss": 0.8717, "rewards/accuracies": 0.5, "rewards/chosen": -8.470369338989258, "rewards/margins": 0.5356054306030273, "rewards/rejected": -9.005973815917969, "step": 11076 }, { "epoch": 1.72, "learning_rate": 6.0230136417864e-06, "logits/chosen": -2.121995687484741, "logits/rejected": -2.842228651046753, "logps/chosen": -178.75096130371094, "logps/rejected": -455.58221435546875, "loss": 0.1012, "rewards/accuracies": 1.0, "rewards/chosen": -6.068861961364746, "rewards/margins": 2.6689932346343994, "rewards/rejected": -8.737854957580566, "step": 11077 }, { "epoch": 1.72, "learning_rate": 6.022280201255252e-06, "logits/chosen": -1.6392247676849365, "logits/rejected": -3.0541224479675293, "logps/chosen": -446.2394714355469, "logps/rejected": -640.0401611328125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -4.793337345123291, "rewards/margins": 8.656177520751953, "rewards/rejected": -13.449514389038086, "step": 11078 }, { "epoch": 1.72, "learning_rate": 6.021546760724104e-06, "logits/chosen": -2.2474746704101562, "logits/rejected": -2.803279399871826, "logps/chosen": -225.72653198242188, "logps/rejected": -434.18988037109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.0090179443359375, "rewards/margins": 8.799979209899902, "rewards/rejected": -13.808996200561523, "step": 11079 }, { "epoch": 1.72, "learning_rate": 6.020813320192956e-06, "logits/chosen": -2.472325563430786, "logits/rejected": -3.017634868621826, "logps/chosen": -138.46759033203125, "logps/rejected": -278.807373046875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.160041093826294, "rewards/margins": 6.749403953552246, "rewards/rejected": -9.909444808959961, "step": 11080 }, { "epoch": 1.72, "learning_rate": 6.020079879661809e-06, "logits/chosen": -2.3697357177734375, "logits/rejected": -2.842860221862793, "logps/chosen": -76.24637603759766, "logps/rejected": -299.09149169921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.039745807647705, "rewards/margins": 7.556301593780518, "rewards/rejected": -11.596047401428223, "step": 11081 }, { "epoch": 1.72, "learning_rate": 6.0193464391306605e-06, "logits/chosen": -3.10418438911438, "logits/rejected": -2.375548839569092, "logps/chosen": -187.2915496826172, "logps/rejected": -184.6563720703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.078233242034912, "rewards/margins": 8.287861824035645, "rewards/rejected": -10.366095542907715, "step": 11082 }, { "epoch": 1.72, "learning_rate": 6.018612998599512e-06, "logits/chosen": -2.7443714141845703, "logits/rejected": -1.6602308750152588, "logps/chosen": -187.86221313476562, "logps/rejected": -235.79368591308594, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.9530982971191406, "rewards/margins": 5.226151466369629, "rewards/rejected": -8.17924976348877, "step": 11083 }, { "epoch": 1.72, "learning_rate": 6.017879558068364e-06, "logits/chosen": -1.9236643314361572, "logits/rejected": -2.860133647918701, "logps/chosen": -258.7093200683594, "logps/rejected": -383.90252685546875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -3.7375893592834473, "rewards/margins": 5.901499271392822, "rewards/rejected": -9.63908863067627, "step": 11084 }, { "epoch": 1.72, "learning_rate": 6.017146117537216e-06, "logits/chosen": -2.360386371612549, "logits/rejected": -2.6126890182495117, "logps/chosen": -177.8185272216797, "logps/rejected": -258.6489562988281, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -1.0048004388809204, "rewards/margins": 9.27955436706543, "rewards/rejected": -10.284355163574219, "step": 11085 }, { "epoch": 1.72, "learning_rate": 6.016412677006069e-06, "logits/chosen": -1.8135361671447754, "logits/rejected": -2.751574754714966, "logps/chosen": -234.15853881835938, "logps/rejected": -453.72479248046875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.515538215637207, "rewards/margins": 6.375108242034912, "rewards/rejected": -9.890645980834961, "step": 11086 }, { "epoch": 1.72, "learning_rate": 6.015679236474921e-06, "logits/chosen": -2.9685893058776855, "logits/rejected": -2.9751851558685303, "logps/chosen": -201.03738403320312, "logps/rejected": -166.92324829101562, "loss": 0.1484, "rewards/accuracies": 1.0, "rewards/chosen": -4.117333889007568, "rewards/margins": 3.5694024562835693, "rewards/rejected": -7.686736106872559, "step": 11087 }, { "epoch": 1.72, "learning_rate": 6.0149457959437734e-06, "logits/chosen": -2.8804996013641357, "logits/rejected": -2.6783759593963623, "logps/chosen": -319.843017578125, "logps/rejected": -320.52716064453125, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -3.3509979248046875, "rewards/margins": 6.095006942749023, "rewards/rejected": -9.446004867553711, "step": 11088 }, { "epoch": 1.72, "learning_rate": 6.014212355412625e-06, "logits/chosen": -2.6305737495422363, "logits/rejected": -3.047450065612793, "logps/chosen": -198.20730590820312, "logps/rejected": -339.0256652832031, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.1623971462249756, "rewards/margins": 7.773064613342285, "rewards/rejected": -10.93546199798584, "step": 11089 }, { "epoch": 1.72, "learning_rate": 6.013478914881478e-06, "logits/chosen": -2.718468189239502, "logits/rejected": -3.1819562911987305, "logps/chosen": -112.50634002685547, "logps/rejected": -280.6437683105469, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.7518749237060547, "rewards/margins": 6.334098815917969, "rewards/rejected": -10.085973739624023, "step": 11090 }, { "epoch": 1.72, "learning_rate": 6.01274547435033e-06, "logits/chosen": -2.978024959564209, "logits/rejected": -2.0141820907592773, "logps/chosen": -430.8778076171875, "logps/rejected": -267.7489318847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.133664131164551, "rewards/margins": 11.111384391784668, "rewards/rejected": -13.245048522949219, "step": 11091 }, { "epoch": 1.73, "learning_rate": 6.012012033819182e-06, "logits/chosen": -3.1118531227111816, "logits/rejected": -2.419382333755493, "logps/chosen": -593.1339111328125, "logps/rejected": -417.240478515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.286128282546997, "rewards/margins": 10.256458282470703, "rewards/rejected": -12.542586326599121, "step": 11092 }, { "epoch": 1.73, "learning_rate": 6.011278593288034e-06, "logits/chosen": -2.3702445030212402, "logits/rejected": -2.551020622253418, "logps/chosen": -89.3707275390625, "logps/rejected": -192.59469604492188, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -3.762721300125122, "rewards/margins": 5.684502601623535, "rewards/rejected": -9.447223663330078, "step": 11093 }, { "epoch": 1.73, "learning_rate": 6.0105451527568855e-06, "logits/chosen": -2.8034536838531494, "logits/rejected": -2.1434848308563232, "logps/chosen": -626.4139404296875, "logps/rejected": -309.77294921875, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -6.384203910827637, "rewards/margins": 5.375129699707031, "rewards/rejected": -11.759334564208984, "step": 11094 }, { "epoch": 1.73, "learning_rate": 6.009811712225738e-06, "logits/chosen": -1.7895058393478394, "logits/rejected": -2.9517087936401367, "logps/chosen": -83.85519409179688, "logps/rejected": -439.9471435546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.0485501289367676, "rewards/margins": 8.617420196533203, "rewards/rejected": -10.665969848632812, "step": 11095 }, { "epoch": 1.73, "learning_rate": 6.00907827169459e-06, "logits/chosen": -2.1839406490325928, "logits/rejected": -2.9185588359832764, "logps/chosen": -256.9875793457031, "logps/rejected": -351.63079833984375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -1.494349718093872, "rewards/margins": 7.5730977058410645, "rewards/rejected": -9.067447662353516, "step": 11096 }, { "epoch": 1.73, "learning_rate": 6.008344831163442e-06, "logits/chosen": -1.584976315498352, "logits/rejected": -2.6716582775115967, "logps/chosen": -98.54777526855469, "logps/rejected": -569.7255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.613921165466309, "rewards/margins": 14.527791976928711, "rewards/rejected": -19.141712188720703, "step": 11097 }, { "epoch": 1.73, "learning_rate": 6.007611390632294e-06, "logits/chosen": -2.565204381942749, "logits/rejected": -3.0724337100982666, "logps/chosen": -163.7349853515625, "logps/rejected": -319.2828369140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.2384743690490723, "rewards/margins": 7.147948265075684, "rewards/rejected": -10.386423110961914, "step": 11098 }, { "epoch": 1.73, "learning_rate": 6.006877950101147e-06, "logits/chosen": -3.1579701900482178, "logits/rejected": -2.4591896533966064, "logps/chosen": -232.6929168701172, "logps/rejected": -190.8722686767578, "loss": 0.4249, "rewards/accuracies": 1.0, "rewards/chosen": -6.8206048011779785, "rewards/margins": 0.7383713722229004, "rewards/rejected": -7.558976173400879, "step": 11099 }, { "epoch": 1.73, "learning_rate": 6.0061445095699985e-06, "logits/chosen": -2.7667887210845947, "logits/rejected": -2.192504405975342, "logps/chosen": -299.3587951660156, "logps/rejected": -248.1942138671875, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": -3.264556884765625, "rewards/margins": 3.7291359901428223, "rewards/rejected": -6.993692874908447, "step": 11100 }, { "epoch": 1.73, "learning_rate": 6.00541106903885e-06, "logits/chosen": -2.4712107181549072, "logits/rejected": -2.9658148288726807, "logps/chosen": -74.51620483398438, "logps/rejected": -258.5963134765625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.721158981323242, "rewards/margins": 6.373912811279297, "rewards/rejected": -11.095071792602539, "step": 11101 }, { "epoch": 1.73, "learning_rate": 6.004677628507702e-06, "logits/chosen": -2.867600440979004, "logits/rejected": -2.3872663974761963, "logps/chosen": -868.1056518554688, "logps/rejected": -652.3807983398438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 1.9335753917694092, "rewards/margins": 13.459898948669434, "rewards/rejected": -11.526323318481445, "step": 11102 }, { "epoch": 1.73, "learning_rate": 6.003944187976554e-06, "logits/chosen": -2.725797653198242, "logits/rejected": -2.5486061573028564, "logps/chosen": -366.0666198730469, "logps/rejected": -266.5346374511719, "loss": 1.0018, "rewards/accuracies": 0.5, "rewards/chosen": -5.620044231414795, "rewards/margins": 4.6439409255981445, "rewards/rejected": -10.263984680175781, "step": 11103 }, { "epoch": 1.73, "learning_rate": 6.003210747445407e-06, "logits/chosen": -2.9131369590759277, "logits/rejected": -2.546241044998169, "logps/chosen": -200.36485290527344, "logps/rejected": -130.57733154296875, "loss": 1.8404, "rewards/accuracies": 0.5, "rewards/chosen": -6.5830512046813965, "rewards/margins": 0.420971155166626, "rewards/rejected": -7.004022598266602, "step": 11104 }, { "epoch": 1.73, "learning_rate": 6.00247730691426e-06, "logits/chosen": -2.826702356338501, "logits/rejected": -2.9477362632751465, "logps/chosen": -128.3198699951172, "logps/rejected": -368.32568359375, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -5.40340518951416, "rewards/margins": 6.65034294128418, "rewards/rejected": -12.05374813079834, "step": 11105 }, { "epoch": 1.73, "learning_rate": 6.0017438663831115e-06, "logits/chosen": -3.0013532638549805, "logits/rejected": -1.9886093139648438, "logps/chosen": -489.0272521972656, "logps/rejected": -323.9106750488281, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/chosen": -2.6487016677856445, "rewards/margins": 4.268065929412842, "rewards/rejected": -6.916767597198486, "step": 11106 }, { "epoch": 1.73, "learning_rate": 6.001010425851963e-06, "logits/chosen": -2.410886764526367, "logits/rejected": -2.494208812713623, "logps/chosen": -153.2860565185547, "logps/rejected": -443.29205322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.125425338745117, "rewards/margins": 13.75269603729248, "rewards/rejected": -16.878122329711914, "step": 11107 }, { "epoch": 1.73, "learning_rate": 6.000276985320816e-06, "logits/chosen": -1.9974350929260254, "logits/rejected": -2.680732250213623, "logps/chosen": -347.8995056152344, "logps/rejected": -372.1895446777344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.935817003250122, "rewards/margins": 9.656049728393555, "rewards/rejected": -11.591865539550781, "step": 11108 }, { "epoch": 1.73, "learning_rate": 5.999543544789668e-06, "logits/chosen": -2.6565966606140137, "logits/rejected": -2.9809505939483643, "logps/chosen": -309.1413879394531, "logps/rejected": -357.59442138671875, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -3.482447624206543, "rewards/margins": 6.461512565612793, "rewards/rejected": -9.943960189819336, "step": 11109 }, { "epoch": 1.73, "learning_rate": 5.99881010425852e-06, "logits/chosen": -1.6527713537216187, "logits/rejected": -2.792598247528076, "logps/chosen": -117.48541259765625, "logps/rejected": -349.462158203125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.838629722595215, "rewards/margins": 6.252692222595215, "rewards/rejected": -12.09132194519043, "step": 11110 }, { "epoch": 1.73, "learning_rate": 5.998076663727372e-06, "logits/chosen": -2.697474718093872, "logits/rejected": -3.0812127590179443, "logps/chosen": -230.86895751953125, "logps/rejected": -373.6263122558594, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.158562660217285, "rewards/margins": 7.076051235198975, "rewards/rejected": -10.234613418579102, "step": 11111 }, { "epoch": 1.73, "learning_rate": 5.997343223196224e-06, "logits/chosen": -2.255204200744629, "logits/rejected": -2.534757137298584, "logps/chosen": -248.34751892089844, "logps/rejected": -393.1822509765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.8866615295410156, "rewards/margins": 9.898738861083984, "rewards/rejected": -13.785400390625, "step": 11112 }, { "epoch": 1.73, "learning_rate": 5.996609782665076e-06, "logits/chosen": -3.0818912982940674, "logits/rejected": -2.475879192352295, "logps/chosen": -145.7753448486328, "logps/rejected": -146.30892944335938, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/chosen": -3.619757890701294, "rewards/margins": 4.497039794921875, "rewards/rejected": -8.11679744720459, "step": 11113 }, { "epoch": 1.73, "learning_rate": 5.995876342133928e-06, "logits/chosen": -2.7797467708587646, "logits/rejected": -2.7921087741851807, "logps/chosen": -505.48297119140625, "logps/rejected": -764.5958251953125, "loss": 0.5366, "rewards/accuracies": 0.5, "rewards/chosen": -5.5328826904296875, "rewards/margins": 3.9112844467163086, "rewards/rejected": -9.444167137145996, "step": 11114 }, { "epoch": 1.73, "learning_rate": 5.99514290160278e-06, "logits/chosen": -0.6327337026596069, "logits/rejected": -2.14370059967041, "logps/chosen": -157.42005920410156, "logps/rejected": -663.8359375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.177490234375, "rewards/margins": 11.790000915527344, "rewards/rejected": -17.967491149902344, "step": 11115 }, { "epoch": 1.73, "learning_rate": 5.994409461071632e-06, "logits/chosen": -2.858829975128174, "logits/rejected": -1.716855525970459, "logps/chosen": -330.05224609375, "logps/rejected": -125.8213882446289, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": 1.6956802606582642, "rewards/margins": 11.136760711669922, "rewards/rejected": -9.441080093383789, "step": 11116 }, { "epoch": 1.73, "learning_rate": 5.993676020540485e-06, "logits/chosen": -2.825692892074585, "logits/rejected": -2.6452572345733643, "logps/chosen": -345.1287841796875, "logps/rejected": -528.6028442382812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.176936149597168, "rewards/margins": 11.68934440612793, "rewards/rejected": -16.86627960205078, "step": 11117 }, { "epoch": 1.73, "learning_rate": 5.9929425800093365e-06, "logits/chosen": -2.2446792125701904, "logits/rejected": -2.8712735176086426, "logps/chosen": -99.44615173339844, "logps/rejected": -367.97589111328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.303986072540283, "rewards/margins": 10.202895164489746, "rewards/rejected": -12.506881713867188, "step": 11118 }, { "epoch": 1.73, "learning_rate": 5.9922091394781884e-06, "logits/chosen": -1.4235674142837524, "logits/rejected": -2.970841646194458, "logps/chosen": -152.21572875976562, "logps/rejected": -344.6993103027344, "loss": 0.0856, "rewards/accuracies": 1.0, "rewards/chosen": -3.985443592071533, "rewards/margins": 4.061120986938477, "rewards/rejected": -8.046565055847168, "step": 11119 }, { "epoch": 1.73, "learning_rate": 5.99147569894704e-06, "logits/chosen": -1.7623087167739868, "logits/rejected": -2.60640549659729, "logps/chosen": -116.91826629638672, "logps/rejected": -455.0194396972656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5351290702819824, "rewards/margins": 11.134965896606445, "rewards/rejected": -12.67009449005127, "step": 11120 }, { "epoch": 1.73, "learning_rate": 5.990742258415893e-06, "logits/chosen": -2.9875221252441406, "logits/rejected": -2.3418264389038086, "logps/chosen": -246.6393585205078, "logps/rejected": -215.481201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4882745742797852, "rewards/margins": 10.50955581665039, "rewards/rejected": -11.997830390930176, "step": 11121 }, { "epoch": 1.73, "learning_rate": 5.990008817884746e-06, "logits/chosen": -2.7920100688934326, "logits/rejected": -3.036245107650757, "logps/chosen": -510.7625732421875, "logps/rejected": -562.6485595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.662017583847046, "rewards/margins": 11.312294006347656, "rewards/rejected": -14.974311828613281, "step": 11122 }, { "epoch": 1.73, "learning_rate": 5.989275377353598e-06, "logits/chosen": -3.230923652648926, "logits/rejected": -3.059051275253296, "logps/chosen": -132.0492401123047, "logps/rejected": -423.6864013671875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.6138195991516113, "rewards/margins": 12.243951797485352, "rewards/rejected": -15.857770919799805, "step": 11123 }, { "epoch": 1.73, "learning_rate": 5.9885419368224495e-06, "logits/chosen": -2.7257308959960938, "logits/rejected": -2.74635910987854, "logps/chosen": -471.4530029296875, "logps/rejected": -454.02142333984375, "loss": 0.0982, "rewards/accuracies": 1.0, "rewards/chosen": -5.368310451507568, "rewards/margins": 4.182197570800781, "rewards/rejected": -9.550508499145508, "step": 11124 }, { "epoch": 1.73, "learning_rate": 5.987808496291301e-06, "logits/chosen": -2.6841092109680176, "logits/rejected": -2.952219009399414, "logps/chosen": -278.1030578613281, "logps/rejected": -279.7497253417969, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -4.189589500427246, "rewards/margins": 5.964359283447266, "rewards/rejected": -10.153948783874512, "step": 11125 }, { "epoch": 1.73, "learning_rate": 5.987075055760154e-06, "logits/chosen": -2.2251622676849365, "logits/rejected": -3.002760171890259, "logps/chosen": -367.59490966796875, "logps/rejected": -501.6302795410156, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -6.135153770446777, "rewards/margins": 5.432893753051758, "rewards/rejected": -11.568048477172852, "step": 11126 }, { "epoch": 1.73, "learning_rate": 5.986341615229006e-06, "logits/chosen": -2.707167148590088, "logits/rejected": -3.0879788398742676, "logps/chosen": -137.50868225097656, "logps/rejected": -413.7693786621094, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.846437454223633, "rewards/margins": 10.191232681274414, "rewards/rejected": -14.037670135498047, "step": 11127 }, { "epoch": 1.73, "learning_rate": 5.985608174697858e-06, "logits/chosen": -2.6920018196105957, "logits/rejected": -3.059847593307495, "logps/chosen": -361.4090881347656, "logps/rejected": -446.62200927734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.950465202331543, "rewards/margins": 10.041135787963867, "rewards/rejected": -14.991601943969727, "step": 11128 }, { "epoch": 1.73, "learning_rate": 5.98487473416671e-06, "logits/chosen": -2.7230446338653564, "logits/rejected": -2.962052822113037, "logps/chosen": -228.14132690429688, "logps/rejected": -255.2264404296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.734249114990234, "rewards/margins": 7.657350063323975, "rewards/rejected": -13.39159870147705, "step": 11129 }, { "epoch": 1.73, "learning_rate": 5.9841412936355625e-06, "logits/chosen": -2.682727575302124, "logits/rejected": -2.294358253479004, "logps/chosen": -531.4818725585938, "logps/rejected": -527.90869140625, "loss": 0.0482, "rewards/accuracies": 1.0, "rewards/chosen": -4.306822776794434, "rewards/margins": 8.340574264526367, "rewards/rejected": -12.6473970413208, "step": 11130 }, { "epoch": 1.73, "learning_rate": 5.983407853104414e-06, "logits/chosen": -2.3911919593811035, "logits/rejected": -2.9200305938720703, "logps/chosen": -130.81814575195312, "logps/rejected": -299.3966064453125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -3.548046588897705, "rewards/margins": 5.410490989685059, "rewards/rejected": -8.958537101745605, "step": 11131 }, { "epoch": 1.73, "learning_rate": 5.982674412573266e-06, "logits/chosen": -2.927856683731079, "logits/rejected": -2.602715253829956, "logps/chosen": -127.82443237304688, "logps/rejected": -148.29550170898438, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": -5.850933074951172, "rewards/margins": 4.944718360900879, "rewards/rejected": -10.79565143585205, "step": 11132 }, { "epoch": 1.73, "learning_rate": 5.981940972042118e-06, "logits/chosen": -3.0110414028167725, "logits/rejected": -3.167221784591675, "logps/chosen": -88.3902816772461, "logps/rejected": -252.00079345703125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -4.25886344909668, "rewards/margins": 5.984057426452637, "rewards/rejected": -10.242921829223633, "step": 11133 }, { "epoch": 1.73, "learning_rate": 5.98120753151097e-06, "logits/chosen": -2.1206910610198975, "logits/rejected": -2.840196132659912, "logps/chosen": -458.449951171875, "logps/rejected": -505.81610107421875, "loss": 0.6277, "rewards/accuracies": 0.5, "rewards/chosen": -5.290350437164307, "rewards/margins": 4.84998893737793, "rewards/rejected": -10.140339851379395, "step": 11134 }, { "epoch": 1.73, "learning_rate": 5.980474090979823e-06, "logits/chosen": -0.6624182462692261, "logits/rejected": -2.5374948978424072, "logps/chosen": -136.3104705810547, "logps/rejected": -414.7236328125, "loss": 0.7043, "rewards/accuracies": 0.5, "rewards/chosen": -7.36963415145874, "rewards/margins": 2.407750129699707, "rewards/rejected": -9.777383804321289, "step": 11135 }, { "epoch": 1.73, "learning_rate": 5.979740650448675e-06, "logits/chosen": -2.3817877769470215, "logits/rejected": -3.066767692565918, "logps/chosen": -183.60507202148438, "logps/rejected": -290.6619873046875, "loss": 0.0408, "rewards/accuracies": 1.0, "rewards/chosen": -6.534670829772949, "rewards/margins": 3.3377230167388916, "rewards/rejected": -9.872394561767578, "step": 11136 }, { "epoch": 1.73, "learning_rate": 5.9790072099175265e-06, "logits/chosen": -2.611886739730835, "logits/rejected": -2.969367504119873, "logps/chosen": -283.8182067871094, "logps/rejected": -334.5555114746094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.5472564697265625, "rewards/margins": 10.424947738647461, "rewards/rejected": -17.972204208374023, "step": 11137 }, { "epoch": 1.73, "learning_rate": 5.978273769386379e-06, "logits/chosen": -2.4192066192626953, "logits/rejected": -2.694974184036255, "logps/chosen": -323.244873046875, "logps/rejected": -404.057861328125, "loss": 0.1776, "rewards/accuracies": 1.0, "rewards/chosen": -7.435667991638184, "rewards/margins": 6.9923996925354, "rewards/rejected": -14.428068161010742, "step": 11138 }, { "epoch": 1.73, "learning_rate": 5.977540328855232e-06, "logits/chosen": -2.7211294174194336, "logits/rejected": -2.77791428565979, "logps/chosen": -325.9337158203125, "logps/rejected": -317.4329528808594, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.731006145477295, "rewards/margins": 8.354835510253906, "rewards/rejected": -12.085841178894043, "step": 11139 }, { "epoch": 1.73, "learning_rate": 5.976806888324084e-06, "logits/chosen": -3.0664024353027344, "logits/rejected": -2.5082788467407227, "logps/chosen": -204.85888671875, "logps/rejected": -259.59307861328125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.831979513168335, "rewards/margins": 9.334346771240234, "rewards/rejected": -11.166326522827148, "step": 11140 }, { "epoch": 1.73, "learning_rate": 5.976073447792936e-06, "logits/chosen": -0.9307085871696472, "logits/rejected": -2.6269137859344482, "logps/chosen": -134.287109375, "logps/rejected": -424.0956726074219, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/chosen": -6.05052375793457, "rewards/margins": 7.159228324890137, "rewards/rejected": -13.20975112915039, "step": 11141 }, { "epoch": 1.73, "learning_rate": 5.9753400072617876e-06, "logits/chosen": -2.2602124214172363, "logits/rejected": -2.887767791748047, "logps/chosen": -354.53546142578125, "logps/rejected": -397.6885681152344, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.0923662185668945, "rewards/margins": 7.704690933227539, "rewards/rejected": -10.79705810546875, "step": 11142 }, { "epoch": 1.73, "learning_rate": 5.9746065667306394e-06, "logits/chosen": -2.1961870193481445, "logits/rejected": -2.4463839530944824, "logps/chosen": -164.22760009765625, "logps/rejected": -277.96173095703125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -1.8058631420135498, "rewards/margins": 5.8756561279296875, "rewards/rejected": -7.681519508361816, "step": 11143 }, { "epoch": 1.73, "learning_rate": 5.973873126199492e-06, "logits/chosen": -2.4657962322235107, "logits/rejected": -2.802985191345215, "logps/chosen": -198.12786865234375, "logps/rejected": -210.95654296875, "loss": 0.2111, "rewards/accuracies": 1.0, "rewards/chosen": -5.17387580871582, "rewards/margins": 2.9112796783447266, "rewards/rejected": -8.085155487060547, "step": 11144 }, { "epoch": 1.73, "learning_rate": 5.973139685668344e-06, "logits/chosen": -2.836366653442383, "logits/rejected": -3.1085636615753174, "logps/chosen": -259.8857727050781, "logps/rejected": -439.28753662109375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.136138916015625, "rewards/margins": 7.235899448394775, "rewards/rejected": -11.372037887573242, "step": 11145 }, { "epoch": 1.73, "learning_rate": 5.972406245137196e-06, "logits/chosen": -3.0141170024871826, "logits/rejected": -1.5436853170394897, "logps/chosen": -491.4291076660156, "logps/rejected": -332.39312744140625, "loss": 2.308, "rewards/accuracies": 0.5, "rewards/chosen": -7.302006721496582, "rewards/margins": 3.55232834815979, "rewards/rejected": -10.854334831237793, "step": 11146 }, { "epoch": 1.73, "learning_rate": 5.971672804606048e-06, "logits/chosen": -2.732574462890625, "logits/rejected": -2.9779343605041504, "logps/chosen": -440.8818664550781, "logps/rejected": -530.8687744140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.532418727874756, "rewards/margins": 9.045772552490234, "rewards/rejected": -11.578190803527832, "step": 11147 }, { "epoch": 1.73, "learning_rate": 5.9709393640749005e-06, "logits/chosen": -2.6982474327087402, "logits/rejected": -2.958225727081299, "logps/chosen": -163.00018310546875, "logps/rejected": -229.2046661376953, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -2.93898868560791, "rewards/margins": 5.442768573760986, "rewards/rejected": -8.381756782531738, "step": 11148 }, { "epoch": 1.73, "learning_rate": 5.970205923543752e-06, "logits/chosen": -1.2293798923492432, "logits/rejected": -1.4275716543197632, "logps/chosen": -260.6637268066406, "logps/rejected": -209.69898986816406, "loss": 0.1476, "rewards/accuracies": 1.0, "rewards/chosen": -4.6710100173950195, "rewards/margins": 3.1441986560821533, "rewards/rejected": -7.815208435058594, "step": 11149 }, { "epoch": 1.73, "learning_rate": 5.969472483012604e-06, "logits/chosen": -2.7281856536865234, "logits/rejected": -1.8600752353668213, "logps/chosen": -105.61186218261719, "logps/rejected": -143.42196655273438, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.521643877029419, "rewards/margins": 6.0902509689331055, "rewards/rejected": -9.611894607543945, "step": 11150 }, { "epoch": 1.73, "learning_rate": 5.968739042481456e-06, "logits/chosen": -1.377409815788269, "logits/rejected": -3.171868324279785, "logps/chosen": -81.76446533203125, "logps/rejected": -313.27880859375, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -4.5190629959106445, "rewards/margins": 4.634762763977051, "rewards/rejected": -9.153825759887695, "step": 11151 }, { "epoch": 1.73, "learning_rate": 5.968005601950308e-06, "logits/chosen": -1.5870579481124878, "logits/rejected": -2.7964136600494385, "logps/chosen": -176.5363006591797, "logps/rejected": -444.7124328613281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.306650161743164, "rewards/margins": 9.587139129638672, "rewards/rejected": -13.89378833770752, "step": 11152 }, { "epoch": 1.73, "learning_rate": 5.967272161419161e-06, "logits/chosen": -2.6356301307678223, "logits/rejected": -3.126230239868164, "logps/chosen": -130.4431610107422, "logps/rejected": -324.90216064453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.9423186779022217, "rewards/margins": 8.154569625854492, "rewards/rejected": -12.096887588500977, "step": 11153 }, { "epoch": 1.73, "learning_rate": 5.966538720888013e-06, "logits/chosen": -2.6707217693328857, "logits/rejected": -2.568682909011841, "logps/chosen": -91.86973571777344, "logps/rejected": -288.89178466796875, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": -3.176607847213745, "rewards/margins": 8.402244567871094, "rewards/rejected": -11.578852653503418, "step": 11154 }, { "epoch": 1.73, "learning_rate": 5.965805280356865e-06, "logits/chosen": -2.7490193843841553, "logits/rejected": -2.712303400039673, "logps/chosen": -130.11585998535156, "logps/rejected": -168.6903076171875, "loss": 0.3474, "rewards/accuracies": 1.0, "rewards/chosen": -6.8768510818481445, "rewards/margins": 2.859654188156128, "rewards/rejected": -9.736505508422852, "step": 11155 }, { "epoch": 1.73, "learning_rate": 5.965071839825717e-06, "logits/chosen": -2.632866382598877, "logits/rejected": -2.2432384490966797, "logps/chosen": -588.540283203125, "logps/rejected": -485.579833984375, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": -6.814077377319336, "rewards/margins": 4.020140647888184, "rewards/rejected": -10.83421802520752, "step": 11156 }, { "epoch": 1.74, "learning_rate": 5.96433839929457e-06, "logits/chosen": -1.7089498043060303, "logits/rejected": -2.7502987384796143, "logps/chosen": -136.59317016601562, "logps/rejected": -337.7257080078125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -5.229469299316406, "rewards/margins": 6.7218780517578125, "rewards/rejected": -11.951347351074219, "step": 11157 }, { "epoch": 1.74, "learning_rate": 5.963604958763422e-06, "logits/chosen": -2.6788179874420166, "logits/rejected": -2.6203348636627197, "logps/chosen": -428.4158630371094, "logps/rejected": -428.86566162109375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.674772262573242, "rewards/margins": 6.031245231628418, "rewards/rejected": -11.70601749420166, "step": 11158 }, { "epoch": 1.74, "learning_rate": 5.962871518232274e-06, "logits/chosen": -2.7179477214813232, "logits/rejected": -2.9617717266082764, "logps/chosen": -284.7196350097656, "logps/rejected": -434.9886474609375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -3.280954599380493, "rewards/margins": 7.61463737487793, "rewards/rejected": -10.895591735839844, "step": 11159 }, { "epoch": 1.74, "learning_rate": 5.962138077701126e-06, "logits/chosen": -1.9416775703430176, "logits/rejected": -3.1284821033477783, "logps/chosen": -172.06307983398438, "logps/rejected": -670.7654418945312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.230510234832764, "rewards/margins": 9.249650955200195, "rewards/rejected": -15.480161666870117, "step": 11160 }, { "epoch": 1.74, "learning_rate": 5.9614046371699775e-06, "logits/chosen": -2.890273094177246, "logits/rejected": -2.6580004692077637, "logps/chosen": -230.67691040039062, "logps/rejected": -302.9676208496094, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -3.946032762527466, "rewards/margins": 5.07358455657959, "rewards/rejected": -9.019617080688477, "step": 11161 }, { "epoch": 1.74, "learning_rate": 5.96067119663883e-06, "logits/chosen": -2.718984842300415, "logits/rejected": -2.7955322265625, "logps/chosen": -131.77322387695312, "logps/rejected": -216.59503173828125, "loss": 0.2142, "rewards/accuracies": 1.0, "rewards/chosen": -5.319559097290039, "rewards/margins": 4.463169097900391, "rewards/rejected": -9.78272819519043, "step": 11162 }, { "epoch": 1.74, "learning_rate": 5.959937756107682e-06, "logits/chosen": -2.924931526184082, "logits/rejected": -2.9450669288635254, "logps/chosen": -444.2192687988281, "logps/rejected": -482.4966125488281, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.966443061828613, "rewards/margins": 5.958292007446289, "rewards/rejected": -10.924735069274902, "step": 11163 }, { "epoch": 1.74, "learning_rate": 5.959204315576534e-06, "logits/chosen": -3.028488874435425, "logits/rejected": -2.86708927154541, "logps/chosen": -429.08856201171875, "logps/rejected": -401.0103454589844, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -3.783686637878418, "rewards/margins": 5.693791389465332, "rewards/rejected": -9.47747802734375, "step": 11164 }, { "epoch": 1.74, "learning_rate": 5.958470875045386e-06, "logits/chosen": -2.7795326709747314, "logits/rejected": -3.0078306198120117, "logps/chosen": -166.9109344482422, "logps/rejected": -254.40869140625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.436734199523926, "rewards/margins": 6.804436683654785, "rewards/rejected": -10.241170883178711, "step": 11165 }, { "epoch": 1.74, "learning_rate": 5.9577374345142386e-06, "logits/chosen": -2.8320155143737793, "logits/rejected": -2.8801722526550293, "logps/chosen": -150.67941284179688, "logps/rejected": -254.20474243164062, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -5.051197528839111, "rewards/margins": 5.331094741821289, "rewards/rejected": -10.382291793823242, "step": 11166 }, { "epoch": 1.74, "learning_rate": 5.9570039939830904e-06, "logits/chosen": -2.398871421813965, "logits/rejected": -2.676029682159424, "logps/chosen": -263.556884765625, "logps/rejected": -303.99517822265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.6362884044647217, "rewards/margins": 9.488948822021484, "rewards/rejected": -12.125238418579102, "step": 11167 }, { "epoch": 1.74, "learning_rate": 5.956270553451942e-06, "logits/chosen": -3.2156853675842285, "logits/rejected": -2.9205901622772217, "logps/chosen": -203.35311889648438, "logps/rejected": -193.3878173828125, "loss": 0.8319, "rewards/accuracies": 0.5, "rewards/chosen": -5.655214786529541, "rewards/margins": 3.448660373687744, "rewards/rejected": -9.103875160217285, "step": 11168 }, { "epoch": 1.74, "learning_rate": 5.955537112920794e-06, "logits/chosen": -2.9082441329956055, "logits/rejected": -1.5886080265045166, "logps/chosen": -389.1566162109375, "logps/rejected": -258.5775451660156, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -6.221540451049805, "rewards/margins": 4.9423651695251465, "rewards/rejected": -11.16390609741211, "step": 11169 }, { "epoch": 1.74, "learning_rate": 5.954803672389646e-06, "logits/chosen": -3.1612017154693604, "logits/rejected": -2.1320197582244873, "logps/chosen": -545.7014770507812, "logps/rejected": -337.7690124511719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9106645584106445, "rewards/margins": 9.168010711669922, "rewards/rejected": -12.07867431640625, "step": 11170 }, { "epoch": 1.74, "learning_rate": 5.954070231858499e-06, "logits/chosen": -2.7904295921325684, "logits/rejected": -2.9383745193481445, "logps/chosen": -484.0147399902344, "logps/rejected": -534.139404296875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.5645341873168945, "rewards/margins": 8.471193313598633, "rewards/rejected": -12.035726547241211, "step": 11171 }, { "epoch": 1.74, "learning_rate": 5.953336791327351e-06, "logits/chosen": -2.923572301864624, "logits/rejected": -3.169647455215454, "logps/chosen": -229.7147979736328, "logps/rejected": -302.332763671875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -5.530816555023193, "rewards/margins": 5.490603923797607, "rewards/rejected": -11.0214204788208, "step": 11172 }, { "epoch": 1.74, "learning_rate": 5.952603350796203e-06, "logits/chosen": -1.7873973846435547, "logits/rejected": -2.9745771884918213, "logps/chosen": -129.00059509277344, "logps/rejected": -436.5646667480469, "loss": 0.5549, "rewards/accuracies": 0.5, "rewards/chosen": -5.642028331756592, "rewards/margins": 5.442882537841797, "rewards/rejected": -11.084911346435547, "step": 11173 }, { "epoch": 1.74, "learning_rate": 5.951869910265055e-06, "logits/chosen": -2.08280348777771, "logits/rejected": -2.89005970954895, "logps/chosen": -88.60298156738281, "logps/rejected": -375.3954162597656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.8228635787963867, "rewards/margins": 9.921676635742188, "rewards/rejected": -12.744539260864258, "step": 11174 }, { "epoch": 1.74, "learning_rate": 5.951136469733908e-06, "logits/chosen": -3.002591133117676, "logits/rejected": -3.198834180831909, "logps/chosen": -63.89823913574219, "logps/rejected": -182.87307739257812, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.423494338989258, "rewards/margins": 7.473807334899902, "rewards/rejected": -11.89730167388916, "step": 11175 }, { "epoch": 1.74, "learning_rate": 5.95040302920276e-06, "logits/chosen": -2.6835029125213623, "logits/rejected": -2.868664264678955, "logps/chosen": -380.4816589355469, "logps/rejected": -412.19744873046875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.468782424926758, "rewards/margins": 6.6133623123168945, "rewards/rejected": -12.082143783569336, "step": 11176 }, { "epoch": 1.74, "learning_rate": 5.949669588671612e-06, "logits/chosen": -3.119453191757202, "logits/rejected": -2.934575319290161, "logps/chosen": -116.97297668457031, "logps/rejected": -159.17599487304688, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -5.873918056488037, "rewards/margins": 5.392632961273193, "rewards/rejected": -11.26655101776123, "step": 11177 }, { "epoch": 1.74, "learning_rate": 5.948936148140464e-06, "logits/chosen": -2.6243832111358643, "logits/rejected": -2.9825658798217773, "logps/chosen": -131.43328857421875, "logps/rejected": -268.75335693359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.588172912597656, "rewards/margins": 6.681195259094238, "rewards/rejected": -12.269368171691895, "step": 11178 }, { "epoch": 1.74, "learning_rate": 5.948202707609316e-06, "logits/chosen": -1.7875880002975464, "logits/rejected": -2.922081708908081, "logps/chosen": -247.783203125, "logps/rejected": -539.3701171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6364712715148926, "rewards/margins": 9.220075607299805, "rewards/rejected": -12.856546401977539, "step": 11179 }, { "epoch": 1.74, "learning_rate": 5.947469267078168e-06, "logits/chosen": -2.7074925899505615, "logits/rejected": -2.6773054599761963, "logps/chosen": -189.96295166015625, "logps/rejected": -234.17684936523438, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.1631646156311035, "rewards/margins": 7.1656975746154785, "rewards/rejected": -9.328862190246582, "step": 11180 }, { "epoch": 1.74, "learning_rate": 5.94673582654702e-06, "logits/chosen": -2.4685866832733154, "logits/rejected": -2.6949853897094727, "logps/chosen": -113.10173034667969, "logps/rejected": -585.320068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.698166847229004, "rewards/margins": 13.318035125732422, "rewards/rejected": -19.01620101928711, "step": 11181 }, { "epoch": 1.74, "learning_rate": 5.946002386015872e-06, "logits/chosen": -2.603522300720215, "logits/rejected": -0.9473105072975159, "logps/chosen": -326.5080871582031, "logps/rejected": -246.68331909179688, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": -7.076920509338379, "rewards/margins": 4.092371940612793, "rewards/rejected": -11.169292449951172, "step": 11182 }, { "epoch": 1.74, "learning_rate": 5.945268945484724e-06, "logits/chosen": -2.5949501991271973, "logits/rejected": -2.852323532104492, "logps/chosen": -339.5203552246094, "logps/rejected": -611.5140380859375, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -6.292259216308594, "rewards/margins": 4.762831211090088, "rewards/rejected": -11.05509090423584, "step": 11183 }, { "epoch": 1.74, "learning_rate": 5.944535504953577e-06, "logits/chosen": -3.173389434814453, "logits/rejected": -2.9531922340393066, "logps/chosen": -159.48031616210938, "logps/rejected": -153.5389404296875, "loss": 2.0326, "rewards/accuracies": 0.5, "rewards/chosen": -5.493025302886963, "rewards/margins": 1.6116082668304443, "rewards/rejected": -7.104633331298828, "step": 11184 }, { "epoch": 1.74, "learning_rate": 5.9438020644224285e-06, "logits/chosen": -2.838387966156006, "logits/rejected": -2.3282690048217773, "logps/chosen": -246.958984375, "logps/rejected": -229.1295166015625, "loss": 2.2547, "rewards/accuracies": 0.5, "rewards/chosen": -7.87486457824707, "rewards/margins": 1.9333105087280273, "rewards/rejected": -9.808175086975098, "step": 11185 }, { "epoch": 1.74, "learning_rate": 5.94306862389128e-06, "logits/chosen": -2.041609287261963, "logits/rejected": -3.0624003410339355, "logps/chosen": -151.12925720214844, "logps/rejected": -666.764892578125, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -3.459942102432251, "rewards/margins": 10.752521514892578, "rewards/rejected": -14.21246337890625, "step": 11186 }, { "epoch": 1.74, "learning_rate": 5.942335183360132e-06, "logits/chosen": -2.5319786071777344, "logits/rejected": -2.466716766357422, "logps/chosen": -269.196044921875, "logps/rejected": -415.9366455078125, "loss": 1.3989, "rewards/accuracies": 0.5, "rewards/chosen": -9.454737663269043, "rewards/margins": 2.1760168075561523, "rewards/rejected": -11.630754470825195, "step": 11187 }, { "epoch": 1.74, "learning_rate": 5.941601742828985e-06, "logits/chosen": -1.5806870460510254, "logits/rejected": -2.7642691135406494, "logps/chosen": -245.2584686279297, "logps/rejected": -388.0873107910156, "loss": 0.6502, "rewards/accuracies": 0.5, "rewards/chosen": -5.153132438659668, "rewards/margins": 2.95965576171875, "rewards/rejected": -8.112788200378418, "step": 11188 }, { "epoch": 1.74, "learning_rate": 5.940868302297837e-06, "logits/chosen": -2.736593246459961, "logits/rejected": -3.3103256225585938, "logps/chosen": -62.30466842651367, "logps/rejected": -210.135986328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.785158157348633, "rewards/margins": 7.087238788604736, "rewards/rejected": -10.872396469116211, "step": 11189 }, { "epoch": 1.74, "learning_rate": 5.9401348617666896e-06, "logits/chosen": -2.9739344120025635, "logits/rejected": -2.1752872467041016, "logps/chosen": -270.9700012207031, "logps/rejected": -182.2226104736328, "loss": 2.1012, "rewards/accuracies": 0.5, "rewards/chosen": -6.308088779449463, "rewards/margins": 0.880326509475708, "rewards/rejected": -7.18841552734375, "step": 11190 }, { "epoch": 1.74, "learning_rate": 5.9394014212355415e-06, "logits/chosen": -0.5914125442504883, "logits/rejected": -1.7819819450378418, "logps/chosen": -161.29246520996094, "logps/rejected": -320.451171875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -5.615484714508057, "rewards/margins": 5.715667724609375, "rewards/rejected": -11.331151962280273, "step": 11191 }, { "epoch": 1.74, "learning_rate": 5.938667980704393e-06, "logits/chosen": -2.420145034790039, "logits/rejected": -1.3929892778396606, "logps/chosen": -293.86297607421875, "logps/rejected": -283.7962646484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.619795799255371, "rewards/margins": 8.93244743347168, "rewards/rejected": -11.55224323272705, "step": 11192 }, { "epoch": 1.74, "learning_rate": 5.937934540173246e-06, "logits/chosen": -2.325161933898926, "logits/rejected": -2.8678460121154785, "logps/chosen": -121.6656494140625, "logps/rejected": -404.4981994628906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.984771490097046, "rewards/margins": 8.798580169677734, "rewards/rejected": -12.78335189819336, "step": 11193 }, { "epoch": 1.74, "learning_rate": 5.937201099642098e-06, "logits/chosen": -2.976571798324585, "logits/rejected": -2.902151584625244, "logps/chosen": -135.71853637695312, "logps/rejected": -162.91574096679688, "loss": 1.4074, "rewards/accuracies": 0.5, "rewards/chosen": -6.740741729736328, "rewards/margins": 2.0102555751800537, "rewards/rejected": -8.750997543334961, "step": 11194 }, { "epoch": 1.74, "learning_rate": 5.93646765911095e-06, "logits/chosen": -2.7996833324432373, "logits/rejected": -2.5960471630096436, "logps/chosen": -151.75030517578125, "logps/rejected": -216.03631591796875, "loss": 0.0974, "rewards/accuracies": 1.0, "rewards/chosen": -5.16533088684082, "rewards/margins": 4.150009632110596, "rewards/rejected": -9.315340042114258, "step": 11195 }, { "epoch": 1.74, "learning_rate": 5.935734218579802e-06, "logits/chosen": -2.609855890274048, "logits/rejected": -3.1897389888763428, "logps/chosen": -190.6865234375, "logps/rejected": -415.2440185546875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.7267351150512695, "rewards/margins": 6.547340393066406, "rewards/rejected": -10.274075508117676, "step": 11196 }, { "epoch": 1.74, "learning_rate": 5.935000778048654e-06, "logits/chosen": -2.354283571243286, "logits/rejected": -2.6932404041290283, "logps/chosen": -146.63949584960938, "logps/rejected": -311.95257568359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.641042232513428, "rewards/margins": 6.4588189125061035, "rewards/rejected": -12.099861145019531, "step": 11197 }, { "epoch": 1.74, "learning_rate": 5.934267337517506e-06, "logits/chosen": -2.4343338012695312, "logits/rejected": -3.2051963806152344, "logps/chosen": -145.8186798095703, "logps/rejected": -504.49395751953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.712499618530273, "rewards/margins": 8.408693313598633, "rewards/rejected": -13.121192932128906, "step": 11198 }, { "epoch": 1.74, "learning_rate": 5.933533896986358e-06, "logits/chosen": -3.192474126815796, "logits/rejected": -2.934373617172241, "logps/chosen": -595.152099609375, "logps/rejected": -321.3963623046875, "loss": 2.4563, "rewards/accuracies": 0.5, "rewards/chosen": -10.657814025878906, "rewards/margins": 0.6215431690216064, "rewards/rejected": -11.27935791015625, "step": 11199 }, { "epoch": 1.74, "learning_rate": 5.93280045645521e-06, "logits/chosen": -3.0080015659332275, "logits/rejected": -2.138076066970825, "logps/chosen": -398.19390869140625, "logps/rejected": -302.5782470703125, "loss": 1.3686, "rewards/accuracies": 0.5, "rewards/chosen": -8.4608154296875, "rewards/margins": -0.5729050636291504, "rewards/rejected": -7.887910842895508, "step": 11200 }, { "epoch": 1.74, "learning_rate": 5.932067015924062e-06, "logits/chosen": -2.811103582382202, "logits/rejected": -3.0863935947418213, "logps/chosen": -109.5934066772461, "logps/rejected": -268.0692443847656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.9546422958374023, "rewards/margins": 8.384014129638672, "rewards/rejected": -10.338655471801758, "step": 11201 }, { "epoch": 1.74, "learning_rate": 5.931333575392915e-06, "logits/chosen": -3.1067018508911133, "logits/rejected": -3.3091089725494385, "logps/chosen": -164.01031494140625, "logps/rejected": -191.84457397460938, "loss": 2.5917, "rewards/accuracies": 0.5, "rewards/chosen": -8.93671989440918, "rewards/margins": -0.69765305519104, "rewards/rejected": -8.239067077636719, "step": 11202 }, { "epoch": 1.74, "learning_rate": 5.9306001348617665e-06, "logits/chosen": -2.347158432006836, "logits/rejected": -3.0817670822143555, "logps/chosen": -107.20612335205078, "logps/rejected": -282.03143310546875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.063957214355469, "rewards/margins": 7.547057628631592, "rewards/rejected": -11.611015319824219, "step": 11203 }, { "epoch": 1.74, "learning_rate": 5.929866694330618e-06, "logits/chosen": -2.468351125717163, "logits/rejected": -2.8509085178375244, "logps/chosen": -149.8425750732422, "logps/rejected": -344.8399353027344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.912535667419434, "rewards/margins": 8.91597843170166, "rewards/rejected": -15.828514099121094, "step": 11204 }, { "epoch": 1.74, "learning_rate": 5.92913325379947e-06, "logits/chosen": -1.576263189315796, "logits/rejected": -2.8201420307159424, "logps/chosen": -453.96502685546875, "logps/rejected": -684.8021850585938, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.203312873840332, "rewards/margins": 8.165279388427734, "rewards/rejected": -13.36859130859375, "step": 11205 }, { "epoch": 1.74, "learning_rate": 5.928399813268323e-06, "logits/chosen": -1.0060704946517944, "logits/rejected": -2.301313638687134, "logps/chosen": -349.4429931640625, "logps/rejected": -658.530029296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3906846046447754, "rewards/margins": 11.22586441040039, "rewards/rejected": -14.61655044555664, "step": 11206 }, { "epoch": 1.74, "learning_rate": 5.927666372737176e-06, "logits/chosen": -2.8380370140075684, "logits/rejected": -1.9299135208129883, "logps/chosen": -355.3061218261719, "logps/rejected": -320.9067077636719, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.773515701293945, "rewards/margins": 7.982153415679932, "rewards/rejected": -14.755668640136719, "step": 11207 }, { "epoch": 1.74, "learning_rate": 5.926932932206028e-06, "logits/chosen": -2.9562950134277344, "logits/rejected": -3.2023427486419678, "logps/chosen": -163.97325134277344, "logps/rejected": -265.2336120605469, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.992507219314575, "rewards/margins": 8.088619232177734, "rewards/rejected": -11.08112621307373, "step": 11208 }, { "epoch": 1.74, "learning_rate": 5.9261994916748795e-06, "logits/chosen": -3.051046371459961, "logits/rejected": -2.3236804008483887, "logps/chosen": -457.9408264160156, "logps/rejected": -353.167724609375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -5.6208696365356445, "rewards/margins": 6.988287925720215, "rewards/rejected": -12.60915756225586, "step": 11209 }, { "epoch": 1.74, "learning_rate": 5.925466051143731e-06, "logits/chosen": -2.743295907974243, "logits/rejected": -1.5314772129058838, "logps/chosen": -205.82826232910156, "logps/rejected": -252.14427185058594, "loss": 0.3766, "rewards/accuracies": 0.5, "rewards/chosen": -4.633482456207275, "rewards/margins": 5.790323734283447, "rewards/rejected": -10.423806190490723, "step": 11210 }, { "epoch": 1.74, "learning_rate": 5.924732610612584e-06, "logits/chosen": -2.64652943611145, "logits/rejected": -2.9122140407562256, "logps/chosen": -189.63551330566406, "logps/rejected": -349.3975830078125, "loss": 0.019, "rewards/accuracies": 1.0, "rewards/chosen": -3.7789955139160156, "rewards/margins": 6.037254333496094, "rewards/rejected": -9.81624984741211, "step": 11211 }, { "epoch": 1.74, "learning_rate": 5.923999170081436e-06, "logits/chosen": -3.3135249614715576, "logits/rejected": -3.2800309658050537, "logps/chosen": -148.61492919921875, "logps/rejected": -200.0517120361328, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.565530300140381, "rewards/margins": 6.271858215332031, "rewards/rejected": -11.83738899230957, "step": 11212 }, { "epoch": 1.74, "learning_rate": 5.923265729550288e-06, "logits/chosen": -3.0844438076019287, "logits/rejected": -2.599764347076416, "logps/chosen": -524.0538940429688, "logps/rejected": -272.96209716796875, "loss": 1.0825, "rewards/accuracies": 0.5, "rewards/chosen": -5.488612174987793, "rewards/margins": 5.2469587326049805, "rewards/rejected": -10.735570907592773, "step": 11213 }, { "epoch": 1.74, "learning_rate": 5.92253228901914e-06, "logits/chosen": -2.6436350345611572, "logits/rejected": -2.939784049987793, "logps/chosen": -489.6029052734375, "logps/rejected": -479.3900451660156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.073466777801514, "rewards/margins": 9.92335319519043, "rewards/rejected": -14.996820449829102, "step": 11214 }, { "epoch": 1.74, "learning_rate": 5.9217988484879925e-06, "logits/chosen": -2.976109027862549, "logits/rejected": -3.1539387702941895, "logps/chosen": -101.24723815917969, "logps/rejected": -190.35008239746094, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.4501075744628906, "rewards/margins": 6.925616264343262, "rewards/rejected": -10.375723838806152, "step": 11215 }, { "epoch": 1.74, "learning_rate": 5.921065407956844e-06, "logits/chosen": -1.6531568765640259, "logits/rejected": -2.7781221866607666, "logps/chosen": -172.44822692871094, "logps/rejected": -309.4923400878906, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -4.234903812408447, "rewards/margins": 5.074685096740723, "rewards/rejected": -9.309589385986328, "step": 11216 }, { "epoch": 1.74, "learning_rate": 5.920331967425696e-06, "logits/chosen": -2.672665596008301, "logits/rejected": -2.0530471801757812, "logps/chosen": -246.39427185058594, "logps/rejected": -226.48190307617188, "loss": 0.9973, "rewards/accuracies": 0.5, "rewards/chosen": -4.331237316131592, "rewards/margins": 2.822326898574829, "rewards/rejected": -7.153564453125, "step": 11217 }, { "epoch": 1.74, "learning_rate": 5.919598526894548e-06, "logits/chosen": -0.9496180415153503, "logits/rejected": -1.5888713598251343, "logps/chosen": -340.2454528808594, "logps/rejected": -546.980224609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.891486167907715, "rewards/margins": 10.102424621582031, "rewards/rejected": -14.993910789489746, "step": 11218 }, { "epoch": 1.74, "learning_rate": 5.918865086363401e-06, "logits/chosen": -2.325152635574341, "logits/rejected": -3.0382912158966064, "logps/chosen": -148.1412353515625, "logps/rejected": -320.4081726074219, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -4.024153232574463, "rewards/margins": 5.692752838134766, "rewards/rejected": -9.716906547546387, "step": 11219 }, { "epoch": 1.74, "learning_rate": 5.918131645832253e-06, "logits/chosen": -2.7819392681121826, "logits/rejected": -2.8533151149749756, "logps/chosen": -267.9361267089844, "logps/rejected": -308.48028564453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.669846296310425, "rewards/margins": 6.725164890289307, "rewards/rejected": -10.395011901855469, "step": 11220 }, { "epoch": 1.75, "learning_rate": 5.917398205301105e-06, "logits/chosen": -2.202570915222168, "logits/rejected": -2.603628158569336, "logps/chosen": -409.4781494140625, "logps/rejected": -397.9320983886719, "loss": 0.1365, "rewards/accuracies": 1.0, "rewards/chosen": -5.360033988952637, "rewards/margins": 5.780287742614746, "rewards/rejected": -11.1403226852417, "step": 11221 }, { "epoch": 1.75, "learning_rate": 5.9166647647699565e-06, "logits/chosen": -2.774083375930786, "logits/rejected": -2.8481922149658203, "logps/chosen": -703.85498046875, "logps/rejected": -510.56683349609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.659109592437744, "rewards/margins": 7.613559722900391, "rewards/rejected": -11.272668838500977, "step": 11222 }, { "epoch": 1.75, "learning_rate": 5.915931324238809e-06, "logits/chosen": -1.8118765354156494, "logits/rejected": -2.597162961959839, "logps/chosen": -214.12339782714844, "logps/rejected": -249.951416015625, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -2.758833885192871, "rewards/margins": 3.4475488662719727, "rewards/rejected": -6.206382751464844, "step": 11223 }, { "epoch": 1.75, "learning_rate": 5.915197883707662e-06, "logits/chosen": -2.9801583290100098, "logits/rejected": -2.7912659645080566, "logps/chosen": -148.9708251953125, "logps/rejected": -219.5089111328125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -4.90627908706665, "rewards/margins": 6.472257137298584, "rewards/rejected": -11.378536224365234, "step": 11224 }, { "epoch": 1.75, "learning_rate": 5.914464443176514e-06, "logits/chosen": -3.0823581218719482, "logits/rejected": -2.705749034881592, "logps/chosen": -406.5462646484375, "logps/rejected": -378.85772705078125, "loss": 0.3321, "rewards/accuracies": 1.0, "rewards/chosen": -6.201849937438965, "rewards/margins": 5.3896284103393555, "rewards/rejected": -11.59147834777832, "step": 11225 }, { "epoch": 1.75, "learning_rate": 5.913731002645366e-06, "logits/chosen": -2.051513910293579, "logits/rejected": -2.832172393798828, "logps/chosen": -244.06753540039062, "logps/rejected": -404.89263916015625, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -5.480783462524414, "rewards/margins": 7.835710525512695, "rewards/rejected": -13.31649398803711, "step": 11226 }, { "epoch": 1.75, "learning_rate": 5.9129975621142175e-06, "logits/chosen": -2.798487663269043, "logits/rejected": -2.3296515941619873, "logps/chosen": -282.408447265625, "logps/rejected": -284.3017578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.7150559425354, "rewards/margins": 8.876749038696289, "rewards/rejected": -14.591804504394531, "step": 11227 }, { "epoch": 1.75, "learning_rate": 5.91226412158307e-06, "logits/chosen": -2.259178876876831, "logits/rejected": -3.0071442127227783, "logps/chosen": -168.27273559570312, "logps/rejected": -300.6059265136719, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -3.5173180103302, "rewards/margins": 6.050792694091797, "rewards/rejected": -9.568111419677734, "step": 11228 }, { "epoch": 1.75, "learning_rate": 5.911530681051922e-06, "logits/chosen": -3.0322985649108887, "logits/rejected": -2.764817237854004, "logps/chosen": -177.511474609375, "logps/rejected": -102.18096160888672, "loss": 1.4621, "rewards/accuracies": 0.5, "rewards/chosen": -4.80088996887207, "rewards/margins": 2.004732370376587, "rewards/rejected": -6.805622100830078, "step": 11229 }, { "epoch": 1.75, "learning_rate": 5.910797240520774e-06, "logits/chosen": -2.9890804290771484, "logits/rejected": -2.6977245807647705, "logps/chosen": -391.6842041015625, "logps/rejected": -329.0308532714844, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.4382753372192383, "rewards/margins": 6.322416305541992, "rewards/rejected": -8.76069164276123, "step": 11230 }, { "epoch": 1.75, "learning_rate": 5.910063799989626e-06, "logits/chosen": -2.4081990718841553, "logits/rejected": -2.9447555541992188, "logps/chosen": -163.76565551757812, "logps/rejected": -209.53363037109375, "loss": 1.5687, "rewards/accuracies": 0.5, "rewards/chosen": -6.805380821228027, "rewards/margins": 0.6253228187561035, "rewards/rejected": -7.430704116821289, "step": 11231 }, { "epoch": 1.75, "learning_rate": 5.909330359458478e-06, "logits/chosen": -2.8506147861480713, "logits/rejected": -3.118206024169922, "logps/chosen": -427.3799743652344, "logps/rejected": -457.01092529296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.423876762390137, "rewards/margins": 8.068390846252441, "rewards/rejected": -12.492267608642578, "step": 11232 }, { "epoch": 1.75, "learning_rate": 5.9085969189273305e-06, "logits/chosen": -2.2782833576202393, "logits/rejected": -2.802424192428589, "logps/chosen": -393.4614562988281, "logps/rejected": -470.85198974609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.491328239440918, "rewards/margins": 9.16654109954834, "rewards/rejected": -13.657869338989258, "step": 11233 }, { "epoch": 1.75, "learning_rate": 5.907863478396182e-06, "logits/chosen": -1.996154546737671, "logits/rejected": -2.7777464389801025, "logps/chosen": -104.37761688232422, "logps/rejected": -217.82936096191406, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -3.5264503955841064, "rewards/margins": 6.753668785095215, "rewards/rejected": -10.280118942260742, "step": 11234 }, { "epoch": 1.75, "learning_rate": 5.907130037865034e-06, "logits/chosen": -2.1494154930114746, "logits/rejected": -2.8354673385620117, "logps/chosen": -83.83293151855469, "logps/rejected": -209.14822387695312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.408536911010742, "rewards/margins": 7.5984954833984375, "rewards/rejected": -11.00703239440918, "step": 11235 }, { "epoch": 1.75, "learning_rate": 5.906396597333886e-06, "logits/chosen": -1.8268346786499023, "logits/rejected": -2.810171604156494, "logps/chosen": -147.48089599609375, "logps/rejected": -434.8677978515625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.6637091636657715, "rewards/margins": 7.386818885803223, "rewards/rejected": -12.050527572631836, "step": 11236 }, { "epoch": 1.75, "learning_rate": 5.905663156802739e-06, "logits/chosen": -2.4640262126922607, "logits/rejected": -2.9570014476776123, "logps/chosen": -150.13414001464844, "logps/rejected": -449.8765869140625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.261575698852539, "rewards/margins": 8.774664878845215, "rewards/rejected": -13.036239624023438, "step": 11237 }, { "epoch": 1.75, "learning_rate": 5.904929716271591e-06, "logits/chosen": -1.396960735321045, "logits/rejected": -2.770984649658203, "logps/chosen": -174.8424072265625, "logps/rejected": -372.66461181640625, "loss": 1.3147, "rewards/accuracies": 0.5, "rewards/chosen": -6.278067111968994, "rewards/margins": 4.335997581481934, "rewards/rejected": -10.614065170288086, "step": 11238 }, { "epoch": 1.75, "learning_rate": 5.904196275740443e-06, "logits/chosen": -2.9976634979248047, "logits/rejected": -3.2254905700683594, "logps/chosen": -64.34600830078125, "logps/rejected": -295.8963928222656, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.605865955352783, "rewards/margins": 7.894074440002441, "rewards/rejected": -11.499940872192383, "step": 11239 }, { "epoch": 1.75, "learning_rate": 5.903462835209295e-06, "logits/chosen": -2.0882749557495117, "logits/rejected": -2.9538650512695312, "logps/chosen": -97.317138671875, "logps/rejected": -336.0114440917969, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.6190314292907715, "rewards/margins": 7.832014083862305, "rewards/rejected": -12.451045989990234, "step": 11240 }, { "epoch": 1.75, "learning_rate": 5.902729394678147e-06, "logits/chosen": -1.0694116353988647, "logits/rejected": -2.981924295425415, "logps/chosen": -105.58406066894531, "logps/rejected": -572.911865234375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -5.1096296310424805, "rewards/margins": 6.120417594909668, "rewards/rejected": -11.230047225952148, "step": 11241 }, { "epoch": 1.75, "learning_rate": 5.901995954147e-06, "logits/chosen": -1.7773778438568115, "logits/rejected": -3.4300484657287598, "logps/chosen": -170.06808471679688, "logps/rejected": -475.45672607421875, "loss": 0.074, "rewards/accuracies": 1.0, "rewards/chosen": -6.234477996826172, "rewards/margins": 3.825589418411255, "rewards/rejected": -10.060068130493164, "step": 11242 }, { "epoch": 1.75, "learning_rate": 5.901262513615852e-06, "logits/chosen": -2.4859609603881836, "logits/rejected": -2.9989168643951416, "logps/chosen": -112.58326721191406, "logps/rejected": -289.9232482910156, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -5.659817695617676, "rewards/margins": 4.493691444396973, "rewards/rejected": -10.153509140014648, "step": 11243 }, { "epoch": 1.75, "learning_rate": 5.900529073084704e-06, "logits/chosen": -2.9506163597106934, "logits/rejected": -2.926743745803833, "logps/chosen": -174.6919708251953, "logps/rejected": -246.07208251953125, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -3.8193230628967285, "rewards/margins": 6.628541946411133, "rewards/rejected": -10.447864532470703, "step": 11244 }, { "epoch": 1.75, "learning_rate": 5.899795632553556e-06, "logits/chosen": -0.8604289889335632, "logits/rejected": -2.194281578063965, "logps/chosen": -157.26820373535156, "logps/rejected": -514.09619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.106472969055176, "rewards/margins": 12.043066024780273, "rewards/rejected": -17.149539947509766, "step": 11245 }, { "epoch": 1.75, "learning_rate": 5.899062192022408e-06, "logits/chosen": -2.6594343185424805, "logits/rejected": -1.9081716537475586, "logps/chosen": -280.2625732421875, "logps/rejected": -198.56187438964844, "loss": 0.8859, "rewards/accuracies": 0.5, "rewards/chosen": -4.974757194519043, "rewards/margins": 3.1861660480499268, "rewards/rejected": -8.16092300415039, "step": 11246 }, { "epoch": 1.75, "learning_rate": 5.89832875149126e-06, "logits/chosen": -2.672576904296875, "logits/rejected": -2.8394951820373535, "logps/chosen": -183.11557006835938, "logps/rejected": -303.95849609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.455130100250244, "rewards/margins": 8.711308479309082, "rewards/rejected": -13.166439056396484, "step": 11247 }, { "epoch": 1.75, "learning_rate": 5.897595310960112e-06, "logits/chosen": -2.926496982574463, "logits/rejected": -2.26403546333313, "logps/chosen": -429.873779296875, "logps/rejected": -99.65966796875, "loss": 0.8351, "rewards/accuracies": 0.5, "rewards/chosen": -6.874933242797852, "rewards/margins": 0.01937127113342285, "rewards/rejected": -6.894304275512695, "step": 11248 }, { "epoch": 1.75, "learning_rate": 5.896861870428964e-06, "logits/chosen": -2.607832908630371, "logits/rejected": -2.3062450885772705, "logps/chosen": -227.18142700195312, "logps/rejected": -208.70901489257812, "loss": 0.0298, "rewards/accuracies": 1.0, "rewards/chosen": -4.57254695892334, "rewards/margins": 5.114364147186279, "rewards/rejected": -9.686911582946777, "step": 11249 }, { "epoch": 1.75, "learning_rate": 5.896128429897816e-06, "logits/chosen": -1.9008101224899292, "logits/rejected": -2.6232573986053467, "logps/chosen": -268.76483154296875, "logps/rejected": -179.88095092773438, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -4.168950080871582, "rewards/margins": 3.94244647026062, "rewards/rejected": -8.111396789550781, "step": 11250 }, { "epoch": 1.75, "learning_rate": 5.8953949893666685e-06, "logits/chosen": -2.7154784202575684, "logits/rejected": -3.1021618843078613, "logps/chosen": -209.57864379882812, "logps/rejected": -407.9906005859375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.0720977783203125, "rewards/margins": 8.651177406311035, "rewards/rejected": -10.723274230957031, "step": 11251 }, { "epoch": 1.75, "learning_rate": 5.8946615488355204e-06, "logits/chosen": -2.7554116249084473, "logits/rejected": -2.9514641761779785, "logps/chosen": -148.26068115234375, "logps/rejected": -287.47515869140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.047665119171143, "rewards/margins": 7.6337175369262695, "rewards/rejected": -13.68138313293457, "step": 11252 }, { "epoch": 1.75, "learning_rate": 5.893928108304372e-06, "logits/chosen": -2.5125105381011963, "logits/rejected": -1.1578481197357178, "logps/chosen": -191.60545349121094, "logps/rejected": -264.2498474121094, "loss": 0.0806, "rewards/accuracies": 1.0, "rewards/chosen": -3.7466139793395996, "rewards/margins": 5.860617637634277, "rewards/rejected": -9.607231140136719, "step": 11253 }, { "epoch": 1.75, "learning_rate": 5.893194667773224e-06, "logits/chosen": -2.9205007553100586, "logits/rejected": -2.839076519012451, "logps/chosen": -215.80474853515625, "logps/rejected": -299.39642333984375, "loss": 0.074, "rewards/accuracies": 1.0, "rewards/chosen": -4.662754058837891, "rewards/margins": 5.175610542297363, "rewards/rejected": -9.83836555480957, "step": 11254 }, { "epoch": 1.75, "learning_rate": 5.892461227242077e-06, "logits/chosen": -1.762105107307434, "logits/rejected": -2.838805675506592, "logps/chosen": -109.81869506835938, "logps/rejected": -245.3300018310547, "loss": 0.2041, "rewards/accuracies": 1.0, "rewards/chosen": -4.393350601196289, "rewards/margins": 2.316070556640625, "rewards/rejected": -6.709421157836914, "step": 11255 }, { "epoch": 1.75, "learning_rate": 5.891727786710929e-06, "logits/chosen": -2.951104164123535, "logits/rejected": -3.1085660457611084, "logps/chosen": -369.0082092285156, "logps/rejected": -281.3404541015625, "loss": 0.0284, "rewards/accuracies": 1.0, "rewards/chosen": -4.063368797302246, "rewards/margins": 3.606719970703125, "rewards/rejected": -7.670088768005371, "step": 11256 }, { "epoch": 1.75, "learning_rate": 5.8909943461797815e-06, "logits/chosen": -1.292117953300476, "logits/rejected": -2.609578847885132, "logps/chosen": -95.51951599121094, "logps/rejected": -342.6653137207031, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -5.4737138748168945, "rewards/margins": 4.837026596069336, "rewards/rejected": -10.31074047088623, "step": 11257 }, { "epoch": 1.75, "learning_rate": 5.890260905648633e-06, "logits/chosen": -2.1743197441101074, "logits/rejected": -3.195977210998535, "logps/chosen": -112.19607543945312, "logps/rejected": -420.4189453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.615450859069824, "rewards/margins": 10.747568130493164, "rewards/rejected": -14.363019943237305, "step": 11258 }, { "epoch": 1.75, "learning_rate": 5.889527465117485e-06, "logits/chosen": -3.163393497467041, "logits/rejected": -3.4208223819732666, "logps/chosen": -65.53665924072266, "logps/rejected": -165.42849731445312, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -5.064882278442383, "rewards/margins": 5.420859336853027, "rewards/rejected": -10.48574161529541, "step": 11259 }, { "epoch": 1.75, "learning_rate": 5.888794024586338e-06, "logits/chosen": -2.994408130645752, "logits/rejected": -2.963252544403076, "logps/chosen": -367.58636474609375, "logps/rejected": -276.504638671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.397128105163574, "rewards/margins": 10.04572582244873, "rewards/rejected": -14.442853927612305, "step": 11260 }, { "epoch": 1.75, "learning_rate": 5.88806058405519e-06, "logits/chosen": -1.4843437671661377, "logits/rejected": -2.9459874629974365, "logps/chosen": -258.391845703125, "logps/rejected": -330.55322265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.896511077880859, "rewards/margins": 7.279862403869629, "rewards/rejected": -12.176373481750488, "step": 11261 }, { "epoch": 1.75, "learning_rate": 5.887327143524042e-06, "logits/chosen": -2.768653154373169, "logits/rejected": -2.943826198577881, "logps/chosen": -189.0675811767578, "logps/rejected": -259.2289733886719, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.806692600250244, "rewards/margins": 9.69348430633545, "rewards/rejected": -14.500177383422852, "step": 11262 }, { "epoch": 1.75, "learning_rate": 5.886593702992894e-06, "logits/chosen": -2.8734076023101807, "logits/rejected": -2.220203399658203, "logps/chosen": -265.96502685546875, "logps/rejected": -362.1219177246094, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -4.76276969909668, "rewards/margins": 5.763973236083984, "rewards/rejected": -10.526742935180664, "step": 11263 }, { "epoch": 1.75, "learning_rate": 5.885860262461746e-06, "logits/chosen": -2.6014091968536377, "logits/rejected": -2.9234652519226074, "logps/chosen": -475.72747802734375, "logps/rejected": -525.5714721679688, "loss": 0.0636, "rewards/accuracies": 1.0, "rewards/chosen": -6.097196578979492, "rewards/margins": 4.194302558898926, "rewards/rejected": -10.291499137878418, "step": 11264 }, { "epoch": 1.75, "learning_rate": 5.885126821930598e-06, "logits/chosen": -3.079794406890869, "logits/rejected": -2.8621859550476074, "logps/chosen": -406.7976379394531, "logps/rejected": -311.4732666015625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -4.845438003540039, "rewards/margins": 7.62184476852417, "rewards/rejected": -12.467283248901367, "step": 11265 }, { "epoch": 1.75, "learning_rate": 5.88439338139945e-06, "logits/chosen": -2.644052505493164, "logits/rejected": -3.2237908840179443, "logps/chosen": -136.30294799804688, "logps/rejected": -357.0843505859375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -3.0173795223236084, "rewards/margins": 5.927538871765137, "rewards/rejected": -8.944917678833008, "step": 11266 }, { "epoch": 1.75, "learning_rate": 5.883659940868302e-06, "logits/chosen": -2.6714353561401367, "logits/rejected": -3.1609842777252197, "logps/chosen": -90.4211654663086, "logps/rejected": -227.16693115234375, "loss": 0.138, "rewards/accuracies": 1.0, "rewards/chosen": -2.7763137817382812, "rewards/margins": 5.983166694641113, "rewards/rejected": -8.759480476379395, "step": 11267 }, { "epoch": 1.75, "learning_rate": 5.882926500337155e-06, "logits/chosen": -2.839108467102051, "logits/rejected": -2.003610372543335, "logps/chosen": -730.6627197265625, "logps/rejected": -414.4864501953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.11967658996582, "rewards/margins": 10.17233943939209, "rewards/rejected": -17.292015075683594, "step": 11268 }, { "epoch": 1.75, "learning_rate": 5.882193059806007e-06, "logits/chosen": -2.9809045791625977, "logits/rejected": -2.6613986492156982, "logps/chosen": -176.57949829101562, "logps/rejected": -201.71377563476562, "loss": 0.155, "rewards/accuracies": 1.0, "rewards/chosen": -5.794188499450684, "rewards/margins": 2.7880592346191406, "rewards/rejected": -8.582247734069824, "step": 11269 }, { "epoch": 1.75, "learning_rate": 5.8814596192748585e-06, "logits/chosen": -2.059291124343872, "logits/rejected": -1.7087271213531494, "logps/chosen": -425.0404357910156, "logps/rejected": -429.62188720703125, "loss": 0.0497, "rewards/accuracies": 1.0, "rewards/chosen": -4.43010139465332, "rewards/margins": 8.603947639465332, "rewards/rejected": -13.034049034118652, "step": 11270 }, { "epoch": 1.75, "learning_rate": 5.88072617874371e-06, "logits/chosen": -2.8280375003814697, "logits/rejected": -2.425365924835205, "logps/chosen": -622.2337646484375, "logps/rejected": -709.8333740234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.478522300720215, "rewards/margins": 9.311694145202637, "rewards/rejected": -13.790216445922852, "step": 11271 }, { "epoch": 1.75, "learning_rate": 5.879992738212562e-06, "logits/chosen": -2.8307571411132812, "logits/rejected": -1.835350751876831, "logps/chosen": -1006.0498657226562, "logps/rejected": -578.9657592773438, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -7.228695869445801, "rewards/margins": 4.90416145324707, "rewards/rejected": -12.132857322692871, "step": 11272 }, { "epoch": 1.75, "learning_rate": 5.879259297681415e-06, "logits/chosen": -1.4977331161499023, "logits/rejected": -2.5976431369781494, "logps/chosen": -147.4203643798828, "logps/rejected": -382.32794189453125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.77850341796875, "rewards/margins": 8.660186767578125, "rewards/rejected": -11.438690185546875, "step": 11273 }, { "epoch": 1.75, "learning_rate": 5.878525857150268e-06, "logits/chosen": -2.781991481781006, "logits/rejected": -2.8281607627868652, "logps/chosen": -354.03790283203125, "logps/rejected": -337.5480041503906, "loss": 0.5846, "rewards/accuracies": 0.5, "rewards/chosen": -5.978018283843994, "rewards/margins": 7.797107219696045, "rewards/rejected": -13.775125503540039, "step": 11274 }, { "epoch": 1.75, "learning_rate": 5.8777924166191196e-06, "logits/chosen": -2.6478724479675293, "logits/rejected": -2.719745397567749, "logps/chosen": -201.6702423095703, "logps/rejected": -377.32537841796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.184143543243408, "rewards/margins": 9.211450576782227, "rewards/rejected": -12.395593643188477, "step": 11275 }, { "epoch": 1.75, "learning_rate": 5.8770589760879714e-06, "logits/chosen": -2.6991782188415527, "logits/rejected": -2.8962841033935547, "logps/chosen": -154.2361602783203, "logps/rejected": -288.5738525390625, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -3.139763832092285, "rewards/margins": 5.756206035614014, "rewards/rejected": -8.89596939086914, "step": 11276 }, { "epoch": 1.75, "learning_rate": 5.876325535556824e-06, "logits/chosen": -3.0436744689941406, "logits/rejected": -2.1040449142456055, "logps/chosen": -1118.09423828125, "logps/rejected": -723.1548461914062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4075837135314941, "rewards/margins": 11.670466423034668, "rewards/rejected": -13.07805061340332, "step": 11277 }, { "epoch": 1.75, "learning_rate": 5.875592095025676e-06, "logits/chosen": -1.935586929321289, "logits/rejected": -3.0476913452148438, "logps/chosen": -336.9298095703125, "logps/rejected": -506.9658203125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.991575717926025, "rewards/margins": 7.555848598480225, "rewards/rejected": -13.54742431640625, "step": 11278 }, { "epoch": 1.75, "learning_rate": 5.874858654494528e-06, "logits/chosen": -2.8355159759521484, "logits/rejected": -2.876675605773926, "logps/chosen": -341.4199523925781, "logps/rejected": -488.63671875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.162660598754883, "rewards/margins": 6.3837690353393555, "rewards/rejected": -11.546428680419922, "step": 11279 }, { "epoch": 1.75, "learning_rate": 5.87412521396338e-06, "logits/chosen": -2.6586952209472656, "logits/rejected": -3.033219575881958, "logps/chosen": -313.0803527832031, "logps/rejected": -538.8612060546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.2186479568481445, "rewards/margins": 10.22931957244873, "rewards/rejected": -17.447967529296875, "step": 11280 }, { "epoch": 1.75, "learning_rate": 5.873391773432232e-06, "logits/chosen": -2.286755323410034, "logits/rejected": -3.1042935848236084, "logps/chosen": -94.61263275146484, "logps/rejected": -374.5572814941406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.074978828430176, "rewards/margins": 9.653900146484375, "rewards/rejected": -13.728878021240234, "step": 11281 }, { "epoch": 1.75, "learning_rate": 5.872658332901084e-06, "logits/chosen": -1.7954493761062622, "logits/rejected": -2.9289870262145996, "logps/chosen": -172.832763671875, "logps/rejected": -345.18731689453125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.843779563903809, "rewards/margins": 8.308048248291016, "rewards/rejected": -15.151827812194824, "step": 11282 }, { "epoch": 1.75, "learning_rate": 5.871924892369936e-06, "logits/chosen": -2.489799976348877, "logits/rejected": -3.058462619781494, "logps/chosen": -194.45001220703125, "logps/rejected": -366.4072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.745915412902832, "rewards/margins": 9.599039077758789, "rewards/rejected": -14.344955444335938, "step": 11283 }, { "epoch": 1.75, "learning_rate": 5.871191451838788e-06, "logits/chosen": -1.4636799097061157, "logits/rejected": -3.032923698425293, "logps/chosen": -114.56596374511719, "logps/rejected": -509.25567626953125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.987201690673828, "rewards/margins": 7.39991569519043, "rewards/rejected": -11.387117385864258, "step": 11284 }, { "epoch": 1.76, "learning_rate": 5.87045801130764e-06, "logits/chosen": -2.7173328399658203, "logits/rejected": -2.9212663173675537, "logps/chosen": -138.357666015625, "logps/rejected": -203.67230224609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.096916675567627, "rewards/margins": 9.955013275146484, "rewards/rejected": -11.051929473876953, "step": 11285 }, { "epoch": 1.76, "learning_rate": 5.869724570776493e-06, "logits/chosen": -2.895010471343994, "logits/rejected": -2.878016948699951, "logps/chosen": -188.57354736328125, "logps/rejected": -262.5905456542969, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -4.157340049743652, "rewards/margins": 5.363956451416016, "rewards/rejected": -9.521296501159668, "step": 11286 }, { "epoch": 1.76, "learning_rate": 5.868991130245345e-06, "logits/chosen": -2.8923118114471436, "logits/rejected": -3.033710479736328, "logps/chosen": -380.1154479980469, "logps/rejected": -404.28521728515625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.2766406536102295, "rewards/margins": 8.83449935913086, "rewards/rejected": -12.111139297485352, "step": 11287 }, { "epoch": 1.76, "learning_rate": 5.8682576897141965e-06, "logits/chosen": -2.5373001098632812, "logits/rejected": -2.9472806453704834, "logps/chosen": -137.1305389404297, "logps/rejected": -276.82421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.7548460960388184, "rewards/margins": 8.851672172546387, "rewards/rejected": -11.606517791748047, "step": 11288 }, { "epoch": 1.76, "learning_rate": 5.867524249183048e-06, "logits/chosen": -2.8746132850646973, "logits/rejected": -2.7984206676483154, "logps/chosen": -341.3739929199219, "logps/rejected": -329.0713806152344, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/chosen": -3.8342208862304688, "rewards/margins": 2.6956422328948975, "rewards/rejected": -6.529863357543945, "step": 11289 }, { "epoch": 1.76, "learning_rate": 5.866790808651901e-06, "logits/chosen": -2.594987630844116, "logits/rejected": -3.005014181137085, "logps/chosen": -272.80780029296875, "logps/rejected": -497.5342102050781, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.55642032623291, "rewards/margins": 7.450998783111572, "rewards/rejected": -11.00741958618164, "step": 11290 }, { "epoch": 1.76, "learning_rate": 5.866057368120754e-06, "logits/chosen": -1.5750914812088013, "logits/rejected": -2.793549060821533, "logps/chosen": -96.4620361328125, "logps/rejected": -600.7913208007812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.672582149505615, "rewards/margins": 8.958864212036133, "rewards/rejected": -14.63144588470459, "step": 11291 }, { "epoch": 1.76, "learning_rate": 5.865323927589606e-06, "logits/chosen": -1.8008393049240112, "logits/rejected": -3.0497963428497314, "logps/chosen": -338.8232421875, "logps/rejected": -623.7603759765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.838961601257324, "rewards/margins": 8.469949722290039, "rewards/rejected": -14.308911323547363, "step": 11292 }, { "epoch": 1.76, "learning_rate": 5.864590487058458e-06, "logits/chosen": -1.8733521699905396, "logits/rejected": -3.3141841888427734, "logps/chosen": -75.8231201171875, "logps/rejected": -352.69024658203125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -4.3078107833862305, "rewards/margins": 7.413098335266113, "rewards/rejected": -11.720909118652344, "step": 11293 }, { "epoch": 1.76, "learning_rate": 5.8638570465273095e-06, "logits/chosen": -3.008718490600586, "logits/rejected": -3.2847900390625, "logps/chosen": -76.3163833618164, "logps/rejected": -259.97528076171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.096041202545166, "rewards/margins": 6.944900989532471, "rewards/rejected": -13.040942192077637, "step": 11294 }, { "epoch": 1.76, "learning_rate": 5.863123605996162e-06, "logits/chosen": -2.2921857833862305, "logits/rejected": -3.0336077213287354, "logps/chosen": -347.166748046875, "logps/rejected": -439.9886474609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.9879136085510254, "rewards/margins": 8.119241714477539, "rewards/rejected": -12.107154846191406, "step": 11295 }, { "epoch": 1.76, "learning_rate": 5.862390165465014e-06, "logits/chosen": -2.3190224170684814, "logits/rejected": -2.605175256729126, "logps/chosen": -176.20065307617188, "logps/rejected": -258.7235412597656, "loss": 0.1061, "rewards/accuracies": 1.0, "rewards/chosen": -4.069392681121826, "rewards/margins": 6.578182220458984, "rewards/rejected": -10.647575378417969, "step": 11296 }, { "epoch": 1.76, "learning_rate": 5.861656724933866e-06, "logits/chosen": -2.73545503616333, "logits/rejected": -2.7502458095550537, "logps/chosen": -289.26806640625, "logps/rejected": -294.2852478027344, "loss": 0.0917, "rewards/accuracies": 1.0, "rewards/chosen": -5.736804008483887, "rewards/margins": 7.3380126953125, "rewards/rejected": -13.074816703796387, "step": 11297 }, { "epoch": 1.76, "learning_rate": 5.860923284402718e-06, "logits/chosen": -2.6272149085998535, "logits/rejected": -2.7883126735687256, "logps/chosen": -264.36053466796875, "logps/rejected": -404.82806396484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.725679397583008, "rewards/margins": 10.05559253692627, "rewards/rejected": -12.781271934509277, "step": 11298 }, { "epoch": 1.76, "learning_rate": 5.86018984387157e-06, "logits/chosen": -2.3103957176208496, "logits/rejected": -2.9268147945404053, "logps/chosen": -136.96876525878906, "logps/rejected": -562.341064453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.160184621810913, "rewards/margins": 9.573875427246094, "rewards/rejected": -12.73405933380127, "step": 11299 }, { "epoch": 1.76, "learning_rate": 5.8594564033404224e-06, "logits/chosen": -2.806492805480957, "logits/rejected": -2.5260446071624756, "logps/chosen": -474.8423156738281, "logps/rejected": -428.8028259277344, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.554623603820801, "rewards/margins": 7.417784690856934, "rewards/rejected": -12.972408294677734, "step": 11300 }, { "epoch": 1.76, "learning_rate": 5.858722962809274e-06, "logits/chosen": -1.7512706518173218, "logits/rejected": -2.312394857406616, "logps/chosen": -52.8050422668457, "logps/rejected": -301.54498291015625, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -3.777337074279785, "rewards/margins": 6.869292259216309, "rewards/rejected": -10.646629333496094, "step": 11301 }, { "epoch": 1.76, "learning_rate": 5.857989522278126e-06, "logits/chosen": -2.9552078247070312, "logits/rejected": -2.972795009613037, "logps/chosen": -162.259033203125, "logps/rejected": -325.6487121582031, "loss": 0.3018, "rewards/accuracies": 1.0, "rewards/chosen": -7.204249382019043, "rewards/margins": 6.867001533508301, "rewards/rejected": -14.071250915527344, "step": 11302 }, { "epoch": 1.76, "learning_rate": 5.857256081746978e-06, "logits/chosen": -3.0126278400421143, "logits/rejected": -1.8880091905593872, "logps/chosen": -436.6946105957031, "logps/rejected": -228.96652221679688, "loss": 0.7929, "rewards/accuracies": 0.5, "rewards/chosen": -3.531897783279419, "rewards/margins": 2.5780189037323, "rewards/rejected": -6.109916687011719, "step": 11303 }, { "epoch": 1.76, "learning_rate": 5.856522641215831e-06, "logits/chosen": -2.355466842651367, "logits/rejected": -2.9185290336608887, "logps/chosen": -264.7992858886719, "logps/rejected": -368.2159118652344, "loss": 2.0743, "rewards/accuracies": 0.5, "rewards/chosen": -6.009115695953369, "rewards/margins": 4.816892623901367, "rewards/rejected": -10.826007843017578, "step": 11304 }, { "epoch": 1.76, "learning_rate": 5.855789200684683e-06, "logits/chosen": -3.0638139247894287, "logits/rejected": -2.9979212284088135, "logps/chosen": -368.90283203125, "logps/rejected": -473.9285888671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.113022804260254, "rewards/margins": 11.267436027526855, "rewards/rejected": -15.38045883178711, "step": 11305 }, { "epoch": 1.76, "learning_rate": 5.8550557601535346e-06, "logits/chosen": -2.2081212997436523, "logits/rejected": -2.6672592163085938, "logps/chosen": -90.1090087890625, "logps/rejected": -257.6348876953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.498035430908203, "rewards/margins": 9.416078567504883, "rewards/rejected": -12.914113998413086, "step": 11306 }, { "epoch": 1.76, "learning_rate": 5.854322319622387e-06, "logits/chosen": -2.5978264808654785, "logits/rejected": -3.001086950302124, "logps/chosen": -91.0009765625, "logps/rejected": -315.3651123046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.9854044914245605, "rewards/margins": 8.541521072387695, "rewards/rejected": -11.526926040649414, "step": 11307 }, { "epoch": 1.76, "learning_rate": 5.853588879091239e-06, "logits/chosen": -1.5839686393737793, "logits/rejected": -2.5008935928344727, "logps/chosen": -171.97509765625, "logps/rejected": -531.483642578125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -5.34959602355957, "rewards/margins": 9.818586349487305, "rewards/rejected": -15.168182373046875, "step": 11308 }, { "epoch": 1.76, "learning_rate": 5.852855438560092e-06, "logits/chosen": -2.1846728324890137, "logits/rejected": -2.787297248840332, "logps/chosen": -301.8634948730469, "logps/rejected": -391.7528381347656, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -4.2584638595581055, "rewards/margins": 7.506044387817383, "rewards/rejected": -11.764508247375488, "step": 11309 }, { "epoch": 1.76, "learning_rate": 5.852121998028944e-06, "logits/chosen": -1.9430500268936157, "logits/rejected": -2.7781982421875, "logps/chosen": -215.3040771484375, "logps/rejected": -467.5364990234375, "loss": 0.0752, "rewards/accuracies": 1.0, "rewards/chosen": -8.648275375366211, "rewards/margins": 4.61784029006958, "rewards/rejected": -13.266115188598633, "step": 11310 }, { "epoch": 1.76, "learning_rate": 5.851388557497796e-06, "logits/chosen": -1.47823965549469, "logits/rejected": -2.975679874420166, "logps/chosen": -73.09423065185547, "logps/rejected": -355.50830078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8975768089294434, "rewards/margins": 8.910826683044434, "rewards/rejected": -12.808403015136719, "step": 11311 }, { "epoch": 1.76, "learning_rate": 5.8506551169666475e-06, "logits/chosen": -2.6724326610565186, "logits/rejected": -2.424393653869629, "logps/chosen": -100.05943298339844, "logps/rejected": -205.83360290527344, "loss": 1.4469, "rewards/accuracies": 0.5, "rewards/chosen": -4.206394672393799, "rewards/margins": 5.018825531005859, "rewards/rejected": -9.2252197265625, "step": 11312 }, { "epoch": 1.76, "learning_rate": 5.8499216764355e-06, "logits/chosen": -2.916912078857422, "logits/rejected": -2.7681353092193604, "logps/chosen": -156.23045349121094, "logps/rejected": -333.13616943359375, "loss": 0.6524, "rewards/accuracies": 0.5, "rewards/chosen": -5.743552207946777, "rewards/margins": 5.137548446655273, "rewards/rejected": -10.88110065460205, "step": 11313 }, { "epoch": 1.76, "learning_rate": 5.849188235904352e-06, "logits/chosen": -2.914011240005493, "logits/rejected": -2.9981701374053955, "logps/chosen": -325.6258239746094, "logps/rejected": -486.7690124511719, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.2972235679626465, "rewards/margins": 7.905290603637695, "rewards/rejected": -12.2025146484375, "step": 11314 }, { "epoch": 1.76, "learning_rate": 5.848454795373204e-06, "logits/chosen": -1.1438695192337036, "logits/rejected": -2.7083425521850586, "logps/chosen": -151.50828552246094, "logps/rejected": -746.9052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.687544345855713, "rewards/margins": 12.725798606872559, "rewards/rejected": -16.41334342956543, "step": 11315 }, { "epoch": 1.76, "learning_rate": 5.847721354842056e-06, "logits/chosen": -2.9841182231903076, "logits/rejected": -2.2076058387756348, "logps/chosen": -411.78857421875, "logps/rejected": -354.6171875, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -3.184445381164551, "rewards/margins": 4.644853591918945, "rewards/rejected": -7.829298973083496, "step": 11316 }, { "epoch": 1.76, "learning_rate": 5.846987914310909e-06, "logits/chosen": -2.5843019485473633, "logits/rejected": -3.109856367111206, "logps/chosen": -176.2434539794922, "logps/rejected": -205.13876342773438, "loss": 0.3664, "rewards/accuracies": 0.5, "rewards/chosen": -3.6114845275878906, "rewards/margins": 4.107409477233887, "rewards/rejected": -7.718893527984619, "step": 11317 }, { "epoch": 1.76, "learning_rate": 5.8462544737797605e-06, "logits/chosen": -3.1200127601623535, "logits/rejected": -2.692988634109497, "logps/chosen": -380.66607666015625, "logps/rejected": -373.54217529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4945083856582642, "rewards/margins": 11.024870872497559, "rewards/rejected": -12.519378662109375, "step": 11318 }, { "epoch": 1.76, "learning_rate": 5.845521033248612e-06, "logits/chosen": -1.7785876989364624, "logits/rejected": -2.808169364929199, "logps/chosen": -109.027587890625, "logps/rejected": -381.19940185546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.880119800567627, "rewards/margins": 9.394290924072266, "rewards/rejected": -12.27441120147705, "step": 11319 }, { "epoch": 1.76, "learning_rate": 5.844787592717464e-06, "logits/chosen": -2.496647596359253, "logits/rejected": -3.172211170196533, "logps/chosen": -250.61508178710938, "logps/rejected": -315.3907165527344, "loss": 2.1792, "rewards/accuracies": 0.5, "rewards/chosen": -5.946009159088135, "rewards/margins": 0.12476158142089844, "rewards/rejected": -6.070770740509033, "step": 11320 }, { "epoch": 1.76, "learning_rate": 5.844054152186316e-06, "logits/chosen": -1.648611068725586, "logits/rejected": -2.836759090423584, "logps/chosen": -95.02822875976562, "logps/rejected": -356.2715148925781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.385359048843384, "rewards/margins": 8.886420249938965, "rewards/rejected": -12.27177906036377, "step": 11321 }, { "epoch": 1.76, "learning_rate": 5.843320711655169e-06, "logits/chosen": -2.5260798931121826, "logits/rejected": -3.196053981781006, "logps/chosen": -88.0159912109375, "logps/rejected": -308.06658935546875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -4.205224990844727, "rewards/margins": 5.345160484313965, "rewards/rejected": -9.550385475158691, "step": 11322 }, { "epoch": 1.76, "learning_rate": 5.842587271124021e-06, "logits/chosen": -0.894108235836029, "logits/rejected": -2.628958225250244, "logps/chosen": -131.88235473632812, "logps/rejected": -433.89154052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8477444648742676, "rewards/margins": 11.330873489379883, "rewards/rejected": -15.178617477416992, "step": 11323 }, { "epoch": 1.76, "learning_rate": 5.8418538305928735e-06, "logits/chosen": -1.3837848901748657, "logits/rejected": -2.927354097366333, "logps/chosen": -103.79032897949219, "logps/rejected": -346.3258056640625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.7996726036071777, "rewards/margins": 8.69112777709961, "rewards/rejected": -11.490800857543945, "step": 11324 }, { "epoch": 1.76, "learning_rate": 5.841120390061725e-06, "logits/chosen": -2.63871431350708, "logits/rejected": -2.858013391494751, "logps/chosen": -142.06707763671875, "logps/rejected": -305.3229064941406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.3541648387908936, "rewards/margins": 8.439709663391113, "rewards/rejected": -9.793874740600586, "step": 11325 }, { "epoch": 1.76, "learning_rate": 5.840386949530578e-06, "logits/chosen": -2.5839102268218994, "logits/rejected": -2.9222829341888428, "logps/chosen": -59.17463684082031, "logps/rejected": -193.9337615966797, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.1913299560546875, "rewards/margins": 6.625084400177002, "rewards/rejected": -10.816413879394531, "step": 11326 }, { "epoch": 1.76, "learning_rate": 5.83965350899943e-06, "logits/chosen": -2.925039291381836, "logits/rejected": -2.92240834236145, "logps/chosen": -212.63970947265625, "logps/rejected": -266.5316162109375, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -5.381076812744141, "rewards/margins": 3.6017656326293945, "rewards/rejected": -8.982842445373535, "step": 11327 }, { "epoch": 1.76, "learning_rate": 5.838920068468282e-06, "logits/chosen": -2.7631967067718506, "logits/rejected": -2.5420384407043457, "logps/chosen": -128.03402709960938, "logps/rejected": -275.2611999511719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7013072967529297, "rewards/margins": 9.1220121383667, "rewards/rejected": -10.823319435119629, "step": 11328 }, { "epoch": 1.76, "learning_rate": 5.838186627937134e-06, "logits/chosen": -2.436373233795166, "logits/rejected": -3.0582070350646973, "logps/chosen": -402.4942321777344, "logps/rejected": -513.4212646484375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -4.993613243103027, "rewards/margins": 6.1725263595581055, "rewards/rejected": -11.166139602661133, "step": 11329 }, { "epoch": 1.76, "learning_rate": 5.8374531874059856e-06, "logits/chosen": -2.5936479568481445, "logits/rejected": -2.8550963401794434, "logps/chosen": -128.2506866455078, "logps/rejected": -236.47702026367188, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -3.714797019958496, "rewards/margins": 5.967455863952637, "rewards/rejected": -9.682252883911133, "step": 11330 }, { "epoch": 1.76, "learning_rate": 5.836719746874838e-06, "logits/chosen": -2.8092143535614014, "logits/rejected": -2.716015577316284, "logps/chosen": -559.628173828125, "logps/rejected": -504.9950256347656, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.288357734680176, "rewards/margins": 8.679198265075684, "rewards/rejected": -10.96755599975586, "step": 11331 }, { "epoch": 1.76, "learning_rate": 5.83598630634369e-06, "logits/chosen": -3.0379927158355713, "logits/rejected": -3.2358644008636475, "logps/chosen": -115.68775939941406, "logps/rejected": -270.5492248535156, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.7919209003448486, "rewards/margins": 7.75557279586792, "rewards/rejected": -10.547493934631348, "step": 11332 }, { "epoch": 1.76, "learning_rate": 5.835252865812542e-06, "logits/chosen": -1.7019047737121582, "logits/rejected": -2.836885929107666, "logps/chosen": -107.59484100341797, "logps/rejected": -215.521728515625, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -2.8727619647979736, "rewards/margins": 8.215490341186523, "rewards/rejected": -11.088253021240234, "step": 11333 }, { "epoch": 1.76, "learning_rate": 5.834519425281394e-06, "logits/chosen": -2.7375004291534424, "logits/rejected": -2.996319055557251, "logps/chosen": -101.90536499023438, "logps/rejected": -150.25347900390625, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -2.0472335815429688, "rewards/margins": 6.122175216674805, "rewards/rejected": -8.169408798217773, "step": 11334 }, { "epoch": 1.76, "learning_rate": 5.833785984750247e-06, "logits/chosen": -2.444422721862793, "logits/rejected": -2.811563491821289, "logps/chosen": -99.46510314941406, "logps/rejected": -188.2107696533203, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -3.700637102127075, "rewards/margins": 5.500823020935059, "rewards/rejected": -9.201459884643555, "step": 11335 }, { "epoch": 1.76, "learning_rate": 5.8330525442190985e-06, "logits/chosen": -2.700587749481201, "logits/rejected": -3.0613820552825928, "logps/chosen": -165.1673583984375, "logps/rejected": -293.873046875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.8557653427124023, "rewards/margins": 6.784256935119629, "rewards/rejected": -9.640022277832031, "step": 11336 }, { "epoch": 1.76, "learning_rate": 5.83231910368795e-06, "logits/chosen": -2.1791176795959473, "logits/rejected": -2.7061703205108643, "logps/chosen": -663.5828247070312, "logps/rejected": -631.41748046875, "loss": 1.0862, "rewards/accuracies": 0.5, "rewards/chosen": -5.506727695465088, "rewards/margins": 5.883437633514404, "rewards/rejected": -11.390165328979492, "step": 11337 }, { "epoch": 1.76, "learning_rate": 5.831585663156802e-06, "logits/chosen": -2.31508731842041, "logits/rejected": -2.800232172012329, "logps/chosen": -344.8753967285156, "logps/rejected": -498.24334716796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.855203628540039, "rewards/margins": 11.429939270019531, "rewards/rejected": -14.28514289855957, "step": 11338 }, { "epoch": 1.76, "learning_rate": 5.830852222625654e-06, "logits/chosen": -1.9003344774246216, "logits/rejected": -3.017646551132202, "logps/chosen": -139.09120178222656, "logps/rejected": -438.6362609863281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.212123394012451, "rewards/margins": 10.239641189575195, "rewards/rejected": -13.451764106750488, "step": 11339 }, { "epoch": 1.76, "learning_rate": 5.830118782094507e-06, "logits/chosen": -2.7644476890563965, "logits/rejected": -2.7511165142059326, "logps/chosen": -135.31675720214844, "logps/rejected": -224.51907348632812, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.0199432373046875, "rewards/margins": 8.03307819366455, "rewards/rejected": -13.053021430969238, "step": 11340 }, { "epoch": 1.76, "learning_rate": 5.82938534156336e-06, "logits/chosen": -2.1763734817504883, "logits/rejected": -3.3536975383758545, "logps/chosen": -114.34049987792969, "logps/rejected": -353.01434326171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.434326410293579, "rewards/margins": 8.271626472473145, "rewards/rejected": -10.705952644348145, "step": 11341 }, { "epoch": 1.76, "learning_rate": 5.8286519010322115e-06, "logits/chosen": -2.9180052280426025, "logits/rejected": -3.117910623550415, "logps/chosen": -131.71389770507812, "logps/rejected": -374.34814453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.409069538116455, "rewards/margins": 10.656597137451172, "rewards/rejected": -14.065666198730469, "step": 11342 }, { "epoch": 1.76, "learning_rate": 5.827918460501063e-06, "logits/chosen": -2.8683924674987793, "logits/rejected": -1.9716840982437134, "logps/chosen": -214.2257843017578, "logps/rejected": -246.33416748046875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -2.849679470062256, "rewards/margins": 10.07752799987793, "rewards/rejected": -12.927207946777344, "step": 11343 }, { "epoch": 1.76, "learning_rate": 5.827185019969916e-06, "logits/chosen": -2.162370204925537, "logits/rejected": -3.1201791763305664, "logps/chosen": -287.2677917480469, "logps/rejected": -426.9234313964844, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.3380303382873535, "rewards/margins": 6.746677875518799, "rewards/rejected": -9.084708213806152, "step": 11344 }, { "epoch": 1.76, "learning_rate": 5.826451579438768e-06, "logits/chosen": -2.883413553237915, "logits/rejected": -2.4916417598724365, "logps/chosen": -126.91326904296875, "logps/rejected": -343.729248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1213936805725098, "rewards/margins": 10.296560287475586, "rewards/rejected": -12.417953491210938, "step": 11345 }, { "epoch": 1.76, "learning_rate": 5.82571813890762e-06, "logits/chosen": -2.678755044937134, "logits/rejected": -2.8957393169403076, "logps/chosen": -744.3078002929688, "logps/rejected": -501.12762451171875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.762831926345825, "rewards/margins": 6.072514533996582, "rewards/rejected": -9.835346221923828, "step": 11346 }, { "epoch": 1.76, "learning_rate": 5.824984698376472e-06, "logits/chosen": -1.2375110387802124, "logits/rejected": -2.8384013175964355, "logps/chosen": -70.20028686523438, "logps/rejected": -279.36761474609375, "loss": 0.1926, "rewards/accuracies": 1.0, "rewards/chosen": -5.115179061889648, "rewards/margins": 2.3114888668060303, "rewards/rejected": -7.426668167114258, "step": 11347 }, { "epoch": 1.76, "learning_rate": 5.824251257845324e-06, "logits/chosen": -1.2613166570663452, "logits/rejected": -2.432584524154663, "logps/chosen": -140.97792053222656, "logps/rejected": -497.4391784667969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7085680961608887, "rewards/margins": 11.40854263305664, "rewards/rejected": -14.117110252380371, "step": 11348 }, { "epoch": 1.77, "learning_rate": 5.823517817314176e-06, "logits/chosen": -2.3326754570007324, "logits/rejected": -2.8877205848693848, "logps/chosen": -240.2484130859375, "logps/rejected": -379.88873291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7208375930786133, "rewards/margins": 9.737993240356445, "rewards/rejected": -13.458829879760742, "step": 11349 }, { "epoch": 1.77, "learning_rate": 5.822784376783028e-06, "logits/chosen": -2.893305540084839, "logits/rejected": -2.478945732116699, "logps/chosen": -171.05609130859375, "logps/rejected": -188.39952087402344, "loss": 0.5091, "rewards/accuracies": 0.5, "rewards/chosen": -4.588338851928711, "rewards/margins": 4.054648399353027, "rewards/rejected": -8.642988204956055, "step": 11350 }, { "epoch": 1.77, "learning_rate": 5.82205093625188e-06, "logits/chosen": -2.730468988418579, "logits/rejected": -3.065129041671753, "logps/chosen": -112.03141021728516, "logps/rejected": -222.4063720703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.8869152069091797, "rewards/margins": 7.216963768005371, "rewards/rejected": -10.103879928588867, "step": 11351 }, { "epoch": 1.77, "learning_rate": 5.821317495720732e-06, "logits/chosen": -2.5929996967315674, "logits/rejected": -2.6628339290618896, "logps/chosen": -301.324462890625, "logps/rejected": -518.7931518554688, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -6.088658332824707, "rewards/margins": 7.373800277709961, "rewards/rejected": -13.462458610534668, "step": 11352 }, { "epoch": 1.77, "learning_rate": 5.820584055189585e-06, "logits/chosen": -3.1314263343811035, "logits/rejected": -2.5544400215148926, "logps/chosen": -409.39129638671875, "logps/rejected": -360.73974609375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.374722480773926, "rewards/margins": 7.171392440795898, "rewards/rejected": -11.546114921569824, "step": 11353 }, { "epoch": 1.77, "learning_rate": 5.819850614658437e-06, "logits/chosen": -2.4898838996887207, "logits/rejected": -2.9762566089630127, "logps/chosen": -235.22024536132812, "logps/rejected": -255.4300537109375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -4.459724426269531, "rewards/margins": 4.604861736297607, "rewards/rejected": -9.064586639404297, "step": 11354 }, { "epoch": 1.77, "learning_rate": 5.8191171741272885e-06, "logits/chosen": -2.0156166553497314, "logits/rejected": -3.0605504512786865, "logps/chosen": -144.53924560546875, "logps/rejected": -222.86935424804688, "loss": 1.0293, "rewards/accuracies": 0.5, "rewards/chosen": -4.037215709686279, "rewards/margins": 3.9865288734436035, "rewards/rejected": -8.023744583129883, "step": 11355 }, { "epoch": 1.77, "learning_rate": 5.81838373359614e-06, "logits/chosen": -2.2748661041259766, "logits/rejected": -2.7901108264923096, "logps/chosen": -108.44390869140625, "logps/rejected": -313.54754638671875, "loss": 0.0258, "rewards/accuracies": 1.0, "rewards/chosen": -5.307884216308594, "rewards/margins": 6.843626976013184, "rewards/rejected": -12.151512145996094, "step": 11356 }, { "epoch": 1.77, "learning_rate": 5.817650293064993e-06, "logits/chosen": -1.7793748378753662, "logits/rejected": -3.1521568298339844, "logps/chosen": -91.75320434570312, "logps/rejected": -433.9144287109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.9504170417785645, "rewards/margins": 9.79626750946045, "rewards/rejected": -12.746685028076172, "step": 11357 }, { "epoch": 1.77, "learning_rate": 5.816916852533846e-06, "logits/chosen": -1.5825589895248413, "logits/rejected": -2.830368757247925, "logps/chosen": -212.3403778076172, "logps/rejected": -525.10302734375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -3.378614902496338, "rewards/margins": 10.31744384765625, "rewards/rejected": -13.69605827331543, "step": 11358 }, { "epoch": 1.77, "learning_rate": 5.816183412002698e-06, "logits/chosen": -1.6908133029937744, "logits/rejected": -2.962050676345825, "logps/chosen": -57.278480529785156, "logps/rejected": -376.24468994140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.618640184402466, "rewards/margins": 7.988665580749512, "rewards/rejected": -10.607305526733398, "step": 11359 }, { "epoch": 1.77, "learning_rate": 5.8154499714715495e-06, "logits/chosen": -2.8278021812438965, "logits/rejected": -2.817835807800293, "logps/chosen": -198.73468017578125, "logps/rejected": -344.48236083984375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.134847164154053, "rewards/margins": 7.642928600311279, "rewards/rejected": -12.777775764465332, "step": 11360 }, { "epoch": 1.77, "learning_rate": 5.814716530940401e-06, "logits/chosen": -2.8559627532958984, "logits/rejected": -1.8884477615356445, "logps/chosen": -399.126220703125, "logps/rejected": -469.22723388671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.080347537994385, "rewards/margins": 10.581550598144531, "rewards/rejected": -16.66189956665039, "step": 11361 }, { "epoch": 1.77, "learning_rate": 5.813983090409254e-06, "logits/chosen": -3.1187806129455566, "logits/rejected": -2.9943594932556152, "logps/chosen": -316.33734130859375, "logps/rejected": -254.18426513671875, "loss": 0.8992, "rewards/accuracies": 0.5, "rewards/chosen": -5.229622840881348, "rewards/margins": 2.8707728385925293, "rewards/rejected": -8.100395202636719, "step": 11362 }, { "epoch": 1.77, "learning_rate": 5.813249649878106e-06, "logits/chosen": -2.8020036220550537, "logits/rejected": -2.656669855117798, "logps/chosen": -330.19842529296875, "logps/rejected": -534.5350341796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.9421943426132202, "rewards/margins": 11.49624252319336, "rewards/rejected": -13.438436508178711, "step": 11363 }, { "epoch": 1.77, "learning_rate": 5.812516209346958e-06, "logits/chosen": -2.6993367671966553, "logits/rejected": -2.3367886543273926, "logps/chosen": -190.09014892578125, "logps/rejected": -214.839599609375, "loss": 0.3885, "rewards/accuracies": 0.5, "rewards/chosen": -3.334360122680664, "rewards/margins": 5.72109317779541, "rewards/rejected": -9.055453300476074, "step": 11364 }, { "epoch": 1.77, "learning_rate": 5.81178276881581e-06, "logits/chosen": -1.9884546995162964, "logits/rejected": -2.8656487464904785, "logps/chosen": -186.03826904296875, "logps/rejected": -362.55853271484375, "loss": 0.453, "rewards/accuracies": 0.5, "rewards/chosen": -5.020947456359863, "rewards/margins": 5.918979644775391, "rewards/rejected": -10.93992805480957, "step": 11365 }, { "epoch": 1.77, "learning_rate": 5.8110493282846625e-06, "logits/chosen": -1.7845370769500732, "logits/rejected": -2.7081642150878906, "logps/chosen": -125.36019134521484, "logps/rejected": -270.0096435546875, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": -3.928001880645752, "rewards/margins": 6.121102809906006, "rewards/rejected": -10.049104690551758, "step": 11366 }, { "epoch": 1.77, "learning_rate": 5.810315887753514e-06, "logits/chosen": -2.0944924354553223, "logits/rejected": -2.859060287475586, "logps/chosen": -199.9868927001953, "logps/rejected": -467.65618896484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.772870063781738, "rewards/margins": 9.999846458435059, "rewards/rejected": -16.772716522216797, "step": 11367 }, { "epoch": 1.77, "learning_rate": 5.809582447222366e-06, "logits/chosen": -1.4924474954605103, "logits/rejected": -3.026388645172119, "logps/chosen": -67.78932189941406, "logps/rejected": -606.339111328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.7721753120422363, "rewards/margins": 11.659089088439941, "rewards/rejected": -15.431264877319336, "step": 11368 }, { "epoch": 1.77, "learning_rate": 5.808849006691218e-06, "logits/chosen": -2.4498298168182373, "logits/rejected": -2.225642681121826, "logps/chosen": -433.0815124511719, "logps/rejected": -480.6027526855469, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -5.120999336242676, "rewards/margins": 6.999281406402588, "rewards/rejected": -12.120281219482422, "step": 11369 }, { "epoch": 1.77, "learning_rate": 5.80811556616007e-06, "logits/chosen": -2.9717719554901123, "logits/rejected": -2.4335756301879883, "logps/chosen": -134.8561248779297, "logps/rejected": -103.09249877929688, "loss": 0.9856, "rewards/accuracies": 0.5, "rewards/chosen": -5.026344299316406, "rewards/margins": 1.2695302963256836, "rewards/rejected": -6.29587459564209, "step": 11370 }, { "epoch": 1.77, "learning_rate": 5.807382125628923e-06, "logits/chosen": -1.9250226020812988, "logits/rejected": -2.817742109298706, "logps/chosen": -102.62744140625, "logps/rejected": -189.07211303710938, "loss": 0.4935, "rewards/accuracies": 0.5, "rewards/chosen": -3.9504916667938232, "rewards/margins": 1.5620627403259277, "rewards/rejected": -5.512554168701172, "step": 11371 }, { "epoch": 1.77, "learning_rate": 5.806648685097775e-06, "logits/chosen": -2.1517221927642822, "logits/rejected": -2.6847643852233887, "logps/chosen": -170.53750610351562, "logps/rejected": -314.7628479003906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.141283988952637, "rewards/margins": 10.759173393249512, "rewards/rejected": -14.900457382202148, "step": 11372 }, { "epoch": 1.77, "learning_rate": 5.8059152445666265e-06, "logits/chosen": -3.088352918624878, "logits/rejected": -2.881826400756836, "logps/chosen": -170.6785888671875, "logps/rejected": -148.2155303955078, "loss": 0.6233, "rewards/accuracies": 0.5, "rewards/chosen": -4.317721366882324, "rewards/margins": 4.046621322631836, "rewards/rejected": -8.36434268951416, "step": 11373 }, { "epoch": 1.77, "learning_rate": 5.805181804035479e-06, "logits/chosen": -0.671068012714386, "logits/rejected": -2.4368247985839844, "logps/chosen": -104.95637512207031, "logps/rejected": -361.4827575683594, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -7.392343997955322, "rewards/margins": 4.8071393966674805, "rewards/rejected": -12.199483871459961, "step": 11374 }, { "epoch": 1.77, "learning_rate": 5.804448363504332e-06, "logits/chosen": -2.9440574645996094, "logits/rejected": -2.7881274223327637, "logps/chosen": -272.8245544433594, "logps/rejected": -161.0700225830078, "loss": 0.3367, "rewards/accuracies": 1.0, "rewards/chosen": -2.4740371704101562, "rewards/margins": 3.5234453678131104, "rewards/rejected": -5.997482776641846, "step": 11375 }, { "epoch": 1.77, "learning_rate": 5.803714922973184e-06, "logits/chosen": -1.2323944568634033, "logits/rejected": -2.873969793319702, "logps/chosen": -43.991600036621094, "logps/rejected": -502.68804931640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.6185457706451416, "rewards/margins": 11.779033660888672, "rewards/rejected": -14.397579193115234, "step": 11376 }, { "epoch": 1.77, "learning_rate": 5.802981482442036e-06, "logits/chosen": -2.328862428665161, "logits/rejected": -3.117168664932251, "logps/chosen": -87.60810852050781, "logps/rejected": -335.9432678222656, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.7735848426818848, "rewards/margins": 8.339670181274414, "rewards/rejected": -11.11325454711914, "step": 11377 }, { "epoch": 1.77, "learning_rate": 5.802248041910888e-06, "logits/chosen": -2.0707483291625977, "logits/rejected": -2.6757326126098633, "logps/chosen": -620.0281982421875, "logps/rejected": -624.254150390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.0827765464782715, "rewards/margins": 7.510048866271973, "rewards/rejected": -14.592824935913086, "step": 11378 }, { "epoch": 1.77, "learning_rate": 5.8015146013797395e-06, "logits/chosen": -2.540428400039673, "logits/rejected": -2.8250670433044434, "logps/chosen": -145.303466796875, "logps/rejected": -386.9871520996094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.682121515274048, "rewards/margins": 8.670173645019531, "rewards/rejected": -12.352294921875, "step": 11379 }, { "epoch": 1.77, "learning_rate": 5.800781160848592e-06, "logits/chosen": -1.6052495241165161, "logits/rejected": -2.890831232070923, "logps/chosen": -315.2255859375, "logps/rejected": -750.4716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3418678045272827, "rewards/margins": 12.313671112060547, "rewards/rejected": -13.655538558959961, "step": 11380 }, { "epoch": 1.77, "learning_rate": 5.800047720317444e-06, "logits/chosen": -2.8001534938812256, "logits/rejected": -2.9735617637634277, "logps/chosen": -470.99493408203125, "logps/rejected": -455.5689697265625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.171368598937988, "rewards/margins": 7.806698799133301, "rewards/rejected": -12.978067398071289, "step": 11381 }, { "epoch": 1.77, "learning_rate": 5.799314279786296e-06, "logits/chosen": -1.2315675020217896, "logits/rejected": -3.002878189086914, "logps/chosen": -114.38948822021484, "logps/rejected": -388.4852294921875, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -2.584470510482788, "rewards/margins": 7.288796424865723, "rewards/rejected": -9.87326717376709, "step": 11382 }, { "epoch": 1.77, "learning_rate": 5.798580839255148e-06, "logits/chosen": -2.5984561443328857, "logits/rejected": -2.8195786476135254, "logps/chosen": -269.273681640625, "logps/rejected": -253.60658264160156, "loss": 1.6995, "rewards/accuracies": 0.5, "rewards/chosen": -5.5824456214904785, "rewards/margins": 1.2110902070999146, "rewards/rejected": -6.7935357093811035, "step": 11383 }, { "epoch": 1.77, "learning_rate": 5.7978473987240005e-06, "logits/chosen": -1.8238729238510132, "logits/rejected": -2.8423171043395996, "logps/chosen": -106.59661865234375, "logps/rejected": -208.31800842285156, "loss": 0.1879, "rewards/accuracies": 1.0, "rewards/chosen": -8.056962966918945, "rewards/margins": 1.6120436191558838, "rewards/rejected": -9.66900634765625, "step": 11384 }, { "epoch": 1.77, "learning_rate": 5.7971139581928524e-06, "logits/chosen": -2.9749364852905273, "logits/rejected": -3.26117205619812, "logps/chosen": -151.94395446777344, "logps/rejected": -320.94842529296875, "loss": 0.1275, "rewards/accuracies": 1.0, "rewards/chosen": -3.2642087936401367, "rewards/margins": 4.7761383056640625, "rewards/rejected": -8.0403470993042, "step": 11385 }, { "epoch": 1.77, "learning_rate": 5.796380517661704e-06, "logits/chosen": -2.579413652420044, "logits/rejected": -2.8872573375701904, "logps/chosen": -510.45050048828125, "logps/rejected": -357.25299072265625, "loss": 0.6291, "rewards/accuracies": 0.5, "rewards/chosen": -4.241413116455078, "rewards/margins": 3.872316598892212, "rewards/rejected": -8.113729476928711, "step": 11386 }, { "epoch": 1.77, "learning_rate": 5.795647077130556e-06, "logits/chosen": -1.7797142267227173, "logits/rejected": -2.47806715965271, "logps/chosen": -396.3700866699219, "logps/rejected": -585.0718994140625, "loss": 0.158, "rewards/accuracies": 1.0, "rewards/chosen": -2.398188829421997, "rewards/margins": 11.74828052520752, "rewards/rejected": -14.146469116210938, "step": 11387 }, { "epoch": 1.77, "learning_rate": 5.794913636599408e-06, "logits/chosen": -2.6383373737335205, "logits/rejected": -2.458462953567505, "logps/chosen": -275.2565612792969, "logps/rejected": -539.32666015625, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -5.8599629402160645, "rewards/margins": 8.152177810668945, "rewards/rejected": -14.012140274047852, "step": 11388 }, { "epoch": 1.77, "learning_rate": 5.794180196068261e-06, "logits/chosen": -2.8531312942504883, "logits/rejected": -2.6758222579956055, "logps/chosen": -190.0885009765625, "logps/rejected": -236.7279510498047, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -3.0608763694763184, "rewards/margins": 5.54124116897583, "rewards/rejected": -8.602117538452148, "step": 11389 }, { "epoch": 1.77, "learning_rate": 5.793446755537113e-06, "logits/chosen": -0.9194186329841614, "logits/rejected": -2.128028631210327, "logps/chosen": -213.2576446533203, "logps/rejected": -663.44384765625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -3.206843852996826, "rewards/margins": 10.257224082946777, "rewards/rejected": -13.464067459106445, "step": 11390 }, { "epoch": 1.77, "learning_rate": 5.792713315005965e-06, "logits/chosen": -2.8879764080047607, "logits/rejected": -2.555300235748291, "logps/chosen": -462.2826843261719, "logps/rejected": -384.1266784667969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.038264274597168, "rewards/margins": 8.813735961914062, "rewards/rejected": -12.85200023651123, "step": 11391 }, { "epoch": 1.77, "learning_rate": 5.791979874474817e-06, "logits/chosen": -2.8804805278778076, "logits/rejected": -2.2346580028533936, "logps/chosen": -665.9754028320312, "logps/rejected": -470.8717041015625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.72945499420166, "rewards/margins": 6.942649841308594, "rewards/rejected": -12.672104835510254, "step": 11392 }, { "epoch": 1.77, "learning_rate": 5.79124643394367e-06, "logits/chosen": -2.991739273071289, "logits/rejected": -2.6719322204589844, "logps/chosen": -1320.4825439453125, "logps/rejected": -932.796630859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.010088920593262, "rewards/margins": 9.902578353881836, "rewards/rejected": -15.912668228149414, "step": 11393 }, { "epoch": 1.77, "learning_rate": 5.790512993412522e-06, "logits/chosen": -3.085858106613159, "logits/rejected": -2.5566823482513428, "logps/chosen": -284.9714660644531, "logps/rejected": -256.67767333984375, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -3.528341770172119, "rewards/margins": 5.05974817276001, "rewards/rejected": -8.588089942932129, "step": 11394 }, { "epoch": 1.77, "learning_rate": 5.789779552881374e-06, "logits/chosen": -3.2049148082733154, "logits/rejected": -2.4867324829101562, "logps/chosen": -490.5994873046875, "logps/rejected": -375.6477355957031, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.3626909255981445, "rewards/margins": 8.129627227783203, "rewards/rejected": -12.492317199707031, "step": 11395 }, { "epoch": 1.77, "learning_rate": 5.789046112350226e-06, "logits/chosen": -2.8758585453033447, "logits/rejected": -2.716696262359619, "logps/chosen": -390.83660888671875, "logps/rejected": -435.94622802734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.6124215126037598, "rewards/margins": 7.636179447174072, "rewards/rejected": -11.248600959777832, "step": 11396 }, { "epoch": 1.77, "learning_rate": 5.7883126718190775e-06, "logits/chosen": -2.8380753993988037, "logits/rejected": -2.9981093406677246, "logps/chosen": -170.24996948242188, "logps/rejected": -312.7944030761719, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.4245407581329346, "rewards/margins": 6.985363006591797, "rewards/rejected": -9.409904479980469, "step": 11397 }, { "epoch": 1.77, "learning_rate": 5.78757923128793e-06, "logits/chosen": -2.6570322513580322, "logits/rejected": -2.9210269451141357, "logps/chosen": -129.6724395751953, "logps/rejected": -148.93206787109375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.8826335668563843, "rewards/margins": 6.795360565185547, "rewards/rejected": -8.677993774414062, "step": 11398 }, { "epoch": 1.77, "learning_rate": 5.786845790756782e-06, "logits/chosen": -2.8042962551116943, "logits/rejected": -2.5999033451080322, "logps/chosen": -199.753662109375, "logps/rejected": -218.14962768554688, "loss": 1.2327, "rewards/accuracies": 0.5, "rewards/chosen": -5.049989700317383, "rewards/margins": 1.473968267440796, "rewards/rejected": -6.523958206176758, "step": 11399 }, { "epoch": 1.77, "learning_rate": 5.786112350225634e-06, "logits/chosen": -2.774639368057251, "logits/rejected": -2.1988492012023926, "logps/chosen": -266.3507080078125, "logps/rejected": -389.363525390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.6550915241241455, "rewards/margins": 10.116183280944824, "rewards/rejected": -12.77127456665039, "step": 11400 }, { "epoch": 1.77, "learning_rate": 5.785378909694486e-06, "logits/chosen": -1.9469774961471558, "logits/rejected": -2.444289445877075, "logps/chosen": -249.95770263671875, "logps/rejected": -339.0873107910156, "loss": 0.1282, "rewards/accuracies": 1.0, "rewards/chosen": -4.665770530700684, "rewards/margins": 4.172607421875, "rewards/rejected": -8.838377952575684, "step": 11401 }, { "epoch": 1.77, "learning_rate": 5.784645469163339e-06, "logits/chosen": -2.5720183849334717, "logits/rejected": -3.1664323806762695, "logps/chosen": -60.20842742919922, "logps/rejected": -226.09246826171875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -3.7571420669555664, "rewards/margins": 4.6288604736328125, "rewards/rejected": -8.386002540588379, "step": 11402 }, { "epoch": 1.77, "learning_rate": 5.7839120286321905e-06, "logits/chosen": -2.230807065963745, "logits/rejected": -3.272854804992676, "logps/chosen": -224.80418395996094, "logps/rejected": -490.7192077636719, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.8728439807891846, "rewards/margins": 8.542537689208984, "rewards/rejected": -12.415380477905273, "step": 11403 }, { "epoch": 1.77, "learning_rate": 5.783178588101042e-06, "logits/chosen": -2.6780693531036377, "logits/rejected": -2.8488008975982666, "logps/chosen": -209.4457550048828, "logps/rejected": -371.6466064453125, "loss": 0.7636, "rewards/accuracies": 0.5, "rewards/chosen": -5.684293270111084, "rewards/margins": 4.6415557861328125, "rewards/rejected": -10.325849533081055, "step": 11404 }, { "epoch": 1.77, "learning_rate": 5.782445147569894e-06, "logits/chosen": -3.0708084106445312, "logits/rejected": -2.451738119125366, "logps/chosen": -298.04876708984375, "logps/rejected": -268.73443603515625, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -5.155016899108887, "rewards/margins": 4.89613676071167, "rewards/rejected": -10.051153182983398, "step": 11405 }, { "epoch": 1.77, "learning_rate": 5.781711707038747e-06, "logits/chosen": -2.307685375213623, "logits/rejected": -2.8051888942718506, "logps/chosen": -135.29067993164062, "logps/rejected": -176.80917358398438, "loss": 0.1424, "rewards/accuracies": 1.0, "rewards/chosen": -4.925812721252441, "rewards/margins": 2.926506519317627, "rewards/rejected": -7.85231876373291, "step": 11406 }, { "epoch": 1.77, "learning_rate": 5.780978266507599e-06, "logits/chosen": -2.474424362182617, "logits/rejected": -2.5509238243103027, "logps/chosen": -244.57882690429688, "logps/rejected": -482.40777587890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.926915168762207, "rewards/margins": 10.795087814331055, "rewards/rejected": -13.722002983093262, "step": 11407 }, { "epoch": 1.77, "learning_rate": 5.7802448259764516e-06, "logits/chosen": -2.4845917224884033, "logits/rejected": -2.922166347503662, "logps/chosen": -53.01837158203125, "logps/rejected": -295.0384521484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.7597838640213013, "rewards/margins": 8.138829231262207, "rewards/rejected": -9.898612976074219, "step": 11408 }, { "epoch": 1.77, "learning_rate": 5.7795113854453034e-06, "logits/chosen": -2.3604488372802734, "logits/rejected": -2.9147024154663086, "logps/chosen": -96.85688018798828, "logps/rejected": -333.9394836425781, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -4.988613605499268, "rewards/margins": 7.3461408615112305, "rewards/rejected": -12.33475399017334, "step": 11409 }, { "epoch": 1.77, "learning_rate": 5.778777944914155e-06, "logits/chosen": -2.525972843170166, "logits/rejected": -2.352708101272583, "logps/chosen": -571.125, "logps/rejected": -509.9764404296875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.3484482765197754, "rewards/margins": 7.8753838539123535, "rewards/rejected": -11.223832130432129, "step": 11410 }, { "epoch": 1.77, "learning_rate": 5.778044504383008e-06, "logits/chosen": -2.6282174587249756, "logits/rejected": -2.8402793407440186, "logps/chosen": -78.99967956542969, "logps/rejected": -196.76785278320312, "loss": 0.2385, "rewards/accuracies": 1.0, "rewards/chosen": -3.71724796295166, "rewards/margins": 1.9613471031188965, "rewards/rejected": -5.678595066070557, "step": 11411 }, { "epoch": 1.77, "learning_rate": 5.77731106385186e-06, "logits/chosen": -2.975578784942627, "logits/rejected": -3.034196376800537, "logps/chosen": -586.5999755859375, "logps/rejected": -524.78564453125, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -4.208003044128418, "rewards/margins": 6.169031143188477, "rewards/rejected": -10.377034187316895, "step": 11412 }, { "epoch": 1.77, "learning_rate": 5.776577623320712e-06, "logits/chosen": -2.1861934661865234, "logits/rejected": -2.9528372287750244, "logps/chosen": -204.72560119628906, "logps/rejected": -379.49658203125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.593785285949707, "rewards/margins": 7.8097662925720215, "rewards/rejected": -13.40355110168457, "step": 11413 }, { "epoch": 1.78, "learning_rate": 5.775844182789564e-06, "logits/chosen": -3.080440044403076, "logits/rejected": -3.211923599243164, "logps/chosen": -246.56216430664062, "logps/rejected": -272.52142333984375, "loss": 1.0494, "rewards/accuracies": 0.5, "rewards/chosen": -3.953263282775879, "rewards/margins": 3.0887207984924316, "rewards/rejected": -7.0419840812683105, "step": 11414 }, { "epoch": 1.78, "learning_rate": 5.775110742258416e-06, "logits/chosen": -2.9287524223327637, "logits/rejected": -2.3745687007904053, "logps/chosen": -344.18780517578125, "logps/rejected": -178.193115234375, "loss": 0.0681, "rewards/accuracies": 1.0, "rewards/chosen": -3.4671630859375, "rewards/margins": 3.4004898071289062, "rewards/rejected": -6.867652893066406, "step": 11415 }, { "epoch": 1.78, "learning_rate": 5.774377301727268e-06, "logits/chosen": -2.701476573944092, "logits/rejected": -2.8864481449127197, "logps/chosen": -92.19132995605469, "logps/rejected": -202.0464324951172, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -2.8103535175323486, "rewards/margins": 6.808066368103027, "rewards/rejected": -9.618419647216797, "step": 11416 }, { "epoch": 1.78, "learning_rate": 5.77364386119612e-06, "logits/chosen": -2.7865583896636963, "logits/rejected": -2.8575940132141113, "logps/chosen": -149.3597412109375, "logps/rejected": -162.02450561523438, "loss": 0.1636, "rewards/accuracies": 1.0, "rewards/chosen": -4.8498125076293945, "rewards/margins": 2.080986499786377, "rewards/rejected": -6.930798530578613, "step": 11417 }, { "epoch": 1.78, "learning_rate": 5.772910420664972e-06, "logits/chosen": -2.9975414276123047, "logits/rejected": -2.759300470352173, "logps/chosen": -375.70745849609375, "logps/rejected": -310.6614990234375, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -3.1872682571411133, "rewards/margins": 4.283945560455322, "rewards/rejected": -7.4712138175964355, "step": 11418 }, { "epoch": 1.78, "learning_rate": 5.772176980133824e-06, "logits/chosen": -3.0300633907318115, "logits/rejected": -2.8486289978027344, "logps/chosen": -294.8463134765625, "logps/rejected": -334.3978271484375, "loss": 0.1655, "rewards/accuracies": 1.0, "rewards/chosen": -5.821704387664795, "rewards/margins": 4.82401180267334, "rewards/rejected": -10.645715713500977, "step": 11419 }, { "epoch": 1.78, "learning_rate": 5.771443539602677e-06, "logits/chosen": -1.956496238708496, "logits/rejected": -3.089740753173828, "logps/chosen": -179.92747497558594, "logps/rejected": -413.44158935546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.1812257766723633, "rewards/margins": 8.245532989501953, "rewards/rejected": -11.4267578125, "step": 11420 }, { "epoch": 1.78, "learning_rate": 5.7707100990715285e-06, "logits/chosen": -2.074617624282837, "logits/rejected": -3.114323616027832, "logps/chosen": -187.95068359375, "logps/rejected": -380.43267822265625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.881277084350586, "rewards/margins": 6.53562068939209, "rewards/rejected": -10.416897773742676, "step": 11421 }, { "epoch": 1.78, "learning_rate": 5.76997665854038e-06, "logits/chosen": -2.4782652854919434, "logits/rejected": -3.022322177886963, "logps/chosen": -123.87728881835938, "logps/rejected": -452.14666748046875, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -5.5275678634643555, "rewards/margins": 6.573388576507568, "rewards/rejected": -12.100955963134766, "step": 11422 }, { "epoch": 1.78, "learning_rate": 5.769243218009232e-06, "logits/chosen": -3.2682738304138184, "logits/rejected": -2.0182764530181885, "logps/chosen": -463.1018981933594, "logps/rejected": -238.3236846923828, "loss": 3.0659, "rewards/accuracies": 0.5, "rewards/chosen": -8.031318664550781, "rewards/margins": 1.9036152362823486, "rewards/rejected": -9.93493366241455, "step": 11423 }, { "epoch": 1.78, "learning_rate": 5.768509777478085e-06, "logits/chosen": -2.852306842803955, "logits/rejected": -2.921797513961792, "logps/chosen": -319.71734619140625, "logps/rejected": -323.87884521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0172669887542725, "rewards/margins": 10.267921447753906, "rewards/rejected": -13.285188674926758, "step": 11424 }, { "epoch": 1.78, "learning_rate": 5.767776336946938e-06, "logits/chosen": -2.462345838546753, "logits/rejected": -2.886427164077759, "logps/chosen": -461.61114501953125, "logps/rejected": -525.3602294921875, "loss": 1.6124, "rewards/accuracies": 0.5, "rewards/chosen": -6.593559265136719, "rewards/margins": 3.0266847610473633, "rewards/rejected": -9.620244026184082, "step": 11425 }, { "epoch": 1.78, "learning_rate": 5.76704289641579e-06, "logits/chosen": -3.042217969894409, "logits/rejected": -3.0637307167053223, "logps/chosen": -163.1471405029297, "logps/rejected": -263.87030029296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.212479114532471, "rewards/margins": 8.278051376342773, "rewards/rejected": -12.490530014038086, "step": 11426 }, { "epoch": 1.78, "learning_rate": 5.7663094558846415e-06, "logits/chosen": -2.7017388343811035, "logits/rejected": -3.0054802894592285, "logps/chosen": -421.8932189941406, "logps/rejected": -505.6068115234375, "loss": 0.5121, "rewards/accuracies": 0.5, "rewards/chosen": -5.07866096496582, "rewards/margins": 3.1704585552215576, "rewards/rejected": -8.249119758605957, "step": 11427 }, { "epoch": 1.78, "learning_rate": 5.765576015353493e-06, "logits/chosen": -2.425368547439575, "logits/rejected": -2.0944371223449707, "logps/chosen": -192.88973999023438, "logps/rejected": -271.71759033203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9084348678588867, "rewards/margins": 10.217489242553711, "rewards/rejected": -14.125925064086914, "step": 11428 }, { "epoch": 1.78, "learning_rate": 5.764842574822346e-06, "logits/chosen": -3.098855972290039, "logits/rejected": -2.4876136779785156, "logps/chosen": -324.0233459472656, "logps/rejected": -157.08642578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 0.1144920289516449, "rewards/margins": 8.77963924407959, "rewards/rejected": -8.665146827697754, "step": 11429 }, { "epoch": 1.78, "learning_rate": 5.764109134291198e-06, "logits/chosen": -2.655458450317383, "logits/rejected": -2.887812614440918, "logps/chosen": -329.03973388671875, "logps/rejected": -399.0761413574219, "loss": 0.6642, "rewards/accuracies": 0.5, "rewards/chosen": -6.882181644439697, "rewards/margins": 1.7416977882385254, "rewards/rejected": -8.623879432678223, "step": 11430 }, { "epoch": 1.78, "learning_rate": 5.76337569376005e-06, "logits/chosen": -2.59995436668396, "logits/rejected": -1.9384374618530273, "logps/chosen": -520.2391357421875, "logps/rejected": -387.2292175292969, "loss": 0.1167, "rewards/accuracies": 1.0, "rewards/chosen": -3.4771742820739746, "rewards/margins": 5.397665500640869, "rewards/rejected": -8.874839782714844, "step": 11431 }, { "epoch": 1.78, "learning_rate": 5.762642253228902e-06, "logits/chosen": -2.248884439468384, "logits/rejected": -2.9113590717315674, "logps/chosen": -130.9530029296875, "logps/rejected": -171.02476501464844, "loss": 2.1702, "rewards/accuracies": 0.5, "rewards/chosen": -6.435123443603516, "rewards/margins": 0.6916882991790771, "rewards/rejected": -7.126811981201172, "step": 11432 }, { "epoch": 1.78, "learning_rate": 5.7619088126977544e-06, "logits/chosen": -2.8288910388946533, "logits/rejected": -2.93723201751709, "logps/chosen": -400.3084716796875, "logps/rejected": -420.25421142578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.2294514179229736, "rewards/margins": 8.138535499572754, "rewards/rejected": -10.367986679077148, "step": 11433 }, { "epoch": 1.78, "learning_rate": 5.761175372166606e-06, "logits/chosen": -2.758406162261963, "logits/rejected": -3.3503427505493164, "logps/chosen": -171.90528869628906, "logps/rejected": -526.2027587890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.7446746826171875, "rewards/margins": 12.152461051940918, "rewards/rejected": -16.897136688232422, "step": 11434 }, { "epoch": 1.78, "learning_rate": 5.760441931635458e-06, "logits/chosen": -2.2128889560699463, "logits/rejected": -2.8646249771118164, "logps/chosen": -154.89358520507812, "logps/rejected": -231.99595642089844, "loss": 0.0415, "rewards/accuracies": 1.0, "rewards/chosen": -5.152698516845703, "rewards/margins": 4.202939987182617, "rewards/rejected": -9.35563850402832, "step": 11435 }, { "epoch": 1.78, "learning_rate": 5.75970849110431e-06, "logits/chosen": -2.8665244579315186, "logits/rejected": -2.670137643814087, "logps/chosen": -192.49903869628906, "logps/rejected": -225.34608459472656, "loss": 3.5233, "rewards/accuracies": 0.5, "rewards/chosen": -6.248951435089111, "rewards/margins": 2.1168041229248047, "rewards/rejected": -8.365755081176758, "step": 11436 }, { "epoch": 1.78, "learning_rate": 5.758975050573162e-06, "logits/chosen": -2.7094149589538574, "logits/rejected": -2.0560009479522705, "logps/chosen": -521.2339477539062, "logps/rejected": -522.6723022460938, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.1031389236450195, "rewards/margins": 11.22158432006836, "rewards/rejected": -15.324722290039062, "step": 11437 }, { "epoch": 1.78, "learning_rate": 5.758241610042015e-06, "logits/chosen": -2.788449287414551, "logits/rejected": -2.4966588020324707, "logps/chosen": -422.11541748046875, "logps/rejected": -424.212890625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.3182923793792725, "rewards/margins": 7.591160774230957, "rewards/rejected": -9.909452438354492, "step": 11438 }, { "epoch": 1.78, "learning_rate": 5.7575081695108666e-06, "logits/chosen": -2.2242915630340576, "logits/rejected": -2.4887185096740723, "logps/chosen": -134.52574157714844, "logps/rejected": -240.59185791015625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -3.912832736968994, "rewards/margins": 5.485299110412598, "rewards/rejected": -9.39813232421875, "step": 11439 }, { "epoch": 1.78, "learning_rate": 5.7567747289797184e-06, "logits/chosen": -1.677475094795227, "logits/rejected": -2.6869802474975586, "logps/chosen": -105.08590698242188, "logps/rejected": -362.1508483886719, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.4978652000427246, "rewards/margins": 7.9045844078063965, "rewards/rejected": -11.402449607849121, "step": 11440 }, { "epoch": 1.78, "learning_rate": 5.756041288448571e-06, "logits/chosen": -2.4951934814453125, "logits/rejected": -2.764566659927368, "logps/chosen": -242.0497589111328, "logps/rejected": -217.57518005371094, "loss": 0.8909, "rewards/accuracies": 0.5, "rewards/chosen": -3.9681596755981445, "rewards/margins": 5.784181594848633, "rewards/rejected": -9.752341270446777, "step": 11441 }, { "epoch": 1.78, "learning_rate": 5.755307847917424e-06, "logits/chosen": -1.561147928237915, "logits/rejected": -2.7314021587371826, "logps/chosen": -73.73021697998047, "logps/rejected": -416.90460205078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.2377243041992188, "rewards/margins": 9.07677173614502, "rewards/rejected": -12.314496040344238, "step": 11442 }, { "epoch": 1.78, "learning_rate": 5.754574407386276e-06, "logits/chosen": -2.6849567890167236, "logits/rejected": -3.0728402137756348, "logps/chosen": -324.3011169433594, "logps/rejected": -448.2032775878906, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -3.6422996520996094, "rewards/margins": 5.036344051361084, "rewards/rejected": -8.678644180297852, "step": 11443 }, { "epoch": 1.78, "learning_rate": 5.753840966855128e-06, "logits/chosen": -3.3652091026306152, "logits/rejected": -3.120114803314209, "logps/chosen": -544.5232543945312, "logps/rejected": -459.6258544921875, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -1.4931293725967407, "rewards/margins": 6.292288303375244, "rewards/rejected": -7.785417556762695, "step": 11444 }, { "epoch": 1.78, "learning_rate": 5.7531075263239795e-06, "logits/chosen": -1.8623183965682983, "logits/rejected": -2.5575947761535645, "logps/chosen": -133.75584411621094, "logps/rejected": -341.8109436035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0241971015930176, "rewards/margins": 15.343877792358398, "rewards/rejected": -17.368074417114258, "step": 11445 }, { "epoch": 1.78, "learning_rate": 5.752374085792831e-06, "logits/chosen": -2.0603573322296143, "logits/rejected": -2.6359665393829346, "logps/chosen": -111.12026977539062, "logps/rejected": -243.97520446777344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.7508834600448608, "rewards/margins": 9.079351425170898, "rewards/rejected": -10.830235481262207, "step": 11446 }, { "epoch": 1.78, "learning_rate": 5.751640645261684e-06, "logits/chosen": -2.2738747596740723, "logits/rejected": -3.2094271183013916, "logps/chosen": -148.84046936035156, "logps/rejected": -229.3236846923828, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -3.068176746368408, "rewards/margins": 5.461474418640137, "rewards/rejected": -8.529650688171387, "step": 11447 }, { "epoch": 1.78, "learning_rate": 5.750907204730536e-06, "logits/chosen": -1.6637332439422607, "logits/rejected": -2.899005651473999, "logps/chosen": -81.86509704589844, "logps/rejected": -290.5986022949219, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -5.559868812561035, "rewards/margins": 6.541194915771484, "rewards/rejected": -12.101064682006836, "step": 11448 }, { "epoch": 1.78, "learning_rate": 5.750173764199388e-06, "logits/chosen": -2.7479407787323, "logits/rejected": -3.0267815589904785, "logps/chosen": -134.02774047851562, "logps/rejected": -293.784912109375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -1.7993228435516357, "rewards/margins": 7.510893821716309, "rewards/rejected": -9.310216903686523, "step": 11449 }, { "epoch": 1.78, "learning_rate": 5.74944032366824e-06, "logits/chosen": -2.1715621948242188, "logits/rejected": -2.5152175426483154, "logps/chosen": -277.52978515625, "logps/rejected": -359.59893798828125, "loss": 0.0699, "rewards/accuracies": 1.0, "rewards/chosen": -5.502203464508057, "rewards/margins": 5.713983535766602, "rewards/rejected": -11.216187477111816, "step": 11450 }, { "epoch": 1.78, "learning_rate": 5.7487068831370925e-06, "logits/chosen": -2.685379981994629, "logits/rejected": -1.8001459836959839, "logps/chosen": -192.63082885742188, "logps/rejected": -172.69021606445312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.0087578296661377, "rewards/margins": 9.52144718170166, "rewards/rejected": -11.530204772949219, "step": 11451 }, { "epoch": 1.78, "learning_rate": 5.747973442605944e-06, "logits/chosen": -2.461454153060913, "logits/rejected": -2.867293119430542, "logps/chosen": -47.39222717285156, "logps/rejected": -325.840576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7974112033843994, "rewards/margins": 12.269173622131348, "rewards/rejected": -14.066585540771484, "step": 11452 }, { "epoch": 1.78, "learning_rate": 5.747240002074796e-06, "logits/chosen": -1.17422354221344, "logits/rejected": -2.8219611644744873, "logps/chosen": -201.48516845703125, "logps/rejected": -602.8162841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.116901397705078, "rewards/margins": 11.641450881958008, "rewards/rejected": -15.758353233337402, "step": 11453 }, { "epoch": 1.78, "learning_rate": 5.746506561543648e-06, "logits/chosen": -2.7616076469421387, "logits/rejected": -3.075529098510742, "logps/chosen": -80.10806274414062, "logps/rejected": -275.20574951171875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -3.27978253364563, "rewards/margins": 5.742620944976807, "rewards/rejected": -9.022403717041016, "step": 11454 }, { "epoch": 1.78, "learning_rate": 5.745773121012501e-06, "logits/chosen": -1.4868748188018799, "logits/rejected": -2.295102596282959, "logps/chosen": -187.6950225830078, "logps/rejected": -372.86199951171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7120654582977295, "rewards/margins": 10.093485832214355, "rewards/rejected": -12.805551528930664, "step": 11455 }, { "epoch": 1.78, "learning_rate": 5.745039680481353e-06, "logits/chosen": -3.06017804145813, "logits/rejected": -2.951648235321045, "logps/chosen": -150.38113403320312, "logps/rejected": -284.2463684082031, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -3.4976208209991455, "rewards/margins": 6.9808268547058105, "rewards/rejected": -10.478447914123535, "step": 11456 }, { "epoch": 1.78, "learning_rate": 5.744306239950205e-06, "logits/chosen": -1.339310884475708, "logits/rejected": -2.470531463623047, "logps/chosen": -597.006103515625, "logps/rejected": -654.4270629882812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.8755135536193848, "rewards/margins": 9.596734046936035, "rewards/rejected": -13.472248077392578, "step": 11457 }, { "epoch": 1.78, "learning_rate": 5.743572799419057e-06, "logits/chosen": -2.8449289798736572, "logits/rejected": -2.7845823764801025, "logps/chosen": -104.99549865722656, "logps/rejected": -297.3853759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6602582931518555, "rewards/margins": 11.840581893920898, "rewards/rejected": -14.500840187072754, "step": 11458 }, { "epoch": 1.78, "learning_rate": 5.742839358887909e-06, "logits/chosen": -2.7075562477111816, "logits/rejected": -3.0110530853271484, "logps/chosen": -346.97540283203125, "logps/rejected": -542.6431274414062, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.930433750152588, "rewards/margins": 9.263174057006836, "rewards/rejected": -13.193608283996582, "step": 11459 }, { "epoch": 1.78, "learning_rate": 5.742105918356762e-06, "logits/chosen": -2.924511432647705, "logits/rejected": -2.704380750656128, "logps/chosen": -372.26226806640625, "logps/rejected": -255.23898315429688, "loss": 0.1224, "rewards/accuracies": 1.0, "rewards/chosen": -4.187413215637207, "rewards/margins": 3.7998461723327637, "rewards/rejected": -7.9872589111328125, "step": 11460 }, { "epoch": 1.78, "learning_rate": 5.741372477825614e-06, "logits/chosen": -2.8803908824920654, "logits/rejected": -3.0042121410369873, "logps/chosen": -271.3551940917969, "logps/rejected": -254.33151245117188, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.289927959442139, "rewards/margins": 7.420769691467285, "rewards/rejected": -12.710698127746582, "step": 11461 }, { "epoch": 1.78, "learning_rate": 5.740639037294466e-06, "logits/chosen": -2.894693374633789, "logits/rejected": -2.843130588531494, "logps/chosen": -557.518798828125, "logps/rejected": -463.759033203125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.8299311399459839, "rewards/margins": 7.441643238067627, "rewards/rejected": -9.271574020385742, "step": 11462 }, { "epoch": 1.78, "learning_rate": 5.7399055967633176e-06, "logits/chosen": -2.958801031112671, "logits/rejected": -2.5863234996795654, "logps/chosen": -220.26864624023438, "logps/rejected": -251.59515380859375, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -3.281278610229492, "rewards/margins": 6.260897636413574, "rewards/rejected": -9.542176246643066, "step": 11463 }, { "epoch": 1.78, "learning_rate": 5.73917215623217e-06, "logits/chosen": -2.3640084266662598, "logits/rejected": -2.8076162338256836, "logps/chosen": -621.81005859375, "logps/rejected": -652.7570190429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.676400899887085, "rewards/margins": 13.26590347290039, "rewards/rejected": -16.942304611206055, "step": 11464 }, { "epoch": 1.78, "learning_rate": 5.738438715701022e-06, "logits/chosen": -2.2511749267578125, "logits/rejected": -3.0771849155426025, "logps/chosen": -263.3230285644531, "logps/rejected": -453.9615478515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.833651542663574, "rewards/margins": 9.666440963745117, "rewards/rejected": -15.500093460083008, "step": 11465 }, { "epoch": 1.78, "learning_rate": 5.737705275169874e-06, "logits/chosen": -3.1200311183929443, "logits/rejected": -1.7921637296676636, "logps/chosen": -357.8056640625, "logps/rejected": -132.04043579101562, "loss": 1.5449, "rewards/accuracies": 0.5, "rewards/chosen": -7.301429271697998, "rewards/margins": -0.05340909957885742, "rewards/rejected": -7.248020172119141, "step": 11466 }, { "epoch": 1.78, "learning_rate": 5.736971834638726e-06, "logits/chosen": -0.8567434549331665, "logits/rejected": -3.040018081665039, "logps/chosen": -63.15131378173828, "logps/rejected": -391.51507568359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.0211329460144043, "rewards/margins": 9.036528587341309, "rewards/rejected": -12.057661056518555, "step": 11467 }, { "epoch": 1.78, "learning_rate": 5.736238394107578e-06, "logits/chosen": -1.6428804397583008, "logits/rejected": -2.199570655822754, "logps/chosen": -224.34881591796875, "logps/rejected": -428.80731201171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.551199436187744, "rewards/margins": 10.898157119750977, "rewards/rejected": -14.449356079101562, "step": 11468 }, { "epoch": 1.78, "learning_rate": 5.7355049535764305e-06, "logits/chosen": -2.6670408248901367, "logits/rejected": -1.6446552276611328, "logps/chosen": -471.38336181640625, "logps/rejected": -298.6719055175781, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.1606155633926392, "rewards/margins": 10.093424797058105, "rewards/rejected": -11.254039764404297, "step": 11469 }, { "epoch": 1.78, "learning_rate": 5.734771513045282e-06, "logits/chosen": -1.8990904092788696, "logits/rejected": -3.1147186756134033, "logps/chosen": -440.53240966796875, "logps/rejected": -593.7728881835938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.7749660015106201, "rewards/margins": 11.705572128295898, "rewards/rejected": -13.480538368225098, "step": 11470 }, { "epoch": 1.78, "learning_rate": 5.734038072514134e-06, "logits/chosen": -2.9031331539154053, "logits/rejected": -2.221992254257202, "logps/chosen": -335.3594970703125, "logps/rejected": -338.51727294921875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -2.483536720275879, "rewards/margins": 5.3167924880981445, "rewards/rejected": -7.800329208374023, "step": 11471 }, { "epoch": 1.78, "learning_rate": 5.733304631982986e-06, "logits/chosen": -3.011028289794922, "logits/rejected": -3.3301398754119873, "logps/chosen": -101.94825744628906, "logps/rejected": -291.47052001953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.233616828918457, "rewards/margins": 7.992578506469727, "rewards/rejected": -11.226195335388184, "step": 11472 }, { "epoch": 1.78, "learning_rate": 5.732571191451839e-06, "logits/chosen": -1.9490162134170532, "logits/rejected": -2.927971601486206, "logps/chosen": -247.009521484375, "logps/rejected": -654.6923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.325821399688721, "rewards/margins": 11.718542098999023, "rewards/rejected": -16.044363021850586, "step": 11473 }, { "epoch": 1.78, "learning_rate": 5.731837750920691e-06, "logits/chosen": -3.1092987060546875, "logits/rejected": -2.962437629699707, "logps/chosen": -283.3736572265625, "logps/rejected": -343.61920166015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.8808741569519043, "rewards/margins": 7.974033355712891, "rewards/rejected": -11.854907989501953, "step": 11474 }, { "epoch": 1.78, "learning_rate": 5.7311043103895435e-06, "logits/chosen": -1.9549944400787354, "logits/rejected": -2.971609115600586, "logps/chosen": -303.6597900390625, "logps/rejected": -527.4742431640625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.0915915966033936, "rewards/margins": 7.587885856628418, "rewards/rejected": -10.67947769165039, "step": 11475 }, { "epoch": 1.78, "learning_rate": 5.730370869858395e-06, "logits/chosen": -2.549522876739502, "logits/rejected": -2.9253594875335693, "logps/chosen": -50.04786682128906, "logps/rejected": -229.73931884765625, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -2.696028709411621, "rewards/margins": 7.007534027099609, "rewards/rejected": -9.70356273651123, "step": 11476 }, { "epoch": 1.78, "learning_rate": 5.729637429327247e-06, "logits/chosen": -1.941094994544983, "logits/rejected": -2.741365909576416, "logps/chosen": -175.3629150390625, "logps/rejected": -247.7728271484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.257656097412109, "rewards/margins": 8.37368106842041, "rewards/rejected": -12.631336212158203, "step": 11477 }, { "epoch": 1.79, "learning_rate": 5.7289039887961e-06, "logits/chosen": -1.3386653661727905, "logits/rejected": -2.811502695083618, "logps/chosen": -95.75009155273438, "logps/rejected": -729.379638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6642203330993652, "rewards/margins": 12.726407051086426, "rewards/rejected": -15.390626907348633, "step": 11478 }, { "epoch": 1.79, "learning_rate": 5.728170548264952e-06, "logits/chosen": -3.0078866481781006, "logits/rejected": -2.991530179977417, "logps/chosen": -163.49822998046875, "logps/rejected": -260.9549865722656, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.335038423538208, "rewards/margins": 7.165210247039795, "rewards/rejected": -9.500248908996582, "step": 11479 }, { "epoch": 1.79, "learning_rate": 5.727437107733804e-06, "logits/chosen": -2.1175119876861572, "logits/rejected": -2.9734015464782715, "logps/chosen": -156.03245544433594, "logps/rejected": -323.0295715332031, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -3.4634885787963867, "rewards/margins": 6.8532514572143555, "rewards/rejected": -10.316740036010742, "step": 11480 }, { "epoch": 1.79, "learning_rate": 5.726703667202656e-06, "logits/chosen": -1.653005599975586, "logits/rejected": -2.724332809448242, "logps/chosen": -273.1629333496094, "logps/rejected": -756.2877197265625, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -6.148337364196777, "rewards/margins": 6.824727535247803, "rewards/rejected": -12.973064422607422, "step": 11481 }, { "epoch": 1.79, "learning_rate": 5.725970226671508e-06, "logits/chosen": -1.856618881225586, "logits/rejected": -2.684528350830078, "logps/chosen": -161.36077880859375, "logps/rejected": -316.08905029296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.247166633605957, "rewards/margins": 7.650731086730957, "rewards/rejected": -11.897897720336914, "step": 11482 }, { "epoch": 1.79, "learning_rate": 5.72523678614036e-06, "logits/chosen": -2.4112839698791504, "logits/rejected": -3.349782943725586, "logps/chosen": -209.269287109375, "logps/rejected": -572.1915893554688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4521851539611816, "rewards/margins": 11.404903411865234, "rewards/rejected": -12.857089042663574, "step": 11483 }, { "epoch": 1.79, "learning_rate": 5.724503345609212e-06, "logits/chosen": -2.565650463104248, "logits/rejected": -2.843212842941284, "logps/chosen": -329.8132629394531, "logps/rejected": -453.00811767578125, "loss": 0.092, "rewards/accuracies": 1.0, "rewards/chosen": -4.60915994644165, "rewards/margins": 4.871940612792969, "rewards/rejected": -9.481100082397461, "step": 11484 }, { "epoch": 1.79, "learning_rate": 5.723769905078064e-06, "logits/chosen": -2.986521005630493, "logits/rejected": -2.2943503856658936, "logps/chosen": -263.96600341796875, "logps/rejected": -137.0385284423828, "loss": 0.2275, "rewards/accuracies": 1.0, "rewards/chosen": -5.208033561706543, "rewards/margins": 1.568274736404419, "rewards/rejected": -6.776308536529541, "step": 11485 }, { "epoch": 1.79, "learning_rate": 5.723036464546916e-06, "logits/chosen": -2.320016860961914, "logits/rejected": -2.79133939743042, "logps/chosen": -645.221435546875, "logps/rejected": -512.1607666015625, "loss": 4.1033, "rewards/accuracies": 0.5, "rewards/chosen": -8.5228271484375, "rewards/margins": 3.3892717361450195, "rewards/rejected": -11.91209888458252, "step": 11486 }, { "epoch": 1.79, "learning_rate": 5.7223030240157686e-06, "logits/chosen": -2.7358124256134033, "logits/rejected": -2.5131912231445312, "logps/chosen": -127.23689270019531, "logps/rejected": -314.951904296875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.4916253089904785, "rewards/margins": 8.425878524780273, "rewards/rejected": -10.91750431060791, "step": 11487 }, { "epoch": 1.79, "learning_rate": 5.7215695834846205e-06, "logits/chosen": -2.283540964126587, "logits/rejected": -2.8599281311035156, "logps/chosen": -321.16912841796875, "logps/rejected": -372.8933410644531, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -3.9862442016601562, "rewards/margins": 4.775577068328857, "rewards/rejected": -8.761821746826172, "step": 11488 }, { "epoch": 1.79, "learning_rate": 5.720836142953472e-06, "logits/chosen": -2.280761241912842, "logits/rejected": -2.5121307373046875, "logps/chosen": -252.29168701171875, "logps/rejected": -320.7857666015625, "loss": 0.6573, "rewards/accuracies": 0.5, "rewards/chosen": -6.162498474121094, "rewards/margins": 2.5927913188934326, "rewards/rejected": -8.755290031433105, "step": 11489 }, { "epoch": 1.79, "learning_rate": 5.720102702422324e-06, "logits/chosen": -2.7055773735046387, "logits/rejected": -2.9751226902008057, "logps/chosen": -480.52447509765625, "logps/rejected": -562.7408447265625, "loss": 0.7357, "rewards/accuracies": 0.5, "rewards/chosen": -6.901185512542725, "rewards/margins": 3.793837070465088, "rewards/rejected": -10.695022583007812, "step": 11490 }, { "epoch": 1.79, "learning_rate": 5.719369261891177e-06, "logits/chosen": -2.7003743648529053, "logits/rejected": -2.451202392578125, "logps/chosen": -720.808837890625, "logps/rejected": -630.2924194335938, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -6.058921813964844, "rewards/margins": 7.629359245300293, "rewards/rejected": -13.688282012939453, "step": 11491 }, { "epoch": 1.79, "learning_rate": 5.71863582136003e-06, "logits/chosen": -2.881181478500366, "logits/rejected": -2.970162868499756, "logps/chosen": -190.3935546875, "logps/rejected": -267.3936767578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.5376458168029785, "rewards/margins": 7.634075164794922, "rewards/rejected": -13.171720504760742, "step": 11492 }, { "epoch": 1.79, "learning_rate": 5.7179023808288815e-06, "logits/chosen": -2.6931726932525635, "logits/rejected": -2.285647392272949, "logps/chosen": -377.61090087890625, "logps/rejected": -334.91754150390625, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/chosen": -4.534435272216797, "rewards/margins": 5.8281168937683105, "rewards/rejected": -10.362552642822266, "step": 11493 }, { "epoch": 1.79, "learning_rate": 5.717168940297733e-06, "logits/chosen": -3.0399787425994873, "logits/rejected": -2.872746229171753, "logps/chosen": -151.963623046875, "logps/rejected": -292.21185302734375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.0733180046081543, "rewards/margins": 6.27590274810791, "rewards/rejected": -8.349221229553223, "step": 11494 }, { "epoch": 1.79, "learning_rate": 5.716435499766585e-06, "logits/chosen": -2.6178534030914307, "logits/rejected": -3.0052921772003174, "logps/chosen": -135.93675231933594, "logps/rejected": -296.89312744140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8121914863586426, "rewards/margins": 9.614104270935059, "rewards/rejected": -11.42629623413086, "step": 11495 }, { "epoch": 1.79, "learning_rate": 5.715702059235438e-06, "logits/chosen": -1.5707364082336426, "logits/rejected": -2.3614935874938965, "logps/chosen": -330.63580322265625, "logps/rejected": -352.46124267578125, "loss": 0.2824, "rewards/accuracies": 1.0, "rewards/chosen": -5.88557243347168, "rewards/margins": 4.884870529174805, "rewards/rejected": -10.770442962646484, "step": 11496 }, { "epoch": 1.79, "learning_rate": 5.71496861870429e-06, "logits/chosen": -2.9036481380462646, "logits/rejected": -1.6857638359069824, "logps/chosen": -423.6767272949219, "logps/rejected": -298.4810791015625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.271597146987915, "rewards/margins": 8.704618453979492, "rewards/rejected": -11.976216316223145, "step": 11497 }, { "epoch": 1.79, "learning_rate": 5.714235178173142e-06, "logits/chosen": -2.4737496376037598, "logits/rejected": -3.2217299938201904, "logps/chosen": -193.8724822998047, "logps/rejected": -376.91058349609375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -4.032933712005615, "rewards/margins": 6.951531410217285, "rewards/rejected": -10.984465599060059, "step": 11498 }, { "epoch": 1.79, "learning_rate": 5.713501737641994e-06, "logits/chosen": -2.625396251678467, "logits/rejected": -2.76118540763855, "logps/chosen": -459.22064208984375, "logps/rejected": -603.3048095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.798261642456055, "rewards/margins": 10.551076889038086, "rewards/rejected": -15.34933853149414, "step": 11499 }, { "epoch": 1.79, "learning_rate": 5.712768297110846e-06, "logits/chosen": -2.8491017818450928, "logits/rejected": -2.9429659843444824, "logps/chosen": -74.78669738769531, "logps/rejected": -227.86282348632812, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.328143358230591, "rewards/margins": 7.842329978942871, "rewards/rejected": -10.170473098754883, "step": 11500 }, { "epoch": 1.79, "learning_rate": 5.712034856579698e-06, "logits/chosen": -2.5610499382019043, "logits/rejected": -3.0120160579681396, "logps/chosen": -430.7401123046875, "logps/rejected": -420.6022644042969, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -6.728196144104004, "rewards/margins": 5.000080108642578, "rewards/rejected": -11.728276252746582, "step": 11501 }, { "epoch": 1.79, "learning_rate": 5.71130141604855e-06, "logits/chosen": -2.369750499725342, "logits/rejected": -2.764808177947998, "logps/chosen": -164.28659057617188, "logps/rejected": -278.96002197265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.286940813064575, "rewards/margins": 7.7003374099731445, "rewards/rejected": -10.98727798461914, "step": 11502 }, { "epoch": 1.79, "learning_rate": 5.710567975517402e-06, "logits/chosen": -2.1162872314453125, "logits/rejected": -2.892718553543091, "logps/chosen": -145.6580810546875, "logps/rejected": -301.2466125488281, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -6.109975814819336, "rewards/margins": 6.079656600952148, "rewards/rejected": -12.189632415771484, "step": 11503 }, { "epoch": 1.79, "learning_rate": 5.709834534986255e-06, "logits/chosen": -2.5500152111053467, "logits/rejected": -2.94710111618042, "logps/chosen": -143.75283813476562, "logps/rejected": -320.39862060546875, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": -5.45712947845459, "rewards/margins": 5.712270736694336, "rewards/rejected": -11.169400215148926, "step": 11504 }, { "epoch": 1.79, "learning_rate": 5.709101094455107e-06, "logits/chosen": -2.7285196781158447, "logits/rejected": -2.86533260345459, "logps/chosen": -143.13011169433594, "logps/rejected": -205.67898559570312, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.28135347366333, "rewards/margins": 7.552323818206787, "rewards/rejected": -9.833677291870117, "step": 11505 }, { "epoch": 1.79, "learning_rate": 5.7083676539239585e-06, "logits/chosen": -2.569328784942627, "logits/rejected": -3.117485761642456, "logps/chosen": -52.607154846191406, "logps/rejected": -189.97573852539062, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.9284610748291016, "rewards/margins": 6.6682586669921875, "rewards/rejected": -9.596719741821289, "step": 11506 }, { "epoch": 1.79, "learning_rate": 5.70763421339281e-06, "logits/chosen": -2.6378235816955566, "logits/rejected": -1.4930559396743774, "logps/chosen": -445.1891174316406, "logps/rejected": -477.44207763671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.051120758056641, "rewards/margins": 12.109506607055664, "rewards/rejected": -16.160627365112305, "step": 11507 }, { "epoch": 1.79, "learning_rate": 5.706900772861663e-06, "logits/chosen": -2.365757465362549, "logits/rejected": -2.802396059036255, "logps/chosen": -284.229248046875, "logps/rejected": -373.95135498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.56908655166626, "rewards/margins": 10.765037536621094, "rewards/rejected": -15.334124565124512, "step": 11508 }, { "epoch": 1.79, "learning_rate": 5.706167332330516e-06, "logits/chosen": -2.663550615310669, "logits/rejected": -2.9556925296783447, "logps/chosen": -76.65658569335938, "logps/rejected": -208.53948974609375, "loss": 0.6391, "rewards/accuracies": 0.5, "rewards/chosen": -3.685708999633789, "rewards/margins": 5.556283950805664, "rewards/rejected": -9.241992950439453, "step": 11509 }, { "epoch": 1.79, "learning_rate": 5.705433891799368e-06, "logits/chosen": -2.7250618934631348, "logits/rejected": -3.132946491241455, "logps/chosen": -244.38192749023438, "logps/rejected": -386.58038330078125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -4.340867519378662, "rewards/margins": 9.755199432373047, "rewards/rejected": -14.09606647491455, "step": 11510 }, { "epoch": 1.79, "learning_rate": 5.70470045126822e-06, "logits/chosen": -2.865025281906128, "logits/rejected": -3.1193501949310303, "logps/chosen": -58.96299743652344, "logps/rejected": -184.25823974609375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.191655158996582, "rewards/margins": 6.809466361999512, "rewards/rejected": -11.001121520996094, "step": 11511 }, { "epoch": 1.79, "learning_rate": 5.7039670107370715e-06, "logits/chosen": -0.7300654053688049, "logits/rejected": -2.0424604415893555, "logps/chosen": -121.04176330566406, "logps/rejected": -304.8265686035156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.6575055122375488, "rewards/margins": 9.040881156921387, "rewards/rejected": -10.698387145996094, "step": 11512 }, { "epoch": 1.79, "learning_rate": 5.703233570205924e-06, "logits/chosen": -2.453267812728882, "logits/rejected": -2.817739725112915, "logps/chosen": -325.71539306640625, "logps/rejected": -330.38031005859375, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -4.258762359619141, "rewards/margins": 3.998296022415161, "rewards/rejected": -8.257058143615723, "step": 11513 }, { "epoch": 1.79, "learning_rate": 5.702500129674776e-06, "logits/chosen": -2.5093605518341064, "logits/rejected": -3.0583832263946533, "logps/chosen": -355.62762451171875, "logps/rejected": -392.4942626953125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -3.3109142780303955, "rewards/margins": 7.524448394775391, "rewards/rejected": -10.835362434387207, "step": 11514 }, { "epoch": 1.79, "learning_rate": 5.701766689143628e-06, "logits/chosen": -2.6124870777130127, "logits/rejected": -2.8469772338867188, "logps/chosen": -316.39263916015625, "logps/rejected": -268.40582275390625, "loss": 1.5519, "rewards/accuracies": 0.5, "rewards/chosen": -5.509456634521484, "rewards/margins": 2.8791275024414062, "rewards/rejected": -8.38858413696289, "step": 11515 }, { "epoch": 1.79, "learning_rate": 5.70103324861248e-06, "logits/chosen": -2.484727382659912, "logits/rejected": -2.7773916721343994, "logps/chosen": -132.794677734375, "logps/rejected": -247.56362915039062, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -3.534177780151367, "rewards/margins": 7.7245941162109375, "rewards/rejected": -11.258771896362305, "step": 11516 }, { "epoch": 1.79, "learning_rate": 5.700299808081332e-06, "logits/chosen": -1.9712979793548584, "logits/rejected": -2.883923053741455, "logps/chosen": -360.3036193847656, "logps/rejected": -466.1774597167969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1710824966430664, "rewards/margins": 11.016927719116211, "rewards/rejected": -14.188009262084961, "step": 11517 }, { "epoch": 1.79, "learning_rate": 5.6995663675501844e-06, "logits/chosen": -2.1021108627319336, "logits/rejected": -2.6991560459136963, "logps/chosen": -386.6166687011719, "logps/rejected": -525.128173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.446346759796143, "rewards/margins": 11.55090045928955, "rewards/rejected": -16.99724769592285, "step": 11518 }, { "epoch": 1.79, "learning_rate": 5.698832927019036e-06, "logits/chosen": -2.7696499824523926, "logits/rejected": -2.0235965251922607, "logps/chosen": -248.048828125, "logps/rejected": -201.37222290039062, "loss": 0.2966, "rewards/accuracies": 1.0, "rewards/chosen": -5.1363019943237305, "rewards/margins": 3.9447078704833984, "rewards/rejected": -9.081009864807129, "step": 11519 }, { "epoch": 1.79, "learning_rate": 5.698099486487888e-06, "logits/chosen": -2.51705265045166, "logits/rejected": -2.5765175819396973, "logps/chosen": -150.27679443359375, "logps/rejected": -306.3522033691406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.539607048034668, "rewards/margins": 8.250142097473145, "rewards/rejected": -11.789749145507812, "step": 11520 }, { "epoch": 1.79, "learning_rate": 5.69736604595674e-06, "logits/chosen": -2.7905092239379883, "logits/rejected": -1.6718860864639282, "logps/chosen": -468.051513671875, "logps/rejected": -306.3960876464844, "loss": 1.148, "rewards/accuracies": 0.5, "rewards/chosen": -6.819876670837402, "rewards/margins": 0.6908187866210938, "rewards/rejected": -7.510695457458496, "step": 11521 }, { "epoch": 1.79, "learning_rate": 5.696632605425593e-06, "logits/chosen": -2.551826000213623, "logits/rejected": -3.276813268661499, "logps/chosen": -132.37530517578125, "logps/rejected": -339.0504150390625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.169815540313721, "rewards/margins": 6.905603885650635, "rewards/rejected": -12.075419425964355, "step": 11522 }, { "epoch": 1.79, "learning_rate": 5.695899164894445e-06, "logits/chosen": -3.0746634006500244, "logits/rejected": -2.6426868438720703, "logps/chosen": -183.16592407226562, "logps/rejected": -181.30172729492188, "loss": 2.8565, "rewards/accuracies": 0.5, "rewards/chosen": -9.333841323852539, "rewards/margins": -1.5580377578735352, "rewards/rejected": -7.775804042816162, "step": 11523 }, { "epoch": 1.79, "learning_rate": 5.6951657243632965e-06, "logits/chosen": -2.5207836627960205, "logits/rejected": -2.784627676010132, "logps/chosen": -105.62092590332031, "logps/rejected": -131.85816955566406, "loss": 1.2368, "rewards/accuracies": 0.5, "rewards/chosen": -6.353449821472168, "rewards/margins": 1.202538251876831, "rewards/rejected": -7.55598783493042, "step": 11524 }, { "epoch": 1.79, "learning_rate": 5.694432283832149e-06, "logits/chosen": -2.889394760131836, "logits/rejected": -2.872422456741333, "logps/chosen": -488.83251953125, "logps/rejected": -505.0216064453125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.867132663726807, "rewards/margins": 9.82303237915039, "rewards/rejected": -14.690165519714355, "step": 11525 }, { "epoch": 1.79, "learning_rate": 5.693698843301001e-06, "logits/chosen": -2.3556056022644043, "logits/rejected": -3.012671709060669, "logps/chosen": -189.36575317382812, "logps/rejected": -390.89105224609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.098446846008301, "rewards/margins": 8.345680236816406, "rewards/rejected": -13.44412612915039, "step": 11526 }, { "epoch": 1.79, "learning_rate": 5.692965402769854e-06, "logits/chosen": -2.108630657196045, "logits/rejected": -2.9475088119506836, "logps/chosen": -77.28277587890625, "logps/rejected": -421.3518371582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6165504455566406, "rewards/margins": 10.45006275177002, "rewards/rejected": -14.06661319732666, "step": 11527 }, { "epoch": 1.79, "learning_rate": 5.692231962238706e-06, "logits/chosen": -1.65478515625, "logits/rejected": -2.6537392139434814, "logps/chosen": -203.41766357421875, "logps/rejected": -424.7360534667969, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -5.804131507873535, "rewards/margins": 5.381165504455566, "rewards/rejected": -11.185297012329102, "step": 11528 }, { "epoch": 1.79, "learning_rate": 5.691498521707558e-06, "logits/chosen": -2.7161130905151367, "logits/rejected": -1.8710970878601074, "logps/chosen": -227.59547424316406, "logps/rejected": -270.09844970703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.8214614391326904, "rewards/margins": 7.928980350494385, "rewards/rejected": -10.750442504882812, "step": 11529 }, { "epoch": 1.79, "learning_rate": 5.6907650811764095e-06, "logits/chosen": -2.915158987045288, "logits/rejected": -2.6126933097839355, "logps/chosen": -193.3489227294922, "logps/rejected": -208.787353515625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -6.714659690856934, "rewards/margins": 5.841647148132324, "rewards/rejected": -12.556306838989258, "step": 11530 }, { "epoch": 1.79, "learning_rate": 5.690031640645262e-06, "logits/chosen": -3.173285722732544, "logits/rejected": -2.550041913986206, "logps/chosen": -341.51629638671875, "logps/rejected": -118.78607177734375, "loss": 2.5395, "rewards/accuracies": 0.0, "rewards/chosen": -7.7314958572387695, "rewards/margins": -2.456787586212158, "rewards/rejected": -5.274707794189453, "step": 11531 }, { "epoch": 1.79, "learning_rate": 5.689298200114114e-06, "logits/chosen": -1.8480417728424072, "logits/rejected": -2.665473461151123, "logps/chosen": -160.56556701660156, "logps/rejected": -353.42901611328125, "loss": 0.1159, "rewards/accuracies": 1.0, "rewards/chosen": -6.643258571624756, "rewards/margins": 7.381166934967041, "rewards/rejected": -14.024425506591797, "step": 11532 }, { "epoch": 1.79, "learning_rate": 5.688564759582966e-06, "logits/chosen": -2.7071890830993652, "logits/rejected": -1.906073808670044, "logps/chosen": -286.3812561035156, "logps/rejected": -287.2739562988281, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -5.799169063568115, "rewards/margins": 6.161922931671143, "rewards/rejected": -11.961091995239258, "step": 11533 }, { "epoch": 1.79, "learning_rate": 5.687831319051818e-06, "logits/chosen": -2.93107533454895, "logits/rejected": -3.0164620876312256, "logps/chosen": -119.40741729736328, "logps/rejected": -292.38397216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.504481792449951, "rewards/margins": 10.31201171875, "rewards/rejected": -14.816493034362793, "step": 11534 }, { "epoch": 1.79, "learning_rate": 5.68709787852067e-06, "logits/chosen": -2.9160404205322266, "logits/rejected": -2.4532127380371094, "logps/chosen": -281.7256774902344, "logps/rejected": -135.6245574951172, "loss": 1.1536, "rewards/accuracies": 0.0, "rewards/chosen": -6.729606628417969, "rewards/margins": -0.7011692523956299, "rewards/rejected": -6.028437614440918, "step": 11535 }, { "epoch": 1.79, "learning_rate": 5.6863644379895225e-06, "logits/chosen": -2.6666011810302734, "logits/rejected": -1.8316715955734253, "logps/chosen": -254.09498596191406, "logps/rejected": -304.7738037109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.9529175758361816, "rewards/margins": 10.193975448608398, "rewards/rejected": -12.146893501281738, "step": 11536 }, { "epoch": 1.79, "learning_rate": 5.685630997458374e-06, "logits/chosen": -2.9163389205932617, "logits/rejected": -3.022547960281372, "logps/chosen": -65.14335632324219, "logps/rejected": -167.80917358398438, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -1.7578620910644531, "rewards/margins": 6.556923866271973, "rewards/rejected": -8.314785957336426, "step": 11537 }, { "epoch": 1.79, "learning_rate": 5.684897556927226e-06, "logits/chosen": -2.443073034286499, "logits/rejected": -2.7956490516662598, "logps/chosen": -124.17674255371094, "logps/rejected": -315.642822265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.48437237739563, "rewards/margins": 9.4925537109375, "rewards/rejected": -12.97692584991455, "step": 11538 }, { "epoch": 1.79, "learning_rate": 5.684164116396078e-06, "logits/chosen": -2.8386147022247314, "logits/rejected": -2.3188412189483643, "logps/chosen": -257.41949462890625, "logps/rejected": -202.88633728027344, "loss": 0.4003, "rewards/accuracies": 0.5, "rewards/chosen": -4.262730598449707, "rewards/margins": 3.5052783489227295, "rewards/rejected": -7.768008708953857, "step": 11539 }, { "epoch": 1.79, "learning_rate": 5.683430675864931e-06, "logits/chosen": -2.9569284915924072, "logits/rejected": -2.9852912425994873, "logps/chosen": -130.08604431152344, "logps/rejected": -219.50991821289062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8497891426086426, "rewards/margins": 10.426284790039062, "rewards/rejected": -12.276074409484863, "step": 11540 }, { "epoch": 1.79, "learning_rate": 5.682697235333783e-06, "logits/chosen": -1.9208698272705078, "logits/rejected": -3.0081546306610107, "logps/chosen": -240.86334228515625, "logps/rejected": -589.8012084960938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.74235463142395, "rewards/margins": 9.431632995605469, "rewards/rejected": -12.17398738861084, "step": 11541 }, { "epoch": 1.8, "learning_rate": 5.681963794802635e-06, "logits/chosen": -3.328941583633423, "logits/rejected": -3.1624045372009277, "logps/chosen": -250.19534301757812, "logps/rejected": -205.04586791992188, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -4.777190685272217, "rewards/margins": 5.529053688049316, "rewards/rejected": -10.306243896484375, "step": 11542 }, { "epoch": 1.8, "learning_rate": 5.681230354271487e-06, "logits/chosen": -2.7502524852752686, "logits/rejected": -2.9153831005096436, "logps/chosen": -127.7249984741211, "logps/rejected": -222.51329040527344, "loss": 0.0772, "rewards/accuracies": 1.0, "rewards/chosen": -4.188042640686035, "rewards/margins": 3.303837299346924, "rewards/rejected": -7.491880416870117, "step": 11543 }, { "epoch": 1.8, "learning_rate": 5.680496913740339e-06, "logits/chosen": -2.7658631801605225, "logits/rejected": -2.7241291999816895, "logps/chosen": -431.455322265625, "logps/rejected": -362.04345703125, "loss": 0.1001, "rewards/accuracies": 1.0, "rewards/chosen": -5.6553850173950195, "rewards/margins": 4.273006916046143, "rewards/rejected": -9.92839241027832, "step": 11544 }, { "epoch": 1.8, "learning_rate": 5.679763473209192e-06, "logits/chosen": -2.9068427085876465, "logits/rejected": -2.358537197113037, "logps/chosen": -190.14520263671875, "logps/rejected": -161.57904052734375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -4.02638053894043, "rewards/margins": 4.7081193923950195, "rewards/rejected": -8.73449993133545, "step": 11545 }, { "epoch": 1.8, "learning_rate": 5.679030032678044e-06, "logits/chosen": -1.2756010293960571, "logits/rejected": -2.840122699737549, "logps/chosen": -132.32327270507812, "logps/rejected": -461.68701171875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.3017075061798096, "rewards/margins": 8.350963592529297, "rewards/rejected": -11.652671813964844, "step": 11546 }, { "epoch": 1.8, "learning_rate": 5.678296592146896e-06, "logits/chosen": -2.6982429027557373, "logits/rejected": -2.856896162033081, "logps/chosen": -99.28736114501953, "logps/rejected": -211.65907287597656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.843198776245117, "rewards/margins": 8.770156860351562, "rewards/rejected": -12.61335563659668, "step": 11547 }, { "epoch": 1.8, "learning_rate": 5.6775631516157476e-06, "logits/chosen": -3.046210289001465, "logits/rejected": -3.091834545135498, "logps/chosen": -373.13555908203125, "logps/rejected": -518.9852294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.326410293579102, "rewards/margins": 11.276983261108398, "rewards/rejected": -15.6033935546875, "step": 11548 }, { "epoch": 1.8, "learning_rate": 5.6768297110846e-06, "logits/chosen": -1.8050976991653442, "logits/rejected": -2.9535329341888428, "logps/chosen": -116.04966735839844, "logps/rejected": -403.3563537597656, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -6.597709655761719, "rewards/margins": 8.847128868103027, "rewards/rejected": -15.444838523864746, "step": 11549 }, { "epoch": 1.8, "learning_rate": 5.676096270553452e-06, "logits/chosen": -1.826883316040039, "logits/rejected": -2.9743144512176514, "logps/chosen": -100.9044418334961, "logps/rejected": -432.5826721191406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.775852680206299, "rewards/margins": 9.704146385192871, "rewards/rejected": -14.479999542236328, "step": 11550 }, { "epoch": 1.8, "learning_rate": 5.675362830022304e-06, "logits/chosen": -2.0511574745178223, "logits/rejected": -1.8216990232467651, "logps/chosen": -277.7375183105469, "logps/rejected": -148.28521728515625, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -2.52205228805542, "rewards/margins": 7.2505340576171875, "rewards/rejected": -9.772586822509766, "step": 11551 }, { "epoch": 1.8, "learning_rate": 5.674629389491156e-06, "logits/chosen": -3.0140938758850098, "logits/rejected": -2.9013726711273193, "logps/chosen": -234.79090881347656, "logps/rejected": -281.42462158203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.304624557495117, "rewards/margins": 9.765786170959473, "rewards/rejected": -13.070409774780273, "step": 11552 }, { "epoch": 1.8, "learning_rate": 5.673895948960009e-06, "logits/chosen": -1.845245599746704, "logits/rejected": -3.145616292953491, "logps/chosen": -159.32852172851562, "logps/rejected": -474.6112365722656, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -5.352903366088867, "rewards/margins": 8.283080101013184, "rewards/rejected": -13.635982513427734, "step": 11553 }, { "epoch": 1.8, "learning_rate": 5.6731625084288605e-06, "logits/chosen": -1.9763325452804565, "logits/rejected": -2.93855881690979, "logps/chosen": -67.00650024414062, "logps/rejected": -445.0361633300781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.655303478240967, "rewards/margins": 8.639519691467285, "rewards/rejected": -13.294822692871094, "step": 11554 }, { "epoch": 1.8, "learning_rate": 5.672429067897712e-06, "logits/chosen": -2.967428684234619, "logits/rejected": -3.196415424346924, "logps/chosen": -143.7453155517578, "logps/rejected": -237.45669555664062, "loss": 0.089, "rewards/accuracies": 1.0, "rewards/chosen": -3.0624191761016846, "rewards/margins": 4.1933698654174805, "rewards/rejected": -7.255788803100586, "step": 11555 }, { "epoch": 1.8, "learning_rate": 5.671695627366564e-06, "logits/chosen": -2.198899984359741, "logits/rejected": -2.899874210357666, "logps/chosen": -263.1655578613281, "logps/rejected": -361.37139892578125, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -4.72330379486084, "rewards/margins": 4.970066070556641, "rewards/rejected": -9.69336986541748, "step": 11556 }, { "epoch": 1.8, "learning_rate": 5.670962186835416e-06, "logits/chosen": -1.5977498292922974, "logits/rejected": -2.8571724891662598, "logps/chosen": -78.52265930175781, "logps/rejected": -455.897216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.6428399085998535, "rewards/margins": 14.635761260986328, "rewards/rejected": -19.278602600097656, "step": 11557 }, { "epoch": 1.8, "learning_rate": 5.670228746304269e-06, "logits/chosen": -2.5234382152557373, "logits/rejected": -3.0771124362945557, "logps/chosen": -200.5631103515625, "logps/rejected": -287.4953918457031, "loss": 0.9179, "rewards/accuracies": 0.5, "rewards/chosen": -6.642551422119141, "rewards/margins": 4.532829284667969, "rewards/rejected": -11.17538070678711, "step": 11558 }, { "epoch": 1.8, "learning_rate": 5.669495305773121e-06, "logits/chosen": -1.935491919517517, "logits/rejected": -3.005481243133545, "logps/chosen": -98.96113586425781, "logps/rejected": -276.7957458496094, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -3.6514503955841064, "rewards/margins": 7.884875297546387, "rewards/rejected": -11.536325454711914, "step": 11559 }, { "epoch": 1.8, "learning_rate": 5.6687618652419735e-06, "logits/chosen": -1.9946013689041138, "logits/rejected": -2.9803853034973145, "logps/chosen": -185.35101318359375, "logps/rejected": -576.9859619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.53998064994812, "rewards/margins": 10.98802375793457, "rewards/rejected": -14.52800464630127, "step": 11560 }, { "epoch": 1.8, "learning_rate": 5.668028424710825e-06, "logits/chosen": -2.381058692932129, "logits/rejected": -2.6976945400238037, "logps/chosen": -467.2861022949219, "logps/rejected": -511.6330871582031, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.555478811264038, "rewards/margins": 6.854223251342773, "rewards/rejected": -10.40970230102539, "step": 11561 }, { "epoch": 1.8, "learning_rate": 5.667294984179678e-06, "logits/chosen": -2.4192733764648438, "logits/rejected": -3.018096685409546, "logps/chosen": -144.75723266601562, "logps/rejected": -211.1764373779297, "loss": 1.2523, "rewards/accuracies": 0.5, "rewards/chosen": -4.598329544067383, "rewards/margins": 2.5904316902160645, "rewards/rejected": -7.188761234283447, "step": 11562 }, { "epoch": 1.8, "learning_rate": 5.66656154364853e-06, "logits/chosen": -3.1458325386047363, "logits/rejected": -3.235666275024414, "logps/chosen": -109.43244934082031, "logps/rejected": -164.21746826171875, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -4.440987586975098, "rewards/margins": 4.403200626373291, "rewards/rejected": -8.844188690185547, "step": 11563 }, { "epoch": 1.8, "learning_rate": 5.665828103117382e-06, "logits/chosen": -2.671123504638672, "logits/rejected": -3.0319132804870605, "logps/chosen": -72.35722351074219, "logps/rejected": -212.288330078125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.477595329284668, "rewards/margins": 7.945096969604492, "rewards/rejected": -12.42269229888916, "step": 11564 }, { "epoch": 1.8, "learning_rate": 5.665094662586234e-06, "logits/chosen": -2.009524345397949, "logits/rejected": -2.6431689262390137, "logps/chosen": -174.69947814941406, "logps/rejected": -307.7191467285156, "loss": 0.0854, "rewards/accuracies": 1.0, "rewards/chosen": -6.232316493988037, "rewards/margins": 5.613241195678711, "rewards/rejected": -11.845558166503906, "step": 11565 }, { "epoch": 1.8, "learning_rate": 5.664361222055086e-06, "logits/chosen": -2.4900176525115967, "logits/rejected": -2.483295440673828, "logps/chosen": -164.59580993652344, "logps/rejected": -256.98529052734375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -4.310027122497559, "rewards/margins": 6.66651725769043, "rewards/rejected": -10.976544380187988, "step": 11566 }, { "epoch": 1.8, "learning_rate": 5.663627781523938e-06, "logits/chosen": -2.7808985710144043, "logits/rejected": -2.8007442951202393, "logps/chosen": -68.90454864501953, "logps/rejected": -189.32325744628906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2086057662963867, "rewards/margins": 10.778427124023438, "rewards/rejected": -12.987032890319824, "step": 11567 }, { "epoch": 1.8, "learning_rate": 5.66289434099279e-06, "logits/chosen": -3.1534550189971924, "logits/rejected": -3.191453456878662, "logps/chosen": -281.18780517578125, "logps/rejected": -332.07110595703125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -4.8188323974609375, "rewards/margins": 7.750199317932129, "rewards/rejected": -12.569031715393066, "step": 11568 }, { "epoch": 1.8, "learning_rate": 5.662160900461642e-06, "logits/chosen": -1.3042207956314087, "logits/rejected": -2.156599283218384, "logps/chosen": -243.664794921875, "logps/rejected": -335.8297119140625, "loss": 1.2331, "rewards/accuracies": 0.5, "rewards/chosen": -7.263195037841797, "rewards/margins": 3.7892544269561768, "rewards/rejected": -11.052449226379395, "step": 11569 }, { "epoch": 1.8, "learning_rate": 5.661427459930494e-06, "logits/chosen": -2.036301612854004, "logits/rejected": -2.557304620742798, "logps/chosen": -171.37759399414062, "logps/rejected": -299.73358154296875, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -5.454015254974365, "rewards/margins": 6.65452766418457, "rewards/rejected": -12.108543395996094, "step": 11570 }, { "epoch": 1.8, "learning_rate": 5.660694019399347e-06, "logits/chosen": -2.4438610076904297, "logits/rejected": -2.98964786529541, "logps/chosen": -75.89017486572266, "logps/rejected": -294.98162841796875, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -4.458008766174316, "rewards/margins": 6.793952941894531, "rewards/rejected": -11.251961708068848, "step": 11571 }, { "epoch": 1.8, "learning_rate": 5.6599605788681986e-06, "logits/chosen": -2.7401773929595947, "logits/rejected": -2.696767807006836, "logps/chosen": -230.27777099609375, "logps/rejected": -203.90472412109375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -6.479241371154785, "rewards/margins": 6.285187721252441, "rewards/rejected": -12.764429092407227, "step": 11572 }, { "epoch": 1.8, "learning_rate": 5.6592271383370504e-06, "logits/chosen": -2.6957390308380127, "logits/rejected": -3.096327781677246, "logps/chosen": -107.44638061523438, "logps/rejected": -392.8421325683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.817847728729248, "rewards/margins": 12.485523223876953, "rewards/rejected": -15.30337142944336, "step": 11573 }, { "epoch": 1.8, "learning_rate": 5.658493697805902e-06, "logits/chosen": -1.9079056978225708, "logits/rejected": -2.894627809524536, "logps/chosen": -208.74212646484375, "logps/rejected": -327.0299072265625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -4.028217315673828, "rewards/margins": 6.887005805969238, "rewards/rejected": -10.915223121643066, "step": 11574 }, { "epoch": 1.8, "learning_rate": 5.657760257274754e-06, "logits/chosen": -2.3101789951324463, "logits/rejected": -3.1892364025115967, "logps/chosen": -68.38662719726562, "logps/rejected": -609.2864990234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.181852340698242, "rewards/margins": 8.550630569458008, "rewards/rejected": -13.73248291015625, "step": 11575 }, { "epoch": 1.8, "learning_rate": 5.657026816743607e-06, "logits/chosen": -1.8123764991760254, "logits/rejected": -2.8301820755004883, "logps/chosen": -97.12615966796875, "logps/rejected": -400.51605224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.5308942794799805, "rewards/margins": 11.106940269470215, "rewards/rejected": -15.637834548950195, "step": 11576 }, { "epoch": 1.8, "learning_rate": 5.65629337621246e-06, "logits/chosen": -2.269473075866699, "logits/rejected": -2.7446367740631104, "logps/chosen": -206.7235107421875, "logps/rejected": -362.83758544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2628087997436523, "rewards/margins": 9.248601913452148, "rewards/rejected": -11.511411666870117, "step": 11577 }, { "epoch": 1.8, "learning_rate": 5.6555599356813115e-06, "logits/chosen": -2.15356183052063, "logits/rejected": -1.8201195001602173, "logps/chosen": -298.49810791015625, "logps/rejected": -355.9958801269531, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.977029800415039, "rewards/margins": 7.7871246337890625, "rewards/rejected": -13.764154434204102, "step": 11578 }, { "epoch": 1.8, "learning_rate": 5.654826495150163e-06, "logits/chosen": -1.9742763042449951, "logits/rejected": -2.3537795543670654, "logps/chosen": -300.9381408691406, "logps/rejected": -330.00811767578125, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -5.3619065284729, "rewards/margins": 6.013124942779541, "rewards/rejected": -11.375031471252441, "step": 11579 }, { "epoch": 1.8, "learning_rate": 5.654093054619016e-06, "logits/chosen": -2.8781774044036865, "logits/rejected": -2.8966073989868164, "logps/chosen": -188.1141815185547, "logps/rejected": -197.3844757080078, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -4.924814701080322, "rewards/margins": 3.4523751735687256, "rewards/rejected": -8.377189636230469, "step": 11580 }, { "epoch": 1.8, "learning_rate": 5.653359614087868e-06, "logits/chosen": -2.3278257846832275, "logits/rejected": -2.888411521911621, "logps/chosen": -183.96051025390625, "logps/rejected": -395.30548095703125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.9601783752441406, "rewards/margins": 8.641639709472656, "rewards/rejected": -11.601818084716797, "step": 11581 }, { "epoch": 1.8, "learning_rate": 5.65262617355672e-06, "logits/chosen": -1.07985258102417, "logits/rejected": -2.306627035140991, "logps/chosen": -210.38853454589844, "logps/rejected": -612.75830078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.247580528259277, "rewards/margins": 11.828999519348145, "rewards/rejected": -17.076580047607422, "step": 11582 }, { "epoch": 1.8, "learning_rate": 5.651892733025572e-06, "logits/chosen": -2.893404006958008, "logits/rejected": -2.3542535305023193, "logps/chosen": -136.10240173339844, "logps/rejected": -150.18557739257812, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": -4.04591178894043, "rewards/margins": 4.014966011047363, "rewards/rejected": -8.060877799987793, "step": 11583 }, { "epoch": 1.8, "learning_rate": 5.651159292494424e-06, "logits/chosen": -2.2890267372131348, "logits/rejected": -1.9761486053466797, "logps/chosen": -151.1370849609375, "logps/rejected": -230.10736083984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.7263565063476562, "rewards/margins": 8.193717956542969, "rewards/rejected": -10.920074462890625, "step": 11584 }, { "epoch": 1.8, "learning_rate": 5.650425851963276e-06, "logits/chosen": -1.8162174224853516, "logits/rejected": -3.1924777030944824, "logps/chosen": -191.89511108398438, "logps/rejected": -265.94049072265625, "loss": 3.209, "rewards/accuracies": 0.5, "rewards/chosen": -7.723104000091553, "rewards/margins": -1.8858017921447754, "rewards/rejected": -5.837302207946777, "step": 11585 }, { "epoch": 1.8, "learning_rate": 5.649692411432128e-06, "logits/chosen": -1.6162185668945312, "logits/rejected": -2.687267541885376, "logps/chosen": -105.08462524414062, "logps/rejected": -387.48358154296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.9415438175201416, "rewards/margins": 7.683917999267578, "rewards/rejected": -11.62546157836914, "step": 11586 }, { "epoch": 1.8, "learning_rate": 5.64895897090098e-06, "logits/chosen": -2.2433385848999023, "logits/rejected": -3.0641062259674072, "logps/chosen": -194.89686584472656, "logps/rejected": -494.16973876953125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.7031660079956055, "rewards/margins": 12.383556365966797, "rewards/rejected": -17.086721420288086, "step": 11587 }, { "epoch": 1.8, "learning_rate": 5.648225530369832e-06, "logits/chosen": -2.250223159790039, "logits/rejected": -2.7546074390411377, "logps/chosen": -643.3514404296875, "logps/rejected": -710.4125366210938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.239080905914307, "rewards/margins": 8.845785140991211, "rewards/rejected": -14.08486557006836, "step": 11588 }, { "epoch": 1.8, "learning_rate": 5.647492089838685e-06, "logits/chosen": -2.7067651748657227, "logits/rejected": -2.9025187492370605, "logps/chosen": -250.1119384765625, "logps/rejected": -537.599365234375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.725710868835449, "rewards/margins": 8.85875415802002, "rewards/rejected": -12.584465026855469, "step": 11589 }, { "epoch": 1.8, "learning_rate": 5.646758649307537e-06, "logits/chosen": -2.910569429397583, "logits/rejected": -1.9198778867721558, "logps/chosen": -275.76556396484375, "logps/rejected": -170.45867919921875, "loss": 0.8779, "rewards/accuracies": 0.0, "rewards/chosen": -5.635992527008057, "rewards/margins": -0.3402400016784668, "rewards/rejected": -5.29575252532959, "step": 11590 }, { "epoch": 1.8, "learning_rate": 5.6460252087763885e-06, "logits/chosen": -0.8570852875709534, "logits/rejected": -2.5181057453155518, "logps/chosen": -235.2328338623047, "logps/rejected": -554.84619140625, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": -5.550586223602295, "rewards/margins": 5.770847320556641, "rewards/rejected": -11.321434020996094, "step": 11591 }, { "epoch": 1.8, "learning_rate": 5.64529176824524e-06, "logits/chosen": -1.7375229597091675, "logits/rejected": -2.7903971672058105, "logps/chosen": -129.4737091064453, "logps/rejected": -280.0093994140625, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -5.265717506408691, "rewards/margins": 4.6199750900268555, "rewards/rejected": -9.885692596435547, "step": 11592 }, { "epoch": 1.8, "learning_rate": 5.644558327714093e-06, "logits/chosen": -1.2799588441848755, "logits/rejected": -2.9250380992889404, "logps/chosen": -83.6544418334961, "logps/rejected": -343.6020202636719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.0751562118530273, "rewards/margins": 8.061922073364258, "rewards/rejected": -11.137077331542969, "step": 11593 }, { "epoch": 1.8, "learning_rate": 5.643824887182946e-06, "logits/chosen": -1.858089804649353, "logits/rejected": -2.6669921875, "logps/chosen": -439.3360290527344, "logps/rejected": -728.960205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.474920749664307, "rewards/margins": 13.410057067871094, "rewards/rejected": -18.884979248046875, "step": 11594 }, { "epoch": 1.8, "learning_rate": 5.643091446651798e-06, "logits/chosen": -2.7529640197753906, "logits/rejected": -1.3658183813095093, "logps/chosen": -540.1568603515625, "logps/rejected": -280.7098693847656, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -6.160813331604004, "rewards/margins": 6.383800983428955, "rewards/rejected": -12.544614791870117, "step": 11595 }, { "epoch": 1.8, "learning_rate": 5.6423580061206496e-06, "logits/chosen": -1.6805733442306519, "logits/rejected": -1.9971320629119873, "logps/chosen": -204.17745971679688, "logps/rejected": -456.90594482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.519692897796631, "rewards/margins": 13.11655044555664, "rewards/rejected": -15.636242866516113, "step": 11596 }, { "epoch": 1.8, "learning_rate": 5.6416245655895014e-06, "logits/chosen": -2.6963937282562256, "logits/rejected": -1.9356367588043213, "logps/chosen": -469.67498779296875, "logps/rejected": -491.4970397949219, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -4.742392063140869, "rewards/margins": 6.930351257324219, "rewards/rejected": -11.67274284362793, "step": 11597 }, { "epoch": 1.8, "learning_rate": 5.640891125058354e-06, "logits/chosen": -2.5127484798431396, "logits/rejected": -2.8920443058013916, "logps/chosen": -178.26710510253906, "logps/rejected": -229.00057983398438, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.7391974925994873, "rewards/margins": 6.011855602264404, "rewards/rejected": -8.751052856445312, "step": 11598 }, { "epoch": 1.8, "learning_rate": 5.640157684527206e-06, "logits/chosen": -1.4356929063796997, "logits/rejected": -2.654463768005371, "logps/chosen": -115.97496032714844, "logps/rejected": -374.43450927734375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.07128381729126, "rewards/margins": 7.893445014953613, "rewards/rejected": -11.964728355407715, "step": 11599 }, { "epoch": 1.8, "learning_rate": 5.639424243996058e-06, "logits/chosen": -2.630704164505005, "logits/rejected": -2.7871243953704834, "logps/chosen": -320.2816467285156, "logps/rejected": -304.477783203125, "loss": 0.7372, "rewards/accuracies": 0.5, "rewards/chosen": -4.648155212402344, "rewards/margins": 5.6975297927856445, "rewards/rejected": -10.345685005187988, "step": 11600 }, { "epoch": 1.8, "learning_rate": 5.63869080346491e-06, "logits/chosen": -3.0924429893493652, "logits/rejected": -3.0001235008239746, "logps/chosen": -132.28875732421875, "logps/rejected": -145.0218505859375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -2.874319553375244, "rewards/margins": 6.34316349029541, "rewards/rejected": -9.217482566833496, "step": 11601 }, { "epoch": 1.8, "learning_rate": 5.6379573629337625e-06, "logits/chosen": -2.600113868713379, "logits/rejected": -2.708827495574951, "logps/chosen": -662.8560791015625, "logps/rejected": -511.4048156738281, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.071415901184082, "rewards/margins": 10.407390594482422, "rewards/rejected": -14.47880744934082, "step": 11602 }, { "epoch": 1.8, "learning_rate": 5.637223922402614e-06, "logits/chosen": -2.812032699584961, "logits/rejected": -1.967093825340271, "logps/chosen": -120.80296325683594, "logps/rejected": -225.65142822265625, "loss": 0.1086, "rewards/accuracies": 1.0, "rewards/chosen": -3.654358386993408, "rewards/margins": 4.983819007873535, "rewards/rejected": -8.638176918029785, "step": 11603 }, { "epoch": 1.8, "learning_rate": 5.636490481871466e-06, "logits/chosen": -1.6149340867996216, "logits/rejected": -2.891981601715088, "logps/chosen": -201.28665161132812, "logps/rejected": -525.8822021484375, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -3.6256728172302246, "rewards/margins": 6.888392448425293, "rewards/rejected": -10.514065742492676, "step": 11604 }, { "epoch": 1.8, "learning_rate": 5.635757041340318e-06, "logits/chosen": -2.5452475547790527, "logits/rejected": -1.5265926122665405, "logps/chosen": -434.4937438964844, "logps/rejected": -429.7921142578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.6974204182624817, "rewards/margins": 13.20453929901123, "rewards/rejected": -13.901960372924805, "step": 11605 }, { "epoch": 1.8, "learning_rate": 5.63502360080917e-06, "logits/chosen": -2.8815765380859375, "logits/rejected": -3.1391103267669678, "logps/chosen": -633.26025390625, "logps/rejected": -489.7034912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4791862964630127, "rewards/margins": 10.200927734375, "rewards/rejected": -13.68011474609375, "step": 11606 }, { "epoch": 1.81, "learning_rate": 5.634290160278023e-06, "logits/chosen": -2.4438929557800293, "logits/rejected": -3.123849868774414, "logps/chosen": -200.24343872070312, "logps/rejected": -346.87890625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -4.231040954589844, "rewards/margins": 7.597052574157715, "rewards/rejected": -11.828094482421875, "step": 11607 }, { "epoch": 1.81, "learning_rate": 5.633556719746875e-06, "logits/chosen": -2.6091468334198, "logits/rejected": -2.9834113121032715, "logps/chosen": -108.5678939819336, "logps/rejected": -387.88201904296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.502396106719971, "rewards/margins": 7.908905029296875, "rewards/rejected": -12.411301612854004, "step": 11608 }, { "epoch": 1.81, "learning_rate": 5.6328232792157265e-06, "logits/chosen": -3.213411569595337, "logits/rejected": -3.045098304748535, "logps/chosen": -279.38433837890625, "logps/rejected": -271.69927978515625, "loss": 0.133, "rewards/accuracies": 1.0, "rewards/chosen": -5.6873779296875, "rewards/margins": 2.900080442428589, "rewards/rejected": -8.587458610534668, "step": 11609 }, { "epoch": 1.81, "learning_rate": 5.632089838684579e-06, "logits/chosen": -1.3085488080978394, "logits/rejected": -2.888047695159912, "logps/chosen": -122.95526123046875, "logps/rejected": -315.0491638183594, "loss": 0.1137, "rewards/accuracies": 1.0, "rewards/chosen": -3.750033378601074, "rewards/margins": 7.5448102951049805, "rewards/rejected": -11.294843673706055, "step": 11610 }, { "epoch": 1.81, "learning_rate": 5.631356398153432e-06, "logits/chosen": -2.8441293239593506, "logits/rejected": -2.809877395629883, "logps/chosen": -318.3687744140625, "logps/rejected": -170.50631713867188, "loss": 1.4783, "rewards/accuracies": 0.5, "rewards/chosen": -6.220457077026367, "rewards/margins": 0.458868145942688, "rewards/rejected": -6.679325103759766, "step": 11611 }, { "epoch": 1.81, "learning_rate": 5.630622957622284e-06, "logits/chosen": -2.981564521789551, "logits/rejected": -3.2836387157440186, "logps/chosen": -74.85360717773438, "logps/rejected": -256.51287841796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.1677682399749756, "rewards/margins": 6.714935302734375, "rewards/rejected": -9.88270378112793, "step": 11612 }, { "epoch": 1.81, "learning_rate": 5.629889517091136e-06, "logits/chosen": -2.2571041584014893, "logits/rejected": -2.935918092727661, "logps/chosen": -194.15203857421875, "logps/rejected": -426.08685302734375, "loss": 0.1134, "rewards/accuracies": 1.0, "rewards/chosen": -4.503964424133301, "rewards/margins": 3.311800956726074, "rewards/rejected": -7.815765380859375, "step": 11613 }, { "epoch": 1.81, "learning_rate": 5.629156076559988e-06, "logits/chosen": -2.267108917236328, "logits/rejected": -2.8494150638580322, "logps/chosen": -154.19454956054688, "logps/rejected": -248.26417541503906, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -5.118507385253906, "rewards/margins": 4.9383931159973145, "rewards/rejected": -10.056900024414062, "step": 11614 }, { "epoch": 1.81, "learning_rate": 5.6284226360288395e-06, "logits/chosen": -2.0796194076538086, "logits/rejected": -3.119030475616455, "logps/chosen": -81.19761657714844, "logps/rejected": -423.998779296875, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -3.916659355163574, "rewards/margins": 7.915238380432129, "rewards/rejected": -11.831897735595703, "step": 11615 }, { "epoch": 1.81, "learning_rate": 5.627689195497692e-06, "logits/chosen": -2.4474170207977295, "logits/rejected": -2.932594060897827, "logps/chosen": -216.08975219726562, "logps/rejected": -493.02362060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9491143226623535, "rewards/margins": 12.987662315368652, "rewards/rejected": -15.936777114868164, "step": 11616 }, { "epoch": 1.81, "learning_rate": 5.626955754966544e-06, "logits/chosen": -2.763340473175049, "logits/rejected": -3.3174245357513428, "logps/chosen": -286.3387451171875, "logps/rejected": -496.642822265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.78507137298584, "rewards/margins": 9.788559913635254, "rewards/rejected": -16.573631286621094, "step": 11617 }, { "epoch": 1.81, "learning_rate": 5.626222314435396e-06, "logits/chosen": -1.7912120819091797, "logits/rejected": -2.8500890731811523, "logps/chosen": -128.6883087158203, "logps/rejected": -314.40191650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.0656068325042725, "rewards/margins": 10.052196502685547, "rewards/rejected": -11.117803573608398, "step": 11618 }, { "epoch": 1.81, "learning_rate": 5.625488873904248e-06, "logits/chosen": -2.466181516647339, "logits/rejected": -3.144496202468872, "logps/chosen": -138.4867401123047, "logps/rejected": -377.6153564453125, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -4.363689422607422, "rewards/margins": 6.3483381271362305, "rewards/rejected": -10.712027549743652, "step": 11619 }, { "epoch": 1.81, "learning_rate": 5.6247554333731006e-06, "logits/chosen": -2.646594524383545, "logits/rejected": -1.3985220193862915, "logps/chosen": -172.49598693847656, "logps/rejected": -261.8420715332031, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.783900022506714, "rewards/margins": 7.351892471313477, "rewards/rejected": -11.13579273223877, "step": 11620 }, { "epoch": 1.81, "learning_rate": 5.6240219928419525e-06, "logits/chosen": -2.7679691314697266, "logits/rejected": -2.9344778060913086, "logps/chosen": -439.9209899902344, "logps/rejected": -624.8584594726562, "loss": 0.0454, "rewards/accuracies": 1.0, "rewards/chosen": -6.395223617553711, "rewards/margins": 4.680651664733887, "rewards/rejected": -11.075874328613281, "step": 11621 }, { "epoch": 1.81, "learning_rate": 5.623288552310804e-06, "logits/chosen": -2.3605456352233887, "logits/rejected": -2.569164991378784, "logps/chosen": -108.39462280273438, "logps/rejected": -191.101806640625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.319997787475586, "rewards/margins": 6.010605812072754, "rewards/rejected": -9.33060359954834, "step": 11622 }, { "epoch": 1.81, "learning_rate": 5.622555111779656e-06, "logits/chosen": -2.0962488651275635, "logits/rejected": -2.82383131980896, "logps/chosen": -188.9720458984375, "logps/rejected": -529.4453125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -2.954446315765381, "rewards/margins": 6.62271785736084, "rewards/rejected": -9.577163696289062, "step": 11623 }, { "epoch": 1.81, "learning_rate": 5.621821671248508e-06, "logits/chosen": -2.0771775245666504, "logits/rejected": -3.3406388759613037, "logps/chosen": -164.89920043945312, "logps/rejected": -476.8291015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.8338189125061035, "rewards/margins": 8.558069229125977, "rewards/rejected": -11.391888618469238, "step": 11624 }, { "epoch": 1.81, "learning_rate": 5.621088230717361e-06, "logits/chosen": -1.4346612691879272, "logits/rejected": -2.258056402206421, "logps/chosen": -213.35891723632812, "logps/rejected": -522.2887573242188, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -4.091378211975098, "rewards/margins": 10.778295516967773, "rewards/rejected": -14.869673728942871, "step": 11625 }, { "epoch": 1.81, "learning_rate": 5.620354790186213e-06, "logits/chosen": -2.8393595218658447, "logits/rejected": -1.6779797077178955, "logps/chosen": -311.0841979980469, "logps/rejected": -133.10089111328125, "loss": 0.2443, "rewards/accuracies": 1.0, "rewards/chosen": -1.892591953277588, "rewards/margins": 5.417039394378662, "rewards/rejected": -7.30963134765625, "step": 11626 }, { "epoch": 1.81, "learning_rate": 5.619621349655065e-06, "logits/chosen": -2.2568700313568115, "logits/rejected": -3.0766570568084717, "logps/chosen": -186.012451171875, "logps/rejected": -319.6395568847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.207337379455566, "rewards/margins": 10.172072410583496, "rewards/rejected": -14.379409790039062, "step": 11627 }, { "epoch": 1.81, "learning_rate": 5.618887909123917e-06, "logits/chosen": -1.9810476303100586, "logits/rejected": -2.8712069988250732, "logps/chosen": -141.01187133789062, "logps/rejected": -394.9952392578125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -3.2146682739257812, "rewards/margins": 6.0775346755981445, "rewards/rejected": -9.292202949523926, "step": 11628 }, { "epoch": 1.81, "learning_rate": 5.61815446859277e-06, "logits/chosen": -2.712407112121582, "logits/rejected": -2.543795585632324, "logps/chosen": -266.66717529296875, "logps/rejected": -374.2042236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2552971839904785, "rewards/margins": 12.105724334716797, "rewards/rejected": -15.361021995544434, "step": 11629 }, { "epoch": 1.81, "learning_rate": 5.617421028061622e-06, "logits/chosen": -2.7730937004089355, "logits/rejected": -2.8122191429138184, "logps/chosen": -302.5354309082031, "logps/rejected": -621.8468017578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.518088340759277, "rewards/margins": 12.682503700256348, "rewards/rejected": -18.200592041015625, "step": 11630 }, { "epoch": 1.81, "learning_rate": 5.616687587530474e-06, "logits/chosen": -1.2572275400161743, "logits/rejected": -2.5769948959350586, "logps/chosen": -177.83856201171875, "logps/rejected": -486.01727294921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.8579840660095215, "rewards/margins": 11.068605422973633, "rewards/rejected": -14.926589965820312, "step": 11631 }, { "epoch": 1.81, "learning_rate": 5.615954146999326e-06, "logits/chosen": -1.4767930507659912, "logits/rejected": -2.6140334606170654, "logps/chosen": -226.57742309570312, "logps/rejected": -581.8117065429688, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -3.3021912574768066, "rewards/margins": 8.991451263427734, "rewards/rejected": -12.293642044067383, "step": 11632 }, { "epoch": 1.81, "learning_rate": 5.6152207064681775e-06, "logits/chosen": -2.5872864723205566, "logits/rejected": -2.6424851417541504, "logps/chosen": -63.491424560546875, "logps/rejected": -409.49395751953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.6246416568756104, "rewards/margins": 13.007083892822266, "rewards/rejected": -16.631725311279297, "step": 11633 }, { "epoch": 1.81, "learning_rate": 5.61448726593703e-06, "logits/chosen": -2.64987850189209, "logits/rejected": -1.871802806854248, "logps/chosen": -266.18707275390625, "logps/rejected": -389.34368896484375, "loss": 0.2977, "rewards/accuracies": 1.0, "rewards/chosen": -7.429844856262207, "rewards/margins": 4.170419692993164, "rewards/rejected": -11.600263595581055, "step": 11634 }, { "epoch": 1.81, "learning_rate": 5.613753825405882e-06, "logits/chosen": -1.6310566663742065, "logits/rejected": -3.051694393157959, "logps/chosen": -384.44854736328125, "logps/rejected": -549.5850219726562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5660347938537598, "rewards/margins": 9.971765518188477, "rewards/rejected": -13.537800788879395, "step": 11635 }, { "epoch": 1.81, "learning_rate": 5.613020384874734e-06, "logits/chosen": -2.2455904483795166, "logits/rejected": -3.056684732437134, "logps/chosen": -366.689208984375, "logps/rejected": -565.9180908203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.829871654510498, "rewards/margins": 11.329584121704102, "rewards/rejected": -15.159456253051758, "step": 11636 }, { "epoch": 1.81, "learning_rate": 5.612286944343586e-06, "logits/chosen": -2.645435094833374, "logits/rejected": -1.7859480381011963, "logps/chosen": -719.4495849609375, "logps/rejected": -475.4253234863281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.422601342201233, "rewards/margins": 9.949450492858887, "rewards/rejected": -11.372052192687988, "step": 11637 }, { "epoch": 1.81, "learning_rate": 5.611553503812439e-06, "logits/chosen": -2.6907718181610107, "logits/rejected": -2.2380239963531494, "logps/chosen": -303.9389343261719, "logps/rejected": -333.9931945800781, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -5.181138038635254, "rewards/margins": 4.140670299530029, "rewards/rejected": -9.321807861328125, "step": 11638 }, { "epoch": 1.81, "learning_rate": 5.6108200632812905e-06, "logits/chosen": -1.2031066417694092, "logits/rejected": -2.8525044918060303, "logps/chosen": -180.60653686523438, "logps/rejected": -379.92132568359375, "loss": 0.2702, "rewards/accuracies": 1.0, "rewards/chosen": -5.324742794036865, "rewards/margins": 5.682767868041992, "rewards/rejected": -11.007511138916016, "step": 11639 }, { "epoch": 1.81, "learning_rate": 5.610086622750142e-06, "logits/chosen": -3.0175256729125977, "logits/rejected": -2.3250155448913574, "logps/chosen": -238.7490234375, "logps/rejected": -221.939453125, "loss": 0.1128, "rewards/accuracies": 1.0, "rewards/chosen": -5.2041168212890625, "rewards/margins": 3.8100266456604004, "rewards/rejected": -9.014142990112305, "step": 11640 }, { "epoch": 1.81, "learning_rate": 5.609353182218994e-06, "logits/chosen": -2.157844066619873, "logits/rejected": -2.5934975147247314, "logps/chosen": -119.88971710205078, "logps/rejected": -260.8753967285156, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": -6.302370548248291, "rewards/margins": 5.100874423980713, "rewards/rejected": -11.403244972229004, "step": 11641 }, { "epoch": 1.81, "learning_rate": 5.608619741687847e-06, "logits/chosen": -2.943147897720337, "logits/rejected": -3.092824935913086, "logps/chosen": -211.20040893554688, "logps/rejected": -330.279296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1079154014587402, "rewards/margins": 10.906488418579102, "rewards/rejected": -14.014404296875, "step": 11642 }, { "epoch": 1.81, "learning_rate": 5.607886301156699e-06, "logits/chosen": -3.2360494136810303, "logits/rejected": -3.1055338382720947, "logps/chosen": -102.24528503417969, "logps/rejected": -164.07656860351562, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/chosen": -4.4073004722595215, "rewards/margins": 4.456726551055908, "rewards/rejected": -8.86402702331543, "step": 11643 }, { "epoch": 1.81, "learning_rate": 5.607152860625552e-06, "logits/chosen": -2.6512646675109863, "logits/rejected": -2.794412612915039, "logps/chosen": -312.1679992675781, "logps/rejected": -368.2938232421875, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -5.6680588722229, "rewards/margins": 6.651271820068359, "rewards/rejected": -12.319330215454102, "step": 11644 }, { "epoch": 1.81, "learning_rate": 5.6064194200944035e-06, "logits/chosen": -2.864624500274658, "logits/rejected": -1.0233012437820435, "logps/chosen": -343.8697509765625, "logps/rejected": -73.05734252929688, "loss": 1.9278, "rewards/accuracies": 0.0, "rewards/chosen": -6.504770755767822, "rewards/margins": -1.6645231246948242, "rewards/rejected": -4.840247631072998, "step": 11645 }, { "epoch": 1.81, "learning_rate": 5.605685979563255e-06, "logits/chosen": -3.156092405319214, "logits/rejected": -3.4003043174743652, "logps/chosen": -131.48544311523438, "logps/rejected": -305.5410461425781, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.636974811553955, "rewards/margins": 6.150424003601074, "rewards/rejected": -9.787399291992188, "step": 11646 }, { "epoch": 1.81, "learning_rate": 5.604952539032108e-06, "logits/chosen": -1.8971238136291504, "logits/rejected": -2.740459442138672, "logps/chosen": -99.43154907226562, "logps/rejected": -330.9700622558594, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.6410319805145264, "rewards/margins": 6.868930816650391, "rewards/rejected": -10.509963035583496, "step": 11647 }, { "epoch": 1.81, "learning_rate": 5.60421909850096e-06, "logits/chosen": -2.4115610122680664, "logits/rejected": -2.8362271785736084, "logps/chosen": -241.7330322265625, "logps/rejected": -299.6813659667969, "loss": 0.1004, "rewards/accuracies": 1.0, "rewards/chosen": -4.259157657623291, "rewards/margins": 4.358855247497559, "rewards/rejected": -8.618013381958008, "step": 11648 }, { "epoch": 1.81, "learning_rate": 5.603485657969812e-06, "logits/chosen": -2.266040086746216, "logits/rejected": -3.206616163253784, "logps/chosen": -161.78494262695312, "logps/rejected": -525.3455810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6433897018432617, "rewards/margins": 13.550675392150879, "rewards/rejected": -15.19406509399414, "step": 11649 }, { "epoch": 1.81, "learning_rate": 5.602752217438664e-06, "logits/chosen": -1.1471900939941406, "logits/rejected": -2.4749317169189453, "logps/chosen": -82.1495132446289, "logps/rejected": -319.7630920410156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.272477149963379, "rewards/margins": 9.825457572937012, "rewards/rejected": -12.09793472290039, "step": 11650 }, { "epoch": 1.81, "learning_rate": 5.6020187769075164e-06, "logits/chosen": -2.429885149002075, "logits/rejected": -3.0539019107818604, "logps/chosen": -78.27010345458984, "logps/rejected": -242.25189208984375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.8958382606506348, "rewards/margins": 6.667525768280029, "rewards/rejected": -9.563364028930664, "step": 11651 }, { "epoch": 1.81, "learning_rate": 5.601285336376368e-06, "logits/chosen": -2.8067736625671387, "logits/rejected": -2.1613926887512207, "logps/chosen": -191.9121856689453, "logps/rejected": -164.43453979492188, "loss": 1.7577, "rewards/accuracies": 0.5, "rewards/chosen": -4.166046142578125, "rewards/margins": 1.3942502737045288, "rewards/rejected": -5.560296535491943, "step": 11652 }, { "epoch": 1.81, "learning_rate": 5.60055189584522e-06, "logits/chosen": -1.6305476427078247, "logits/rejected": -2.3710432052612305, "logps/chosen": -120.45264434814453, "logps/rejected": -440.61981201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8938965797424316, "rewards/margins": 12.019027709960938, "rewards/rejected": -13.912924766540527, "step": 11653 }, { "epoch": 1.81, "learning_rate": 5.599818455314072e-06, "logits/chosen": -1.6579657793045044, "logits/rejected": -2.874138832092285, "logps/chosen": -138.9714813232422, "logps/rejected": -459.0126953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.853546619415283, "rewards/margins": 9.221711158752441, "rewards/rejected": -13.075258255004883, "step": 11654 }, { "epoch": 1.81, "learning_rate": 5.599085014782924e-06, "logits/chosen": -2.144197463989258, "logits/rejected": -2.994769334793091, "logps/chosen": -285.6673889160156, "logps/rejected": -647.7548217773438, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.342080593109131, "rewards/margins": 6.500617027282715, "rewards/rejected": -10.842697143554688, "step": 11655 }, { "epoch": 1.81, "learning_rate": 5.598351574251777e-06, "logits/chosen": -1.23674738407135, "logits/rejected": -2.3259596824645996, "logps/chosen": -206.897705078125, "logps/rejected": -557.0682373046875, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -4.865154266357422, "rewards/margins": 10.84139633178711, "rewards/rejected": -15.706550598144531, "step": 11656 }, { "epoch": 1.81, "learning_rate": 5.5976181337206285e-06, "logits/chosen": -3.320265531539917, "logits/rejected": -2.9149580001831055, "logps/chosen": -871.291748046875, "logps/rejected": -591.877197265625, "loss": 1.5489, "rewards/accuracies": 0.5, "rewards/chosen": -5.416441440582275, "rewards/margins": 3.0055954456329346, "rewards/rejected": -8.422037124633789, "step": 11657 }, { "epoch": 1.81, "learning_rate": 5.59688469318948e-06, "logits/chosen": -2.8219354152679443, "logits/rejected": -3.0733091831207275, "logps/chosen": -86.56175231933594, "logps/rejected": -253.06114196777344, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.0561017990112305, "rewards/margins": 6.814242362976074, "rewards/rejected": -11.870344161987305, "step": 11658 }, { "epoch": 1.81, "learning_rate": 5.596151252658332e-06, "logits/chosen": -2.7691357135772705, "logits/rejected": -2.8637044429779053, "logps/chosen": -204.7647705078125, "logps/rejected": -200.87770080566406, "loss": 1.6811, "rewards/accuracies": 0.5, "rewards/chosen": -6.998867988586426, "rewards/margins": 3.2401554584503174, "rewards/rejected": -10.23902416229248, "step": 11659 }, { "epoch": 1.81, "learning_rate": 5.595417812127185e-06, "logits/chosen": -2.7186319828033447, "logits/rejected": -3.348346471786499, "logps/chosen": -133.98446655273438, "logps/rejected": -316.5213928222656, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -4.02864933013916, "rewards/margins": 8.311382293701172, "rewards/rejected": -12.340031623840332, "step": 11660 }, { "epoch": 1.81, "learning_rate": 5.594684371596038e-06, "logits/chosen": -2.529404640197754, "logits/rejected": -2.8136472702026367, "logps/chosen": -110.77386474609375, "logps/rejected": -280.02838134765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.9011802673339844, "rewards/margins": 8.991978645324707, "rewards/rejected": -12.893158912658691, "step": 11661 }, { "epoch": 1.81, "learning_rate": 5.59395093106489e-06, "logits/chosen": -2.311591386795044, "logits/rejected": -2.946878433227539, "logps/chosen": -167.892822265625, "logps/rejected": -390.7354736328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.0548322200775146, "rewards/margins": 11.049178123474121, "rewards/rejected": -14.104010581970215, "step": 11662 }, { "epoch": 1.81, "learning_rate": 5.5932174905337415e-06, "logits/chosen": -1.664294719696045, "logits/rejected": -2.9163966178894043, "logps/chosen": -77.18502807617188, "logps/rejected": -264.15789794921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.9222432971000671, "rewards/margins": 9.441645622253418, "rewards/rejected": -10.36388874053955, "step": 11663 }, { "epoch": 1.81, "learning_rate": 5.592484050002593e-06, "logits/chosen": -1.7077877521514893, "logits/rejected": -2.598191261291504, "logps/chosen": -194.11569213867188, "logps/rejected": -309.85467529296875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.5946099758148193, "rewards/margins": 5.927594184875488, "rewards/rejected": -9.52220344543457, "step": 11664 }, { "epoch": 1.81, "learning_rate": 5.591750609471446e-06, "logits/chosen": -2.88179349899292, "logits/rejected": -2.8626201152801514, "logps/chosen": -129.1638946533203, "logps/rejected": -234.8150634765625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.712287187576294, "rewards/margins": 6.522984027862549, "rewards/rejected": -9.235271453857422, "step": 11665 }, { "epoch": 1.81, "learning_rate": 5.591017168940298e-06, "logits/chosen": -2.7152535915374756, "logits/rejected": -2.9015042781829834, "logps/chosen": -128.51048278808594, "logps/rejected": -286.6729736328125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.527101516723633, "rewards/margins": 10.245941162109375, "rewards/rejected": -12.773042678833008, "step": 11666 }, { "epoch": 1.81, "learning_rate": 5.59028372840915e-06, "logits/chosen": -2.5270118713378906, "logits/rejected": -2.823256731033325, "logps/chosen": -226.0162811279297, "logps/rejected": -226.08694458007812, "loss": 0.7183, "rewards/accuracies": 0.5, "rewards/chosen": -4.890872955322266, "rewards/margins": 4.143808364868164, "rewards/rejected": -9.03468132019043, "step": 11667 }, { "epoch": 1.81, "learning_rate": 5.589550287878002e-06, "logits/chosen": -1.5424050092697144, "logits/rejected": -2.4677720069885254, "logps/chosen": -144.32699584960938, "logps/rejected": -304.1826171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.219765663146973, "rewards/margins": 6.939414978027344, "rewards/rejected": -11.159180641174316, "step": 11668 }, { "epoch": 1.81, "learning_rate": 5.5888168473468545e-06, "logits/chosen": -1.759259581565857, "logits/rejected": -2.390432596206665, "logps/chosen": -125.76535034179688, "logps/rejected": -400.006103515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.6838431358337402, "rewards/margins": 11.083789825439453, "rewards/rejected": -14.767633438110352, "step": 11669 }, { "epoch": 1.81, "learning_rate": 5.588083406815706e-06, "logits/chosen": -1.0719445943832397, "logits/rejected": -2.7386934757232666, "logps/chosen": -151.6607666015625, "logps/rejected": -631.1874389648438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.358920574188232, "rewards/margins": 10.468677520751953, "rewards/rejected": -14.827597618103027, "step": 11670 }, { "epoch": 1.82, "learning_rate": 5.587349966284558e-06, "logits/chosen": -1.5228888988494873, "logits/rejected": -2.827939987182617, "logps/chosen": -175.3022003173828, "logps/rejected": -540.9136352539062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.0969648361206055, "rewards/margins": 9.370959281921387, "rewards/rejected": -13.467924118041992, "step": 11671 }, { "epoch": 1.82, "learning_rate": 5.58661652575341e-06, "logits/chosen": -2.770777702331543, "logits/rejected": -3.0188565254211426, "logps/chosen": -108.50477600097656, "logps/rejected": -267.1227111816406, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -5.029677391052246, "rewards/margins": 5.346505165100098, "rewards/rejected": -10.376182556152344, "step": 11672 }, { "epoch": 1.82, "learning_rate": 5.585883085222262e-06, "logits/chosen": -2.4281442165374756, "logits/rejected": -2.779806137084961, "logps/chosen": -428.7397766113281, "logps/rejected": -449.4024658203125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.7347898483276367, "rewards/margins": 8.123397827148438, "rewards/rejected": -11.85818862915039, "step": 11673 }, { "epoch": 1.82, "learning_rate": 5.585149644691115e-06, "logits/chosen": -1.5393449068069458, "logits/rejected": -2.657003402709961, "logps/chosen": -242.6663360595703, "logps/rejected": -404.9898986816406, "loss": 0.4625, "rewards/accuracies": 0.5, "rewards/chosen": -6.896674633026123, "rewards/margins": 4.762839317321777, "rewards/rejected": -11.659513473510742, "step": 11674 }, { "epoch": 1.82, "learning_rate": 5.584416204159967e-06, "logits/chosen": -2.7816805839538574, "logits/rejected": -2.9882454872131348, "logps/chosen": -212.1273193359375, "logps/rejected": -365.01080322265625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -5.13628625869751, "rewards/margins": 6.258244514465332, "rewards/rejected": -11.39453125, "step": 11675 }, { "epoch": 1.82, "learning_rate": 5.5836827636288185e-06, "logits/chosen": -1.0679975748062134, "logits/rejected": -2.7014193534851074, "logps/chosen": -120.75810241699219, "logps/rejected": -457.3797912597656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.7303953170776367, "rewards/margins": 10.228520393371582, "rewards/rejected": -13.958915710449219, "step": 11676 }, { "epoch": 1.82, "learning_rate": 5.582949323097671e-06, "logits/chosen": -2.778275728225708, "logits/rejected": -3.1178481578826904, "logps/chosen": -218.37380981445312, "logps/rejected": -419.76416015625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.8206377029418945, "rewards/margins": 7.889521598815918, "rewards/rejected": -11.710159301757812, "step": 11677 }, { "epoch": 1.82, "learning_rate": 5.582215882566524e-06, "logits/chosen": -2.4643492698669434, "logits/rejected": -2.805196523666382, "logps/chosen": -214.05801391601562, "logps/rejected": -531.38525390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.4499385356903076, "rewards/margins": 7.266647815704346, "rewards/rejected": -9.71658706665039, "step": 11678 }, { "epoch": 1.82, "learning_rate": 5.581482442035376e-06, "logits/chosen": -2.7500534057617188, "logits/rejected": -2.883190631866455, "logps/chosen": -292.4418640136719, "logps/rejected": -282.0796203613281, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": -2.8144073486328125, "rewards/margins": 3.064749240875244, "rewards/rejected": -5.879156589508057, "step": 11679 }, { "epoch": 1.82, "learning_rate": 5.580749001504228e-06, "logits/chosen": -1.329247236251831, "logits/rejected": -3.0582003593444824, "logps/chosen": -90.98615264892578, "logps/rejected": -394.33673095703125, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -3.231072425842285, "rewards/margins": 4.495558738708496, "rewards/rejected": -7.726631164550781, "step": 11680 }, { "epoch": 1.82, "learning_rate": 5.5800155609730796e-06, "logits/chosen": -2.1432156562805176, "logits/rejected": -2.933077335357666, "logps/chosen": -199.07273864746094, "logps/rejected": -506.001220703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.161256790161133, "rewards/margins": 6.694234848022461, "rewards/rejected": -11.855491638183594, "step": 11681 }, { "epoch": 1.82, "learning_rate": 5.5792821204419314e-06, "logits/chosen": -2.7580461502075195, "logits/rejected": -2.7085094451904297, "logps/chosen": -134.63229370117188, "logps/rejected": -224.90972900390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.272881984710693, "rewards/margins": 7.005597114562988, "rewards/rejected": -11.278478622436523, "step": 11682 }, { "epoch": 1.82, "learning_rate": 5.578548679910784e-06, "logits/chosen": -1.9897128343582153, "logits/rejected": -3.147289514541626, "logps/chosen": -72.15861511230469, "logps/rejected": -235.29751586914062, "loss": 0.233, "rewards/accuracies": 1.0, "rewards/chosen": -3.1448750495910645, "rewards/margins": 2.28873872756958, "rewards/rejected": -5.4336137771606445, "step": 11683 }, { "epoch": 1.82, "learning_rate": 5.577815239379636e-06, "logits/chosen": -2.0709173679351807, "logits/rejected": -2.4776039123535156, "logps/chosen": -202.80604553222656, "logps/rejected": -284.12469482421875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -4.415534496307373, "rewards/margins": 6.083963871002197, "rewards/rejected": -10.49949836730957, "step": 11684 }, { "epoch": 1.82, "learning_rate": 5.577081798848488e-06, "logits/chosen": -1.8641277551651, "logits/rejected": -2.9135289192199707, "logps/chosen": -257.07232666015625, "logps/rejected": -346.62939453125, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -2.568345308303833, "rewards/margins": 7.909084320068359, "rewards/rejected": -10.477429389953613, "step": 11685 }, { "epoch": 1.82, "learning_rate": 5.57634835831734e-06, "logits/chosen": -2.66396427154541, "logits/rejected": -2.937345027923584, "logps/chosen": -178.83404541015625, "logps/rejected": -303.9316711425781, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -4.311180591583252, "rewards/margins": 6.3650407791137695, "rewards/rejected": -10.676220893859863, "step": 11686 }, { "epoch": 1.82, "learning_rate": 5.5756149177861925e-06, "logits/chosen": -2.7716400623321533, "logits/rejected": -1.1998827457427979, "logps/chosen": -218.9440460205078, "logps/rejected": -177.98667907714844, "loss": 0.2087, "rewards/accuracies": 1.0, "rewards/chosen": -4.824404239654541, "rewards/margins": 3.5995774269104004, "rewards/rejected": -8.423981666564941, "step": 11687 }, { "epoch": 1.82, "learning_rate": 5.574881477255044e-06, "logits/chosen": -1.3592348098754883, "logits/rejected": -2.61136531829834, "logps/chosen": -137.22564697265625, "logps/rejected": -275.477294921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.296187400817871, "rewards/margins": 7.615816116333008, "rewards/rejected": -11.912004470825195, "step": 11688 }, { "epoch": 1.82, "learning_rate": 5.574148036723896e-06, "logits/chosen": -1.31516432762146, "logits/rejected": -2.3255553245544434, "logps/chosen": -130.8506622314453, "logps/rejected": -417.0196533203125, "loss": 0.047, "rewards/accuracies": 1.0, "rewards/chosen": -4.011482238769531, "rewards/margins": 5.103446960449219, "rewards/rejected": -9.11492919921875, "step": 11689 }, { "epoch": 1.82, "learning_rate": 5.573414596192748e-06, "logits/chosen": -2.1484262943267822, "logits/rejected": -2.710071563720703, "logps/chosen": -269.9517517089844, "logps/rejected": -409.76043701171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.4617714881896973, "rewards/margins": 9.963275909423828, "rewards/rejected": -12.425046920776367, "step": 11690 }, { "epoch": 1.82, "learning_rate": 5.572681155661601e-06, "logits/chosen": -2.3769328594207764, "logits/rejected": -2.653369665145874, "logps/chosen": -88.16378021240234, "logps/rejected": -128.81056213378906, "loss": 0.42, "rewards/accuracies": 0.5, "rewards/chosen": -5.285879135131836, "rewards/margins": 2.5147454738616943, "rewards/rejected": -7.800624370574951, "step": 11691 }, { "epoch": 1.82, "learning_rate": 5.571947715130453e-06, "logits/chosen": -2.8520638942718506, "logits/rejected": -2.496772289276123, "logps/chosen": -447.58197021484375, "logps/rejected": -369.148193359375, "loss": 1.046, "rewards/accuracies": 0.5, "rewards/chosen": -6.931884765625, "rewards/margins": 2.281179428100586, "rewards/rejected": -9.213064193725586, "step": 11692 }, { "epoch": 1.82, "learning_rate": 5.571214274599305e-06, "logits/chosen": -2.0255484580993652, "logits/rejected": -2.808945417404175, "logps/chosen": -156.6344451904297, "logps/rejected": -344.80743408203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.060173749923706, "rewards/margins": 10.291141510009766, "rewards/rejected": -12.35131549835205, "step": 11693 }, { "epoch": 1.82, "learning_rate": 5.570480834068157e-06, "logits/chosen": -1.986736536026001, "logits/rejected": -2.8879611492156982, "logps/chosen": -143.0876007080078, "logps/rejected": -491.06182861328125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.4702978134155273, "rewards/margins": 8.730301856994629, "rewards/rejected": -12.200599670410156, "step": 11694 }, { "epoch": 1.82, "learning_rate": 5.569747393537009e-06, "logits/chosen": -2.963533639907837, "logits/rejected": -2.3482272624969482, "logps/chosen": -628.76318359375, "logps/rejected": -719.1231689453125, "loss": 0.618, "rewards/accuracies": 0.5, "rewards/chosen": -6.765390396118164, "rewards/margins": 1.628418207168579, "rewards/rejected": -8.393808364868164, "step": 11695 }, { "epoch": 1.82, "learning_rate": 5.569013953005862e-06, "logits/chosen": -2.4374914169311523, "logits/rejected": -2.7230122089385986, "logps/chosen": -257.29058837890625, "logps/rejected": -389.6842041015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.2977802753448486, "rewards/margins": 8.67277717590332, "rewards/rejected": -11.970558166503906, "step": 11696 }, { "epoch": 1.82, "learning_rate": 5.568280512474714e-06, "logits/chosen": -1.9303321838378906, "logits/rejected": -2.9260168075561523, "logps/chosen": -153.56971740722656, "logps/rejected": -573.2603759765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.201965808868408, "rewards/margins": 12.316808700561523, "rewards/rejected": -14.518774032592773, "step": 11697 }, { "epoch": 1.82, "learning_rate": 5.567547071943566e-06, "logits/chosen": -2.4858572483062744, "logits/rejected": -3.1017444133758545, "logps/chosen": -182.40504455566406, "logps/rejected": -253.6169891357422, "loss": 1.0634, "rewards/accuracies": 0.5, "rewards/chosen": -5.190980434417725, "rewards/margins": 4.155787944793701, "rewards/rejected": -9.346768379211426, "step": 11698 }, { "epoch": 1.82, "learning_rate": 5.566813631412418e-06, "logits/chosen": -2.8067033290863037, "logits/rejected": -1.8969149589538574, "logps/chosen": -177.3314666748047, "logps/rejected": -287.17425537109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.2933921813964844, "rewards/margins": 8.401346206665039, "rewards/rejected": -11.694738388061523, "step": 11699 }, { "epoch": 1.82, "learning_rate": 5.56608019088127e-06, "logits/chosen": -2.828045606613159, "logits/rejected": -2.7059195041656494, "logps/chosen": -293.561767578125, "logps/rejected": -284.54364013671875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.125726699829102, "rewards/margins": 6.55450963973999, "rewards/rejected": -10.680235862731934, "step": 11700 }, { "epoch": 1.82, "learning_rate": 5.565346750350122e-06, "logits/chosen": -2.8985936641693115, "logits/rejected": -2.4262287616729736, "logps/chosen": -331.3656311035156, "logps/rejected": -278.5243835449219, "loss": 0.0975, "rewards/accuracies": 1.0, "rewards/chosen": -2.5074753761291504, "rewards/margins": 2.351405382156372, "rewards/rejected": -4.858880996704102, "step": 11701 }, { "epoch": 1.82, "learning_rate": 5.564613309818974e-06, "logits/chosen": -3.004423141479492, "logits/rejected": -2.4427337646484375, "logps/chosen": -201.91737365722656, "logps/rejected": -207.40208435058594, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -2.369138479232788, "rewards/margins": 6.252958297729492, "rewards/rejected": -8.62209701538086, "step": 11702 }, { "epoch": 1.82, "learning_rate": 5.563879869287826e-06, "logits/chosen": -2.02549147605896, "logits/rejected": -3.0580484867095947, "logps/chosen": -320.226806640625, "logps/rejected": -554.8292236328125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.853708267211914, "rewards/margins": 6.19305944442749, "rewards/rejected": -11.046768188476562, "step": 11703 }, { "epoch": 1.82, "learning_rate": 5.563146428756678e-06, "logits/chosen": -3.133366584777832, "logits/rejected": -2.6980488300323486, "logps/chosen": -470.2455139160156, "logps/rejected": -336.5697021484375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.6708786487579346, "rewards/margins": 8.422563552856445, "rewards/rejected": -11.093442916870117, "step": 11704 }, { "epoch": 1.82, "learning_rate": 5.5624129882255306e-06, "logits/chosen": -2.740493059158325, "logits/rejected": -3.046074151992798, "logps/chosen": -231.38258361816406, "logps/rejected": -381.856201171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.0646049976348877, "rewards/margins": 7.22097635269165, "rewards/rejected": -9.285581588745117, "step": 11705 }, { "epoch": 1.82, "learning_rate": 5.5616795476943824e-06, "logits/chosen": -2.4283905029296875, "logits/rejected": -2.9057507514953613, "logps/chosen": -684.6680297851562, "logps/rejected": -769.4052734375, "loss": 0.091, "rewards/accuracies": 1.0, "rewards/chosen": -4.190179347991943, "rewards/margins": 5.503697395324707, "rewards/rejected": -9.693877220153809, "step": 11706 }, { "epoch": 1.82, "learning_rate": 5.560946107163234e-06, "logits/chosen": -1.9051826000213623, "logits/rejected": -2.675381660461426, "logps/chosen": -106.08778381347656, "logps/rejected": -176.09942626953125, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.718076229095459, "rewards/margins": 5.652943134307861, "rewards/rejected": -8.37101936340332, "step": 11707 }, { "epoch": 1.82, "learning_rate": 5.560212666632086e-06, "logits/chosen": -2.8748958110809326, "logits/rejected": -2.176595449447632, "logps/chosen": -334.1199645996094, "logps/rejected": -218.7742919921875, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -3.849299430847168, "rewards/margins": 6.916106224060059, "rewards/rejected": -10.765405654907227, "step": 11708 }, { "epoch": 1.82, "learning_rate": 5.559479226100939e-06, "logits/chosen": -2.889704465866089, "logits/rejected": -2.1306374073028564, "logps/chosen": -457.82598876953125, "logps/rejected": -535.3889770507812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.9071712493896484, "rewards/margins": 7.101239204406738, "rewards/rejected": -11.008410453796387, "step": 11709 }, { "epoch": 1.82, "learning_rate": 5.558745785569791e-06, "logits/chosen": -2.8150768280029297, "logits/rejected": -2.4823663234710693, "logps/chosen": -170.48873901367188, "logps/rejected": -297.0216369628906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.246288299560547, "rewards/margins": 8.21924114227295, "rewards/rejected": -10.46552848815918, "step": 11710 }, { "epoch": 1.82, "learning_rate": 5.5580123450386435e-06, "logits/chosen": -1.5561317205429077, "logits/rejected": -2.9220991134643555, "logps/chosen": -158.87318420410156, "logps/rejected": -623.8541259765625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -5.286797523498535, "rewards/margins": 8.117030143737793, "rewards/rejected": -13.403827667236328, "step": 11711 }, { "epoch": 1.82, "learning_rate": 5.557278904507495e-06, "logits/chosen": -2.9097001552581787, "logits/rejected": -2.9112350940704346, "logps/chosen": -262.5761413574219, "logps/rejected": -302.9897155761719, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -3.32560658454895, "rewards/margins": 4.916391849517822, "rewards/rejected": -8.241998672485352, "step": 11712 }, { "epoch": 1.82, "learning_rate": 5.556545463976347e-06, "logits/chosen": -1.77167809009552, "logits/rejected": -3.0164101123809814, "logps/chosen": -359.49652099609375, "logps/rejected": -724.5015869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.875013828277588, "rewards/margins": 11.90519905090332, "rewards/rejected": -14.78021240234375, "step": 11713 }, { "epoch": 1.82, "learning_rate": 5.5558120234452e-06, "logits/chosen": -3.2311112880706787, "logits/rejected": -3.094566822052002, "logps/chosen": -52.97955322265625, "logps/rejected": -170.2099609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.8014976978302002, "rewards/margins": 9.716371536254883, "rewards/rejected": -11.517868995666504, "step": 11714 }, { "epoch": 1.82, "learning_rate": 5.555078582914052e-06, "logits/chosen": -1.9652583599090576, "logits/rejected": -3.005471706390381, "logps/chosen": -567.9498901367188, "logps/rejected": -672.593505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.212060928344727, "rewards/margins": 11.311139106750488, "rewards/rejected": -16.52320098876953, "step": 11715 }, { "epoch": 1.82, "learning_rate": 5.554345142382904e-06, "logits/chosen": -2.7243053913116455, "logits/rejected": -3.1635947227478027, "logps/chosen": -254.0996551513672, "logps/rejected": -328.8894958496094, "loss": 0.5838, "rewards/accuracies": 0.5, "rewards/chosen": -6.571829319000244, "rewards/margins": 2.8928380012512207, "rewards/rejected": -9.464667320251465, "step": 11716 }, { "epoch": 1.82, "learning_rate": 5.553611701851756e-06, "logits/chosen": -2.995612382888794, "logits/rejected": -2.8336501121520996, "logps/chosen": -508.8222961425781, "logps/rejected": -321.44903564453125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.9652748107910156, "rewards/margins": 6.992095470428467, "rewards/rejected": -10.95737075805664, "step": 11717 }, { "epoch": 1.82, "learning_rate": 5.552878261320608e-06, "logits/chosen": -2.938781976699829, "logits/rejected": -2.4096014499664307, "logps/chosen": -565.87158203125, "logps/rejected": -688.1735229492188, "loss": 0.0366, "rewards/accuracies": 1.0, "rewards/chosen": -3.730886936187744, "rewards/margins": 7.179194450378418, "rewards/rejected": -10.91008186340332, "step": 11718 }, { "epoch": 1.82, "learning_rate": 5.55214482078946e-06, "logits/chosen": -1.2712167501449585, "logits/rejected": -2.3099520206451416, "logps/chosen": -201.74990844726562, "logps/rejected": -432.5662536621094, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -3.566530227661133, "rewards/margins": 7.581051349639893, "rewards/rejected": -11.147581100463867, "step": 11719 }, { "epoch": 1.82, "learning_rate": 5.551411380258312e-06, "logits/chosen": -2.1879160404205322, "logits/rejected": -2.3461365699768066, "logps/chosen": -234.0801239013672, "logps/rejected": -276.7159423828125, "loss": 0.4528, "rewards/accuracies": 0.5, "rewards/chosen": -3.983018398284912, "rewards/margins": 4.952933311462402, "rewards/rejected": -8.935952186584473, "step": 11720 }, { "epoch": 1.82, "learning_rate": 5.550677939727164e-06, "logits/chosen": -2.666511058807373, "logits/rejected": -3.1725335121154785, "logps/chosen": -176.0084228515625, "logps/rejected": -329.7867126464844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9068291187286377, "rewards/margins": 10.346012115478516, "rewards/rejected": -12.252840995788574, "step": 11721 }, { "epoch": 1.82, "learning_rate": 5.549944499196016e-06, "logits/chosen": -1.8637220859527588, "logits/rejected": -3.259666681289673, "logps/chosen": -173.91607666015625, "logps/rejected": -536.2593994140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.648705005645752, "rewards/margins": 11.985994338989258, "rewards/rejected": -14.634698867797852, "step": 11722 }, { "epoch": 1.82, "learning_rate": 5.549211058664869e-06, "logits/chosen": -2.928053379058838, "logits/rejected": -2.1771583557128906, "logps/chosen": -921.4888305664062, "logps/rejected": -587.383544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.299233913421631, "rewards/margins": 9.129812240600586, "rewards/rejected": -14.429046630859375, "step": 11723 }, { "epoch": 1.82, "learning_rate": 5.5484776181337205e-06, "logits/chosen": -3.115776538848877, "logits/rejected": -2.297563314437866, "logps/chosen": -206.91900634765625, "logps/rejected": -200.80101013183594, "loss": 1.6021, "rewards/accuracies": 0.5, "rewards/chosen": -5.348626136779785, "rewards/margins": -0.19515156745910645, "rewards/rejected": -5.1534743309021, "step": 11724 }, { "epoch": 1.82, "learning_rate": 5.547744177602572e-06, "logits/chosen": -3.2317581176757812, "logits/rejected": -3.2757985591888428, "logps/chosen": -289.51220703125, "logps/rejected": -363.86614990234375, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.202446460723877, "rewards/margins": 6.9161553382873535, "rewards/rejected": -10.11860179901123, "step": 11725 }, { "epoch": 1.82, "learning_rate": 5.547010737071424e-06, "logits/chosen": -2.694995641708374, "logits/rejected": -2.95535945892334, "logps/chosen": -574.4332275390625, "logps/rejected": -601.14990234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.038633823394775, "rewards/margins": 7.381915092468262, "rewards/rejected": -11.420548439025879, "step": 11726 }, { "epoch": 1.82, "learning_rate": 5.546277296540277e-06, "logits/chosen": -3.102632761001587, "logits/rejected": -2.897404432296753, "logps/chosen": -396.0099792480469, "logps/rejected": -352.7477722167969, "loss": 0.1532, "rewards/accuracies": 1.0, "rewards/chosen": -1.7886557579040527, "rewards/margins": 7.128893852233887, "rewards/rejected": -8.917549133300781, "step": 11727 }, { "epoch": 1.82, "learning_rate": 5.54554385600913e-06, "logits/chosen": -2.8981740474700928, "logits/rejected": -3.016542673110962, "logps/chosen": -147.8478240966797, "logps/rejected": -392.7118835449219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9325122833251953, "rewards/margins": 11.671043395996094, "rewards/rejected": -14.603555679321289, "step": 11728 }, { "epoch": 1.82, "learning_rate": 5.5448104154779816e-06, "logits/chosen": -2.768127679824829, "logits/rejected": -3.1608986854553223, "logps/chosen": -142.79298400878906, "logps/rejected": -242.05068969726562, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -3.560828447341919, "rewards/margins": 4.05047082901001, "rewards/rejected": -7.611299514770508, "step": 11729 }, { "epoch": 1.82, "learning_rate": 5.5440769749468334e-06, "logits/chosen": -2.7085788249969482, "logits/rejected": -2.906953811645508, "logps/chosen": -102.58495330810547, "logps/rejected": -268.00048828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3326683044433594, "rewards/margins": 9.51292610168457, "rewards/rejected": -10.84559440612793, "step": 11730 }, { "epoch": 1.82, "learning_rate": 5.543343534415685e-06, "logits/chosen": -0.6675801277160645, "logits/rejected": -2.474616050720215, "logps/chosen": -95.61427307128906, "logps/rejected": -273.7655029296875, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -3.142042875289917, "rewards/margins": 5.683417797088623, "rewards/rejected": -8.825460433959961, "step": 11731 }, { "epoch": 1.82, "learning_rate": 5.542610093884538e-06, "logits/chosen": -2.849186658859253, "logits/rejected": -2.060788869857788, "logps/chosen": -155.23919677734375, "logps/rejected": -184.48284912109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.1165966987609863, "rewards/margins": 8.22329330444336, "rewards/rejected": -10.339890480041504, "step": 11732 }, { "epoch": 1.82, "learning_rate": 5.54187665335339e-06, "logits/chosen": -2.743307590484619, "logits/rejected": -1.5316623449325562, "logps/chosen": -440.39727783203125, "logps/rejected": -286.38458251953125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.478375434875488, "rewards/margins": 8.823700904846191, "rewards/rejected": -13.30207633972168, "step": 11733 }, { "epoch": 1.82, "learning_rate": 5.541143212822242e-06, "logits/chosen": -2.883744478225708, "logits/rejected": -2.917097568511963, "logps/chosen": -141.98521423339844, "logps/rejected": -218.67306518554688, "loss": 0.2983, "rewards/accuracies": 1.0, "rewards/chosen": -3.3168225288391113, "rewards/margins": 3.4913933277130127, "rewards/rejected": -6.808216094970703, "step": 11734 }, { "epoch": 1.83, "learning_rate": 5.540409772291094e-06, "logits/chosen": -2.5564677715301514, "logits/rejected": -3.0056936740875244, "logps/chosen": -316.1280212402344, "logps/rejected": -429.1356201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7849974632263184, "rewards/margins": 12.997602462768555, "rewards/rejected": -15.782600402832031, "step": 11735 }, { "epoch": 1.83, "learning_rate": 5.539676331759946e-06, "logits/chosen": -3.094399929046631, "logits/rejected": -2.2293219566345215, "logps/chosen": -313.32110595703125, "logps/rejected": -213.55482482910156, "loss": 1.0409, "rewards/accuracies": 0.5, "rewards/chosen": -4.403286933898926, "rewards/margins": 0.7246361970901489, "rewards/rejected": -5.127923011779785, "step": 11736 }, { "epoch": 1.83, "learning_rate": 5.538942891228798e-06, "logits/chosen": -3.0283095836639404, "logits/rejected": -3.2879891395568848, "logps/chosen": -179.94589233398438, "logps/rejected": -347.74853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.1158549785614014, "rewards/margins": 11.92635726928711, "rewards/rejected": -13.042211532592773, "step": 11737 }, { "epoch": 1.83, "learning_rate": 5.53820945069765e-06, "logits/chosen": -2.0981216430664062, "logits/rejected": -3.0645415782928467, "logps/chosen": -173.1298370361328, "logps/rejected": -459.9117431640625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.45177960395813, "rewards/margins": 6.526642799377441, "rewards/rejected": -8.978422164916992, "step": 11738 }, { "epoch": 1.83, "learning_rate": 5.537476010166502e-06, "logits/chosen": -1.9745076894760132, "logits/rejected": -3.1437199115753174, "logps/chosen": -198.34677124023438, "logps/rejected": -402.5220947265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.784205913543701, "rewards/margins": 8.339492797851562, "rewards/rejected": -11.123699188232422, "step": 11739 }, { "epoch": 1.83, "learning_rate": 5.536742569635355e-06, "logits/chosen": -2.591566562652588, "logits/rejected": -2.6958601474761963, "logps/chosen": -313.72808837890625, "logps/rejected": -402.0732421875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.428445339202881, "rewards/margins": 7.252232551574707, "rewards/rejected": -9.68067741394043, "step": 11740 }, { "epoch": 1.83, "learning_rate": 5.536009129104207e-06, "logits/chosen": -2.5303947925567627, "logits/rejected": -3.0093955993652344, "logps/chosen": -192.03903198242188, "logps/rejected": -336.586181640625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -4.082926273345947, "rewards/margins": 5.94615364074707, "rewards/rejected": -10.029080390930176, "step": 11741 }, { "epoch": 1.83, "learning_rate": 5.5352756885730585e-06, "logits/chosen": -1.5815637111663818, "logits/rejected": -2.9585554599761963, "logps/chosen": -155.5902099609375, "logps/rejected": -359.40155029296875, "loss": 0.3486, "rewards/accuracies": 0.5, "rewards/chosen": -5.445323467254639, "rewards/margins": 3.7240958213806152, "rewards/rejected": -9.169419288635254, "step": 11742 }, { "epoch": 1.83, "learning_rate": 5.53454224804191e-06, "logits/chosen": -2.770479440689087, "logits/rejected": -2.834047555923462, "logps/chosen": -83.25708770751953, "logps/rejected": -160.41534423828125, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": -5.704576015472412, "rewards/margins": 4.462111949920654, "rewards/rejected": -10.166687965393066, "step": 11743 }, { "epoch": 1.83, "learning_rate": 5.533808807510763e-06, "logits/chosen": -2.586765766143799, "logits/rejected": -2.760455369949341, "logps/chosen": -118.34999084472656, "logps/rejected": -391.2367858886719, "loss": 1.4308, "rewards/accuracies": 0.5, "rewards/chosen": -5.6811113357543945, "rewards/margins": 7.309309959411621, "rewards/rejected": -12.990421295166016, "step": 11744 }, { "epoch": 1.83, "learning_rate": 5.533075366979616e-06, "logits/chosen": -1.9692319631576538, "logits/rejected": -2.8162240982055664, "logps/chosen": -318.7822265625, "logps/rejected": -507.9229736328125, "loss": 1.6374, "rewards/accuracies": 0.5, "rewards/chosen": -2.976710557937622, "rewards/margins": 5.206766128540039, "rewards/rejected": -8.183476448059082, "step": 11745 }, { "epoch": 1.83, "learning_rate": 5.532341926448468e-06, "logits/chosen": -1.9590952396392822, "logits/rejected": -2.8801400661468506, "logps/chosen": -131.0333251953125, "logps/rejected": -278.3373107910156, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -5.595958232879639, "rewards/margins": 6.8105788230896, "rewards/rejected": -12.406537055969238, "step": 11746 }, { "epoch": 1.83, "learning_rate": 5.53160848591732e-06, "logits/chosen": -2.86517596244812, "logits/rejected": -2.059170722961426, "logps/chosen": -528.6787109375, "logps/rejected": -381.23779296875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.856461763381958, "rewards/margins": 7.650158882141113, "rewards/rejected": -10.506620407104492, "step": 11747 }, { "epoch": 1.83, "learning_rate": 5.5308750453861715e-06, "logits/chosen": -2.617804765701294, "logits/rejected": -2.780550003051758, "logps/chosen": -229.37234497070312, "logps/rejected": -253.8184814453125, "loss": 3.331, "rewards/accuracies": 0.5, "rewards/chosen": -7.575831413269043, "rewards/margins": -1.2637016773223877, "rewards/rejected": -6.312129974365234, "step": 11748 }, { "epoch": 1.83, "learning_rate": 5.530141604855024e-06, "logits/chosen": -3.2666492462158203, "logits/rejected": -2.304124593734741, "logps/chosen": -327.2975158691406, "logps/rejected": -199.86094665527344, "loss": 1.0768, "rewards/accuracies": 0.5, "rewards/chosen": -4.273240566253662, "rewards/margins": 6.601066589355469, "rewards/rejected": -10.874307632446289, "step": 11749 }, { "epoch": 1.83, "learning_rate": 5.529408164323876e-06, "logits/chosen": -2.5636284351348877, "logits/rejected": -2.941075086593628, "logps/chosen": -211.98513793945312, "logps/rejected": -479.9935302734375, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -6.400700092315674, "rewards/margins": 7.220003128051758, "rewards/rejected": -13.62070369720459, "step": 11750 }, { "epoch": 1.83, "learning_rate": 5.528674723792728e-06, "logits/chosen": -2.4733524322509766, "logits/rejected": -3.0591111183166504, "logps/chosen": -61.68221664428711, "logps/rejected": -196.70172119140625, "loss": 0.0871, "rewards/accuracies": 1.0, "rewards/chosen": -3.854487180709839, "rewards/margins": 4.909934043884277, "rewards/rejected": -8.764421463012695, "step": 11751 }, { "epoch": 1.83, "learning_rate": 5.52794128326158e-06, "logits/chosen": -2.5448217391967773, "logits/rejected": -2.717942476272583, "logps/chosen": -135.37974548339844, "logps/rejected": -298.513671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.203772068023682, "rewards/margins": 6.854754447937012, "rewards/rejected": -12.058526992797852, "step": 11752 }, { "epoch": 1.83, "learning_rate": 5.527207842730432e-06, "logits/chosen": -2.624647855758667, "logits/rejected": -3.0753707885742188, "logps/chosen": -227.71592712402344, "logps/rejected": -339.8079833984375, "loss": 0.9329, "rewards/accuracies": 0.5, "rewards/chosen": -7.209297180175781, "rewards/margins": 2.0027382373809814, "rewards/rejected": -9.212035179138184, "step": 11753 }, { "epoch": 1.83, "learning_rate": 5.5264744021992845e-06, "logits/chosen": -2.528226137161255, "logits/rejected": -2.990407705307007, "logps/chosen": -56.71866226196289, "logps/rejected": -155.60858154296875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -3.591984748840332, "rewards/margins": 4.867737293243408, "rewards/rejected": -8.459722518920898, "step": 11754 }, { "epoch": 1.83, "learning_rate": 5.525740961668136e-06, "logits/chosen": -2.327096700668335, "logits/rejected": -3.2662363052368164, "logps/chosen": -91.52223205566406, "logps/rejected": -501.45263671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7463111877441406, "rewards/margins": 9.933181762695312, "rewards/rejected": -12.679492950439453, "step": 11755 }, { "epoch": 1.83, "learning_rate": 5.525007521136988e-06, "logits/chosen": -2.9115941524505615, "logits/rejected": -3.049391508102417, "logps/chosen": -82.77030944824219, "logps/rejected": -212.3820037841797, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1131374835968018, "rewards/margins": 6.929954528808594, "rewards/rejected": -10.043092727661133, "step": 11756 }, { "epoch": 1.83, "learning_rate": 5.52427408060584e-06, "logits/chosen": -3.1790428161621094, "logits/rejected": -3.342135429382324, "logps/chosen": -369.52252197265625, "logps/rejected": -385.06646728515625, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": -5.448125839233398, "rewards/margins": 5.266045570373535, "rewards/rejected": -10.71417236328125, "step": 11757 }, { "epoch": 1.83, "learning_rate": 5.523540640074693e-06, "logits/chosen": -2.815553665161133, "logits/rejected": -2.8775460720062256, "logps/chosen": -772.42138671875, "logps/rejected": -717.3941650390625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.477151393890381, "rewards/margins": 5.874795913696289, "rewards/rejected": -8.351947784423828, "step": 11758 }, { "epoch": 1.83, "learning_rate": 5.522807199543545e-06, "logits/chosen": -2.5074970722198486, "logits/rejected": -2.9767005443573, "logps/chosen": -242.13070678710938, "logps/rejected": -469.4482421875, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -5.5392303466796875, "rewards/margins": 5.869999885559082, "rewards/rejected": -11.40923023223877, "step": 11759 }, { "epoch": 1.83, "learning_rate": 5.5220737590123966e-06, "logits/chosen": -3.010006904602051, "logits/rejected": -2.8042714595794678, "logps/chosen": -129.7008056640625, "logps/rejected": -237.03533935546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8058513402938843, "rewards/margins": 9.67547607421875, "rewards/rejected": -11.481327056884766, "step": 11760 }, { "epoch": 1.83, "learning_rate": 5.521340318481249e-06, "logits/chosen": -2.7786974906921387, "logits/rejected": -2.1910903453826904, "logps/chosen": -572.6223754882812, "logps/rejected": -340.275390625, "loss": 2.6316, "rewards/accuracies": 0.5, "rewards/chosen": -5.209709167480469, "rewards/margins": 0.5911159515380859, "rewards/rejected": -5.800825595855713, "step": 11761 }, { "epoch": 1.83, "learning_rate": 5.520606877950101e-06, "logits/chosen": -2.6774628162384033, "logits/rejected": -2.7587509155273438, "logps/chosen": -234.74038696289062, "logps/rejected": -380.62945556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.833292007446289, "rewards/margins": 10.01806640625, "rewards/rejected": -12.851358413696289, "step": 11762 }, { "epoch": 1.83, "learning_rate": 5.519873437418954e-06, "logits/chosen": -2.808359384536743, "logits/rejected": -3.2354657649993896, "logps/chosen": -229.60879516601562, "logps/rejected": -408.9935302734375, "loss": 0.1326, "rewards/accuracies": 1.0, "rewards/chosen": -5.275091648101807, "rewards/margins": 2.1946868896484375, "rewards/rejected": -7.469778537750244, "step": 11763 }, { "epoch": 1.83, "learning_rate": 5.519139996887806e-06, "logits/chosen": -3.0132486820220947, "logits/rejected": -3.044647693634033, "logps/chosen": -455.94061279296875, "logps/rejected": -468.8665466308594, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -0.234405517578125, "rewards/margins": 9.220438003540039, "rewards/rejected": -9.454843521118164, "step": 11764 }, { "epoch": 1.83, "learning_rate": 5.518406556356658e-06, "logits/chosen": -2.3478827476501465, "logits/rejected": -2.9947214126586914, "logps/chosen": -106.94929504394531, "logps/rejected": -332.8922424316406, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -4.477644443511963, "rewards/margins": 7.431095600128174, "rewards/rejected": -11.908740043640137, "step": 11765 }, { "epoch": 1.83, "learning_rate": 5.5176731158255095e-06, "logits/chosen": -2.3584768772125244, "logits/rejected": -2.767791271209717, "logps/chosen": -177.48455810546875, "logps/rejected": -164.47482299804688, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": -5.385354995727539, "rewards/margins": 3.9497203826904297, "rewards/rejected": -9.335075378417969, "step": 11766 }, { "epoch": 1.83, "learning_rate": 5.516939675294362e-06, "logits/chosen": -2.7256665229797363, "logits/rejected": -3.131225347518921, "logps/chosen": -631.0594482421875, "logps/rejected": -609.5928955078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.169281244277954, "rewards/margins": 7.67802619934082, "rewards/rejected": -9.847307205200195, "step": 11767 }, { "epoch": 1.83, "learning_rate": 5.516206234763214e-06, "logits/chosen": -2.879685163497925, "logits/rejected": -2.84350848197937, "logps/chosen": -225.14373779296875, "logps/rejected": -237.177734375, "loss": 1.4161, "rewards/accuracies": 0.5, "rewards/chosen": -4.7440690994262695, "rewards/margins": 1.5058860778808594, "rewards/rejected": -6.249955177307129, "step": 11768 }, { "epoch": 1.83, "learning_rate": 5.515472794232066e-06, "logits/chosen": -2.6135640144348145, "logits/rejected": -2.9464757442474365, "logps/chosen": -161.9176025390625, "logps/rejected": -331.35357666015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.706974983215332, "rewards/margins": 8.170814514160156, "rewards/rejected": -12.877790451049805, "step": 11769 }, { "epoch": 1.83, "learning_rate": 5.514739353700918e-06, "logits/chosen": -1.8030085563659668, "logits/rejected": -2.7679290771484375, "logps/chosen": -344.3617858886719, "logps/rejected": -493.6779479980469, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.6404879093170166, "rewards/margins": 8.420478820800781, "rewards/rejected": -12.060967445373535, "step": 11770 }, { "epoch": 1.83, "learning_rate": 5.51400591316977e-06, "logits/chosen": -2.293635368347168, "logits/rejected": -3.1674180030822754, "logps/chosen": -193.30674743652344, "logps/rejected": -383.6426696777344, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -2.6915721893310547, "rewards/margins": 6.545379161834717, "rewards/rejected": -9.236950874328613, "step": 11771 }, { "epoch": 1.83, "learning_rate": 5.5132724726386225e-06, "logits/chosen": -1.2487937211990356, "logits/rejected": -2.6279382705688477, "logps/chosen": -83.01351928710938, "logps/rejected": -377.20989990234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.033585548400879, "rewards/margins": 7.059196472167969, "rewards/rejected": -11.092781066894531, "step": 11772 }, { "epoch": 1.83, "learning_rate": 5.512539032107474e-06, "logits/chosen": -1.0666239261627197, "logits/rejected": -2.6765847206115723, "logps/chosen": -123.56399536132812, "logps/rejected": -247.39898681640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.674574136734009, "rewards/margins": 9.257908821105957, "rewards/rejected": -11.932483673095703, "step": 11773 }, { "epoch": 1.83, "learning_rate": 5.511805591576326e-06, "logits/chosen": -2.729142665863037, "logits/rejected": -2.930582284927368, "logps/chosen": -48.80543899536133, "logps/rejected": -223.61746215820312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.48844575881958, "rewards/margins": 7.509305953979492, "rewards/rejected": -10.997751235961914, "step": 11774 }, { "epoch": 1.83, "learning_rate": 5.511072151045178e-06, "logits/chosen": -2.8220319747924805, "logits/rejected": -1.9876551628112793, "logps/chosen": -371.52294921875, "logps/rejected": -364.914794921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.918787002563477, "rewards/margins": 8.57021713256836, "rewards/rejected": -13.489004135131836, "step": 11775 }, { "epoch": 1.83, "learning_rate": 5.510338710514031e-06, "logits/chosen": -2.971642255783081, "logits/rejected": -2.9554316997528076, "logps/chosen": -398.915283203125, "logps/rejected": -434.812744140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.7084076404571533, "rewards/margins": 8.279743194580078, "rewards/rejected": -9.988149642944336, "step": 11776 }, { "epoch": 1.83, "learning_rate": 5.509605269982883e-06, "logits/chosen": -2.980940103530884, "logits/rejected": -3.0980026721954346, "logps/chosen": -156.05975341796875, "logps/rejected": -280.1786193847656, "loss": 0.995, "rewards/accuracies": 0.5, "rewards/chosen": -8.687356948852539, "rewards/margins": 1.3688344955444336, "rewards/rejected": -10.056191444396973, "step": 11777 }, { "epoch": 1.83, "learning_rate": 5.5088718294517355e-06, "logits/chosen": -2.803441047668457, "logits/rejected": -2.9827823638916016, "logps/chosen": -172.1234130859375, "logps/rejected": -365.09344482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.414611339569092, "rewards/margins": 12.030557632446289, "rewards/rejected": -14.445168495178223, "step": 11778 }, { "epoch": 1.83, "learning_rate": 5.508138388920587e-06, "logits/chosen": -0.7327229380607605, "logits/rejected": -2.8214704990386963, "logps/chosen": -121.61080932617188, "logps/rejected": -489.7601013183594, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -7.4703898429870605, "rewards/margins": 4.128006458282471, "rewards/rejected": -11.598396301269531, "step": 11779 }, { "epoch": 1.83, "learning_rate": 5.50740494838944e-06, "logits/chosen": -1.3704355955123901, "logits/rejected": -2.725572347640991, "logps/chosen": -112.56195068359375, "logps/rejected": -419.4432373046875, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": -6.553686141967773, "rewards/margins": 6.1923933029174805, "rewards/rejected": -12.746079444885254, "step": 11780 }, { "epoch": 1.83, "learning_rate": 5.506671507858292e-06, "logits/chosen": -3.0769948959350586, "logits/rejected": -2.9122769832611084, "logps/chosen": -341.4811096191406, "logps/rejected": -464.91656494140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.3335862159729, "rewards/margins": 9.23392391204834, "rewards/rejected": -13.567509651184082, "step": 11781 }, { "epoch": 1.83, "learning_rate": 5.505938067327144e-06, "logits/chosen": -2.1887059211730957, "logits/rejected": -2.808708667755127, "logps/chosen": -128.70863342285156, "logps/rejected": -238.9396514892578, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.151144504547119, "rewards/margins": 8.693963050842285, "rewards/rejected": -11.845108032226562, "step": 11782 }, { "epoch": 1.83, "learning_rate": 5.505204626795996e-06, "logits/chosen": -1.7703155279159546, "logits/rejected": -2.6300342082977295, "logps/chosen": -110.45524597167969, "logps/rejected": -252.162841796875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -5.797408103942871, "rewards/margins": 5.137434959411621, "rewards/rejected": -10.934843063354492, "step": 11783 }, { "epoch": 1.83, "learning_rate": 5.504471186264848e-06, "logits/chosen": -2.1777896881103516, "logits/rejected": -2.712009906768799, "logps/chosen": -275.2094421386719, "logps/rejected": -325.0331115722656, "loss": 0.2398, "rewards/accuracies": 1.0, "rewards/chosen": -4.152268409729004, "rewards/margins": 4.095796585083008, "rewards/rejected": -8.248064994812012, "step": 11784 }, { "epoch": 1.83, "learning_rate": 5.5037377457337e-06, "logits/chosen": -1.9196847677230835, "logits/rejected": -2.895596981048584, "logps/chosen": -180.73361206054688, "logps/rejected": -427.3028259277344, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -4.319967746734619, "rewards/margins": 7.583521366119385, "rewards/rejected": -11.903489112854004, "step": 11785 }, { "epoch": 1.83, "learning_rate": 5.503004305202552e-06, "logits/chosen": -2.724186658859253, "logits/rejected": -2.746551036834717, "logps/chosen": -116.23938751220703, "logps/rejected": -288.2857666015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.58492374420166, "rewards/margins": 9.087699890136719, "rewards/rejected": -13.672624588012695, "step": 11786 }, { "epoch": 1.83, "learning_rate": 5.502270864671404e-06, "logits/chosen": -2.8705828189849854, "logits/rejected": -2.6337125301361084, "logps/chosen": -326.95684814453125, "logps/rejected": -306.8842468261719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.6705827713012695, "rewards/margins": 10.754122734069824, "rewards/rejected": -15.424705505371094, "step": 11787 }, { "epoch": 1.83, "learning_rate": 5.501537424140256e-06, "logits/chosen": -2.9256207942962646, "logits/rejected": -3.181830644607544, "logps/chosen": -347.7751159667969, "logps/rejected": -467.0212707519531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.1966092586517334, "rewards/margins": 8.258535385131836, "rewards/rejected": -10.455144882202148, "step": 11788 }, { "epoch": 1.83, "learning_rate": 5.500803983609109e-06, "logits/chosen": -2.145437717437744, "logits/rejected": -2.965221643447876, "logps/chosen": -55.8320198059082, "logps/rejected": -448.0030517578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.565995216369629, "rewards/margins": 10.579238891601562, "rewards/rejected": -14.145235061645508, "step": 11789 }, { "epoch": 1.83, "learning_rate": 5.5000705430779605e-06, "logits/chosen": -1.7540395259857178, "logits/rejected": -2.8514339923858643, "logps/chosen": -117.180908203125, "logps/rejected": -317.606201171875, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -3.6365439891815186, "rewards/margins": 6.364350318908691, "rewards/rejected": -10.000894546508789, "step": 11790 }, { "epoch": 1.83, "learning_rate": 5.499337102546812e-06, "logits/chosen": -1.6553386449813843, "logits/rejected": -2.9014127254486084, "logps/chosen": -201.03529357910156, "logps/rejected": -645.8065795898438, "loss": 0.1062, "rewards/accuracies": 1.0, "rewards/chosen": -6.315171241760254, "rewards/margins": 6.367290496826172, "rewards/rejected": -12.682461738586426, "step": 11791 }, { "epoch": 1.83, "learning_rate": 5.498603662015664e-06, "logits/chosen": -2.9902920722961426, "logits/rejected": -2.077677011489868, "logps/chosen": -375.30303955078125, "logps/rejected": -116.37059020996094, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": -4.140002250671387, "rewards/margins": 3.9192848205566406, "rewards/rejected": -8.059287071228027, "step": 11792 }, { "epoch": 1.83, "learning_rate": 5.497870221484516e-06, "logits/chosen": -1.8556140661239624, "logits/rejected": -2.865125894546509, "logps/chosen": -111.98884582519531, "logps/rejected": -466.7142028808594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.924887180328369, "rewards/margins": 8.391974449157715, "rewards/rejected": -12.316862106323242, "step": 11793 }, { "epoch": 1.83, "learning_rate": 5.497136780953369e-06, "logits/chosen": -2.622089147567749, "logits/rejected": -2.9269964694976807, "logps/chosen": -115.3092041015625, "logps/rejected": -348.83721923828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.618452787399292, "rewards/margins": 10.14045238494873, "rewards/rejected": -12.758905410766602, "step": 11794 }, { "epoch": 1.83, "learning_rate": 5.496403340422222e-06, "logits/chosen": -2.206864595413208, "logits/rejected": -2.802175283432007, "logps/chosen": -181.47305297851562, "logps/rejected": -334.0386047363281, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.341280221939087, "rewards/margins": 8.704176902770996, "rewards/rejected": -12.045456886291504, "step": 11795 }, { "epoch": 1.83, "learning_rate": 5.4956698998910735e-06, "logits/chosen": -2.1844613552093506, "logits/rejected": -2.646970272064209, "logps/chosen": -261.6236572265625, "logps/rejected": -434.2900390625, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -5.1091718673706055, "rewards/margins": 6.580897808074951, "rewards/rejected": -11.690069198608398, "step": 11796 }, { "epoch": 1.83, "learning_rate": 5.494936459359925e-06, "logits/chosen": -2.628066301345825, "logits/rejected": -2.6301443576812744, "logps/chosen": -622.2059936523438, "logps/rejected": -606.3253173828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.968696594238281, "rewards/margins": 7.627479553222656, "rewards/rejected": -12.596176147460938, "step": 11797 }, { "epoch": 1.83, "learning_rate": 5.494203018828778e-06, "logits/chosen": -2.4253976345062256, "logits/rejected": -2.860593318939209, "logps/chosen": -182.97972106933594, "logps/rejected": -342.21160888671875, "loss": 0.5663, "rewards/accuracies": 0.5, "rewards/chosen": -5.119677543640137, "rewards/margins": 3.4169788360595703, "rewards/rejected": -8.536656379699707, "step": 11798 }, { "epoch": 1.83, "learning_rate": 5.49346957829763e-06, "logits/chosen": -2.848505735397339, "logits/rejected": -3.162095069885254, "logps/chosen": -137.474853515625, "logps/rejected": -317.10015869140625, "loss": 0.0663, "rewards/accuracies": 1.0, "rewards/chosen": -3.0135979652404785, "rewards/margins": 5.199676036834717, "rewards/rejected": -8.213274002075195, "step": 11799 }, { "epoch": 1.84, "learning_rate": 5.492736137766482e-06, "logits/chosen": -1.6370446681976318, "logits/rejected": -2.8095357418060303, "logps/chosen": -86.15159606933594, "logps/rejected": -358.3581237792969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.132689952850342, "rewards/margins": 10.98283576965332, "rewards/rejected": -15.115525245666504, "step": 11800 }, { "epoch": 1.84, "learning_rate": 5.492002697235334e-06, "logits/chosen": -2.857518434524536, "logits/rejected": -1.9764171838760376, "logps/chosen": -740.7494506835938, "logps/rejected": -748.441650390625, "loss": 0.0439, "rewards/accuracies": 1.0, "rewards/chosen": -7.03529691696167, "rewards/margins": 4.664068698883057, "rewards/rejected": -11.699365615844727, "step": 11801 }, { "epoch": 1.84, "learning_rate": 5.491269256704186e-06, "logits/chosen": -2.981752634048462, "logits/rejected": -3.0045268535614014, "logps/chosen": -313.2034606933594, "logps/rejected": -475.0689697265625, "loss": 0.5958, "rewards/accuracies": 0.5, "rewards/chosen": -4.821580410003662, "rewards/margins": 4.75422477722168, "rewards/rejected": -9.5758056640625, "step": 11802 }, { "epoch": 1.84, "learning_rate": 5.490535816173038e-06, "logits/chosen": -1.4724558591842651, "logits/rejected": -2.6219394207000732, "logps/chosen": -215.1510009765625, "logps/rejected": -510.131103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.639047145843506, "rewards/margins": 14.16813850402832, "rewards/rejected": -16.807186126708984, "step": 11803 }, { "epoch": 1.84, "learning_rate": 5.48980237564189e-06, "logits/chosen": -2.928574323654175, "logits/rejected": -3.1301605701446533, "logps/chosen": -115.22774505615234, "logps/rejected": -306.166259765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.8221030235290527, "rewards/margins": 9.74116325378418, "rewards/rejected": -12.56326675415039, "step": 11804 }, { "epoch": 1.84, "learning_rate": 5.489068935110742e-06, "logits/chosen": -2.6887004375457764, "logits/rejected": -2.752572774887085, "logps/chosen": -58.90178680419922, "logps/rejected": -157.5997314453125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.110405921936035, "rewards/margins": 6.704037666320801, "rewards/rejected": -10.814443588256836, "step": 11805 }, { "epoch": 1.84, "learning_rate": 5.488335494579594e-06, "logits/chosen": -3.039731740951538, "logits/rejected": -2.011504650115967, "logps/chosen": -242.29595947265625, "logps/rejected": -341.71343994140625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -4.144794464111328, "rewards/margins": 6.735944747924805, "rewards/rejected": -10.880739212036133, "step": 11806 }, { "epoch": 1.84, "learning_rate": 5.487602054048447e-06, "logits/chosen": -3.056821584701538, "logits/rejected": -3.1115825176239014, "logps/chosen": -573.1508178710938, "logps/rejected": -545.9263305664062, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.9306535720825195, "rewards/margins": 6.916029930114746, "rewards/rejected": -11.846683502197266, "step": 11807 }, { "epoch": 1.84, "learning_rate": 5.486868613517299e-06, "logits/chosen": -2.706399440765381, "logits/rejected": -3.1369550228118896, "logps/chosen": -97.19721984863281, "logps/rejected": -229.6887969970703, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -3.6908793449401855, "rewards/margins": 6.692421913146973, "rewards/rejected": -10.38330078125, "step": 11808 }, { "epoch": 1.84, "learning_rate": 5.4861351729861505e-06, "logits/chosen": -1.7933183908462524, "logits/rejected": -2.854037046432495, "logps/chosen": -91.50283813476562, "logps/rejected": -442.200927734375, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -4.250856399536133, "rewards/margins": 7.827314853668213, "rewards/rejected": -12.078170776367188, "step": 11809 }, { "epoch": 1.84, "learning_rate": 5.485401732455002e-06, "logits/chosen": -2.604954719543457, "logits/rejected": -1.723646879196167, "logps/chosen": -611.474609375, "logps/rejected": -436.11578369140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2480525970458984, "rewards/margins": 9.455694198608398, "rewards/rejected": -11.703746795654297, "step": 11810 }, { "epoch": 1.84, "learning_rate": 5.484668291923855e-06, "logits/chosen": -2.1227989196777344, "logits/rejected": -3.144667863845825, "logps/chosen": -123.61337280273438, "logps/rejected": -362.240478515625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -4.474286079406738, "rewards/margins": 8.41981315612793, "rewards/rejected": -12.894100189208984, "step": 11811 }, { "epoch": 1.84, "learning_rate": 5.483934851392708e-06, "logits/chosen": -2.7425637245178223, "logits/rejected": -1.2270879745483398, "logps/chosen": -306.76275634765625, "logps/rejected": -190.99139404296875, "loss": 1.2379, "rewards/accuracies": 0.5, "rewards/chosen": -4.97982931137085, "rewards/margins": 2.4235119819641113, "rewards/rejected": -7.403341293334961, "step": 11812 }, { "epoch": 1.84, "learning_rate": 5.48320141086156e-06, "logits/chosen": -1.6399825811386108, "logits/rejected": -1.7586426734924316, "logps/chosen": -943.5032958984375, "logps/rejected": -513.337646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.4877532720565796, "rewards/margins": 16.838611602783203, "rewards/rejected": -17.326364517211914, "step": 11813 }, { "epoch": 1.84, "learning_rate": 5.4824679703304116e-06, "logits/chosen": -2.8479437828063965, "logits/rejected": -2.928299903869629, "logps/chosen": -157.6824493408203, "logps/rejected": -327.70123291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3428754806518555, "rewards/margins": 11.663755416870117, "rewards/rejected": -14.006629943847656, "step": 11814 }, { "epoch": 1.84, "learning_rate": 5.4817345297992634e-06, "logits/chosen": -3.315122365951538, "logits/rejected": -3.2310826778411865, "logps/chosen": -523.8192138671875, "logps/rejected": -290.7764587402344, "loss": 1.8891, "rewards/accuracies": 0.5, "rewards/chosen": -7.016927719116211, "rewards/margins": -0.13856911659240723, "rewards/rejected": -6.878358364105225, "step": 11815 }, { "epoch": 1.84, "learning_rate": 5.481001089268116e-06, "logits/chosen": -2.2466773986816406, "logits/rejected": -2.9485533237457275, "logps/chosen": -79.12191009521484, "logps/rejected": -377.50909423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.039959669113159, "rewards/margins": 10.957194328308105, "rewards/rejected": -13.997154235839844, "step": 11816 }, { "epoch": 1.84, "learning_rate": 5.480267648736968e-06, "logits/chosen": -2.513638973236084, "logits/rejected": -2.785531759262085, "logps/chosen": -85.04669189453125, "logps/rejected": -510.67779541015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.525486946105957, "rewards/margins": 11.587806701660156, "rewards/rejected": -15.113292694091797, "step": 11817 }, { "epoch": 1.84, "learning_rate": 5.47953420820582e-06, "logits/chosen": -2.9271137714385986, "logits/rejected": -2.7568023204803467, "logps/chosen": -214.6522216796875, "logps/rejected": -266.70562744140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.116869926452637, "rewards/margins": 8.951423645019531, "rewards/rejected": -14.068292617797852, "step": 11818 }, { "epoch": 1.84, "learning_rate": 5.478800767674672e-06, "logits/chosen": -1.5606372356414795, "logits/rejected": -2.7538034915924072, "logps/chosen": -416.4100341796875, "logps/rejected": -627.0831298828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.5089285373687744, "rewards/margins": 10.174604415893555, "rewards/rejected": -12.68353271484375, "step": 11819 }, { "epoch": 1.84, "learning_rate": 5.478067327143524e-06, "logits/chosen": -1.7246967554092407, "logits/rejected": -2.6764938831329346, "logps/chosen": -130.43408203125, "logps/rejected": -402.76202392578125, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -3.137378692626953, "rewards/margins": 7.429816722869873, "rewards/rejected": -10.567195892333984, "step": 11820 }, { "epoch": 1.84, "learning_rate": 5.477333886612376e-06, "logits/chosen": -1.6559303998947144, "logits/rejected": -2.581831455230713, "logps/chosen": -320.2806701660156, "logps/rejected": -644.8533325195312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.713189125061035, "rewards/margins": 8.67582893371582, "rewards/rejected": -14.389019012451172, "step": 11821 }, { "epoch": 1.84, "learning_rate": 5.476600446081228e-06, "logits/chosen": -3.011575222015381, "logits/rejected": -1.8554311990737915, "logps/chosen": -274.8215637207031, "logps/rejected": -270.04876708984375, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -6.168399333953857, "rewards/margins": 7.808920383453369, "rewards/rejected": -13.977319717407227, "step": 11822 }, { "epoch": 1.84, "learning_rate": 5.47586700555008e-06, "logits/chosen": -2.7449986934661865, "logits/rejected": -3.110581159591675, "logps/chosen": -140.68748474121094, "logps/rejected": -257.5312805175781, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.5064003467559814, "rewards/margins": 5.765609264373779, "rewards/rejected": -9.27200984954834, "step": 11823 }, { "epoch": 1.84, "learning_rate": 5.475133565018932e-06, "logits/chosen": -2.783513307571411, "logits/rejected": -2.394094228744507, "logps/chosen": -216.61209106445312, "logps/rejected": -293.4577941894531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.411782741546631, "rewards/margins": 10.737152099609375, "rewards/rejected": -13.148934364318848, "step": 11824 }, { "epoch": 1.84, "learning_rate": 5.474400124487785e-06, "logits/chosen": -2.5684080123901367, "logits/rejected": -2.8942911624908447, "logps/chosen": -97.9867172241211, "logps/rejected": -287.500732421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.786107063293457, "rewards/margins": 9.581110000610352, "rewards/rejected": -14.367218017578125, "step": 11825 }, { "epoch": 1.84, "learning_rate": 5.473666683956637e-06, "logits/chosen": -2.6648097038269043, "logits/rejected": -2.9439210891723633, "logps/chosen": -257.4004821777344, "logps/rejected": -368.4864196777344, "loss": 0.4592, "rewards/accuracies": 0.5, "rewards/chosen": -3.0196824073791504, "rewards/margins": 2.7662296295166016, "rewards/rejected": -5.785912036895752, "step": 11826 }, { "epoch": 1.84, "learning_rate": 5.4729332434254885e-06, "logits/chosen": -2.0070011615753174, "logits/rejected": -2.7430524826049805, "logps/chosen": -137.59234619140625, "logps/rejected": -393.6328125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -4.671823501586914, "rewards/margins": 8.174678802490234, "rewards/rejected": -12.846502304077148, "step": 11827 }, { "epoch": 1.84, "learning_rate": 5.472199802894341e-06, "logits/chosen": -3.271209239959717, "logits/rejected": -3.295900821685791, "logps/chosen": -151.6334228515625, "logps/rejected": -225.19119262695312, "loss": 0.7065, "rewards/accuracies": 0.5, "rewards/chosen": -3.4386675357818604, "rewards/margins": 5.98016357421875, "rewards/rejected": -9.418830871582031, "step": 11828 }, { "epoch": 1.84, "learning_rate": 5.471466362363194e-06, "logits/chosen": -2.519334316253662, "logits/rejected": -3.032437801361084, "logps/chosen": -250.4498291015625, "logps/rejected": -382.3204345703125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -2.406959056854248, "rewards/margins": 7.487057685852051, "rewards/rejected": -9.89401626586914, "step": 11829 }, { "epoch": 1.84, "learning_rate": 5.470732921832046e-06, "logits/chosen": -2.1414570808410645, "logits/rejected": -2.9874799251556396, "logps/chosen": -128.42996215820312, "logps/rejected": -171.19081115722656, "loss": 0.2449, "rewards/accuracies": 1.0, "rewards/chosen": -5.202432632446289, "rewards/margins": 2.9029932022094727, "rewards/rejected": -8.105425834655762, "step": 11830 }, { "epoch": 1.84, "learning_rate": 5.469999481300898e-06, "logits/chosen": -2.2097811698913574, "logits/rejected": -1.567954659461975, "logps/chosen": -227.47433471679688, "logps/rejected": -279.456787109375, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": -4.38831901550293, "rewards/margins": 4.672140121459961, "rewards/rejected": -9.06045913696289, "step": 11831 }, { "epoch": 1.84, "learning_rate": 5.46926604076975e-06, "logits/chosen": -2.073073625564575, "logits/rejected": -2.8325307369232178, "logps/chosen": -195.75694274902344, "logps/rejected": -271.208740234375, "loss": 1.4102, "rewards/accuracies": 0.5, "rewards/chosen": -5.872276306152344, "rewards/margins": 1.7039883136749268, "rewards/rejected": -7.576264381408691, "step": 11832 }, { "epoch": 1.84, "learning_rate": 5.4685326002386015e-06, "logits/chosen": -2.279942512512207, "logits/rejected": -2.818603992462158, "logps/chosen": -55.37968444824219, "logps/rejected": -194.3686065673828, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -3.5507636070251465, "rewards/margins": 6.876293659210205, "rewards/rejected": -10.427057266235352, "step": 11833 }, { "epoch": 1.84, "learning_rate": 5.467799159707454e-06, "logits/chosen": -1.1775078773498535, "logits/rejected": -2.8608572483062744, "logps/chosen": -203.68600463867188, "logps/rejected": -342.30108642578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.5822930335998535, "rewards/margins": 9.306463241577148, "rewards/rejected": -12.888755798339844, "step": 11834 }, { "epoch": 1.84, "learning_rate": 5.467065719176306e-06, "logits/chosen": -2.2535953521728516, "logits/rejected": -2.9261529445648193, "logps/chosen": -597.812255859375, "logps/rejected": -607.89990234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.996837615966797, "rewards/margins": 7.5092620849609375, "rewards/rejected": -11.506099700927734, "step": 11835 }, { "epoch": 1.84, "learning_rate": 5.466332278645158e-06, "logits/chosen": -2.7850470542907715, "logits/rejected": -3.051071882247925, "logps/chosen": -152.6282958984375, "logps/rejected": -307.72222900390625, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/chosen": -7.211804389953613, "rewards/margins": 5.850761890411377, "rewards/rejected": -13.062566757202148, "step": 11836 }, { "epoch": 1.84, "learning_rate": 5.46559883811401e-06, "logits/chosen": -2.043596029281616, "logits/rejected": -3.1017067432403564, "logps/chosen": -122.39057922363281, "logps/rejected": -334.54290771484375, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/chosen": -4.642760753631592, "rewards/margins": 5.286937713623047, "rewards/rejected": -9.929698944091797, "step": 11837 }, { "epoch": 1.84, "learning_rate": 5.4648653975828626e-06, "logits/chosen": -2.998277425765991, "logits/rejected": -2.064486265182495, "logps/chosen": -519.1021728515625, "logps/rejected": -382.7713623046875, "loss": 0.3532, "rewards/accuracies": 0.5, "rewards/chosen": -5.2489519119262695, "rewards/margins": 3.156510829925537, "rewards/rejected": -8.405462265014648, "step": 11838 }, { "epoch": 1.84, "learning_rate": 5.4641319570517144e-06, "logits/chosen": -1.3847812414169312, "logits/rejected": -1.925232172012329, "logps/chosen": -117.16192626953125, "logps/rejected": -371.4971923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.775256395339966, "rewards/margins": 10.503764152526855, "rewards/rejected": -13.279020309448242, "step": 11839 }, { "epoch": 1.84, "learning_rate": 5.463398516520566e-06, "logits/chosen": -2.539703130722046, "logits/rejected": -2.7452080249786377, "logps/chosen": -86.83902740478516, "logps/rejected": -319.1059265136719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.2879767417907715, "rewards/margins": 10.487236022949219, "rewards/rejected": -13.775213241577148, "step": 11840 }, { "epoch": 1.84, "learning_rate": 5.462665075989418e-06, "logits/chosen": -2.821596622467041, "logits/rejected": -2.7889933586120605, "logps/chosen": -187.2264404296875, "logps/rejected": -131.55804443359375, "loss": 0.1098, "rewards/accuracies": 1.0, "rewards/chosen": -3.6359896659851074, "rewards/margins": 5.040825843811035, "rewards/rejected": -8.6768159866333, "step": 11841 }, { "epoch": 1.84, "learning_rate": 5.46193163545827e-06, "logits/chosen": -1.2993851900100708, "logits/rejected": -2.7944605350494385, "logps/chosen": -295.8647155761719, "logps/rejected": -474.9604187011719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.8752715587615967, "rewards/margins": 11.204106330871582, "rewards/rejected": -14.079378128051758, "step": 11842 }, { "epoch": 1.84, "learning_rate": 5.461198194927123e-06, "logits/chosen": -2.372631549835205, "logits/rejected": -3.2883288860321045, "logps/chosen": -113.48301696777344, "logps/rejected": -636.22314453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.176106929779053, "rewards/margins": 9.823033332824707, "rewards/rejected": -14.999139785766602, "step": 11843 }, { "epoch": 1.84, "learning_rate": 5.460464754395975e-06, "logits/chosen": -2.1501564979553223, "logits/rejected": -2.9826595783233643, "logps/chosen": -238.65396118164062, "logps/rejected": -499.1558837890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8139939308166504, "rewards/margins": 9.543720245361328, "rewards/rejected": -11.35771369934082, "step": 11844 }, { "epoch": 1.84, "learning_rate": 5.459731313864827e-06, "logits/chosen": -2.6592254638671875, "logits/rejected": -2.8841357231140137, "logps/chosen": -163.6904296875, "logps/rejected": -183.079345703125, "loss": 1.4588, "rewards/accuracies": 0.5, "rewards/chosen": -4.246818542480469, "rewards/margins": 2.6435070037841797, "rewards/rejected": -6.890325546264648, "step": 11845 }, { "epoch": 1.84, "learning_rate": 5.458997873333679e-06, "logits/chosen": -1.9640870094299316, "logits/rejected": -2.7737770080566406, "logps/chosen": -310.8531799316406, "logps/rejected": -415.57562255859375, "loss": 1.0226, "rewards/accuracies": 0.5, "rewards/chosen": -4.460939407348633, "rewards/margins": 4.408832550048828, "rewards/rejected": -8.869771957397461, "step": 11846 }, { "epoch": 1.84, "learning_rate": 5.458264432802532e-06, "logits/chosen": -2.5673723220825195, "logits/rejected": -3.2203209400177, "logps/chosen": -66.99097442626953, "logps/rejected": -368.07354736328125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.086938858032227, "rewards/margins": 6.556706428527832, "rewards/rejected": -10.643646240234375, "step": 11847 }, { "epoch": 1.84, "learning_rate": 5.457530992271384e-06, "logits/chosen": -2.181936502456665, "logits/rejected": -2.768662214279175, "logps/chosen": -111.13066101074219, "logps/rejected": -396.6794128417969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.718759536743164, "rewards/margins": 10.645681381225586, "rewards/rejected": -14.36444091796875, "step": 11848 }, { "epoch": 1.84, "learning_rate": 5.456797551740236e-06, "logits/chosen": -2.458740234375, "logits/rejected": -2.916344165802002, "logps/chosen": -126.25901794433594, "logps/rejected": -382.726806640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.8382904529571533, "rewards/margins": 7.1501054763793945, "rewards/rejected": -9.988395690917969, "step": 11849 }, { "epoch": 1.84, "learning_rate": 5.456064111209088e-06, "logits/chosen": -2.926231861114502, "logits/rejected": -2.8996102809906006, "logps/chosen": -174.4233856201172, "logps/rejected": -224.05209350585938, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -3.893289089202881, "rewards/margins": 7.04378604888916, "rewards/rejected": -10.937074661254883, "step": 11850 }, { "epoch": 1.84, "learning_rate": 5.4553306706779395e-06, "logits/chosen": -2.735307455062866, "logits/rejected": -1.4716923236846924, "logps/chosen": -219.7164764404297, "logps/rejected": -145.35977172851562, "loss": 1.2645, "rewards/accuracies": 0.5, "rewards/chosen": -5.992337226867676, "rewards/margins": 1.2237625122070312, "rewards/rejected": -7.216099739074707, "step": 11851 }, { "epoch": 1.84, "learning_rate": 5.454597230146792e-06, "logits/chosen": -1.502331018447876, "logits/rejected": -2.8739845752716064, "logps/chosen": -181.6517333984375, "logps/rejected": -558.724609375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.174373626708984, "rewards/margins": 8.382365226745605, "rewards/rejected": -14.55673885345459, "step": 11852 }, { "epoch": 1.84, "learning_rate": 5.453863789615644e-06, "logits/chosen": -2.826450824737549, "logits/rejected": -2.977471351623535, "logps/chosen": -222.2403106689453, "logps/rejected": -448.0685729980469, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.212883949279785, "rewards/margins": 8.553998947143555, "rewards/rejected": -12.766881942749023, "step": 11853 }, { "epoch": 1.84, "learning_rate": 5.453130349084496e-06, "logits/chosen": -2.754746437072754, "logits/rejected": -3.0693743228912354, "logps/chosen": -87.52811431884766, "logps/rejected": -371.8580627441406, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -4.170154094696045, "rewards/margins": 8.9013032913208, "rewards/rejected": -13.071456909179688, "step": 11854 }, { "epoch": 1.84, "learning_rate": 5.452396908553348e-06, "logits/chosen": -2.0287086963653564, "logits/rejected": -3.1108169555664062, "logps/chosen": -237.41839599609375, "logps/rejected": -437.99871826171875, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -6.067287445068359, "rewards/margins": 5.282223224639893, "rewards/rejected": -11.349510192871094, "step": 11855 }, { "epoch": 1.84, "learning_rate": 5.451663468022201e-06, "logits/chosen": -2.0726547241210938, "logits/rejected": -2.9623680114746094, "logps/chosen": -251.6998748779297, "logps/rejected": -296.30859375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.5918242931365967, "rewards/margins": 6.279226303100586, "rewards/rejected": -8.871050834655762, "step": 11856 }, { "epoch": 1.84, "learning_rate": 5.4509300274910525e-06, "logits/chosen": -2.9701554775238037, "logits/rejected": -2.5227997303009033, "logps/chosen": -312.9942626953125, "logps/rejected": -336.53277587890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2920894622802734, "rewards/margins": 9.283414840698242, "rewards/rejected": -10.575504302978516, "step": 11857 }, { "epoch": 1.84, "learning_rate": 5.450196586959904e-06, "logits/chosen": -2.9188425540924072, "logits/rejected": -2.9726197719573975, "logps/chosen": -361.26434326171875, "logps/rejected": -223.0372314453125, "loss": 2.2775, "rewards/accuracies": 0.0, "rewards/chosen": -7.727964878082275, "rewards/margins": -2.169283151626587, "rewards/rejected": -5.558681488037109, "step": 11858 }, { "epoch": 1.84, "learning_rate": 5.449463146428756e-06, "logits/chosen": -2.9551236629486084, "logits/rejected": -2.8777990341186523, "logps/chosen": -179.98602294921875, "logps/rejected": -121.24777221679688, "loss": 0.292, "rewards/accuracies": 1.0, "rewards/chosen": -3.723748207092285, "rewards/margins": 2.5217981338500977, "rewards/rejected": -6.245546340942383, "step": 11859 }, { "epoch": 1.84, "learning_rate": 5.448729705897608e-06, "logits/chosen": -3.0016942024230957, "logits/rejected": -2.969820499420166, "logps/chosen": -125.35047912597656, "logps/rejected": -175.41452026367188, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -2.782827377319336, "rewards/margins": 7.563796043395996, "rewards/rejected": -10.346623420715332, "step": 11860 }, { "epoch": 1.84, "learning_rate": 5.447996265366461e-06, "logits/chosen": -2.3741567134857178, "logits/rejected": -3.147015333175659, "logps/chosen": -125.55519104003906, "logps/rejected": -250.0314483642578, "loss": 0.0303, "rewards/accuracies": 1.0, "rewards/chosen": -6.243927955627441, "rewards/margins": 3.5444693565368652, "rewards/rejected": -9.788396835327148, "step": 11861 }, { "epoch": 1.84, "learning_rate": 5.4472628248353136e-06, "logits/chosen": -2.8054022789001465, "logits/rejected": -2.793980360031128, "logps/chosen": -152.42149353027344, "logps/rejected": -271.8118896484375, "loss": 0.9391, "rewards/accuracies": 0.5, "rewards/chosen": -7.390462875366211, "rewards/margins": 2.6533994674682617, "rewards/rejected": -10.043862342834473, "step": 11862 }, { "epoch": 1.84, "learning_rate": 5.4465293843041654e-06, "logits/chosen": -2.1904072761535645, "logits/rejected": -2.8821284770965576, "logps/chosen": -279.82122802734375, "logps/rejected": -208.177978515625, "loss": 0.5353, "rewards/accuracies": 1.0, "rewards/chosen": -7.912856101989746, "rewards/margins": 0.34649133682250977, "rewards/rejected": -8.259347915649414, "step": 11863 }, { "epoch": 1.85, "learning_rate": 5.445795943773017e-06, "logits/chosen": -2.214676856994629, "logits/rejected": -2.430783271789551, "logps/chosen": -141.54995727539062, "logps/rejected": -235.434326171875, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -3.1831607818603516, "rewards/margins": 3.9543538093566895, "rewards/rejected": -7.137514591217041, "step": 11864 }, { "epoch": 1.85, "learning_rate": 5.44506250324187e-06, "logits/chosen": -1.6018396615982056, "logits/rejected": -2.7039546966552734, "logps/chosen": -148.51132202148438, "logps/rejected": -304.43609619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8584699630737305, "rewards/margins": 11.155220031738281, "rewards/rejected": -14.013689041137695, "step": 11865 }, { "epoch": 1.85, "learning_rate": 5.444329062710722e-06, "logits/chosen": -2.955249547958374, "logits/rejected": -2.936896324157715, "logps/chosen": -126.60364532470703, "logps/rejected": -159.0124053955078, "loss": 0.2556, "rewards/accuracies": 1.0, "rewards/chosen": -6.32392692565918, "rewards/margins": 2.157665491104126, "rewards/rejected": -8.481592178344727, "step": 11866 }, { "epoch": 1.85, "learning_rate": 5.443595622179574e-06, "logits/chosen": -1.978082299232483, "logits/rejected": -2.826261043548584, "logps/chosen": -207.07481384277344, "logps/rejected": -219.29226684570312, "loss": 2.0232, "rewards/accuracies": 0.5, "rewards/chosen": -6.704710960388184, "rewards/margins": 0.28244662284851074, "rewards/rejected": -6.987157821655273, "step": 11867 }, { "epoch": 1.85, "learning_rate": 5.442862181648426e-06, "logits/chosen": -2.9789018630981445, "logits/rejected": -2.213615894317627, "logps/chosen": -295.2010192871094, "logps/rejected": -440.146240234375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -4.394989967346191, "rewards/margins": 8.632116317749023, "rewards/rejected": -13.027105331420898, "step": 11868 }, { "epoch": 1.85, "learning_rate": 5.4421287411172776e-06, "logits/chosen": -1.5642681121826172, "logits/rejected": -2.456789493560791, "logps/chosen": -288.60601806640625, "logps/rejected": -541.385009765625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -3.4656496047973633, "rewards/margins": 5.619906425476074, "rewards/rejected": -9.085556030273438, "step": 11869 }, { "epoch": 1.85, "learning_rate": 5.44139530058613e-06, "logits/chosen": -2.6324713230133057, "logits/rejected": -2.785991668701172, "logps/chosen": -434.3706359863281, "logps/rejected": -517.6793212890625, "loss": 2.2742, "rewards/accuracies": 0.5, "rewards/chosen": -7.963016510009766, "rewards/margins": 2.4965689182281494, "rewards/rejected": -10.459585189819336, "step": 11870 }, { "epoch": 1.85, "learning_rate": 5.440661860054982e-06, "logits/chosen": -2.8115127086639404, "logits/rejected": -3.334625482559204, "logps/chosen": -73.7740478515625, "logps/rejected": -236.2769775390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9382073879241943, "rewards/margins": 9.872664451599121, "rewards/rejected": -11.810872077941895, "step": 11871 }, { "epoch": 1.85, "learning_rate": 5.439928419523834e-06, "logits/chosen": -3.1244053840637207, "logits/rejected": -2.6287858486175537, "logps/chosen": -294.5054016113281, "logps/rejected": -197.8744659423828, "loss": 3.1564, "rewards/accuracies": 0.5, "rewards/chosen": -3.200094699859619, "rewards/margins": 0.9830479621887207, "rewards/rejected": -4.18314266204834, "step": 11872 }, { "epoch": 1.85, "learning_rate": 5.439194978992686e-06, "logits/chosen": -2.598729372024536, "logits/rejected": -2.2508342266082764, "logps/chosen": -300.51422119140625, "logps/rejected": -291.69683837890625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.478250026702881, "rewards/margins": 6.2586870193481445, "rewards/rejected": -10.736936569213867, "step": 11873 }, { "epoch": 1.85, "learning_rate": 5.438461538461539e-06, "logits/chosen": -1.785459280014038, "logits/rejected": -2.385446548461914, "logps/chosen": -84.04756164550781, "logps/rejected": -335.864990234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.8181800842285156, "rewards/margins": 9.746856689453125, "rewards/rejected": -13.56503677368164, "step": 11874 }, { "epoch": 1.85, "learning_rate": 5.4377280979303905e-06, "logits/chosen": -2.231292486190796, "logits/rejected": -2.9780964851379395, "logps/chosen": -187.45301818847656, "logps/rejected": -368.05828857421875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.202942848205566, "rewards/margins": 7.741835594177246, "rewards/rejected": -11.944778442382812, "step": 11875 }, { "epoch": 1.85, "learning_rate": 5.436994657399242e-06, "logits/chosen": -2.7028706073760986, "logits/rejected": -1.3050832748413086, "logps/chosen": -951.4739990234375, "logps/rejected": -399.79962158203125, "loss": 0.3354, "rewards/accuracies": 1.0, "rewards/chosen": -6.1694817543029785, "rewards/margins": 4.140167713165283, "rewards/rejected": -10.309649467468262, "step": 11876 }, { "epoch": 1.85, "learning_rate": 5.436261216868094e-06, "logits/chosen": -1.9251970052719116, "logits/rejected": -3.1704275608062744, "logps/chosen": -277.146240234375, "logps/rejected": -512.4696044921875, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -5.312357425689697, "rewards/margins": 3.5046348571777344, "rewards/rejected": -8.816991806030273, "step": 11877 }, { "epoch": 1.85, "learning_rate": 5.435527776336947e-06, "logits/chosen": -2.7379531860351562, "logits/rejected": -1.9760847091674805, "logps/chosen": -107.88487243652344, "logps/rejected": -315.73736572265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.508103370666504, "rewards/margins": 11.401424407958984, "rewards/rejected": -15.909527778625488, "step": 11878 }, { "epoch": 1.85, "learning_rate": 5.4347943358058e-06, "logits/chosen": -2.345097303390503, "logits/rejected": -3.129124879837036, "logps/chosen": -63.5364990234375, "logps/rejected": -302.1809997558594, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -4.480361461639404, "rewards/margins": 5.70889139175415, "rewards/rejected": -10.189252853393555, "step": 11879 }, { "epoch": 1.85, "learning_rate": 5.434060895274652e-06, "logits/chosen": -2.3246989250183105, "logits/rejected": -2.872572422027588, "logps/chosen": -209.36407470703125, "logps/rejected": -278.5283203125, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": -5.723021507263184, "rewards/margins": 2.894272565841675, "rewards/rejected": -8.617294311523438, "step": 11880 }, { "epoch": 1.85, "learning_rate": 5.4333274547435035e-06, "logits/chosen": -3.1567399501800537, "logits/rejected": -2.491936206817627, "logps/chosen": -466.6667175292969, "logps/rejected": -332.5226745605469, "loss": 0.8769, "rewards/accuracies": 0.5, "rewards/chosen": -3.964521884918213, "rewards/margins": 3.934347152709961, "rewards/rejected": -7.898868560791016, "step": 11881 }, { "epoch": 1.85, "learning_rate": 5.432594014212355e-06, "logits/chosen": -2.8019144535064697, "logits/rejected": -2.227351427078247, "logps/chosen": -402.97076416015625, "logps/rejected": -419.20819091796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.61423659324646, "rewards/margins": 8.955164909362793, "rewards/rejected": -12.569401741027832, "step": 11882 }, { "epoch": 1.85, "learning_rate": 5.431860573681208e-06, "logits/chosen": -2.722245931625366, "logits/rejected": -1.5470991134643555, "logps/chosen": -442.0758972167969, "logps/rejected": -411.386474609375, "loss": 2.0814, "rewards/accuracies": 0.5, "rewards/chosen": -5.267507553100586, "rewards/margins": 3.629566192626953, "rewards/rejected": -8.897073745727539, "step": 11883 }, { "epoch": 1.85, "learning_rate": 5.43112713315006e-06, "logits/chosen": -2.8845553398132324, "logits/rejected": -2.8638105392456055, "logps/chosen": -198.5314178466797, "logps/rejected": -205.64395141601562, "loss": 0.2318, "rewards/accuracies": 1.0, "rewards/chosen": -4.733288764953613, "rewards/margins": 3.0598974227905273, "rewards/rejected": -7.793186187744141, "step": 11884 }, { "epoch": 1.85, "learning_rate": 5.430393692618912e-06, "logits/chosen": -3.289076805114746, "logits/rejected": -3.1033809185028076, "logps/chosen": -501.3210754394531, "logps/rejected": -453.3060607910156, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -3.1472625732421875, "rewards/margins": 6.736006736755371, "rewards/rejected": -9.883269309997559, "step": 11885 }, { "epoch": 1.85, "learning_rate": 5.429660252087764e-06, "logits/chosen": -2.5751540660858154, "logits/rejected": -2.1081624031066895, "logps/chosen": -156.97763061523438, "logps/rejected": -155.67213439941406, "loss": 0.5321, "rewards/accuracies": 0.5, "rewards/chosen": -6.501473426818848, "rewards/margins": 1.1035542488098145, "rewards/rejected": -7.60502815246582, "step": 11886 }, { "epoch": 1.85, "learning_rate": 5.4289268115566165e-06, "logits/chosen": -2.888152837753296, "logits/rejected": -1.0281425714492798, "logps/chosen": -261.8231201171875, "logps/rejected": -85.41808319091797, "loss": 1.7344, "rewards/accuracies": 0.5, "rewards/chosen": -5.228611946105957, "rewards/margins": 1.1088643074035645, "rewards/rejected": -6.337475776672363, "step": 11887 }, { "epoch": 1.85, "learning_rate": 5.428193371025468e-06, "logits/chosen": -2.9830081462860107, "logits/rejected": -2.6759703159332275, "logps/chosen": -321.538818359375, "logps/rejected": -236.28692626953125, "loss": 0.0826, "rewards/accuracies": 1.0, "rewards/chosen": -4.827486991882324, "rewards/margins": 4.028635025024414, "rewards/rejected": -8.856122016906738, "step": 11888 }, { "epoch": 1.85, "learning_rate": 5.42745993049432e-06, "logits/chosen": -1.895978331565857, "logits/rejected": -2.9839344024658203, "logps/chosen": -180.75103759765625, "logps/rejected": -432.44293212890625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.9323811531066895, "rewards/margins": 8.28166389465332, "rewards/rejected": -12.214044570922852, "step": 11889 }, { "epoch": 1.85, "learning_rate": 5.426726489963172e-06, "logits/chosen": -2.1594648361206055, "logits/rejected": -2.3004040718078613, "logps/chosen": -295.6724853515625, "logps/rejected": -341.65704345703125, "loss": 0.1328, "rewards/accuracies": 1.0, "rewards/chosen": -6.290521144866943, "rewards/margins": 2.9914026260375977, "rewards/rejected": -9.281923294067383, "step": 11890 }, { "epoch": 1.85, "learning_rate": 5.425993049432024e-06, "logits/chosen": -2.2582764625549316, "logits/rejected": -2.963848352432251, "logps/chosen": -284.4913635253906, "logps/rejected": -403.92498779296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.4978413581848145, "rewards/margins": 8.1542387008667, "rewards/rejected": -11.652080535888672, "step": 11891 }, { "epoch": 1.85, "learning_rate": 5.425259608900877e-06, "logits/chosen": -2.4500625133514404, "logits/rejected": -2.836186647415161, "logps/chosen": -109.37687683105469, "logps/rejected": -246.11459350585938, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": -3.4542016983032227, "rewards/margins": 4.720459938049316, "rewards/rejected": -8.174661636352539, "step": 11892 }, { "epoch": 1.85, "learning_rate": 5.4245261683697286e-06, "logits/chosen": -2.937422037124634, "logits/rejected": -2.4790215492248535, "logps/chosen": -231.13665771484375, "logps/rejected": -278.2424621582031, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.587480306625366, "rewards/margins": 8.329398155212402, "rewards/rejected": -10.916877746582031, "step": 11893 }, { "epoch": 1.85, "learning_rate": 5.4237927278385804e-06, "logits/chosen": -2.7737412452697754, "logits/rejected": -3.021287441253662, "logps/chosen": -709.0390625, "logps/rejected": -726.3756103515625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -2.852064609527588, "rewards/margins": 8.010307312011719, "rewards/rejected": -10.862371444702148, "step": 11894 }, { "epoch": 1.85, "learning_rate": 5.423059287307433e-06, "logits/chosen": -2.5575151443481445, "logits/rejected": -3.1703989505767822, "logps/chosen": -110.88487243652344, "logps/rejected": -275.0386962890625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.7278833389282227, "rewards/margins": 6.3569135665893555, "rewards/rejected": -10.084796905517578, "step": 11895 }, { "epoch": 1.85, "learning_rate": 5.422325846776286e-06, "logits/chosen": -2.7819101810455322, "logits/rejected": -1.951596975326538, "logps/chosen": -149.32693481445312, "logps/rejected": -288.20587158203125, "loss": 0.0198, "rewards/accuracies": 1.0, "rewards/chosen": -3.9237961769104004, "rewards/margins": 7.670449256896973, "rewards/rejected": -11.594245910644531, "step": 11896 }, { "epoch": 1.85, "learning_rate": 5.421592406245138e-06, "logits/chosen": -2.776127815246582, "logits/rejected": -1.792881965637207, "logps/chosen": -282.5617980957031, "logps/rejected": -187.9159698486328, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8101539611816406, "rewards/margins": 10.675275802612305, "rewards/rejected": -11.485429763793945, "step": 11897 }, { "epoch": 1.85, "learning_rate": 5.42085896571399e-06, "logits/chosen": -1.5051764249801636, "logits/rejected": -2.9226698875427246, "logps/chosen": -128.71435546875, "logps/rejected": -480.50909423828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.474894046783447, "rewards/margins": 9.06551742553711, "rewards/rejected": -14.540410995483398, "step": 11898 }, { "epoch": 1.85, "learning_rate": 5.4201255251828415e-06, "logits/chosen": -2.249115228652954, "logits/rejected": -2.8339247703552246, "logps/chosen": -110.90472412109375, "logps/rejected": -289.5500793457031, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.14481782913208, "rewards/margins": 6.667901515960693, "rewards/rejected": -11.812719345092773, "step": 11899 }, { "epoch": 1.85, "learning_rate": 5.419392084651693e-06, "logits/chosen": -2.687714099884033, "logits/rejected": -2.803351879119873, "logps/chosen": -91.37174987792969, "logps/rejected": -255.37042236328125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.112160682678223, "rewards/margins": 6.574960708618164, "rewards/rejected": -11.687122344970703, "step": 11900 }, { "epoch": 1.85, "learning_rate": 5.418658644120546e-06, "logits/chosen": -2.8182082176208496, "logits/rejected": -1.9200984239578247, "logps/chosen": -614.7955322265625, "logps/rejected": -370.7984619140625, "loss": 0.0653, "rewards/accuracies": 1.0, "rewards/chosen": -5.352635860443115, "rewards/margins": 3.8259880542755127, "rewards/rejected": -9.178624153137207, "step": 11901 }, { "epoch": 1.85, "learning_rate": 5.417925203589398e-06, "logits/chosen": -2.1189215183258057, "logits/rejected": -2.889437198638916, "logps/chosen": -185.52194213867188, "logps/rejected": -372.599365234375, "loss": 0.522, "rewards/accuracies": 0.5, "rewards/chosen": -6.541413307189941, "rewards/margins": 0.9370748996734619, "rewards/rejected": -7.478488445281982, "step": 11902 }, { "epoch": 1.85, "learning_rate": 5.41719176305825e-06, "logits/chosen": -3.1356329917907715, "logits/rejected": -2.7941808700561523, "logps/chosen": -146.84677124023438, "logps/rejected": -229.54940795898438, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.454505205154419, "rewards/margins": 6.410447120666504, "rewards/rejected": -9.864952087402344, "step": 11903 }, { "epoch": 1.85, "learning_rate": 5.416458322527102e-06, "logits/chosen": -3.0990171432495117, "logits/rejected": -3.4026501178741455, "logps/chosen": -66.7618408203125, "logps/rejected": -191.63818359375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.6850216388702393, "rewards/margins": 5.614988327026367, "rewards/rejected": -9.300010681152344, "step": 11904 }, { "epoch": 1.85, "learning_rate": 5.4157248819959545e-06, "logits/chosen": -2.861971139907837, "logits/rejected": -2.5836198329925537, "logps/chosen": -134.18038940429688, "logps/rejected": -262.7725524902344, "loss": 1.0337, "rewards/accuracies": 0.5, "rewards/chosen": -5.253451347351074, "rewards/margins": 5.8359293937683105, "rewards/rejected": -11.089381217956543, "step": 11905 }, { "epoch": 1.85, "learning_rate": 5.414991441464806e-06, "logits/chosen": -2.3285510540008545, "logits/rejected": -2.8675360679626465, "logps/chosen": -245.90362548828125, "logps/rejected": -315.521240234375, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -5.068327903747559, "rewards/margins": 6.6986541748046875, "rewards/rejected": -11.766982078552246, "step": 11906 }, { "epoch": 1.85, "learning_rate": 5.414258000933658e-06, "logits/chosen": -2.2379064559936523, "logits/rejected": -3.0141329765319824, "logps/chosen": -140.12753295898438, "logps/rejected": -328.556396484375, "loss": 0.0479, "rewards/accuracies": 1.0, "rewards/chosen": -5.501615524291992, "rewards/margins": 3.6189630031585693, "rewards/rejected": -9.12057876586914, "step": 11907 }, { "epoch": 1.85, "learning_rate": 5.41352456040251e-06, "logits/chosen": -1.7759435176849365, "logits/rejected": -2.86869478225708, "logps/chosen": -112.3221206665039, "logps/rejected": -243.3103790283203, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.8119120597839355, "rewards/margins": 6.925157070159912, "rewards/rejected": -10.737069129943848, "step": 11908 }, { "epoch": 1.85, "learning_rate": 5.412791119871362e-06, "logits/chosen": -1.7161911725997925, "logits/rejected": -2.764932155609131, "logps/chosen": -115.9804458618164, "logps/rejected": -376.7351379394531, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.443586349487305, "rewards/margins": 8.541851043701172, "rewards/rejected": -12.985437393188477, "step": 11909 }, { "epoch": 1.85, "learning_rate": 5.412057679340215e-06, "logits/chosen": -3.02104115486145, "logits/rejected": -3.2178025245666504, "logps/chosen": -413.2236633300781, "logps/rejected": -479.91448974609375, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": -6.315559387207031, "rewards/margins": 2.9635331630706787, "rewards/rejected": -9.279092788696289, "step": 11910 }, { "epoch": 1.85, "learning_rate": 5.411324238809067e-06, "logits/chosen": -2.6516597270965576, "logits/rejected": -2.9206717014312744, "logps/chosen": -135.2592010498047, "logps/rejected": -284.58197021484375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -2.9385123252868652, "rewards/margins": 7.667360782623291, "rewards/rejected": -10.605873107910156, "step": 11911 }, { "epoch": 1.85, "learning_rate": 5.410590798277919e-06, "logits/chosen": -2.791172742843628, "logits/rejected": -2.4220919609069824, "logps/chosen": -248.42938232421875, "logps/rejected": -380.192626953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.7555947303771973, "rewards/margins": 9.015442848205566, "rewards/rejected": -11.771037101745605, "step": 11912 }, { "epoch": 1.85, "learning_rate": 5.409857357746771e-06, "logits/chosen": -1.8321220874786377, "logits/rejected": -3.1467251777648926, "logps/chosen": -172.47061157226562, "logps/rejected": -576.1282348632812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.354036808013916, "rewards/margins": 8.804340362548828, "rewards/rejected": -11.158376693725586, "step": 11913 }, { "epoch": 1.85, "learning_rate": 5.409123917215624e-06, "logits/chosen": -2.2448699474334717, "logits/rejected": -3.035294771194458, "logps/chosen": -191.4294891357422, "logps/rejected": -353.5000305175781, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.341989755630493, "rewards/margins": 6.033958435058594, "rewards/rejected": -8.375948905944824, "step": 11914 }, { "epoch": 1.85, "learning_rate": 5.408390476684476e-06, "logits/chosen": -1.2787247896194458, "logits/rejected": -2.5322461128234863, "logps/chosen": -97.63882446289062, "logps/rejected": -374.82550048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.81738018989563, "rewards/margins": 10.501785278320312, "rewards/rejected": -14.319165229797363, "step": 11915 }, { "epoch": 1.85, "learning_rate": 5.407657036153328e-06, "logits/chosen": -1.5948394536972046, "logits/rejected": -2.7160236835479736, "logps/chosen": -168.07296752929688, "logps/rejected": -349.9712829589844, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.1570515632629395, "rewards/margins": 8.089181900024414, "rewards/rejected": -14.246233940124512, "step": 11916 }, { "epoch": 1.85, "learning_rate": 5.40692359562218e-06, "logits/chosen": -2.495349884033203, "logits/rejected": -2.757845163345337, "logps/chosen": -265.3519287109375, "logps/rejected": -423.69036865234375, "loss": 0.2679, "rewards/accuracies": 1.0, "rewards/chosen": -4.189720153808594, "rewards/margins": 4.561723232269287, "rewards/rejected": -8.751443862915039, "step": 11917 }, { "epoch": 1.85, "learning_rate": 5.4061901550910315e-06, "logits/chosen": -1.8245834112167358, "logits/rejected": -3.1245317459106445, "logps/chosen": -298.38818359375, "logps/rejected": -614.228759765625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -4.528955459594727, "rewards/margins": 7.510586738586426, "rewards/rejected": -12.039542198181152, "step": 11918 }, { "epoch": 1.85, "learning_rate": 5.405456714559884e-06, "logits/chosen": -2.9144020080566406, "logits/rejected": -1.9456632137298584, "logps/chosen": -203.66273498535156, "logps/rejected": -298.1640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -1.5926789045333862, "rewards/margins": 10.596941947937012, "rewards/rejected": -12.189620971679688, "step": 11919 }, { "epoch": 1.85, "learning_rate": 5.404723274028736e-06, "logits/chosen": -3.047159433364868, "logits/rejected": -2.726144313812256, "logps/chosen": -353.13006591796875, "logps/rejected": -500.4900817871094, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": -4.135622024536133, "rewards/margins": 4.483415603637695, "rewards/rejected": -8.619037628173828, "step": 11920 }, { "epoch": 1.85, "learning_rate": 5.403989833497588e-06, "logits/chosen": -2.899975538253784, "logits/rejected": -2.559678077697754, "logps/chosen": -196.4932861328125, "logps/rejected": -285.3775634765625, "loss": 0.6027, "rewards/accuracies": 0.5, "rewards/chosen": -5.458330154418945, "rewards/margins": 4.993135929107666, "rewards/rejected": -10.451465606689453, "step": 11921 }, { "epoch": 1.85, "learning_rate": 5.40325639296644e-06, "logits/chosen": -2.90053653717041, "logits/rejected": -2.1429834365844727, "logps/chosen": -603.7763061523438, "logps/rejected": -326.91790771484375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -6.087501525878906, "rewards/margins": 6.350541591644287, "rewards/rejected": -12.438043594360352, "step": 11922 }, { "epoch": 1.85, "learning_rate": 5.4025229524352925e-06, "logits/chosen": -2.534782648086548, "logits/rejected": -1.706586480140686, "logps/chosen": -531.3619384765625, "logps/rejected": -464.0312194824219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.471493244171143, "rewards/margins": 9.913841247558594, "rewards/rejected": -14.385334968566895, "step": 11923 }, { "epoch": 1.85, "learning_rate": 5.401789511904144e-06, "logits/chosen": -2.840562582015991, "logits/rejected": -2.144282817840576, "logps/chosen": -368.86090087890625, "logps/rejected": -270.0517578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.4623138904571533, "rewards/margins": 8.16778564453125, "rewards/rejected": -11.630099296569824, "step": 11924 }, { "epoch": 1.85, "learning_rate": 5.401056071372996e-06, "logits/chosen": -2.6635594367980957, "logits/rejected": -3.2311630249023438, "logps/chosen": -239.88690185546875, "logps/rejected": -370.54180908203125, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -4.773041725158691, "rewards/margins": 6.068387031555176, "rewards/rejected": -10.841428756713867, "step": 11925 }, { "epoch": 1.85, "learning_rate": 5.400322630841848e-06, "logits/chosen": -2.264092206954956, "logits/rejected": -1.6412551403045654, "logps/chosen": -185.20909118652344, "logps/rejected": -374.4093322753906, "loss": 0.0958, "rewards/accuracies": 1.0, "rewards/chosen": -3.378633975982666, "rewards/margins": 6.253693580627441, "rewards/rejected": -9.63232707977295, "step": 11926 }, { "epoch": 1.85, "learning_rate": 5.399589190310701e-06, "logits/chosen": -2.259162664413452, "logits/rejected": -2.927527904510498, "logps/chosen": -184.33193969726562, "logps/rejected": -359.97869873046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.3670127391815186, "rewards/margins": 9.130430221557617, "rewards/rejected": -12.497442245483398, "step": 11927 }, { "epoch": 1.86, "learning_rate": 5.398855749779553e-06, "logits/chosen": -3.0940635204315186, "logits/rejected": -2.7805063724517822, "logps/chosen": -228.33676147460938, "logps/rejected": -214.2647705078125, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -7.624040603637695, "rewards/margins": 4.75422477722168, "rewards/rejected": -12.378265380859375, "step": 11928 }, { "epoch": 1.86, "learning_rate": 5.398122309248405e-06, "logits/chosen": -2.8518168926239014, "logits/rejected": -1.6716701984405518, "logps/chosen": -579.6038818359375, "logps/rejected": -575.6690673828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.7923011779785156, "rewards/margins": 10.159440994262695, "rewards/rejected": -12.951742172241211, "step": 11929 }, { "epoch": 1.86, "learning_rate": 5.397388868717257e-06, "logits/chosen": -2.815843105316162, "logits/rejected": -2.2532618045806885, "logps/chosen": -143.60198974609375, "logps/rejected": -279.838134765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.5033607482910156, "rewards/margins": 8.671270370483398, "rewards/rejected": -12.174631118774414, "step": 11930 }, { "epoch": 1.86, "learning_rate": 5.396655428186109e-06, "logits/chosen": -2.8429150581359863, "logits/rejected": -2.7845656871795654, "logps/chosen": -424.4044494628906, "logps/rejected": -368.90631103515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.335354804992676, "rewards/margins": 6.510122299194336, "rewards/rejected": -12.845478057861328, "step": 11931 }, { "epoch": 1.86, "learning_rate": 5.395921987654962e-06, "logits/chosen": -2.466148853302002, "logits/rejected": -3.1925790309906006, "logps/chosen": -120.47563934326172, "logps/rejected": -342.09893798828125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.90787410736084, "rewards/margins": 7.267215728759766, "rewards/rejected": -14.175089836120605, "step": 11932 }, { "epoch": 1.86, "learning_rate": 5.395188547123814e-06, "logits/chosen": -2.8078460693359375, "logits/rejected": -2.342254400253296, "logps/chosen": -257.5452880859375, "logps/rejected": -266.8703308105469, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.5120620727539062, "rewards/margins": 6.435843467712402, "rewards/rejected": -8.947905540466309, "step": 11933 }, { "epoch": 1.86, "learning_rate": 5.394455106592666e-06, "logits/chosen": -2.7519710063934326, "logits/rejected": -2.5000245571136475, "logps/chosen": -109.96629333496094, "logps/rejected": -321.11260986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.741692304611206, "rewards/margins": 10.372982025146484, "rewards/rejected": -13.11467456817627, "step": 11934 }, { "epoch": 1.86, "learning_rate": 5.393721666061518e-06, "logits/chosen": -2.3881595134735107, "logits/rejected": -2.7854628562927246, "logps/chosen": -184.10671997070312, "logps/rejected": -327.3773193359375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -4.499922275543213, "rewards/margins": 6.639799118041992, "rewards/rejected": -11.139720916748047, "step": 11935 }, { "epoch": 1.86, "learning_rate": 5.39298822553037e-06, "logits/chosen": -1.5798577070236206, "logits/rejected": -2.8089234828948975, "logps/chosen": -92.94367980957031, "logps/rejected": -365.98175048828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.165679931640625, "rewards/margins": 7.5833659172058105, "rewards/rejected": -13.749045372009277, "step": 11936 }, { "epoch": 1.86, "learning_rate": 5.392254784999222e-06, "logits/chosen": -3.0281765460968018, "logits/rejected": -2.9907684326171875, "logps/chosen": -317.6189270019531, "logps/rejected": -306.567138671875, "loss": 0.0835, "rewards/accuracies": 1.0, "rewards/chosen": -4.382313251495361, "rewards/margins": 3.7644925117492676, "rewards/rejected": -8.146805763244629, "step": 11937 }, { "epoch": 1.86, "learning_rate": 5.391521344468074e-06, "logits/chosen": -2.9681556224823, "logits/rejected": -2.81054425239563, "logps/chosen": -259.0906982421875, "logps/rejected": -515.4384765625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -2.7580673694610596, "rewards/margins": 8.130173683166504, "rewards/rejected": -10.888240814208984, "step": 11938 }, { "epoch": 1.86, "learning_rate": 5.390787903936926e-06, "logits/chosen": -3.0784952640533447, "logits/rejected": -3.1417629718780518, "logps/chosen": -53.340797424316406, "logps/rejected": -187.99594116210938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.138028144836426, "rewards/margins": 9.043155670166016, "rewards/rejected": -12.181182861328125, "step": 11939 }, { "epoch": 1.86, "learning_rate": 5.390054463405778e-06, "logits/chosen": -3.2347116470336914, "logits/rejected": -3.314822196960449, "logps/chosen": -224.91485595703125, "logps/rejected": -574.5625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.0267555713653564, "rewards/margins": 9.01235580444336, "rewards/rejected": -11.039112091064453, "step": 11940 }, { "epoch": 1.86, "learning_rate": 5.389321022874631e-06, "logits/chosen": -3.1621663570404053, "logits/rejected": -2.805161714553833, "logps/chosen": -460.72637939453125, "logps/rejected": -396.6168212890625, "loss": 0.437, "rewards/accuracies": 0.5, "rewards/chosen": -4.640937805175781, "rewards/margins": 3.4305219650268555, "rewards/rejected": -8.071459770202637, "step": 11941 }, { "epoch": 1.86, "learning_rate": 5.3885875823434825e-06, "logits/chosen": -1.6250832080841064, "logits/rejected": -2.836775541305542, "logps/chosen": -393.8174743652344, "logps/rejected": -480.74267578125, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -4.826284885406494, "rewards/margins": 8.53699779510498, "rewards/rejected": -13.363283157348633, "step": 11942 }, { "epoch": 1.86, "learning_rate": 5.387854141812334e-06, "logits/chosen": -3.0337576866149902, "logits/rejected": -2.718050718307495, "logps/chosen": -561.5300903320312, "logps/rejected": -344.1009521484375, "loss": 0.2571, "rewards/accuracies": 1.0, "rewards/chosen": -6.115928649902344, "rewards/margins": 4.428475379943848, "rewards/rejected": -10.544404029846191, "step": 11943 }, { "epoch": 1.86, "learning_rate": 5.387120701281186e-06, "logits/chosen": -2.9748477935791016, "logits/rejected": -3.0163047313690186, "logps/chosen": -302.638916015625, "logps/rejected": -351.63958740234375, "loss": 0.2054, "rewards/accuracies": 1.0, "rewards/chosen": -3.369783401489258, "rewards/margins": 5.610270977020264, "rewards/rejected": -8.98005485534668, "step": 11944 }, { "epoch": 1.86, "learning_rate": 5.386387260750039e-06, "logits/chosen": -3.042997360229492, "logits/rejected": -3.182124376296997, "logps/chosen": -284.5000305175781, "logps/rejected": -256.5169982910156, "loss": 4.5029, "rewards/accuracies": 0.5, "rewards/chosen": -8.108170509338379, "rewards/margins": -1.6189796924591064, "rewards/rejected": -6.489191055297852, "step": 11945 }, { "epoch": 1.86, "learning_rate": 5.385653820218891e-06, "logits/chosen": -2.2188708782196045, "logits/rejected": -2.732215166091919, "logps/chosen": -372.96405029296875, "logps/rejected": -435.435546875, "loss": 0.0899, "rewards/accuracies": 1.0, "rewards/chosen": -4.255163669586182, "rewards/margins": 4.701254367828369, "rewards/rejected": -8.95641803741455, "step": 11946 }, { "epoch": 1.86, "learning_rate": 5.3849203796877435e-06, "logits/chosen": -3.0246856212615967, "logits/rejected": -2.30825138092041, "logps/chosen": -250.90968322753906, "logps/rejected": -148.20751953125, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -3.211761474609375, "rewards/margins": 5.584824562072754, "rewards/rejected": -8.796586036682129, "step": 11947 }, { "epoch": 1.86, "learning_rate": 5.3841869391565954e-06, "logits/chosen": -1.3301045894622803, "logits/rejected": -2.5914134979248047, "logps/chosen": -70.0125961303711, "logps/rejected": -313.2306213378906, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.380288124084473, "rewards/margins": 7.115031719207764, "rewards/rejected": -11.495319366455078, "step": 11948 }, { "epoch": 1.86, "learning_rate": 5.383453498625447e-06, "logits/chosen": -2.070286750793457, "logits/rejected": -2.6508140563964844, "logps/chosen": -372.9376220703125, "logps/rejected": -425.62939453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.460719347000122, "rewards/margins": 8.992100715637207, "rewards/rejected": -12.45281982421875, "step": 11949 }, { "epoch": 1.86, "learning_rate": 5.3827200580943e-06, "logits/chosen": -2.6436028480529785, "logits/rejected": -2.843543529510498, "logps/chosen": -479.40679931640625, "logps/rejected": -617.9450073242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.459146976470947, "rewards/margins": 13.470861434936523, "rewards/rejected": -18.930007934570312, "step": 11950 }, { "epoch": 1.86, "learning_rate": 5.381986617563152e-06, "logits/chosen": -2.8341028690338135, "logits/rejected": -3.090837240219116, "logps/chosen": -692.2606811523438, "logps/rejected": -887.6065673828125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.913276672363281, "rewards/margins": 7.914180278778076, "rewards/rejected": -13.827457427978516, "step": 11951 }, { "epoch": 1.86, "learning_rate": 5.381253177032004e-06, "logits/chosen": -1.4379167556762695, "logits/rejected": -2.8077101707458496, "logps/chosen": -120.18899536132812, "logps/rejected": -493.1420593261719, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -3.697110652923584, "rewards/margins": 7.867334365844727, "rewards/rejected": -11.564445495605469, "step": 11952 }, { "epoch": 1.86, "learning_rate": 5.380519736500856e-06, "logits/chosen": -2.807415246963501, "logits/rejected": -2.8782799243927, "logps/chosen": -162.23841857910156, "logps/rejected": -357.8150939941406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.6309258937835693, "rewards/margins": 10.954830169677734, "rewards/rejected": -14.585756301879883, "step": 11953 }, { "epoch": 1.86, "learning_rate": 5.379786295969708e-06, "logits/chosen": -2.357167959213257, "logits/rejected": -2.8111119270324707, "logps/chosen": -167.50991821289062, "logps/rejected": -399.1183776855469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8907055854797363, "rewards/margins": 9.828481674194336, "rewards/rejected": -13.719188690185547, "step": 11954 }, { "epoch": 1.86, "learning_rate": 5.37905285543856e-06, "logits/chosen": -1.6433385610580444, "logits/rejected": -1.972158432006836, "logps/chosen": -223.14053344726562, "logps/rejected": -350.43310546875, "loss": 0.0723, "rewards/accuracies": 1.0, "rewards/chosen": -4.129352569580078, "rewards/margins": 3.2633769512176514, "rewards/rejected": -7.392729759216309, "step": 11955 }, { "epoch": 1.86, "learning_rate": 5.378319414907412e-06, "logits/chosen": -2.4365122318267822, "logits/rejected": -3.284759998321533, "logps/chosen": -159.29563903808594, "logps/rejected": -330.77447509765625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.543609142303467, "rewards/margins": 6.7288665771484375, "rewards/rejected": -10.272476196289062, "step": 11956 }, { "epoch": 1.86, "learning_rate": 5.377585974376264e-06, "logits/chosen": -2.4698116779327393, "logits/rejected": -2.512091636657715, "logps/chosen": -442.9052734375, "logps/rejected": -613.6510009765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0827574729919434, "rewards/margins": 11.375955581665039, "rewards/rejected": -14.45871353149414, "step": 11957 }, { "epoch": 1.86, "learning_rate": 5.376852533845116e-06, "logits/chosen": -1.9906340837478638, "logits/rejected": -2.953219175338745, "logps/chosen": -211.1584930419922, "logps/rejected": -346.1793212890625, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -4.776162147521973, "rewards/margins": 5.187604904174805, "rewards/rejected": -9.963767051696777, "step": 11958 }, { "epoch": 1.86, "learning_rate": 5.376119093313969e-06, "logits/chosen": -2.8960394859313965, "logits/rejected": -1.8594748973846436, "logps/chosen": -365.9587707519531, "logps/rejected": -424.9158630371094, "loss": 0.5474, "rewards/accuracies": 0.5, "rewards/chosen": -6.228734970092773, "rewards/margins": 2.3542308807373047, "rewards/rejected": -8.582965850830078, "step": 11959 }, { "epoch": 1.86, "learning_rate": 5.3753856527828205e-06, "logits/chosen": -3.0727851390838623, "logits/rejected": -3.3038556575775146, "logps/chosen": -284.46392822265625, "logps/rejected": -539.4238891601562, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.48357629776001, "rewards/margins": 7.835202217102051, "rewards/rejected": -12.318778038024902, "step": 11960 }, { "epoch": 1.86, "learning_rate": 5.374652212251672e-06, "logits/chosen": -2.6678733825683594, "logits/rejected": -2.576458215713501, "logps/chosen": -258.5616455078125, "logps/rejected": -356.6324462890625, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -5.52003288269043, "rewards/margins": 5.420482635498047, "rewards/rejected": -10.940515518188477, "step": 11961 }, { "epoch": 1.86, "learning_rate": 5.373918771720524e-06, "logits/chosen": -2.761096477508545, "logits/rejected": -2.227592945098877, "logps/chosen": -275.9770202636719, "logps/rejected": -289.9362487792969, "loss": 0.034, "rewards/accuracies": 1.0, "rewards/chosen": -2.1162805557250977, "rewards/margins": 4.079376220703125, "rewards/rejected": -6.195656776428223, "step": 11962 }, { "epoch": 1.86, "learning_rate": 5.373185331189377e-06, "logits/chosen": -3.022364616394043, "logits/rejected": -2.9204845428466797, "logps/chosen": -175.80410766601562, "logps/rejected": -241.19068908691406, "loss": 0.1363, "rewards/accuracies": 1.0, "rewards/chosen": -3.7511467933654785, "rewards/margins": 4.038600921630859, "rewards/rejected": -7.78974723815918, "step": 11963 }, { "epoch": 1.86, "learning_rate": 5.37245189065823e-06, "logits/chosen": -3.0225181579589844, "logits/rejected": -3.106255292892456, "logps/chosen": -143.1284942626953, "logps/rejected": -268.52447509765625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.395367383956909, "rewards/margins": 6.497016906738281, "rewards/rejected": -9.89238452911377, "step": 11964 }, { "epoch": 1.86, "learning_rate": 5.371718450127082e-06, "logits/chosen": -2.270035982131958, "logits/rejected": -1.919232726097107, "logps/chosen": -144.98985290527344, "logps/rejected": -234.302734375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.392361164093018, "rewards/margins": 9.122664451599121, "rewards/rejected": -13.51502513885498, "step": 11965 }, { "epoch": 1.86, "learning_rate": 5.3709850095959335e-06, "logits/chosen": -2.1511597633361816, "logits/rejected": -2.805452823638916, "logps/chosen": -64.09608459472656, "logps/rejected": -252.96359252929688, "loss": 0.1357, "rewards/accuracies": 1.0, "rewards/chosen": -4.665583610534668, "rewards/margins": 4.261079788208008, "rewards/rejected": -8.92666244506836, "step": 11966 }, { "epoch": 1.86, "learning_rate": 5.370251569064786e-06, "logits/chosen": -2.726276159286499, "logits/rejected": -3.0380561351776123, "logps/chosen": -171.31419372558594, "logps/rejected": -293.45599365234375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.824113845825195, "rewards/margins": 6.6742448806762695, "rewards/rejected": -11.498357772827148, "step": 11967 }, { "epoch": 1.86, "learning_rate": 5.369518128533638e-06, "logits/chosen": -2.748965263366699, "logits/rejected": -2.51540470123291, "logps/chosen": -193.40870666503906, "logps/rejected": -297.8205261230469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.411378383636475, "rewards/margins": 8.607860565185547, "rewards/rejected": -13.01923942565918, "step": 11968 }, { "epoch": 1.86, "learning_rate": 5.36878468800249e-06, "logits/chosen": -1.5124138593673706, "logits/rejected": -2.940451145172119, "logps/chosen": -130.82427978515625, "logps/rejected": -545.09423828125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -4.398716449737549, "rewards/margins": 8.81106185913086, "rewards/rejected": -13.209779739379883, "step": 11969 }, { "epoch": 1.86, "learning_rate": 5.368051247471342e-06, "logits/chosen": -2.851149559020996, "logits/rejected": -2.866687536239624, "logps/chosen": -446.0533447265625, "logps/rejected": -529.113037109375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -3.060492992401123, "rewards/margins": 9.1271333694458, "rewards/rejected": -12.187626838684082, "step": 11970 }, { "epoch": 1.86, "learning_rate": 5.367317806940194e-06, "logits/chosen": -2.33958101272583, "logits/rejected": -3.109306812286377, "logps/chosen": -132.96768188476562, "logps/rejected": -255.35202026367188, "loss": 0.3402, "rewards/accuracies": 1.0, "rewards/chosen": -4.547619819641113, "rewards/margins": 2.9950203895568848, "rewards/rejected": -7.54263973236084, "step": 11971 }, { "epoch": 1.86, "learning_rate": 5.3665843664090464e-06, "logits/chosen": -1.5065076351165771, "logits/rejected": -3.0005741119384766, "logps/chosen": -106.3323974609375, "logps/rejected": -336.48211669921875, "loss": 0.0905, "rewards/accuracies": 1.0, "rewards/chosen": -5.298837184906006, "rewards/margins": 5.145092010498047, "rewards/rejected": -10.443929672241211, "step": 11972 }, { "epoch": 1.86, "learning_rate": 5.365850925877898e-06, "logits/chosen": -1.8626348972320557, "logits/rejected": -2.939735174179077, "logps/chosen": -150.28411865234375, "logps/rejected": -404.68780517578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.217158555984497, "rewards/margins": 8.949871063232422, "rewards/rejected": -12.167030334472656, "step": 11973 }, { "epoch": 1.86, "learning_rate": 5.36511748534675e-06, "logits/chosen": -2.3734915256500244, "logits/rejected": -3.074566602706909, "logps/chosen": -90.77664947509766, "logps/rejected": -250.14794921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.096065044403076, "rewards/margins": 8.874822616577148, "rewards/rejected": -11.970888137817383, "step": 11974 }, { "epoch": 1.86, "learning_rate": 5.364384044815602e-06, "logits/chosen": -1.2372099161148071, "logits/rejected": -2.852794647216797, "logps/chosen": -129.80421447753906, "logps/rejected": -451.0430603027344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.924036502838135, "rewards/margins": 9.749608039855957, "rewards/rejected": -14.67364501953125, "step": 11975 }, { "epoch": 1.86, "learning_rate": 5.363650604284455e-06, "logits/chosen": -2.35019588470459, "logits/rejected": -2.988985538482666, "logps/chosen": -628.35986328125, "logps/rejected": -469.60003662109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8089113235473633, "rewards/margins": 10.608102798461914, "rewards/rejected": -12.417013168334961, "step": 11976 }, { "epoch": 1.86, "learning_rate": 5.362917163753307e-06, "logits/chosen": -2.6239941120147705, "logits/rejected": -0.5926395058631897, "logps/chosen": -312.9792175292969, "logps/rejected": -106.05970001220703, "loss": 0.0763, "rewards/accuracies": 1.0, "rewards/chosen": -3.215308427810669, "rewards/margins": 4.252279281616211, "rewards/rejected": -7.467587471008301, "step": 11977 }, { "epoch": 1.86, "learning_rate": 5.3621837232221586e-06, "logits/chosen": -2.8815953731536865, "logits/rejected": -2.9405386447906494, "logps/chosen": -203.54632568359375, "logps/rejected": -239.73309326171875, "loss": 0.1568, "rewards/accuracies": 1.0, "rewards/chosen": -4.018721580505371, "rewards/margins": 4.671268939971924, "rewards/rejected": -8.689990997314453, "step": 11978 }, { "epoch": 1.86, "learning_rate": 5.3614502826910104e-06, "logits/chosen": -2.8619580268859863, "logits/rejected": -3.152649164199829, "logps/chosen": -338.7819519042969, "logps/rejected": -396.4155578613281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.7033737897872925, "rewards/margins": 8.983438491821289, "rewards/rejected": -10.686812400817871, "step": 11979 }, { "epoch": 1.86, "learning_rate": 5.360716842159863e-06, "logits/chosen": -0.9591741561889648, "logits/rejected": -1.995941400527954, "logps/chosen": -223.525146484375, "logps/rejected": -494.0545654296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.541922092437744, "rewards/margins": 12.896574020385742, "rewards/rejected": -16.438495635986328, "step": 11980 }, { "epoch": 1.86, "learning_rate": 5.359983401628716e-06, "logits/chosen": -1.224007487297058, "logits/rejected": -2.313985586166382, "logps/chosen": -87.08802795410156, "logps/rejected": -445.11187744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.105201721191406, "rewards/margins": 12.810260772705078, "rewards/rejected": -16.915462493896484, "step": 11981 }, { "epoch": 1.86, "learning_rate": 5.359249961097568e-06, "logits/chosen": -2.0374629497528076, "logits/rejected": -2.9858055114746094, "logps/chosen": -132.60067749023438, "logps/rejected": -293.24969482421875, "loss": 0.1899, "rewards/accuracies": 1.0, "rewards/chosen": -3.960136890411377, "rewards/margins": 5.8820881843566895, "rewards/rejected": -9.842225074768066, "step": 11982 }, { "epoch": 1.86, "learning_rate": 5.35851652056642e-06, "logits/chosen": -1.3030240535736084, "logits/rejected": -3.0042872428894043, "logps/chosen": -141.98178100585938, "logps/rejected": -307.0472412109375, "loss": 0.4704, "rewards/accuracies": 0.5, "rewards/chosen": -4.856869220733643, "rewards/margins": 5.2099528312683105, "rewards/rejected": -10.066822052001953, "step": 11983 }, { "epoch": 1.86, "learning_rate": 5.3577830800352715e-06, "logits/chosen": -1.9365742206573486, "logits/rejected": -2.8104963302612305, "logps/chosen": -114.95872497558594, "logps/rejected": -403.04156494140625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.99600887298584, "rewards/margins": 7.27786111831665, "rewards/rejected": -10.273870468139648, "step": 11984 }, { "epoch": 1.86, "learning_rate": 5.357049639504124e-06, "logits/chosen": -2.4355483055114746, "logits/rejected": -2.860656499862671, "logps/chosen": -519.7557983398438, "logps/rejected": -905.9296875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -5.924376010894775, "rewards/margins": 6.063967227935791, "rewards/rejected": -11.988343238830566, "step": 11985 }, { "epoch": 1.86, "learning_rate": 5.356316198972976e-06, "logits/chosen": -1.185565710067749, "logits/rejected": -2.6511266231536865, "logps/chosen": -119.33242797851562, "logps/rejected": -360.4093017578125, "loss": 1.5553, "rewards/accuracies": 0.5, "rewards/chosen": -7.62711238861084, "rewards/margins": 6.858824729919434, "rewards/rejected": -14.485937118530273, "step": 11986 }, { "epoch": 1.86, "learning_rate": 5.355582758441828e-06, "logits/chosen": -2.8170816898345947, "logits/rejected": -2.822787284851074, "logps/chosen": -322.836669921875, "logps/rejected": -353.12127685546875, "loss": 3.0811, "rewards/accuracies": 0.5, "rewards/chosen": -7.884199142456055, "rewards/margins": 2.0222277641296387, "rewards/rejected": -9.906427383422852, "step": 11987 }, { "epoch": 1.86, "learning_rate": 5.35484931791068e-06, "logits/chosen": -2.111192464828491, "logits/rejected": -3.0863497257232666, "logps/chosen": -90.58672332763672, "logps/rejected": -260.4941711425781, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -2.792475700378418, "rewards/margins": 6.198125839233398, "rewards/rejected": -8.990601539611816, "step": 11988 }, { "epoch": 1.86, "learning_rate": 5.354115877379532e-06, "logits/chosen": -2.748765468597412, "logits/rejected": -2.546257734298706, "logps/chosen": -290.56103515625, "logps/rejected": -252.99533081054688, "loss": 1.3477, "rewards/accuracies": 0.5, "rewards/chosen": -5.554689407348633, "rewards/margins": 2.2892954349517822, "rewards/rejected": -7.843984603881836, "step": 11989 }, { "epoch": 1.86, "learning_rate": 5.3533824368483845e-06, "logits/chosen": -2.590435028076172, "logits/rejected": -2.846980094909668, "logps/chosen": -201.99432373046875, "logps/rejected": -203.40411376953125, "loss": 0.0774, "rewards/accuracies": 1.0, "rewards/chosen": -3.643146514892578, "rewards/margins": 4.788138389587402, "rewards/rejected": -8.431285858154297, "step": 11990 }, { "epoch": 1.86, "learning_rate": 5.352648996317236e-06, "logits/chosen": -2.460864543914795, "logits/rejected": -2.6603004932403564, "logps/chosen": -638.7932739257812, "logps/rejected": -560.50341796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.4294586181640625, "rewards/margins": 6.771902084350586, "rewards/rejected": -12.201360702514648, "step": 11991 }, { "epoch": 1.87, "learning_rate": 5.351915555786088e-06, "logits/chosen": -2.7330422401428223, "logits/rejected": -3.1930058002471924, "logps/chosen": -372.6824035644531, "logps/rejected": -404.592529296875, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -5.6020073890686035, "rewards/margins": 6.8625712394714355, "rewards/rejected": -12.464578628540039, "step": 11992 }, { "epoch": 1.87, "learning_rate": 5.35118211525494e-06, "logits/chosen": -2.6966686248779297, "logits/rejected": -3.024876832962036, "logps/chosen": -89.85638427734375, "logps/rejected": -270.0655517578125, "loss": 0.0462, "rewards/accuracies": 1.0, "rewards/chosen": -3.971928834915161, "rewards/margins": 6.826504230499268, "rewards/rejected": -10.798433303833008, "step": 11993 }, { "epoch": 1.87, "learning_rate": 5.350448674723793e-06, "logits/chosen": -1.3829600811004639, "logits/rejected": -3.087228775024414, "logps/chosen": -86.94723510742188, "logps/rejected": -389.765869140625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -3.7698802947998047, "rewards/margins": 6.746622085571289, "rewards/rejected": -10.516502380371094, "step": 11994 }, { "epoch": 1.87, "learning_rate": 5.349715234192645e-06, "logits/chosen": -2.9929802417755127, "logits/rejected": -2.9622128009796143, "logps/chosen": -467.8597717285156, "logps/rejected": -362.40728759765625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -2.5821847915649414, "rewards/margins": 6.684337139129639, "rewards/rejected": -9.266521453857422, "step": 11995 }, { "epoch": 1.87, "learning_rate": 5.348981793661497e-06, "logits/chosen": -2.4618237018585205, "logits/rejected": -2.6120290756225586, "logps/chosen": -115.08675384521484, "logps/rejected": -235.75457763671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.9002034664154053, "rewards/margins": 7.658346176147461, "rewards/rejected": -11.558549880981445, "step": 11996 }, { "epoch": 1.87, "learning_rate": 5.348248353130349e-06, "logits/chosen": -2.9699463844299316, "logits/rejected": -3.3181097507476807, "logps/chosen": -168.37747192382812, "logps/rejected": -270.138916015625, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -4.182879447937012, "rewards/margins": 4.822271347045898, "rewards/rejected": -9.005151748657227, "step": 11997 }, { "epoch": 1.87, "learning_rate": 5.347514912599201e-06, "logits/chosen": -1.7169822454452515, "logits/rejected": -2.896218776702881, "logps/chosen": -126.49229431152344, "logps/rejected": -329.54534912109375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.814455509185791, "rewards/margins": 6.174824237823486, "rewards/rejected": -9.989279747009277, "step": 11998 }, { "epoch": 1.87, "learning_rate": 5.346781472068054e-06, "logits/chosen": -2.1050219535827637, "logits/rejected": -2.2825264930725098, "logps/chosen": -775.8438720703125, "logps/rejected": -715.1595458984375, "loss": 0.1265, "rewards/accuracies": 1.0, "rewards/chosen": -7.482657432556152, "rewards/margins": 5.236096382141113, "rewards/rejected": -12.718753814697266, "step": 11999 }, { "epoch": 1.87, "learning_rate": 5.346048031536906e-06, "logits/chosen": -2.5354039669036865, "logits/rejected": -2.868346691131592, "logps/chosen": -86.11476135253906, "logps/rejected": -285.9424133300781, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -2.707123279571533, "rewards/margins": 6.266976356506348, "rewards/rejected": -8.974099159240723, "step": 12000 }, { "epoch": 1.87, "learning_rate": 5.345314591005758e-06, "logits/chosen": -2.6497621536254883, "logits/rejected": -2.424039363861084, "logps/chosen": -398.3788146972656, "logps/rejected": -481.1120910644531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.334264039993286, "rewards/margins": 11.111392974853516, "rewards/rejected": -14.445656776428223, "step": 12001 }, { "epoch": 1.87, "learning_rate": 5.3445811504746096e-06, "logits/chosen": -1.2694038152694702, "logits/rejected": -2.800041913986206, "logps/chosen": -94.70001220703125, "logps/rejected": -311.1455078125, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -7.1558074951171875, "rewards/margins": 6.529595851898193, "rewards/rejected": -13.685403823852539, "step": 12002 }, { "epoch": 1.87, "learning_rate": 5.343847709943462e-06, "logits/chosen": -2.7497658729553223, "logits/rejected": -1.99948251247406, "logps/chosen": -443.5843505859375, "logps/rejected": -400.75762939453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.4248719215393066, "rewards/margins": 9.390777587890625, "rewards/rejected": -11.815649032592773, "step": 12003 }, { "epoch": 1.87, "learning_rate": 5.343114269412314e-06, "logits/chosen": -2.2742528915405273, "logits/rejected": -2.807239532470703, "logps/chosen": -403.0531311035156, "logps/rejected": -681.75927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.034956455230713, "rewards/margins": 11.308076858520508, "rewards/rejected": -14.343034744262695, "step": 12004 }, { "epoch": 1.87, "learning_rate": 5.342380828881166e-06, "logits/chosen": -2.6429014205932617, "logits/rejected": -2.8417394161224365, "logps/chosen": -165.08676147460938, "logps/rejected": -220.6379852294922, "loss": 0.8875, "rewards/accuracies": 0.5, "rewards/chosen": -5.7409138679504395, "rewards/margins": 1.4461159706115723, "rewards/rejected": -7.187029838562012, "step": 12005 }, { "epoch": 1.87, "learning_rate": 5.341647388350018e-06, "logits/chosen": -1.6561702489852905, "logits/rejected": -3.0778074264526367, "logps/chosen": -157.32815551757812, "logps/rejected": -655.0811767578125, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": -6.263021469116211, "rewards/margins": 7.543787002563477, "rewards/rejected": -13.806808471679688, "step": 12006 }, { "epoch": 1.87, "learning_rate": 5.34091394781887e-06, "logits/chosen": -2.49828839302063, "logits/rejected": -3.150550365447998, "logps/chosen": -156.91995239257812, "logps/rejected": -505.0289306640625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.007821083068848, "rewards/margins": 8.804848670959473, "rewards/rejected": -13.81266975402832, "step": 12007 }, { "epoch": 1.87, "learning_rate": 5.3401805072877225e-06, "logits/chosen": -2.04113507270813, "logits/rejected": -3.075305223464966, "logps/chosen": -116.3477554321289, "logps/rejected": -346.4371337890625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.519937515258789, "rewards/margins": 6.565605163574219, "rewards/rejected": -10.085542678833008, "step": 12008 }, { "epoch": 1.87, "learning_rate": 5.339447066756574e-06, "logits/chosen": -2.7402217388153076, "logits/rejected": -1.54310142993927, "logps/chosen": -243.80975341796875, "logps/rejected": -102.02894592285156, "loss": 0.1776, "rewards/accuracies": 1.0, "rewards/chosen": -4.060868740081787, "rewards/margins": 3.357280731201172, "rewards/rejected": -7.418149471282959, "step": 12009 }, { "epoch": 1.87, "learning_rate": 5.338713626225426e-06, "logits/chosen": -1.9783592224121094, "logits/rejected": -3.0205347537994385, "logps/chosen": -298.36297607421875, "logps/rejected": -442.0948181152344, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.7466540336608887, "rewards/margins": 7.203001022338867, "rewards/rejected": -10.949655532836914, "step": 12010 }, { "epoch": 1.87, "learning_rate": 5.337980185694278e-06, "logits/chosen": -2.5754387378692627, "logits/rejected": -2.462804079055786, "logps/chosen": -419.9007263183594, "logps/rejected": -473.44805908203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.9948105812072754, "rewards/margins": 7.238670349121094, "rewards/rejected": -10.233480453491211, "step": 12011 }, { "epoch": 1.87, "learning_rate": 5.337246745163131e-06, "logits/chosen": -2.822115421295166, "logits/rejected": -2.9521219730377197, "logps/chosen": -138.11538696289062, "logps/rejected": -164.17242431640625, "loss": 1.1098, "rewards/accuracies": 0.5, "rewards/chosen": -5.515412330627441, "rewards/margins": 3.267348527908325, "rewards/rejected": -8.782761573791504, "step": 12012 }, { "epoch": 1.87, "learning_rate": 5.336513304631983e-06, "logits/chosen": -2.084951877593994, "logits/rejected": -2.6927192211151123, "logps/chosen": -719.7216186523438, "logps/rejected": -477.77392578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.664674758911133, "rewards/margins": 9.09347915649414, "rewards/rejected": -13.758152961730957, "step": 12013 }, { "epoch": 1.87, "learning_rate": 5.3357798641008355e-06, "logits/chosen": -1.3376612663269043, "logits/rejected": -2.7390172481536865, "logps/chosen": -105.36502075195312, "logps/rejected": -335.1884765625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.0682308673858643, "rewards/margins": 7.406761169433594, "rewards/rejected": -9.474991798400879, "step": 12014 }, { "epoch": 1.87, "learning_rate": 5.335046423569687e-06, "logits/chosen": -2.7653093338012695, "logits/rejected": -2.353304147720337, "logps/chosen": -230.3382568359375, "logps/rejected": -204.27899169921875, "loss": 0.3805, "rewards/accuracies": 0.5, "rewards/chosen": -4.955727577209473, "rewards/margins": 5.438151836395264, "rewards/rejected": -10.393878936767578, "step": 12015 }, { "epoch": 1.87, "learning_rate": 5.33431298303854e-06, "logits/chosen": -1.6712287664413452, "logits/rejected": -2.932265520095825, "logps/chosen": -123.24370574951172, "logps/rejected": -388.5786437988281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.005842208862305, "rewards/margins": 9.359070777893066, "rewards/rejected": -13.364912033081055, "step": 12016 }, { "epoch": 1.87, "learning_rate": 5.333579542507392e-06, "logits/chosen": -1.2547917366027832, "logits/rejected": -2.415896415710449, "logps/chosen": -477.2489013671875, "logps/rejected": -654.393798828125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.402459144592285, "rewards/margins": 8.300477981567383, "rewards/rejected": -11.702937126159668, "step": 12017 }, { "epoch": 1.87, "learning_rate": 5.332846101976244e-06, "logits/chosen": -2.648641347885132, "logits/rejected": -2.8706748485565186, "logps/chosen": -163.86795043945312, "logps/rejected": -285.56597900390625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.7521705627441406, "rewards/margins": 6.427267551422119, "rewards/rejected": -10.179437637329102, "step": 12018 }, { "epoch": 1.87, "learning_rate": 5.332112661445096e-06, "logits/chosen": -2.4935312271118164, "logits/rejected": -3.042893648147583, "logps/chosen": -69.87396240234375, "logps/rejected": -252.98065185546875, "loss": 0.0357, "rewards/accuracies": 1.0, "rewards/chosen": -3.724991798400879, "rewards/margins": 4.530526161193848, "rewards/rejected": -8.255517959594727, "step": 12019 }, { "epoch": 1.87, "learning_rate": 5.331379220913948e-06, "logits/chosen": -1.1077091693878174, "logits/rejected": -2.241123676300049, "logps/chosen": -185.05648803710938, "logps/rejected": -441.154052734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.178045272827148, "rewards/margins": 9.455240249633789, "rewards/rejected": -13.633285522460938, "step": 12020 }, { "epoch": 1.87, "learning_rate": 5.3306457803828e-06, "logits/chosen": -2.928405523300171, "logits/rejected": -3.4060451984405518, "logps/chosen": -55.32524871826172, "logps/rejected": -266.15032958984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.7530429363250732, "rewards/margins": 8.29422378540039, "rewards/rejected": -12.047266006469727, "step": 12021 }, { "epoch": 1.87, "learning_rate": 5.329912339851652e-06, "logits/chosen": -2.52260160446167, "logits/rejected": -2.977365016937256, "logps/chosen": -134.5281524658203, "logps/rejected": -361.7066345214844, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.866919994354248, "rewards/margins": 6.752778053283691, "rewards/rejected": -10.619698524475098, "step": 12022 }, { "epoch": 1.87, "learning_rate": 5.329178899320504e-06, "logits/chosen": -2.77996826171875, "logits/rejected": -3.0242886543273926, "logps/chosen": -182.23052978515625, "logps/rejected": -456.113037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.618429660797119, "rewards/margins": 9.72258472442627, "rewards/rejected": -12.341014862060547, "step": 12023 }, { "epoch": 1.87, "learning_rate": 5.328445458789356e-06, "logits/chosen": -2.1964468955993652, "logits/rejected": -2.916165590286255, "logps/chosen": -164.39114379882812, "logps/rejected": -300.89727783203125, "loss": 0.4673, "rewards/accuracies": 0.5, "rewards/chosen": -5.215371608734131, "rewards/margins": 2.8520305156707764, "rewards/rejected": -8.067401885986328, "step": 12024 }, { "epoch": 1.87, "learning_rate": 5.327712018258209e-06, "logits/chosen": -2.7579398155212402, "logits/rejected": -2.9489548206329346, "logps/chosen": -351.5107421875, "logps/rejected": -493.0467529296875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.5380067825317383, "rewards/margins": 8.49438190460205, "rewards/rejected": -12.032388687133789, "step": 12025 }, { "epoch": 1.87, "learning_rate": 5.3269785777270606e-06, "logits/chosen": -2.0590310096740723, "logits/rejected": -2.731919765472412, "logps/chosen": -210.4019775390625, "logps/rejected": -305.96270751953125, "loss": 0.1836, "rewards/accuracies": 1.0, "rewards/chosen": -6.141733169555664, "rewards/margins": 3.9927866458892822, "rewards/rejected": -10.134519577026367, "step": 12026 }, { "epoch": 1.87, "learning_rate": 5.3262451371959124e-06, "logits/chosen": -2.075282335281372, "logits/rejected": -2.5490589141845703, "logps/chosen": -169.5945281982422, "logps/rejected": -350.21527099609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.4306676387786865, "rewards/margins": 10.367731094360352, "rewards/rejected": -13.798398971557617, "step": 12027 }, { "epoch": 1.87, "learning_rate": 5.325511696664764e-06, "logits/chosen": -2.784313678741455, "logits/rejected": -2.6021196842193604, "logps/chosen": -95.745361328125, "logps/rejected": -287.96282958984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.854743242263794, "rewards/margins": 8.611072540283203, "rewards/rejected": -10.465816497802734, "step": 12028 }, { "epoch": 1.87, "learning_rate": 5.324778256133616e-06, "logits/chosen": -2.7332100868225098, "logits/rejected": -2.442728281021118, "logps/chosen": -262.398193359375, "logps/rejected": -436.21405029296875, "loss": 0.8345, "rewards/accuracies": 0.5, "rewards/chosen": -5.8040313720703125, "rewards/margins": 3.8769166469573975, "rewards/rejected": -9.680948257446289, "step": 12029 }, { "epoch": 1.87, "learning_rate": 5.324044815602469e-06, "logits/chosen": -3.137944459915161, "logits/rejected": -2.378777503967285, "logps/chosen": -1048.094970703125, "logps/rejected": -672.4072875976562, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -4.992062568664551, "rewards/margins": 6.946769714355469, "rewards/rejected": -11.93883228302002, "step": 12030 }, { "epoch": 1.87, "learning_rate": 5.323311375071322e-06, "logits/chosen": -1.8588807582855225, "logits/rejected": -2.9117748737335205, "logps/chosen": -408.157958984375, "logps/rejected": -729.97216796875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.437132358551025, "rewards/margins": 9.417454719543457, "rewards/rejected": -14.85458755493164, "step": 12031 }, { "epoch": 1.87, "learning_rate": 5.3225779345401735e-06, "logits/chosen": -2.1694014072418213, "logits/rejected": -3.078021764755249, "logps/chosen": -99.95098876953125, "logps/rejected": -284.834716796875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -2.925354480743408, "rewards/margins": 8.105677604675293, "rewards/rejected": -11.03103256225586, "step": 12032 }, { "epoch": 1.87, "learning_rate": 5.321844494009025e-06, "logits/chosen": -1.5046184062957764, "logits/rejected": -2.998901844024658, "logps/chosen": -81.0213394165039, "logps/rejected": -639.36962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.2778525352478027, "rewards/margins": 11.96617603302002, "rewards/rejected": -15.244028091430664, "step": 12033 }, { "epoch": 1.87, "learning_rate": 5.321111053477878e-06, "logits/chosen": -2.651543617248535, "logits/rejected": -2.745081663131714, "logps/chosen": -633.17822265625, "logps/rejected": -701.349853515625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -6.1867475509643555, "rewards/margins": 7.2113356590271, "rewards/rejected": -13.398083686828613, "step": 12034 }, { "epoch": 1.87, "learning_rate": 5.32037761294673e-06, "logits/chosen": -2.271238088607788, "logits/rejected": -2.7081687450408936, "logps/chosen": -77.853759765625, "logps/rejected": -208.5956268310547, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.9305801391601562, "rewards/margins": 6.371030807495117, "rewards/rejected": -10.301610946655273, "step": 12035 }, { "epoch": 1.87, "learning_rate": 5.319644172415582e-06, "logits/chosen": -2.708596706390381, "logits/rejected": -2.360381841659546, "logps/chosen": -262.0859680175781, "logps/rejected": -289.5738220214844, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -3.5434682369232178, "rewards/margins": 5.835653305053711, "rewards/rejected": -9.379121780395508, "step": 12036 }, { "epoch": 1.87, "learning_rate": 5.318910731884434e-06, "logits/chosen": -2.2117726802825928, "logits/rejected": -2.3586277961730957, "logps/chosen": -235.6645965576172, "logps/rejected": -294.2451477050781, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": -4.099266052246094, "rewards/margins": 4.641086578369141, "rewards/rejected": -8.740352630615234, "step": 12037 }, { "epoch": 1.87, "learning_rate": 5.318177291353286e-06, "logits/chosen": -2.903315544128418, "logits/rejected": -3.168567180633545, "logps/chosen": -100.13957977294922, "logps/rejected": -354.63873291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1435675621032715, "rewards/margins": 10.383857727050781, "rewards/rejected": -13.527425765991211, "step": 12038 }, { "epoch": 1.87, "learning_rate": 5.317443850822138e-06, "logits/chosen": -2.776843547821045, "logits/rejected": -3.005038022994995, "logps/chosen": -200.26513671875, "logps/rejected": -272.8909912109375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -3.573197603225708, "rewards/margins": 7.345980167388916, "rewards/rejected": -10.919178009033203, "step": 12039 }, { "epoch": 1.87, "learning_rate": 5.31671041029099e-06, "logits/chosen": -2.397778272628784, "logits/rejected": -2.900606870651245, "logps/chosen": -58.091060638427734, "logps/rejected": -298.62506103515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.288666248321533, "rewards/margins": 8.898534774780273, "rewards/rejected": -11.187200546264648, "step": 12040 }, { "epoch": 1.87, "learning_rate": 5.315976969759842e-06, "logits/chosen": -2.0297605991363525, "logits/rejected": -3.178114175796509, "logps/chosen": -175.64474487304688, "logps/rejected": -479.6847229003906, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -4.571752548217773, "rewards/margins": 5.516379356384277, "rewards/rejected": -10.08813190460205, "step": 12041 }, { "epoch": 1.87, "learning_rate": 5.315243529228694e-06, "logits/chosen": -3.0372865200042725, "logits/rejected": -3.1006317138671875, "logps/chosen": -100.9256591796875, "logps/rejected": -213.22586059570312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.2300219535827637, "rewards/margins": 8.22396183013916, "rewards/rejected": -10.453983306884766, "step": 12042 }, { "epoch": 1.87, "learning_rate": 5.314510088697547e-06, "logits/chosen": -2.1884851455688477, "logits/rejected": -2.791236400604248, "logps/chosen": -179.6627655029297, "logps/rejected": -370.33477783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7977306842803955, "rewards/margins": 12.106565475463867, "rewards/rejected": -14.904295921325684, "step": 12043 }, { "epoch": 1.87, "learning_rate": 5.313776648166399e-06, "logits/chosen": -2.4791507720947266, "logits/rejected": -3.045574426651001, "logps/chosen": -56.347877502441406, "logps/rejected": -223.50772094726562, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.424405336380005, "rewards/margins": 6.510807037353516, "rewards/rejected": -9.935213088989258, "step": 12044 }, { "epoch": 1.87, "learning_rate": 5.3130432076352505e-06, "logits/chosen": -2.8770997524261475, "logits/rejected": -3.248779773712158, "logps/chosen": -75.2114486694336, "logps/rejected": -253.92428588867188, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -3.2331790924072266, "rewards/margins": 4.694738388061523, "rewards/rejected": -7.92791748046875, "step": 12045 }, { "epoch": 1.87, "learning_rate": 5.312309767104102e-06, "logits/chosen": -1.2188451290130615, "logits/rejected": -2.7286062240600586, "logps/chosen": -116.49517822265625, "logps/rejected": -302.9706726074219, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -2.7327487468719482, "rewards/margins": 5.491480827331543, "rewards/rejected": -8.22422981262207, "step": 12046 }, { "epoch": 1.87, "learning_rate": 5.311576326572955e-06, "logits/chosen": -2.2731707096099854, "logits/rejected": -3.122907876968384, "logps/chosen": -115.84469604492188, "logps/rejected": -398.62872314453125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.9187557697296143, "rewards/margins": 9.834229469299316, "rewards/rejected": -11.752985000610352, "step": 12047 }, { "epoch": 1.87, "learning_rate": 5.310842886041808e-06, "logits/chosen": -2.5463645458221436, "logits/rejected": -2.9769628047943115, "logps/chosen": -154.6330108642578, "logps/rejected": -234.56224060058594, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.428160667419434, "rewards/margins": 7.172787666320801, "rewards/rejected": -12.600948333740234, "step": 12048 }, { "epoch": 1.87, "learning_rate": 5.31010944551066e-06, "logits/chosen": -1.4794344902038574, "logits/rejected": -2.6255037784576416, "logps/chosen": -173.80410766601562, "logps/rejected": -493.19061279296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.511563301086426, "rewards/margins": 11.120025634765625, "rewards/rejected": -16.631587982177734, "step": 12049 }, { "epoch": 1.87, "learning_rate": 5.309376004979512e-06, "logits/chosen": -2.7491085529327393, "logits/rejected": -1.9419279098510742, "logps/chosen": -537.0236206054688, "logps/rejected": -336.992431640625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.7726693153381348, "rewards/margins": 7.575194835662842, "rewards/rejected": -9.347864151000977, "step": 12050 }, { "epoch": 1.87, "learning_rate": 5.3086425644483635e-06, "logits/chosen": -3.043339490890503, "logits/rejected": -2.935356378555298, "logps/chosen": -221.71817016601562, "logps/rejected": -407.5665283203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.2593071460723877, "rewards/margins": 8.13504695892334, "rewards/rejected": -11.394353866577148, "step": 12051 }, { "epoch": 1.87, "learning_rate": 5.307909123917216e-06, "logits/chosen": -2.7567977905273438, "logits/rejected": -2.0219600200653076, "logps/chosen": -331.5568542480469, "logps/rejected": -311.76025390625, "loss": 0.0739, "rewards/accuracies": 1.0, "rewards/chosen": -4.567381381988525, "rewards/margins": 4.413049221038818, "rewards/rejected": -8.980430603027344, "step": 12052 }, { "epoch": 1.87, "learning_rate": 5.307175683386068e-06, "logits/chosen": -1.3048593997955322, "logits/rejected": -2.6778082847595215, "logps/chosen": -158.61561584472656, "logps/rejected": -447.8165283203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9005234241485596, "rewards/margins": 10.365406036376953, "rewards/rejected": -14.265928268432617, "step": 12053 }, { "epoch": 1.87, "learning_rate": 5.30644224285492e-06, "logits/chosen": -2.7368524074554443, "logits/rejected": -1.6671686172485352, "logps/chosen": -213.99032592773438, "logps/rejected": -270.4595031738281, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -1.9815528392791748, "rewards/margins": 6.262713432312012, "rewards/rejected": -8.244266510009766, "step": 12054 }, { "epoch": 1.87, "learning_rate": 5.305708802323772e-06, "logits/chosen": -2.636162757873535, "logits/rejected": -2.807856321334839, "logps/chosen": -362.5738830566406, "logps/rejected": -296.5907897949219, "loss": 0.1669, "rewards/accuracies": 1.0, "rewards/chosen": -4.61513090133667, "rewards/margins": 3.220893621444702, "rewards/rejected": -7.836024761199951, "step": 12055 }, { "epoch": 1.87, "learning_rate": 5.304975361792624e-06, "logits/chosen": -1.7823318243026733, "logits/rejected": -2.8347508907318115, "logps/chosen": -159.69699096679688, "logps/rejected": -426.60528564453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.465423107147217, "rewards/margins": 9.610014915466309, "rewards/rejected": -13.075437545776367, "step": 12056 }, { "epoch": 1.88, "learning_rate": 5.304241921261476e-06, "logits/chosen": -2.2137036323547363, "logits/rejected": -2.984966993331909, "logps/chosen": -107.30110931396484, "logps/rejected": -262.5325927734375, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -4.3987135887146, "rewards/margins": 4.460834980010986, "rewards/rejected": -8.859548568725586, "step": 12057 }, { "epoch": 1.88, "learning_rate": 5.303508480730328e-06, "logits/chosen": -2.5461525917053223, "logits/rejected": -2.9548745155334473, "logps/chosen": -245.66966247558594, "logps/rejected": -237.94924926757812, "loss": 2.7088, "rewards/accuracies": 0.5, "rewards/chosen": -3.2887802124023438, "rewards/margins": 3.6875147819519043, "rewards/rejected": -6.976294994354248, "step": 12058 }, { "epoch": 1.88, "learning_rate": 5.30277504019918e-06, "logits/chosen": -1.9446214437484741, "logits/rejected": -3.121216058731079, "logps/chosen": -123.78251647949219, "logps/rejected": -445.715576171875, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -3.3591132164001465, "rewards/margins": 8.074315071105957, "rewards/rejected": -11.433427810668945, "step": 12059 }, { "epoch": 1.88, "learning_rate": 5.302041599668032e-06, "logits/chosen": -1.6593459844589233, "logits/rejected": -2.882546901702881, "logps/chosen": -210.21212768554688, "logps/rejected": -424.646240234375, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -4.9024658203125, "rewards/margins": 5.7422990798950195, "rewards/rejected": -10.64476490020752, "step": 12060 }, { "epoch": 1.88, "learning_rate": 5.301308159136885e-06, "logits/chosen": -2.784135580062866, "logits/rejected": -1.513600468635559, "logps/chosen": -959.978759765625, "logps/rejected": -519.7049560546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.982478380203247, "rewards/margins": 10.194737434387207, "rewards/rejected": -12.177215576171875, "step": 12061 }, { "epoch": 1.88, "learning_rate": 5.300574718605737e-06, "logits/chosen": -2.7387754917144775, "logits/rejected": -2.6509084701538086, "logps/chosen": -160.69741821289062, "logps/rejected": -133.47833251953125, "loss": 1.1421, "rewards/accuracies": 0.5, "rewards/chosen": -5.127845764160156, "rewards/margins": 1.0673671960830688, "rewards/rejected": -6.195213317871094, "step": 12062 }, { "epoch": 1.88, "learning_rate": 5.2998412780745885e-06, "logits/chosen": -1.6315851211547852, "logits/rejected": -2.8910603523254395, "logps/chosen": -176.670166015625, "logps/rejected": -392.2451477050781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.540735244750977, "rewards/margins": 8.117202758789062, "rewards/rejected": -14.657938003540039, "step": 12063 }, { "epoch": 1.88, "learning_rate": 5.299107837543441e-06, "logits/chosen": -2.772808313369751, "logits/rejected": -2.382274627685547, "logps/chosen": -538.42919921875, "logps/rejected": -433.09002685546875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.3436386585235596, "rewards/margins": 8.495220184326172, "rewards/rejected": -11.838857650756836, "step": 12064 }, { "epoch": 1.88, "learning_rate": 5.298374397012294e-06, "logits/chosen": -2.59169602394104, "logits/rejected": -2.8150291442871094, "logps/chosen": -275.5650634765625, "logps/rejected": -457.950927734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.681457042694092, "rewards/margins": 9.172436714172363, "rewards/rejected": -13.853893280029297, "step": 12065 }, { "epoch": 1.88, "learning_rate": 5.297640956481146e-06, "logits/chosen": -1.7362602949142456, "logits/rejected": -2.8158347606658936, "logps/chosen": -115.17106628417969, "logps/rejected": -314.0401306152344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.5309267044067383, "rewards/margins": 8.703579902648926, "rewards/rejected": -11.234506607055664, "step": 12066 }, { "epoch": 1.88, "learning_rate": 5.296907515949998e-06, "logits/chosen": -2.646592140197754, "logits/rejected": -2.9287467002868652, "logps/chosen": -104.89830017089844, "logps/rejected": -199.08642578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.283591270446777, "rewards/margins": 6.866122245788574, "rewards/rejected": -11.149713516235352, "step": 12067 }, { "epoch": 1.88, "learning_rate": 5.29617407541885e-06, "logits/chosen": -1.9410884380340576, "logits/rejected": -2.73315691947937, "logps/chosen": -157.5519561767578, "logps/rejected": -305.33160400390625, "loss": 1.1196, "rewards/accuracies": 0.5, "rewards/chosen": -5.379697322845459, "rewards/margins": 3.8541250228881836, "rewards/rejected": -9.2338228225708, "step": 12068 }, { "epoch": 1.88, "learning_rate": 5.2954406348877015e-06, "logits/chosen": -2.194777011871338, "logits/rejected": -2.6772661209106445, "logps/chosen": -137.1943817138672, "logps/rejected": -287.5904846191406, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -5.636392593383789, "rewards/margins": 8.016061782836914, "rewards/rejected": -13.652454376220703, "step": 12069 }, { "epoch": 1.88, "learning_rate": 5.294707194356554e-06, "logits/chosen": -0.9774602055549622, "logits/rejected": -2.243812322616577, "logps/chosen": -116.03413391113281, "logps/rejected": -459.00909423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.727827787399292, "rewards/margins": 11.896879196166992, "rewards/rejected": -13.624707221984863, "step": 12070 }, { "epoch": 1.88, "learning_rate": 5.293973753825406e-06, "logits/chosen": -2.220738172531128, "logits/rejected": -2.675584077835083, "logps/chosen": -334.8984680175781, "logps/rejected": -367.3875732421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.237585067749023, "rewards/margins": 6.851632118225098, "rewards/rejected": -14.089216232299805, "step": 12071 }, { "epoch": 1.88, "learning_rate": 5.293240313294258e-06, "logits/chosen": -2.6240854263305664, "logits/rejected": -3.1080117225646973, "logps/chosen": -109.417724609375, "logps/rejected": -270.88067626953125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -6.140310287475586, "rewards/margins": 6.18659782409668, "rewards/rejected": -12.326908111572266, "step": 12072 }, { "epoch": 1.88, "learning_rate": 5.29250687276311e-06, "logits/chosen": -2.78521990776062, "logits/rejected": -2.820765495300293, "logps/chosen": -115.18609619140625, "logps/rejected": -335.695556640625, "loss": 0.1951, "rewards/accuracies": 1.0, "rewards/chosen": -3.6770739555358887, "rewards/margins": 7.46511697769165, "rewards/rejected": -11.142190933227539, "step": 12073 }, { "epoch": 1.88, "learning_rate": 5.291773432231963e-06, "logits/chosen": -2.215348958969116, "logits/rejected": -2.714411973953247, "logps/chosen": -76.90249633789062, "logps/rejected": -273.7637023925781, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.314504384994507, "rewards/margins": 6.233399391174316, "rewards/rejected": -9.547904014587402, "step": 12074 }, { "epoch": 1.88, "learning_rate": 5.2910399917008145e-06, "logits/chosen": -1.2261109352111816, "logits/rejected": -2.724619150161743, "logps/chosen": -125.34536743164062, "logps/rejected": -406.4136962890625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.1434593200683594, "rewards/margins": 7.351145267486572, "rewards/rejected": -10.494604110717773, "step": 12075 }, { "epoch": 1.88, "learning_rate": 5.290306551169666e-06, "logits/chosen": -2.678138017654419, "logits/rejected": -2.8948259353637695, "logps/chosen": -124.8374252319336, "logps/rejected": -263.45782470703125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.312664270401001, "rewards/margins": 6.678021430969238, "rewards/rejected": -8.99068546295166, "step": 12076 }, { "epoch": 1.88, "learning_rate": 5.289573110638518e-06, "logits/chosen": -2.479440927505493, "logits/rejected": -3.2925097942352295, "logps/chosen": -98.86322021484375, "logps/rejected": -377.8636779785156, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -5.079640865325928, "rewards/margins": 6.651244640350342, "rewards/rejected": -11.73088550567627, "step": 12077 }, { "epoch": 1.88, "learning_rate": 5.28883967010737e-06, "logits/chosen": -1.2584214210510254, "logits/rejected": -2.529594659805298, "logps/chosen": -61.15781021118164, "logps/rejected": -265.172607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5115303993225098, "rewards/margins": 10.433091163635254, "rewards/rejected": -12.944622039794922, "step": 12078 }, { "epoch": 1.88, "learning_rate": 5.288106229576223e-06, "logits/chosen": -1.560346007347107, "logits/rejected": -2.508540153503418, "logps/chosen": -78.70160675048828, "logps/rejected": -322.5518493652344, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.9726409912109375, "rewards/margins": 7.985329627990723, "rewards/rejected": -12.95797061920166, "step": 12079 }, { "epoch": 1.88, "learning_rate": 5.287372789045075e-06, "logits/chosen": -2.677687644958496, "logits/rejected": -2.699183225631714, "logps/chosen": -141.9419708251953, "logps/rejected": -174.8638916015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.305854558944702, "rewards/margins": 8.693466186523438, "rewards/rejected": -10.999320983886719, "step": 12080 }, { "epoch": 1.88, "learning_rate": 5.2866393485139274e-06, "logits/chosen": -2.532461643218994, "logits/rejected": -3.000080108642578, "logps/chosen": -137.0882568359375, "logps/rejected": -324.02362060546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.140136241912842, "rewards/margins": 9.12947940826416, "rewards/rejected": -11.269615173339844, "step": 12081 }, { "epoch": 1.88, "learning_rate": 5.285905907982779e-06, "logits/chosen": -2.9811298847198486, "logits/rejected": -2.9340388774871826, "logps/chosen": -335.23358154296875, "logps/rejected": -265.3115234375, "loss": 0.3222, "rewards/accuracies": 1.0, "rewards/chosen": -4.994314670562744, "rewards/margins": 3.882920265197754, "rewards/rejected": -8.877235412597656, "step": 12082 }, { "epoch": 1.88, "learning_rate": 5.285172467451632e-06, "logits/chosen": -1.7521792650222778, "logits/rejected": -2.4385008811950684, "logps/chosen": -92.84330749511719, "logps/rejected": -325.9644775390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.830315589904785, "rewards/margins": 7.784999847412109, "rewards/rejected": -12.615315437316895, "step": 12083 }, { "epoch": 1.88, "learning_rate": 5.284439026920484e-06, "logits/chosen": -2.3385369777679443, "logits/rejected": -2.904696464538574, "logps/chosen": -573.0680541992188, "logps/rejected": -540.6658935546875, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -1.2650787830352783, "rewards/margins": 4.551727294921875, "rewards/rejected": -5.816805839538574, "step": 12084 }, { "epoch": 1.88, "learning_rate": 5.283705586389336e-06, "logits/chosen": -2.338488817214966, "logits/rejected": -2.945169448852539, "logps/chosen": -249.29476928710938, "logps/rejected": -457.9776611328125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.688194274902344, "rewards/margins": 10.030500411987305, "rewards/rejected": -15.718694686889648, "step": 12085 }, { "epoch": 1.88, "learning_rate": 5.282972145858188e-06, "logits/chosen": -1.5403335094451904, "logits/rejected": -1.8432224988937378, "logps/chosen": -174.64666748046875, "logps/rejected": -304.47235107421875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.0869526863098145, "rewards/margins": 7.815101146697998, "rewards/rejected": -9.902053833007812, "step": 12086 }, { "epoch": 1.88, "learning_rate": 5.2822387053270395e-06, "logits/chosen": -2.3735311031341553, "logits/rejected": -2.7071826457977295, "logps/chosen": -131.12469482421875, "logps/rejected": -251.36074829101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.290466785430908, "rewards/margins": 10.117417335510254, "rewards/rejected": -12.40788459777832, "step": 12087 }, { "epoch": 1.88, "learning_rate": 5.281505264795892e-06, "logits/chosen": -1.8656104803085327, "logits/rejected": -1.9968624114990234, "logps/chosen": -195.8453826904297, "logps/rejected": -385.6730041503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.316395282745361, "rewards/margins": 11.515568733215332, "rewards/rejected": -15.831963539123535, "step": 12088 }, { "epoch": 1.88, "learning_rate": 5.280771824264744e-06, "logits/chosen": -2.2528347969055176, "logits/rejected": -2.8324368000030518, "logps/chosen": -217.31976318359375, "logps/rejected": -271.5456237792969, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.540344715118408, "rewards/margins": 6.821575164794922, "rewards/rejected": -11.361919403076172, "step": 12089 }, { "epoch": 1.88, "learning_rate": 5.280038383733596e-06, "logits/chosen": -1.93356192111969, "logits/rejected": -2.768928289413452, "logps/chosen": -329.3519592285156, "logps/rejected": -451.49334716796875, "loss": 1.6057, "rewards/accuracies": 0.5, "rewards/chosen": -6.632714748382568, "rewards/margins": 6.320030212402344, "rewards/rejected": -12.95274543762207, "step": 12090 }, { "epoch": 1.88, "learning_rate": 5.279304943202448e-06, "logits/chosen": -2.677783250808716, "logits/rejected": -2.958378553390503, "logps/chosen": -313.2967224121094, "logps/rejected": -409.59759521484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.313923716545105, "rewards/margins": 9.87289047241211, "rewards/rejected": -11.186813354492188, "step": 12091 }, { "epoch": 1.88, "learning_rate": 5.278571502671301e-06, "logits/chosen": -2.637343406677246, "logits/rejected": -3.089050769805908, "logps/chosen": -280.8401794433594, "logps/rejected": -394.2060546875, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/chosen": -2.9661941528320312, "rewards/margins": 8.627038955688477, "rewards/rejected": -11.593233108520508, "step": 12092 }, { "epoch": 1.88, "learning_rate": 5.2778380621401525e-06, "logits/chosen": -2.275580883026123, "logits/rejected": -2.953847646713257, "logps/chosen": -87.85942077636719, "logps/rejected": -176.10406494140625, "loss": 0.0607, "rewards/accuracies": 1.0, "rewards/chosen": -4.439303398132324, "rewards/margins": 4.319061279296875, "rewards/rejected": -8.7583646774292, "step": 12093 }, { "epoch": 1.88, "learning_rate": 5.277104621609004e-06, "logits/chosen": -1.485095500946045, "logits/rejected": -2.8146605491638184, "logps/chosen": -173.54684448242188, "logps/rejected": -424.9920959472656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1715446710586548, "rewards/margins": 10.235604286193848, "rewards/rejected": -11.407148361206055, "step": 12094 }, { "epoch": 1.88, "learning_rate": 5.276371181077856e-06, "logits/chosen": -2.2293529510498047, "logits/rejected": -2.776700496673584, "logps/chosen": -394.8487854003906, "logps/rejected": -487.79815673828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.4720155000686646, "rewards/margins": 10.014259338378906, "rewards/rejected": -11.486274719238281, "step": 12095 }, { "epoch": 1.88, "learning_rate": 5.275637740546708e-06, "logits/chosen": -1.498360514640808, "logits/rejected": -2.631406307220459, "logps/chosen": -123.30744934082031, "logps/rejected": -354.2305908203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.5639567375183105, "rewards/margins": 11.813577651977539, "rewards/rejected": -13.377534866333008, "step": 12096 }, { "epoch": 1.88, "learning_rate": 5.274904300015561e-06, "logits/chosen": -1.8311539888381958, "logits/rejected": -2.9842162132263184, "logps/chosen": -204.39047241210938, "logps/rejected": -543.6985473632812, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -4.421807289123535, "rewards/margins": 7.468218803405762, "rewards/rejected": -11.890026092529297, "step": 12097 }, { "epoch": 1.88, "learning_rate": 5.274170859484414e-06, "logits/chosen": -2.7412960529327393, "logits/rejected": -2.1424057483673096, "logps/chosen": -644.0213623046875, "logps/rejected": -448.0536193847656, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -6.989909648895264, "rewards/margins": 5.444666862487793, "rewards/rejected": -12.434576034545898, "step": 12098 }, { "epoch": 1.88, "learning_rate": 5.2734374189532655e-06, "logits/chosen": -1.2498546838760376, "logits/rejected": -2.900057792663574, "logps/chosen": -67.37664794921875, "logps/rejected": -413.2986755371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7992782592773438, "rewards/margins": 10.178319931030273, "rewards/rejected": -13.977598190307617, "step": 12099 }, { "epoch": 1.88, "learning_rate": 5.272703978422117e-06, "logits/chosen": -2.7874701023101807, "logits/rejected": -2.864914655685425, "logps/chosen": -176.9278564453125, "logps/rejected": -239.97061157226562, "loss": 0.462, "rewards/accuracies": 0.5, "rewards/chosen": -3.353153705596924, "rewards/margins": 2.895751953125, "rewards/rejected": -6.248906135559082, "step": 12100 }, { "epoch": 1.88, "learning_rate": 5.27197053789097e-06, "logits/chosen": -1.349578857421875, "logits/rejected": -2.752042531967163, "logps/chosen": -131.77145385742188, "logps/rejected": -537.3329467773438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.039194107055664, "rewards/margins": 11.425457000732422, "rewards/rejected": -13.464651107788086, "step": 12101 }, { "epoch": 1.88, "learning_rate": 5.271237097359822e-06, "logits/chosen": -2.816540241241455, "logits/rejected": -3.1193015575408936, "logps/chosen": -110.90370178222656, "logps/rejected": -153.6370086669922, "loss": 0.6162, "rewards/accuracies": 0.5, "rewards/chosen": -5.576377868652344, "rewards/margins": 2.4343690872192383, "rewards/rejected": -8.010746955871582, "step": 12102 }, { "epoch": 1.88, "learning_rate": 5.270503656828674e-06, "logits/chosen": -0.4822973310947418, "logits/rejected": -2.8267316818237305, "logps/chosen": -107.0470962524414, "logps/rejected": -597.26806640625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -6.575499057769775, "rewards/margins": 6.7215576171875, "rewards/rejected": -13.297056198120117, "step": 12103 }, { "epoch": 1.88, "learning_rate": 5.269770216297526e-06, "logits/chosen": -1.3620901107788086, "logits/rejected": -2.6315619945526123, "logps/chosen": -419.7325439453125, "logps/rejected": -441.34295654296875, "loss": 1.1242, "rewards/accuracies": 0.5, "rewards/chosen": -6.26774263381958, "rewards/margins": 2.2511680126190186, "rewards/rejected": -8.51891040802002, "step": 12104 }, { "epoch": 1.88, "learning_rate": 5.269036775766378e-06, "logits/chosen": -1.8186508417129517, "logits/rejected": -2.8154098987579346, "logps/chosen": -343.77044677734375, "logps/rejected": -709.7359619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.521922588348389, "rewards/margins": 16.795238494873047, "rewards/rejected": -21.317161560058594, "step": 12105 }, { "epoch": 1.88, "learning_rate": 5.26830333523523e-06, "logits/chosen": -2.558793544769287, "logits/rejected": -1.737493634223938, "logps/chosen": -467.3580017089844, "logps/rejected": -364.54364013671875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -4.9868268966674805, "rewards/margins": 5.157382488250732, "rewards/rejected": -10.144208908081055, "step": 12106 }, { "epoch": 1.88, "learning_rate": 5.267569894704082e-06, "logits/chosen": -2.748971462249756, "logits/rejected": -2.3980071544647217, "logps/chosen": -503.3569641113281, "logps/rejected": -450.3479919433594, "loss": 0.171, "rewards/accuracies": 1.0, "rewards/chosen": -4.778757095336914, "rewards/margins": 8.275065422058105, "rewards/rejected": -13.05382251739502, "step": 12107 }, { "epoch": 1.88, "learning_rate": 5.266836454172934e-06, "logits/chosen": -2.8979740142822266, "logits/rejected": -2.937671184539795, "logps/chosen": -116.54683685302734, "logps/rejected": -204.03282165527344, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.037046194076538, "rewards/margins": 7.865729331970215, "rewards/rejected": -9.902775764465332, "step": 12108 }, { "epoch": 1.88, "learning_rate": 5.266103013641786e-06, "logits/chosen": -1.8031610250473022, "logits/rejected": -2.8144264221191406, "logps/chosen": -121.61512756347656, "logps/rejected": -260.3337707519531, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -3.702698230743408, "rewards/margins": 6.408785820007324, "rewards/rejected": -10.11148452758789, "step": 12109 }, { "epoch": 1.88, "learning_rate": 5.265369573110639e-06, "logits/chosen": -2.7883317470550537, "logits/rejected": -3.2868144512176514, "logps/chosen": -80.5207290649414, "logps/rejected": -255.7294158935547, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.3141188621520996, "rewards/margins": 8.864246368408203, "rewards/rejected": -11.178365707397461, "step": 12110 }, { "epoch": 1.88, "learning_rate": 5.2646361325794906e-06, "logits/chosen": -2.887683391571045, "logits/rejected": -3.2079739570617676, "logps/chosen": -347.7698059082031, "logps/rejected": -495.16644287109375, "loss": 3.2405, "rewards/accuracies": 0.5, "rewards/chosen": -9.81707763671875, "rewards/margins": -2.572183609008789, "rewards/rejected": -7.244894027709961, "step": 12111 }, { "epoch": 1.88, "learning_rate": 5.2639026920483424e-06, "logits/chosen": -2.5756888389587402, "logits/rejected": -2.0592384338378906, "logps/chosen": -275.8298645019531, "logps/rejected": -324.9553527832031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.6914446353912354, "rewards/margins": 8.003984451293945, "rewards/rejected": -11.695428848266602, "step": 12112 }, { "epoch": 1.88, "learning_rate": 5.263169251517194e-06, "logits/chosen": -2.5846924781799316, "logits/rejected": -3.142080783843994, "logps/chosen": -359.0137023925781, "logps/rejected": -850.3208618164062, "loss": 0.143, "rewards/accuracies": 1.0, "rewards/chosen": -6.978403091430664, "rewards/margins": 4.552132606506348, "rewards/rejected": -11.530536651611328, "step": 12113 }, { "epoch": 1.88, "learning_rate": 5.262435810986047e-06, "logits/chosen": -2.211977005004883, "logits/rejected": -2.6177632808685303, "logps/chosen": -155.42398071289062, "logps/rejected": -234.8539581298828, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.6920478343963623, "rewards/margins": 7.576897144317627, "rewards/rejected": -11.268945693969727, "step": 12114 }, { "epoch": 1.88, "learning_rate": 5.2617023704549e-06, "logits/chosen": -2.4824070930480957, "logits/rejected": -2.416722536087036, "logps/chosen": -209.81080627441406, "logps/rejected": -476.5899658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.462937831878662, "rewards/margins": 17.062820434570312, "rewards/rejected": -19.525758743286133, "step": 12115 }, { "epoch": 1.88, "learning_rate": 5.260968929923752e-06, "logits/chosen": -1.644139051437378, "logits/rejected": -2.6288909912109375, "logps/chosen": -110.65422058105469, "logps/rejected": -358.1270751953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.749948501586914, "rewards/margins": 9.560441970825195, "rewards/rejected": -12.31039047241211, "step": 12116 }, { "epoch": 1.88, "learning_rate": 5.2602354893926035e-06, "logits/chosen": -2.6845595836639404, "logits/rejected": -2.7984259128570557, "logps/chosen": -77.65495300292969, "logps/rejected": -294.365234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.008279323577881, "rewards/margins": 8.722732543945312, "rewards/rejected": -10.731011390686035, "step": 12117 }, { "epoch": 1.88, "learning_rate": 5.259502048861455e-06, "logits/chosen": -2.346463203430176, "logits/rejected": -2.774901866912842, "logps/chosen": -162.30947875976562, "logps/rejected": -419.515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.09203577041626, "rewards/margins": 10.521455764770508, "rewards/rejected": -15.61349105834961, "step": 12118 }, { "epoch": 1.88, "learning_rate": 5.258768608330308e-06, "logits/chosen": -2.854053497314453, "logits/rejected": -3.038569211959839, "logps/chosen": -257.8506164550781, "logps/rejected": -363.33074951171875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -1.602219820022583, "rewards/margins": 8.028908729553223, "rewards/rejected": -9.631128311157227, "step": 12119 }, { "epoch": 1.88, "learning_rate": 5.25803516779916e-06, "logits/chosen": -2.504836082458496, "logits/rejected": -3.116098165512085, "logps/chosen": -210.25518798828125, "logps/rejected": -369.2312927246094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.320675849914551, "rewards/margins": 8.942680358886719, "rewards/rejected": -14.263357162475586, "step": 12120 }, { "epoch": 1.89, "learning_rate": 5.257301727268012e-06, "logits/chosen": -2.9296438694000244, "logits/rejected": -2.673354148864746, "logps/chosen": -149.3475341796875, "logps/rejected": -92.8466796875, "loss": 3.3437, "rewards/accuracies": 0.5, "rewards/chosen": -6.577787399291992, "rewards/margins": -1.093095064163208, "rewards/rejected": -5.484692096710205, "step": 12121 }, { "epoch": 1.89, "learning_rate": 5.256568286736864e-06, "logits/chosen": -2.4762003421783447, "logits/rejected": -2.7401719093322754, "logps/chosen": -74.18494415283203, "logps/rejected": -199.67333984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.892329692840576, "rewards/margins": 8.03140640258789, "rewards/rejected": -10.923735618591309, "step": 12122 }, { "epoch": 1.89, "learning_rate": 5.2558348462057165e-06, "logits/chosen": -2.111145257949829, "logits/rejected": -2.9970831871032715, "logps/chosen": -486.29052734375, "logps/rejected": -660.9196166992188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.960275650024414, "rewards/margins": 12.26016616821289, "rewards/rejected": -19.220441818237305, "step": 12123 }, { "epoch": 1.89, "learning_rate": 5.255101405674568e-06, "logits/chosen": -2.8834292888641357, "logits/rejected": -2.364091396331787, "logps/chosen": -244.8723602294922, "logps/rejected": -326.0886535644531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.893449783325195, "rewards/margins": 9.187579154968262, "rewards/rejected": -14.08102798461914, "step": 12124 }, { "epoch": 1.89, "learning_rate": 5.25436796514342e-06, "logits/chosen": -2.7835206985473633, "logits/rejected": -2.895263433456421, "logps/chosen": -123.08119201660156, "logps/rejected": -167.1611328125, "loss": 0.0737, "rewards/accuracies": 1.0, "rewards/chosen": -5.887102127075195, "rewards/margins": 4.036596298217773, "rewards/rejected": -9.923698425292969, "step": 12125 }, { "epoch": 1.89, "learning_rate": 5.253634524612272e-06, "logits/chosen": -2.909139633178711, "logits/rejected": -3.14892578125, "logps/chosen": -346.301025390625, "logps/rejected": -485.6955871582031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.790233612060547, "rewards/margins": 8.635700225830078, "rewards/rejected": -12.425933837890625, "step": 12126 }, { "epoch": 1.89, "learning_rate": 5.252901084081124e-06, "logits/chosen": -2.470689058303833, "logits/rejected": -2.9134631156921387, "logps/chosen": -219.49630737304688, "logps/rejected": -375.6243896484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.003080368041992, "rewards/margins": 7.143599510192871, "rewards/rejected": -11.146679878234863, "step": 12127 }, { "epoch": 1.89, "learning_rate": 5.252167643549977e-06, "logits/chosen": -2.821989059448242, "logits/rejected": -2.5264432430267334, "logps/chosen": -148.35549926757812, "logps/rejected": -246.1983184814453, "loss": 0.2135, "rewards/accuracies": 1.0, "rewards/chosen": -4.446289539337158, "rewards/margins": 5.751923561096191, "rewards/rejected": -10.198213577270508, "step": 12128 }, { "epoch": 1.89, "learning_rate": 5.251434203018829e-06, "logits/chosen": -2.2534077167510986, "logits/rejected": -2.9125008583068848, "logps/chosen": -100.6349868774414, "logps/rejected": -205.9464874267578, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.040127754211426, "rewards/margins": 6.656447410583496, "rewards/rejected": -11.696575164794922, "step": 12129 }, { "epoch": 1.89, "learning_rate": 5.2507007624876805e-06, "logits/chosen": -2.019277334213257, "logits/rejected": -2.8223156929016113, "logps/chosen": -76.64646911621094, "logps/rejected": -421.89190673828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.297830581665039, "rewards/margins": 10.351340293884277, "rewards/rejected": -14.649169921875, "step": 12130 }, { "epoch": 1.89, "learning_rate": 5.249967321956533e-06, "logits/chosen": -1.76963472366333, "logits/rejected": -2.409052610397339, "logps/chosen": -212.08441162109375, "logps/rejected": -245.5418701171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5062787532806396, "rewards/margins": 6.947281360626221, "rewards/rejected": -10.453559875488281, "step": 12131 }, { "epoch": 1.89, "learning_rate": 5.249233881425386e-06, "logits/chosen": -2.6516611576080322, "logits/rejected": -2.297912120819092, "logps/chosen": -313.254150390625, "logps/rejected": -408.40264892578125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.17146110534668, "rewards/margins": 8.03642463684082, "rewards/rejected": -12.2078857421875, "step": 12132 }, { "epoch": 1.89, "learning_rate": 5.248500440894238e-06, "logits/chosen": -1.840084195137024, "logits/rejected": -2.7233452796936035, "logps/chosen": -164.7948455810547, "logps/rejected": -334.4476013183594, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.7274465560913086, "rewards/margins": 7.017995834350586, "rewards/rejected": -10.745442390441895, "step": 12133 }, { "epoch": 1.89, "learning_rate": 5.24776700036309e-06, "logits/chosen": -2.906047821044922, "logits/rejected": -2.816554307937622, "logps/chosen": -423.8089599609375, "logps/rejected": -438.7529602050781, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -5.750509738922119, "rewards/margins": 5.164096355438232, "rewards/rejected": -10.914606094360352, "step": 12134 }, { "epoch": 1.89, "learning_rate": 5.2470335598319416e-06, "logits/chosen": -2.4752938747406006, "logits/rejected": -2.708390235900879, "logps/chosen": -163.09251403808594, "logps/rejected": -151.3091583251953, "loss": 0.9859, "rewards/accuracies": 0.5, "rewards/chosen": -6.712427139282227, "rewards/margins": 2.9074547290802, "rewards/rejected": -9.619882583618164, "step": 12135 }, { "epoch": 1.89, "learning_rate": 5.2463001193007934e-06, "logits/chosen": -2.4701895713806152, "logits/rejected": -2.1172006130218506, "logps/chosen": -256.3548889160156, "logps/rejected": -258.694091796875, "loss": 0.7352, "rewards/accuracies": 0.5, "rewards/chosen": -6.263141632080078, "rewards/margins": 6.186928749084473, "rewards/rejected": -12.45007038116455, "step": 12136 }, { "epoch": 1.89, "learning_rate": 5.245566678769646e-06, "logits/chosen": -2.58427095413208, "logits/rejected": -2.023505926132202, "logps/chosen": -236.29859924316406, "logps/rejected": -230.91427612304688, "loss": 1.4137, "rewards/accuracies": 0.5, "rewards/chosen": -5.799855709075928, "rewards/margins": 2.576662540435791, "rewards/rejected": -8.376518249511719, "step": 12137 }, { "epoch": 1.89, "learning_rate": 5.244833238238498e-06, "logits/chosen": -2.712198495864868, "logits/rejected": -2.9241104125976562, "logps/chosen": -439.74053955078125, "logps/rejected": -480.37518310546875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -4.461513519287109, "rewards/margins": 6.065185070037842, "rewards/rejected": -10.52669906616211, "step": 12138 }, { "epoch": 1.89, "learning_rate": 5.24409979770735e-06, "logits/chosen": -1.4299746751785278, "logits/rejected": -2.875322103500366, "logps/chosen": -164.9055633544922, "logps/rejected": -364.07232666015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.122432231903076, "rewards/margins": 8.4774808883667, "rewards/rejected": -12.599912643432617, "step": 12139 }, { "epoch": 1.89, "learning_rate": 5.243366357176202e-06, "logits/chosen": -1.8210407495498657, "logits/rejected": -2.860600233078003, "logps/chosen": -91.80398559570312, "logps/rejected": -249.97703552246094, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -5.05877161026001, "rewards/margins": 6.145665645599365, "rewards/rejected": -11.204437255859375, "step": 12140 }, { "epoch": 1.89, "learning_rate": 5.2426329166450545e-06, "logits/chosen": -2.654653787612915, "logits/rejected": -2.550374746322632, "logps/chosen": -354.0942687988281, "logps/rejected": -576.8082275390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.76715612411499, "rewards/margins": 11.674019813537598, "rewards/rejected": -18.44117546081543, "step": 12141 }, { "epoch": 1.89, "learning_rate": 5.241899476113906e-06, "logits/chosen": -2.7160422801971436, "logits/rejected": -2.2282276153564453, "logps/chosen": -112.45880126953125, "logps/rejected": -266.6415100097656, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.704445242881775, "rewards/margins": 8.434962272644043, "rewards/rejected": -10.13940715789795, "step": 12142 }, { "epoch": 1.89, "learning_rate": 5.241166035582758e-06, "logits/chosen": -2.5741021633148193, "logits/rejected": -1.8239703178405762, "logps/chosen": -213.70681762695312, "logps/rejected": -206.6798095703125, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -1.70036780834198, "rewards/margins": 7.198054313659668, "rewards/rejected": -8.898422241210938, "step": 12143 }, { "epoch": 1.89, "learning_rate": 5.24043259505161e-06, "logits/chosen": -2.6569290161132812, "logits/rejected": -3.255591869354248, "logps/chosen": -531.7388916015625, "logps/rejected": -582.9540405273438, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -8.257341384887695, "rewards/margins": 6.012127876281738, "rewards/rejected": -14.269469261169434, "step": 12144 }, { "epoch": 1.89, "learning_rate": 5.239699154520462e-06, "logits/chosen": -2.7173280715942383, "logits/rejected": -2.6912853717803955, "logps/chosen": -83.88339233398438, "logps/rejected": -125.84390258789062, "loss": 3.2735, "rewards/accuracies": 0.5, "rewards/chosen": -6.012407302856445, "rewards/margins": 1.0871264934539795, "rewards/rejected": -7.099534034729004, "step": 12145 }, { "epoch": 1.89, "learning_rate": 5.238965713989315e-06, "logits/chosen": -2.414971113204956, "logits/rejected": -2.973471164703369, "logps/chosen": -689.3505859375, "logps/rejected": -609.60107421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.447589874267578, "rewards/margins": 7.757830619812012, "rewards/rejected": -13.205419540405273, "step": 12146 }, { "epoch": 1.89, "learning_rate": 5.238232273458167e-06, "logits/chosen": -2.8387110233306885, "logits/rejected": -2.943502902984619, "logps/chosen": -146.74517822265625, "logps/rejected": -366.4222717285156, "loss": 0.951, "rewards/accuracies": 0.5, "rewards/chosen": -6.359713554382324, "rewards/margins": 5.253024101257324, "rewards/rejected": -11.612737655639648, "step": 12147 }, { "epoch": 1.89, "learning_rate": 5.237498832927019e-06, "logits/chosen": -2.238682746887207, "logits/rejected": -3.065457582473755, "logps/chosen": -98.05990600585938, "logps/rejected": -212.08865356445312, "loss": 1.9102, "rewards/accuracies": 0.0, "rewards/chosen": -6.048033714294434, "rewards/margins": -1.7050461769104004, "rewards/rejected": -4.342988014221191, "step": 12148 }, { "epoch": 1.89, "learning_rate": 5.236765392395871e-06, "logits/chosen": -2.0485165119171143, "logits/rejected": -2.863825559616089, "logps/chosen": -221.29830932617188, "logps/rejected": -559.6171264648438, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.776705741882324, "rewards/margins": 9.424055099487305, "rewards/rejected": -16.200761795043945, "step": 12149 }, { "epoch": 1.89, "learning_rate": 5.236031951864724e-06, "logits/chosen": -2.785062313079834, "logits/rejected": -3.0449531078338623, "logps/chosen": -183.1600341796875, "logps/rejected": -232.5324249267578, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.6230483055114746, "rewards/margins": 6.9963908195495605, "rewards/rejected": -10.619439125061035, "step": 12150 }, { "epoch": 1.89, "learning_rate": 5.235298511333576e-06, "logits/chosen": -2.10945987701416, "logits/rejected": -2.8333559036254883, "logps/chosen": -148.1990966796875, "logps/rejected": -434.0675354003906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6061092615127563, "rewards/margins": 11.459417343139648, "rewards/rejected": -13.065526962280273, "step": 12151 }, { "epoch": 1.89, "learning_rate": 5.234565070802428e-06, "logits/chosen": -1.229029655456543, "logits/rejected": -2.728606700897217, "logps/chosen": -95.80200958251953, "logps/rejected": -337.3807373046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.302616596221924, "rewards/margins": 7.181872844696045, "rewards/rejected": -10.484489440917969, "step": 12152 }, { "epoch": 1.89, "learning_rate": 5.23383163027128e-06, "logits/chosen": -2.08068585395813, "logits/rejected": -2.945966958999634, "logps/chosen": -145.79876708984375, "logps/rejected": -508.2962951660156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.5729846954345703, "rewards/margins": 9.525810241699219, "rewards/rejected": -12.098794937133789, "step": 12153 }, { "epoch": 1.89, "learning_rate": 5.233098189740132e-06, "logits/chosen": -2.5873234272003174, "logits/rejected": -2.819899320602417, "logps/chosen": -445.72125244140625, "logps/rejected": -578.491943359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.1823015213012695, "rewards/margins": 11.467270851135254, "rewards/rejected": -15.649572372436523, "step": 12154 }, { "epoch": 1.89, "learning_rate": 5.232364749208984e-06, "logits/chosen": -2.5411536693573, "logits/rejected": -2.8521363735198975, "logps/chosen": -102.51631164550781, "logps/rejected": -359.75225830078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.108015060424805, "rewards/margins": 8.096323013305664, "rewards/rejected": -12.204338073730469, "step": 12155 }, { "epoch": 1.89, "learning_rate": 5.231631308677836e-06, "logits/chosen": -2.2473061084747314, "logits/rejected": -2.965200185775757, "logps/chosen": -445.3146667480469, "logps/rejected": -921.5150146484375, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -5.514247894287109, "rewards/margins": 6.98085880279541, "rewards/rejected": -12.49510669708252, "step": 12156 }, { "epoch": 1.89, "learning_rate": 5.230897868146688e-06, "logits/chosen": -2.323234796524048, "logits/rejected": -2.9941747188568115, "logps/chosen": -178.22193908691406, "logps/rejected": -298.7638244628906, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -4.014659881591797, "rewards/margins": 6.578669548034668, "rewards/rejected": -10.593330383300781, "step": 12157 }, { "epoch": 1.89, "learning_rate": 5.23016442761554e-06, "logits/chosen": -2.9091105461120605, "logits/rejected": -2.0024094581604004, "logps/chosen": -337.49005126953125, "logps/rejected": -231.69775390625, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": -1.3509780168533325, "rewards/margins": 3.371541976928711, "rewards/rejected": -4.722519874572754, "step": 12158 }, { "epoch": 1.89, "learning_rate": 5.2294309870843926e-06, "logits/chosen": -3.250190019607544, "logits/rejected": -2.3747172355651855, "logps/chosen": -276.50653076171875, "logps/rejected": -128.8288116455078, "loss": 3.5291, "rewards/accuracies": 0.5, "rewards/chosen": -6.247904300689697, "rewards/margins": 1.2162249088287354, "rewards/rejected": -7.464129447937012, "step": 12159 }, { "epoch": 1.89, "learning_rate": 5.2286975465532444e-06, "logits/chosen": -1.6522281169891357, "logits/rejected": -3.013498067855835, "logps/chosen": -121.46549987792969, "logps/rejected": -542.1900634765625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.0348801612854004, "rewards/margins": 9.219715118408203, "rewards/rejected": -12.254594802856445, "step": 12160 }, { "epoch": 1.89, "learning_rate": 5.227964106022096e-06, "logits/chosen": -3.0216684341430664, "logits/rejected": -2.9123287200927734, "logps/chosen": -194.80113220214844, "logps/rejected": -156.19459533691406, "loss": 0.177, "rewards/accuracies": 1.0, "rewards/chosen": -3.9873948097229004, "rewards/margins": 3.550905704498291, "rewards/rejected": -7.538300514221191, "step": 12161 }, { "epoch": 1.89, "learning_rate": 5.227230665490948e-06, "logits/chosen": -2.6203057765960693, "logits/rejected": -2.7672128677368164, "logps/chosen": -114.17755126953125, "logps/rejected": -379.11004638671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8726491928100586, "rewards/margins": 9.66898250579834, "rewards/rejected": -12.541631698608398, "step": 12162 }, { "epoch": 1.89, "learning_rate": 5.226497224959801e-06, "logits/chosen": -1.3082133531570435, "logits/rejected": -1.9341486692428589, "logps/chosen": -183.8817138671875, "logps/rejected": -375.90771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.457550048828125, "rewards/margins": 12.486268997192383, "rewards/rejected": -13.943819046020508, "step": 12163 }, { "epoch": 1.89, "learning_rate": 5.225763784428653e-06, "logits/chosen": -2.910825729370117, "logits/rejected": -3.055539846420288, "logps/chosen": -148.9275665283203, "logps/rejected": -259.5025634765625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.8423399925231934, "rewards/margins": 5.572839260101318, "rewards/rejected": -9.415179252624512, "step": 12164 }, { "epoch": 1.89, "learning_rate": 5.2250303438975055e-06, "logits/chosen": -3.0026659965515137, "logits/rejected": -2.270500421524048, "logps/chosen": -203.52420043945312, "logps/rejected": -221.85369873046875, "loss": 3.4223, "rewards/accuracies": 0.5, "rewards/chosen": -6.694714546203613, "rewards/margins": 2.97379469871521, "rewards/rejected": -9.668509483337402, "step": 12165 }, { "epoch": 1.89, "learning_rate": 5.224296903366357e-06, "logits/chosen": -2.714265823364258, "logits/rejected": -2.974783420562744, "logps/chosen": -652.33203125, "logps/rejected": -684.7913818359375, "loss": 0.2006, "rewards/accuracies": 1.0, "rewards/chosen": -4.747401714324951, "rewards/margins": 8.841388702392578, "rewards/rejected": -13.588790893554688, "step": 12166 }, { "epoch": 1.89, "learning_rate": 5.223563462835209e-06, "logits/chosen": -1.8827641010284424, "logits/rejected": -2.708004951477051, "logps/chosen": -248.60650634765625, "logps/rejected": -374.45550537109375, "loss": 3.4202, "rewards/accuracies": 0.5, "rewards/chosen": -9.08491325378418, "rewards/margins": 0.6393182277679443, "rewards/rejected": -9.724230766296387, "step": 12167 }, { "epoch": 1.89, "learning_rate": 5.222830022304062e-06, "logits/chosen": -1.7466975450515747, "logits/rejected": -2.968327760696411, "logps/chosen": -82.91537475585938, "logps/rejected": -332.42340087890625, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/chosen": -3.3208580017089844, "rewards/margins": 4.629094123840332, "rewards/rejected": -7.949952125549316, "step": 12168 }, { "epoch": 1.89, "learning_rate": 5.222096581772914e-06, "logits/chosen": -1.2853045463562012, "logits/rejected": -2.4336674213409424, "logps/chosen": -130.86277770996094, "logps/rejected": -395.11767578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.262373924255371, "rewards/margins": 9.304147720336914, "rewards/rejected": -15.566522598266602, "step": 12169 }, { "epoch": 1.89, "learning_rate": 5.221363141241766e-06, "logits/chosen": -2.3403329849243164, "logits/rejected": -3.0307490825653076, "logps/chosen": -240.6499481201172, "logps/rejected": -468.82110595703125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.376768589019775, "rewards/margins": 8.268280029296875, "rewards/rejected": -12.645048141479492, "step": 12170 }, { "epoch": 1.89, "learning_rate": 5.220629700710618e-06, "logits/chosen": -3.0111637115478516, "logits/rejected": -3.0451905727386475, "logps/chosen": -326.73797607421875, "logps/rejected": -455.3668212890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.8047016263008118, "rewards/margins": 13.261539459228516, "rewards/rejected": -14.066241264343262, "step": 12171 }, { "epoch": 1.89, "learning_rate": 5.21989626017947e-06, "logits/chosen": -2.667872428894043, "logits/rejected": -3.2480053901672363, "logps/chosen": -348.99981689453125, "logps/rejected": -457.32965087890625, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.1729636192321777, "rewards/margins": 5.7541279792785645, "rewards/rejected": -7.927091598510742, "step": 12172 }, { "epoch": 1.89, "learning_rate": 5.219162819648322e-06, "logits/chosen": -2.6962451934814453, "logits/rejected": -2.2720398902893066, "logps/chosen": -299.06158447265625, "logps/rejected": -371.37933349609375, "loss": 0.1035, "rewards/accuracies": 1.0, "rewards/chosen": -6.283468246459961, "rewards/margins": 2.5490634441375732, "rewards/rejected": -8.832530975341797, "step": 12173 }, { "epoch": 1.89, "learning_rate": 5.218429379117174e-06, "logits/chosen": -2.745798110961914, "logits/rejected": -1.4420126676559448, "logps/chosen": -696.3812866210938, "logps/rejected": -370.4063415527344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7289066314697266, "rewards/margins": 9.225484848022461, "rewards/rejected": -11.954391479492188, "step": 12174 }, { "epoch": 1.89, "learning_rate": 5.217695938586026e-06, "logits/chosen": -2.8044838905334473, "logits/rejected": -2.027517318725586, "logps/chosen": -218.283203125, "logps/rejected": -122.87992095947266, "loss": 0.4737, "rewards/accuracies": 0.5, "rewards/chosen": -4.459338665008545, "rewards/margins": 2.0797312259674072, "rewards/rejected": -6.539070129394531, "step": 12175 }, { "epoch": 1.89, "learning_rate": 5.216962498054878e-06, "logits/chosen": -2.7577126026153564, "logits/rejected": -3.2406725883483887, "logps/chosen": -174.05825805664062, "logps/rejected": -427.47296142578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.952101230621338, "rewards/margins": 9.191746711730957, "rewards/rejected": -11.143848419189453, "step": 12176 }, { "epoch": 1.89, "learning_rate": 5.216229057523731e-06, "logits/chosen": -2.6482784748077393, "logits/rejected": -3.1579999923706055, "logps/chosen": -127.67023468017578, "logps/rejected": -309.93865966796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.0352132320404053, "rewards/margins": 8.519701957702637, "rewards/rejected": -10.554915428161621, "step": 12177 }, { "epoch": 1.89, "learning_rate": 5.2154956169925825e-06, "logits/chosen": -2.876156806945801, "logits/rejected": -2.6413190364837646, "logps/chosen": -244.0262451171875, "logps/rejected": -247.30921936035156, "loss": 0.9291, "rewards/accuracies": 0.5, "rewards/chosen": -8.341341018676758, "rewards/margins": 1.5090365409851074, "rewards/rejected": -9.850377082824707, "step": 12178 }, { "epoch": 1.89, "learning_rate": 5.214762176461434e-06, "logits/chosen": -0.9335291981697083, "logits/rejected": -2.6846706867218018, "logps/chosen": -97.13834381103516, "logps/rejected": -449.6117248535156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7296390533447266, "rewards/margins": 9.299546241760254, "rewards/rejected": -12.029186248779297, "step": 12179 }, { "epoch": 1.89, "learning_rate": 5.214028735930286e-06, "logits/chosen": -2.9000232219696045, "logits/rejected": -2.9502716064453125, "logps/chosen": -365.550048828125, "logps/rejected": -365.83758544921875, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -4.428191184997559, "rewards/margins": 5.839189529418945, "rewards/rejected": -10.267380714416504, "step": 12180 }, { "epoch": 1.89, "learning_rate": 5.213295295399139e-06, "logits/chosen": -1.5045133829116821, "logits/rejected": -3.0260467529296875, "logps/chosen": -112.22808837890625, "logps/rejected": -306.01947021484375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -3.206747055053711, "rewards/margins": 6.138903617858887, "rewards/rejected": -9.345650672912598, "step": 12181 }, { "epoch": 1.89, "learning_rate": 5.212561854867992e-06, "logits/chosen": -2.2003636360168457, "logits/rejected": -2.7857894897460938, "logps/chosen": -319.68182373046875, "logps/rejected": -444.0915832519531, "loss": 0.0414, "rewards/accuracies": 1.0, "rewards/chosen": -5.472186088562012, "rewards/margins": 5.3741865158081055, "rewards/rejected": -10.846373558044434, "step": 12182 }, { "epoch": 1.89, "learning_rate": 5.211828414336844e-06, "logits/chosen": -2.91395902633667, "logits/rejected": -2.567023515701294, "logps/chosen": -126.33915710449219, "logps/rejected": -180.4124755859375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -4.193228244781494, "rewards/margins": 5.3913164138793945, "rewards/rejected": -9.584545135498047, "step": 12183 }, { "epoch": 1.89, "learning_rate": 5.2110949738056955e-06, "logits/chosen": -2.883575439453125, "logits/rejected": -3.2657907009124756, "logps/chosen": -57.35921096801758, "logps/rejected": -224.8958282470703, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.3277368545532227, "rewards/margins": 7.113030910491943, "rewards/rejected": -10.440768241882324, "step": 12184 }, { "epoch": 1.9, "learning_rate": 5.210361533274547e-06, "logits/chosen": -2.320366144180298, "logits/rejected": -3.095818042755127, "logps/chosen": -100.72856903076172, "logps/rejected": -338.36541748046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.145806312561035, "rewards/margins": 6.926180839538574, "rewards/rejected": -10.07198715209961, "step": 12185 }, { "epoch": 1.9, "learning_rate": 5.2096280927434e-06, "logits/chosen": -2.891289472579956, "logits/rejected": -2.4028053283691406, "logps/chosen": -206.4896697998047, "logps/rejected": -233.89566040039062, "loss": 0.1214, "rewards/accuracies": 1.0, "rewards/chosen": -2.795124053955078, "rewards/margins": 4.437894821166992, "rewards/rejected": -7.2330193519592285, "step": 12186 }, { "epoch": 1.9, "learning_rate": 5.208894652212252e-06, "logits/chosen": -2.7908997535705566, "logits/rejected": -2.864332437515259, "logps/chosen": -142.05059814453125, "logps/rejected": -223.91110229492188, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -1.638364315032959, "rewards/margins": 6.350526809692383, "rewards/rejected": -7.9888916015625, "step": 12187 }, { "epoch": 1.9, "learning_rate": 5.208161211681104e-06, "logits/chosen": -2.4067249298095703, "logits/rejected": -2.999877691268921, "logps/chosen": -163.73658752441406, "logps/rejected": -271.10693359375, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -3.1869168281555176, "rewards/margins": 3.904503345489502, "rewards/rejected": -7.0914201736450195, "step": 12188 }, { "epoch": 1.9, "learning_rate": 5.207427771149956e-06, "logits/chosen": -1.8352887630462646, "logits/rejected": -2.8828015327453613, "logps/chosen": -105.77488708496094, "logps/rejected": -380.65069580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.020937919616699, "rewards/margins": 11.311798095703125, "rewards/rejected": -15.332735061645508, "step": 12189 }, { "epoch": 1.9, "learning_rate": 5.206694330618808e-06, "logits/chosen": -2.078202247619629, "logits/rejected": -3.054445505142212, "logps/chosen": -100.1876220703125, "logps/rejected": -374.8254699707031, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -5.040886878967285, "rewards/margins": 5.878305435180664, "rewards/rejected": -10.91919231414795, "step": 12190 }, { "epoch": 1.9, "learning_rate": 5.20596089008766e-06, "logits/chosen": -1.8044657707214355, "logits/rejected": -2.885612726211548, "logps/chosen": -143.74887084960938, "logps/rejected": -378.7237854003906, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.346139907836914, "rewards/margins": 7.227390766143799, "rewards/rejected": -12.573530197143555, "step": 12191 }, { "epoch": 1.9, "learning_rate": 5.205227449556512e-06, "logits/chosen": -2.0492427349090576, "logits/rejected": -2.249073028564453, "logps/chosen": -1082.922119140625, "logps/rejected": -815.0209350585938, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -6.824711322784424, "rewards/margins": 8.069443702697754, "rewards/rejected": -14.89415454864502, "step": 12192 }, { "epoch": 1.9, "learning_rate": 5.204494009025364e-06, "logits/chosen": -1.840834140777588, "logits/rejected": -2.443115234375, "logps/chosen": -200.48204040527344, "logps/rejected": -392.4405517578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.59755277633667, "rewards/margins": 11.147165298461914, "rewards/rejected": -13.744717597961426, "step": 12193 }, { "epoch": 1.9, "learning_rate": 5.203760568494216e-06, "logits/chosen": -2.6618473529815674, "logits/rejected": -2.8749356269836426, "logps/chosen": -65.64324951171875, "logps/rejected": -260.0083923339844, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.8324432373046875, "rewards/margins": 7.167135238647461, "rewards/rejected": -11.999578475952148, "step": 12194 }, { "epoch": 1.9, "learning_rate": 5.203027127963069e-06, "logits/chosen": -2.8462374210357666, "logits/rejected": -2.847616672515869, "logps/chosen": -225.60589599609375, "logps/rejected": -232.04336547851562, "loss": 0.6735, "rewards/accuracies": 0.5, "rewards/chosen": -4.187028408050537, "rewards/margins": 4.2584710121154785, "rewards/rejected": -8.445499420166016, "step": 12195 }, { "epoch": 1.9, "learning_rate": 5.2022936874319205e-06, "logits/chosen": -2.9186573028564453, "logits/rejected": -2.5427751541137695, "logps/chosen": -380.9356994628906, "logps/rejected": -513.9309692382812, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/chosen": -7.11623477935791, "rewards/margins": 5.150983810424805, "rewards/rejected": -12.267218589782715, "step": 12196 }, { "epoch": 1.9, "learning_rate": 5.201560246900772e-06, "logits/chosen": -1.391668677330017, "logits/rejected": -2.8627312183380127, "logps/chosen": -150.58694458007812, "logps/rejected": -308.98553466796875, "loss": 0.121, "rewards/accuracies": 1.0, "rewards/chosen": -5.373809814453125, "rewards/margins": 3.455301523208618, "rewards/rejected": -8.829111099243164, "step": 12197 }, { "epoch": 1.9, "learning_rate": 5.200826806369625e-06, "logits/chosen": -1.4027992486953735, "logits/rejected": -2.8384017944335938, "logps/chosen": -177.53683471679688, "logps/rejected": -423.1620178222656, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -5.164112091064453, "rewards/margins": 6.176175117492676, "rewards/rejected": -11.340287208557129, "step": 12198 }, { "epoch": 1.9, "learning_rate": 5.200093365838478e-06, "logits/chosen": -2.0845093727111816, "logits/rejected": -2.6368401050567627, "logps/chosen": -92.06807708740234, "logps/rejected": -292.9482421875, "loss": 0.0202, "rewards/accuracies": 1.0, "rewards/chosen": -2.545379161834717, "rewards/margins": 7.303194046020508, "rewards/rejected": -9.848573684692383, "step": 12199 }, { "epoch": 1.9, "learning_rate": 5.19935992530733e-06, "logits/chosen": -2.82015061378479, "logits/rejected": -1.946517825126648, "logps/chosen": -237.38870239257812, "logps/rejected": -173.03817749023438, "loss": 1.5708, "rewards/accuracies": 0.5, "rewards/chosen": -6.9814958572387695, "rewards/margins": -0.19360709190368652, "rewards/rejected": -6.787889003753662, "step": 12200 }, { "epoch": 1.9, "learning_rate": 5.198626484776182e-06, "logits/chosen": -2.981773853302002, "logits/rejected": -3.2010953426361084, "logps/chosen": -144.50830078125, "logps/rejected": -283.21759033203125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.664293050765991, "rewards/margins": 6.924033164978027, "rewards/rejected": -10.588326454162598, "step": 12201 }, { "epoch": 1.9, "learning_rate": 5.1978930442450335e-06, "logits/chosen": -2.5151920318603516, "logits/rejected": -3.0554754734039307, "logps/chosen": -237.0079803466797, "logps/rejected": -351.9103088378906, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.333746910095215, "rewards/margins": 7.665881156921387, "rewards/rejected": -10.999628067016602, "step": 12202 }, { "epoch": 1.9, "learning_rate": 5.197159603713886e-06, "logits/chosen": -2.6285064220428467, "logits/rejected": -2.4735283851623535, "logps/chosen": -631.543701171875, "logps/rejected": -720.885986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5298554301261902, "rewards/margins": 13.636640548706055, "rewards/rejected": -14.166496276855469, "step": 12203 }, { "epoch": 1.9, "learning_rate": 5.196426163182738e-06, "logits/chosen": -3.2209959030151367, "logits/rejected": -3.0657565593719482, "logps/chosen": -78.0272445678711, "logps/rejected": -102.33482360839844, "loss": 0.2773, "rewards/accuracies": 1.0, "rewards/chosen": -4.5754923820495605, "rewards/margins": 3.3506290912628174, "rewards/rejected": -7.926121711730957, "step": 12204 }, { "epoch": 1.9, "learning_rate": 5.19569272265159e-06, "logits/chosen": -2.8780128955841064, "logits/rejected": -1.5660717487335205, "logps/chosen": -217.09161376953125, "logps/rejected": -263.69244384765625, "loss": 1.0186, "rewards/accuracies": 0.5, "rewards/chosen": -5.6823530197143555, "rewards/margins": 1.8685379028320312, "rewards/rejected": -7.550890922546387, "step": 12205 }, { "epoch": 1.9, "learning_rate": 5.194959282120442e-06, "logits/chosen": -2.7296435832977295, "logits/rejected": -2.913106918334961, "logps/chosen": -145.51776123046875, "logps/rejected": -174.19406127929688, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/chosen": -6.33385705947876, "rewards/margins": 3.5402626991271973, "rewards/rejected": -9.874119758605957, "step": 12206 }, { "epoch": 1.9, "learning_rate": 5.194225841589294e-06, "logits/chosen": -2.4678702354431152, "logits/rejected": -2.9038138389587402, "logps/chosen": -251.23532104492188, "logps/rejected": -324.7149658203125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -3.9869794845581055, "rewards/margins": 5.410008430480957, "rewards/rejected": -9.396987915039062, "step": 12207 }, { "epoch": 1.9, "learning_rate": 5.1934924010581465e-06, "logits/chosen": -2.355700969696045, "logits/rejected": -2.956986904144287, "logps/chosen": -228.9571075439453, "logps/rejected": -279.8785095214844, "loss": 1.1019, "rewards/accuracies": 0.5, "rewards/chosen": -5.732614994049072, "rewards/margins": 3.6747939586639404, "rewards/rejected": -9.407408714294434, "step": 12208 }, { "epoch": 1.9, "learning_rate": 5.192758960526998e-06, "logits/chosen": -3.1030173301696777, "logits/rejected": -2.5038716793060303, "logps/chosen": -124.94775390625, "logps/rejected": -135.9326171875, "loss": 2.3278, "rewards/accuracies": 0.5, "rewards/chosen": -6.04789400100708, "rewards/margins": 1.5112082958221436, "rewards/rejected": -7.5591020584106445, "step": 12209 }, { "epoch": 1.9, "learning_rate": 5.19202551999585e-06, "logits/chosen": -2.9001379013061523, "logits/rejected": -2.9296441078186035, "logps/chosen": -95.62391662597656, "logps/rejected": -195.53887939453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.998107671737671, "rewards/margins": 7.83425760269165, "rewards/rejected": -10.832365036010742, "step": 12210 }, { "epoch": 1.9, "learning_rate": 5.191292079464702e-06, "logits/chosen": -2.9036879539489746, "logits/rejected": -3.2159695625305176, "logps/chosen": -183.90855407714844, "logps/rejected": -277.5587158203125, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -5.996740341186523, "rewards/margins": 6.107558250427246, "rewards/rejected": -12.10429859161377, "step": 12211 }, { "epoch": 1.9, "learning_rate": 5.190558638933555e-06, "logits/chosen": -3.0618178844451904, "logits/rejected": -3.451594591140747, "logps/chosen": -70.75867462158203, "logps/rejected": -308.40185546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.895796060562134, "rewards/margins": 10.010406494140625, "rewards/rejected": -12.90620231628418, "step": 12212 }, { "epoch": 1.9, "learning_rate": 5.189825198402407e-06, "logits/chosen": -2.6060733795166016, "logits/rejected": -2.9509117603302, "logps/chosen": -195.52413940429688, "logps/rejected": -245.2146453857422, "loss": 1.413, "rewards/accuracies": 0.5, "rewards/chosen": -5.169065475463867, "rewards/margins": 3.160922050476074, "rewards/rejected": -8.329987525939941, "step": 12213 }, { "epoch": 1.9, "learning_rate": 5.189091757871259e-06, "logits/chosen": -2.741340160369873, "logits/rejected": -2.0281994342803955, "logps/chosen": -411.4540100097656, "logps/rejected": -626.0252685546875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -4.192602634429932, "rewards/margins": 6.467034339904785, "rewards/rejected": -10.659636497497559, "step": 12214 }, { "epoch": 1.9, "learning_rate": 5.188358317340111e-06, "logits/chosen": -2.6605968475341797, "logits/rejected": -2.622757911682129, "logps/chosen": -132.52069091796875, "logps/rejected": -240.0970458984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.618159770965576, "rewards/margins": 7.274040222167969, "rewards/rejected": -9.892200469970703, "step": 12215 }, { "epoch": 1.9, "learning_rate": 5.187624876808963e-06, "logits/chosen": -2.579558849334717, "logits/rejected": -2.9704859256744385, "logps/chosen": -53.86580276489258, "logps/rejected": -183.08099365234375, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -3.7781295776367188, "rewards/margins": 5.5516743659973145, "rewards/rejected": -9.329803466796875, "step": 12216 }, { "epoch": 1.9, "learning_rate": 5.186891436277816e-06, "logits/chosen": -2.5031707286834717, "logits/rejected": -3.021533727645874, "logps/chosen": -119.00408935546875, "logps/rejected": -403.64141845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.586155652999878, "rewards/margins": 11.548901557922363, "rewards/rejected": -13.13505744934082, "step": 12217 }, { "epoch": 1.9, "learning_rate": 5.186157995746668e-06, "logits/chosen": -2.8618762493133545, "logits/rejected": -2.1561176776885986, "logps/chosen": -256.7544860839844, "logps/rejected": -336.0711669921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.2393174171447754, "rewards/margins": 9.779861450195312, "rewards/rejected": -13.01917839050293, "step": 12218 }, { "epoch": 1.9, "learning_rate": 5.18542455521552e-06, "logits/chosen": -2.7031431198120117, "logits/rejected": -2.840050220489502, "logps/chosen": -109.67742919921875, "logps/rejected": -366.68560791015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.245075702667236, "rewards/margins": 8.113311767578125, "rewards/rejected": -13.35838794708252, "step": 12219 }, { "epoch": 1.9, "learning_rate": 5.1846911146843715e-06, "logits/chosen": -1.9831234216690063, "logits/rejected": -2.6241471767425537, "logps/chosen": -144.34765625, "logps/rejected": -254.05296325683594, "loss": 0.5905, "rewards/accuracies": 0.5, "rewards/chosen": -5.578470230102539, "rewards/margins": 4.507909774780273, "rewards/rejected": -10.086380004882812, "step": 12220 }, { "epoch": 1.9, "learning_rate": 5.183957674153224e-06, "logits/chosen": -2.7258141040802, "logits/rejected": -2.8682808876037598, "logps/chosen": -132.61544799804688, "logps/rejected": -280.87860107421875, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": -5.2136640548706055, "rewards/margins": 6.172207832336426, "rewards/rejected": -11.385871887207031, "step": 12221 }, { "epoch": 1.9, "learning_rate": 5.183224233622076e-06, "logits/chosen": -3.106050968170166, "logits/rejected": -2.2734549045562744, "logps/chosen": -141.54006958007812, "logps/rejected": -103.30328369140625, "loss": 2.4636, "rewards/accuracies": 0.5, "rewards/chosen": -6.667964935302734, "rewards/margins": 0.47472667694091797, "rewards/rejected": -7.142691612243652, "step": 12222 }, { "epoch": 1.9, "learning_rate": 5.182490793090928e-06, "logits/chosen": -2.9060397148132324, "logits/rejected": -2.3039870262145996, "logps/chosen": -219.2196502685547, "logps/rejected": -256.9728088378906, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.512184143066406, "rewards/margins": 7.8082427978515625, "rewards/rejected": -12.320426940917969, "step": 12223 }, { "epoch": 1.9, "learning_rate": 5.18175735255978e-06, "logits/chosen": -1.9622584581375122, "logits/rejected": -3.1702921390533447, "logps/chosen": -250.1200408935547, "logps/rejected": -455.073486328125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.230306148529053, "rewards/margins": 7.00377082824707, "rewards/rejected": -11.234077453613281, "step": 12224 }, { "epoch": 1.9, "learning_rate": 5.181023912028632e-06, "logits/chosen": -2.1590850353240967, "logits/rejected": -3.1191694736480713, "logps/chosen": -88.37242126464844, "logps/rejected": -371.60723876953125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.5981297492980957, "rewards/margins": 9.455284118652344, "rewards/rejected": -12.053413391113281, "step": 12225 }, { "epoch": 1.9, "learning_rate": 5.1802904714974845e-06, "logits/chosen": -2.9039080142974854, "logits/rejected": -2.965071439743042, "logps/chosen": -378.7725830078125, "logps/rejected": -335.00897216796875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.4003992080688477, "rewards/margins": 7.53062629699707, "rewards/rejected": -10.931024551391602, "step": 12226 }, { "epoch": 1.9, "learning_rate": 5.179557030966336e-06, "logits/chosen": -2.5149738788604736, "logits/rejected": -1.3361704349517822, "logps/chosen": -185.09786987304688, "logps/rejected": -181.15501403808594, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.2960715293884277, "rewards/margins": 7.207037925720215, "rewards/rejected": -9.5031099319458, "step": 12227 }, { "epoch": 1.9, "learning_rate": 5.178823590435188e-06, "logits/chosen": -2.7271716594696045, "logits/rejected": -2.1125142574310303, "logps/chosen": -348.18145751953125, "logps/rejected": -369.9801025390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.426950931549072, "rewards/margins": 11.713132858276367, "rewards/rejected": -16.14008331298828, "step": 12228 }, { "epoch": 1.9, "learning_rate": 5.17809014990404e-06, "logits/chosen": -2.7582290172576904, "logits/rejected": -2.697449207305908, "logps/chosen": -192.09750366210938, "logps/rejected": -273.493408203125, "loss": 0.2647, "rewards/accuracies": 1.0, "rewards/chosen": -6.351816654205322, "rewards/margins": 4.035955429077148, "rewards/rejected": -10.387771606445312, "step": 12229 }, { "epoch": 1.9, "learning_rate": 5.177356709372893e-06, "logits/chosen": -2.1403026580810547, "logits/rejected": -2.906949996948242, "logps/chosen": -102.33551788330078, "logps/rejected": -533.2923583984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.930913209915161, "rewards/margins": 10.327950477600098, "rewards/rejected": -13.25886344909668, "step": 12230 }, { "epoch": 1.9, "learning_rate": 5.176623268841745e-06, "logits/chosen": -3.0050933361053467, "logits/rejected": -2.5189948081970215, "logps/chosen": -244.25331115722656, "logps/rejected": -241.8330535888672, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -1.7746193408966064, "rewards/margins": 5.57016134262085, "rewards/rejected": -7.344780921936035, "step": 12231 }, { "epoch": 1.9, "learning_rate": 5.1758898283105975e-06, "logits/chosen": -2.848907709121704, "logits/rejected": -2.5429418087005615, "logps/chosen": -249.50274658203125, "logps/rejected": -277.69305419921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.5338783264160156, "rewards/margins": 8.407499313354492, "rewards/rejected": -10.941377639770508, "step": 12232 }, { "epoch": 1.9, "learning_rate": 5.175156387779449e-06, "logits/chosen": -2.4133083820343018, "logits/rejected": -2.8486862182617188, "logps/chosen": -148.02096557617188, "logps/rejected": -288.682373046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.432267189025879, "rewards/margins": 7.51371955871582, "rewards/rejected": -11.9459867477417, "step": 12233 }, { "epoch": 1.9, "learning_rate": 5.174422947248301e-06, "logits/chosen": -2.7499568462371826, "logits/rejected": -3.083611011505127, "logps/chosen": -192.0700225830078, "logps/rejected": -350.61083984375, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": -4.074244499206543, "rewards/margins": 7.015880584716797, "rewards/rejected": -11.090126037597656, "step": 12234 }, { "epoch": 1.9, "learning_rate": 5.173689506717154e-06, "logits/chosen": -2.7189207077026367, "logits/rejected": -2.252737045288086, "logps/chosen": -487.94305419921875, "logps/rejected": -589.7274169921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.409167289733887, "rewards/margins": 9.989774703979492, "rewards/rejected": -16.398942947387695, "step": 12235 }, { "epoch": 1.9, "learning_rate": 5.172956066186006e-06, "logits/chosen": -2.918933153152466, "logits/rejected": -2.878990650177002, "logps/chosen": -202.26971435546875, "logps/rejected": -145.7676544189453, "loss": 0.6126, "rewards/accuracies": 0.5, "rewards/chosen": -4.310886383056641, "rewards/margins": 5.395941734313965, "rewards/rejected": -9.706828117370605, "step": 12236 }, { "epoch": 1.9, "learning_rate": 5.172222625654858e-06, "logits/chosen": -1.3569473028182983, "logits/rejected": -2.8508007526397705, "logps/chosen": -247.0701446533203, "logps/rejected": -624.0411376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.272594451904297, "rewards/margins": 9.415094375610352, "rewards/rejected": -13.687688827514648, "step": 12237 }, { "epoch": 1.9, "learning_rate": 5.17148918512371e-06, "logits/chosen": -2.823002338409424, "logits/rejected": -1.6506640911102295, "logps/chosen": -252.5599822998047, "logps/rejected": -261.58990478515625, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -3.9495949745178223, "rewards/margins": 4.580474853515625, "rewards/rejected": -8.530069351196289, "step": 12238 }, { "epoch": 1.9, "learning_rate": 5.170755744592562e-06, "logits/chosen": -1.785385012626648, "logits/rejected": -2.9081482887268066, "logps/chosen": -104.82633209228516, "logps/rejected": -357.1587219238281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.8027005195617676, "rewards/margins": 8.348010063171387, "rewards/rejected": -11.150711059570312, "step": 12239 }, { "epoch": 1.9, "learning_rate": 5.170022304061414e-06, "logits/chosen": -1.7958364486694336, "logits/rejected": -2.9378063678741455, "logps/chosen": -78.34814453125, "logps/rejected": -345.83551025390625, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -4.663959503173828, "rewards/margins": 7.042191505432129, "rewards/rejected": -11.706151008605957, "step": 12240 }, { "epoch": 1.9, "learning_rate": 5.169288863530266e-06, "logits/chosen": -1.286733865737915, "logits/rejected": -3.005788564682007, "logps/chosen": -168.2664031982422, "logps/rejected": -514.0347290039062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.158931732177734, "rewards/margins": 9.978754043579102, "rewards/rejected": -14.137685775756836, "step": 12241 }, { "epoch": 1.9, "learning_rate": 5.168555422999118e-06, "logits/chosen": -2.463122844696045, "logits/rejected": -2.976576328277588, "logps/chosen": -78.8504409790039, "logps/rejected": -444.4073486328125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.04415225982666, "rewards/margins": 6.203306198120117, "rewards/rejected": -11.247458457946777, "step": 12242 }, { "epoch": 1.9, "learning_rate": 5.16782198246797e-06, "logits/chosen": -1.8012162446975708, "logits/rejected": -2.715935468673706, "logps/chosen": -174.98939514160156, "logps/rejected": -381.65606689453125, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/chosen": -3.60376238822937, "rewards/margins": 6.419577598571777, "rewards/rejected": -10.023340225219727, "step": 12243 }, { "epoch": 1.9, "learning_rate": 5.1670885419368226e-06, "logits/chosen": -3.0701661109924316, "logits/rejected": -3.130405902862549, "logps/chosen": -116.36524963378906, "logps/rejected": -233.85690307617188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.1508920192718506, "rewards/margins": 9.497576713562012, "rewards/rejected": -10.648468971252441, "step": 12244 }, { "epoch": 1.9, "learning_rate": 5.1663551014056744e-06, "logits/chosen": -2.788503646850586, "logits/rejected": -3.053571939468384, "logps/chosen": -133.530517578125, "logps/rejected": -417.19683837890625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -2.9290223121643066, "rewards/margins": 9.339649200439453, "rewards/rejected": -12.268671035766602, "step": 12245 }, { "epoch": 1.9, "learning_rate": 5.165621660874526e-06, "logits/chosen": -3.089221715927124, "logits/rejected": -3.2865262031555176, "logps/chosen": -177.1024627685547, "logps/rejected": -284.6685791015625, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -5.037894248962402, "rewards/margins": 6.718923568725586, "rewards/rejected": -11.756818771362305, "step": 12246 }, { "epoch": 1.9, "learning_rate": 5.164888220343378e-06, "logits/chosen": -1.8153780698776245, "logits/rejected": -3.014436960220337, "logps/chosen": -159.25198364257812, "logps/rejected": -372.37847900390625, "loss": 0.0786, "rewards/accuracies": 1.0, "rewards/chosen": -3.9300222396850586, "rewards/margins": 5.5524516105651855, "rewards/rejected": -9.482473373413086, "step": 12247 }, { "epoch": 1.9, "learning_rate": 5.164154779812231e-06, "logits/chosen": -2.2111871242523193, "logits/rejected": -2.966123342514038, "logps/chosen": -342.7803649902344, "logps/rejected": -312.39703369140625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.566749572753906, "rewards/margins": 7.921640396118164, "rewards/rejected": -12.48838996887207, "step": 12248 }, { "epoch": 1.9, "learning_rate": 5.163421339281084e-06, "logits/chosen": -2.561427593231201, "logits/rejected": -2.9879367351531982, "logps/chosen": -515.0016479492188, "logps/rejected": -594.1358032226562, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.326932668685913, "rewards/margins": 7.490538597106934, "rewards/rejected": -10.817471504211426, "step": 12249 }, { "epoch": 1.91, "learning_rate": 5.1626878987499355e-06, "logits/chosen": -2.7417821884155273, "logits/rejected": -2.465092658996582, "logps/chosen": -160.3585662841797, "logps/rejected": -225.817138671875, "loss": 0.795, "rewards/accuracies": 0.5, "rewards/chosen": -5.991029739379883, "rewards/margins": 2.6804351806640625, "rewards/rejected": -8.671464920043945, "step": 12250 }, { "epoch": 1.91, "learning_rate": 5.161954458218787e-06, "logits/chosen": -2.5273640155792236, "logits/rejected": -2.912919759750366, "logps/chosen": -74.2315902709961, "logps/rejected": -241.35671997070312, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -4.642936706542969, "rewards/margins": 5.741119384765625, "rewards/rejected": -10.384056091308594, "step": 12251 }, { "epoch": 1.91, "learning_rate": 5.16122101768764e-06, "logits/chosen": -1.8561465740203857, "logits/rejected": -3.174241304397583, "logps/chosen": -258.68878173828125, "logps/rejected": -543.446533203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.523491382598877, "rewards/margins": 8.753108024597168, "rewards/rejected": -13.276598930358887, "step": 12252 }, { "epoch": 1.91, "learning_rate": 5.160487577156492e-06, "logits/chosen": -1.9770487546920776, "logits/rejected": -2.3879928588867188, "logps/chosen": -203.55902099609375, "logps/rejected": -378.3125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.3209617137908936, "rewards/margins": 11.175642013549805, "rewards/rejected": -13.496603012084961, "step": 12253 }, { "epoch": 1.91, "learning_rate": 5.159754136625344e-06, "logits/chosen": -2.1227152347564697, "logits/rejected": -3.1288721561431885, "logps/chosen": -135.58428955078125, "logps/rejected": -351.469970703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.136738300323486, "rewards/margins": 7.775240898132324, "rewards/rejected": -12.911979675292969, "step": 12254 }, { "epoch": 1.91, "learning_rate": 5.159020696094196e-06, "logits/chosen": -2.1686840057373047, "logits/rejected": -3.052762031555176, "logps/chosen": -292.3365173339844, "logps/rejected": -411.471923828125, "loss": 0.0318, "rewards/accuracies": 1.0, "rewards/chosen": -5.852583885192871, "rewards/margins": 4.238018989562988, "rewards/rejected": -10.09060287475586, "step": 12255 }, { "epoch": 1.91, "learning_rate": 5.158287255563048e-06, "logits/chosen": -1.6882091760635376, "logits/rejected": -2.70412278175354, "logps/chosen": -109.63150787353516, "logps/rejected": -292.0589904785156, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.788331985473633, "rewards/margins": 9.998360633850098, "rewards/rejected": -14.786693572998047, "step": 12256 }, { "epoch": 1.91, "learning_rate": 5.1575538150319e-06, "logits/chosen": -2.197720527648926, "logits/rejected": -2.8511464595794678, "logps/chosen": -139.89877319335938, "logps/rejected": -369.5542907714844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.557349681854248, "rewards/margins": 9.98477554321289, "rewards/rejected": -12.542125701904297, "step": 12257 }, { "epoch": 1.91, "learning_rate": 5.156820374500752e-06, "logits/chosen": -2.536623239517212, "logits/rejected": -3.0292975902557373, "logps/chosen": -542.8447875976562, "logps/rejected": -617.4657592773438, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.276709079742432, "rewards/margins": 7.322038650512695, "rewards/rejected": -11.598747253417969, "step": 12258 }, { "epoch": 1.91, "learning_rate": 5.156086933969604e-06, "logits/chosen": -1.0248419046401978, "logits/rejected": -2.958756685256958, "logps/chosen": -64.8106460571289, "logps/rejected": -565.0343627929688, "loss": 0.0694, "rewards/accuracies": 1.0, "rewards/chosen": -4.8634185791015625, "rewards/margins": 8.45572280883789, "rewards/rejected": -13.319141387939453, "step": 12259 }, { "epoch": 1.91, "learning_rate": 5.155353493438456e-06, "logits/chosen": -3.1657161712646484, "logits/rejected": -2.8595852851867676, "logps/chosen": -936.8598022460938, "logps/rejected": -566.1935424804688, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -4.351937770843506, "rewards/margins": 5.189846992492676, "rewards/rejected": -9.541784286499023, "step": 12260 }, { "epoch": 1.91, "learning_rate": 5.154620052907309e-06, "logits/chosen": -2.7303478717803955, "logits/rejected": -2.9453372955322266, "logps/chosen": -116.55958557128906, "logps/rejected": -162.89926147460938, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.640666961669922, "rewards/margins": 7.1368608474731445, "rewards/rejected": -10.77752685546875, "step": 12261 }, { "epoch": 1.91, "learning_rate": 5.153886612376161e-06, "logits/chosen": -2.2891950607299805, "logits/rejected": -2.8196513652801514, "logps/chosen": -329.61602783203125, "logps/rejected": -594.143798828125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.334508895874023, "rewards/margins": 6.54743766784668, "rewards/rejected": -11.881946563720703, "step": 12262 }, { "epoch": 1.91, "learning_rate": 5.1531531718450125e-06, "logits/chosen": -2.8433895111083984, "logits/rejected": -2.3519904613494873, "logps/chosen": -132.5641326904297, "logps/rejected": -170.61376953125, "loss": 1.9429, "rewards/accuracies": 0.5, "rewards/chosen": -7.304425239562988, "rewards/margins": 1.8553438186645508, "rewards/rejected": -9.159769058227539, "step": 12263 }, { "epoch": 1.91, "learning_rate": 5.152419731313864e-06, "logits/chosen": -2.7901952266693115, "logits/rejected": -1.868872880935669, "logps/chosen": -618.439697265625, "logps/rejected": -403.3695983886719, "loss": 0.3618, "rewards/accuracies": 1.0, "rewards/chosen": -7.118905067443848, "rewards/margins": 1.1514172554016113, "rewards/rejected": -8.2703218460083, "step": 12264 }, { "epoch": 1.91, "learning_rate": 5.151686290782717e-06, "logits/chosen": -2.724879741668701, "logits/rejected": -2.4850521087646484, "logps/chosen": -549.638427734375, "logps/rejected": -440.10205078125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -1.1850414276123047, "rewards/margins": 9.107502937316895, "rewards/rejected": -10.2925443649292, "step": 12265 }, { "epoch": 1.91, "learning_rate": 5.15095285025157e-06, "logits/chosen": -2.7480452060699463, "logits/rejected": -2.541478395462036, "logps/chosen": -407.316162109375, "logps/rejected": -616.0232543945312, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/chosen": -4.195693016052246, "rewards/margins": 4.217642307281494, "rewards/rejected": -8.413334846496582, "step": 12266 }, { "epoch": 1.91, "learning_rate": 5.150219409720422e-06, "logits/chosen": -2.9018092155456543, "logits/rejected": -2.5643692016601562, "logps/chosen": -1029.7864990234375, "logps/rejected": -782.2255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7537063360214233, "rewards/margins": 12.231216430664062, "rewards/rejected": -13.984922409057617, "step": 12267 }, { "epoch": 1.91, "learning_rate": 5.1494859691892736e-06, "logits/chosen": -2.945266008377075, "logits/rejected": -2.7934255599975586, "logps/chosen": -125.98640441894531, "logps/rejected": -175.3459014892578, "loss": 0.0514, "rewards/accuracies": 1.0, "rewards/chosen": -3.1214914321899414, "rewards/margins": 5.574761867523193, "rewards/rejected": -8.696252822875977, "step": 12268 }, { "epoch": 1.91, "learning_rate": 5.1487525286581254e-06, "logits/chosen": -1.626878261566162, "logits/rejected": -2.574190139770508, "logps/chosen": -105.95440673828125, "logps/rejected": -379.48651123046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.810844898223877, "rewards/margins": 8.866301536560059, "rewards/rejected": -12.677145957946777, "step": 12269 }, { "epoch": 1.91, "learning_rate": 5.148019088126978e-06, "logits/chosen": -1.5430711507797241, "logits/rejected": -2.5956177711486816, "logps/chosen": -104.66191101074219, "logps/rejected": -433.9818115234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.475743532180786, "rewards/margins": 12.551960945129395, "rewards/rejected": -15.027704238891602, "step": 12270 }, { "epoch": 1.91, "learning_rate": 5.14728564759583e-06, "logits/chosen": -2.540992259979248, "logits/rejected": -3.016167402267456, "logps/chosen": -276.5432434082031, "logps/rejected": -330.0268859863281, "loss": 0.0807, "rewards/accuracies": 1.0, "rewards/chosen": -6.518551826477051, "rewards/margins": 4.270817756652832, "rewards/rejected": -10.789369583129883, "step": 12271 }, { "epoch": 1.91, "learning_rate": 5.146552207064682e-06, "logits/chosen": -2.6599280834198, "logits/rejected": -2.673877716064453, "logps/chosen": -109.93571472167969, "logps/rejected": -183.902099609375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -3.7767467498779297, "rewards/margins": 6.6349334716796875, "rewards/rejected": -10.411680221557617, "step": 12272 }, { "epoch": 1.91, "learning_rate": 5.145818766533534e-06, "logits/chosen": -2.214348554611206, "logits/rejected": -2.9289088249206543, "logps/chosen": -111.47644805908203, "logps/rejected": -350.5128173828125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.1828043460845947, "rewards/margins": 5.549197196960449, "rewards/rejected": -8.732001304626465, "step": 12273 }, { "epoch": 1.91, "learning_rate": 5.145085326002386e-06, "logits/chosen": -2.433128833770752, "logits/rejected": -2.9053151607513428, "logps/chosen": -77.32975769042969, "logps/rejected": -278.07330322265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.7778000831604004, "rewards/margins": 7.495516777038574, "rewards/rejected": -11.273317337036133, "step": 12274 }, { "epoch": 1.91, "learning_rate": 5.144351885471238e-06, "logits/chosen": -2.6037662029266357, "logits/rejected": -2.960073232650757, "logps/chosen": -721.2264404296875, "logps/rejected": -622.0466918945312, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -5.393141269683838, "rewards/margins": 4.252324104309082, "rewards/rejected": -9.645465850830078, "step": 12275 }, { "epoch": 1.91, "learning_rate": 5.14361844494009e-06, "logits/chosen": -2.252701759338379, "logits/rejected": -2.707733631134033, "logps/chosen": -238.45034790039062, "logps/rejected": -275.1320495605469, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": -4.910519123077393, "rewards/margins": 5.3506903648376465, "rewards/rejected": -10.261209487915039, "step": 12276 }, { "epoch": 1.91, "learning_rate": 5.142885004408942e-06, "logits/chosen": -1.0236965417861938, "logits/rejected": -2.58414363861084, "logps/chosen": -85.25274658203125, "logps/rejected": -308.83172607421875, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -4.068995952606201, "rewards/margins": 4.852538585662842, "rewards/rejected": -8.921534538269043, "step": 12277 }, { "epoch": 1.91, "learning_rate": 5.142151563877794e-06, "logits/chosen": -2.8374321460723877, "logits/rejected": -3.087660312652588, "logps/chosen": -88.73773956298828, "logps/rejected": -313.96783447265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2740848064422607, "rewards/margins": 8.789889335632324, "rewards/rejected": -11.063974380493164, "step": 12278 }, { "epoch": 1.91, "learning_rate": 5.141418123346647e-06, "logits/chosen": -2.864973306655884, "logits/rejected": -2.60652494430542, "logps/chosen": -236.0418701171875, "logps/rejected": -289.16546630859375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -3.846693515777588, "rewards/margins": 6.75758171081543, "rewards/rejected": -10.604275703430176, "step": 12279 }, { "epoch": 1.91, "learning_rate": 5.140684682815499e-06, "logits/chosen": -3.0145437717437744, "logits/rejected": -2.419037103652954, "logps/chosen": -124.00057983398438, "logps/rejected": -207.69839477539062, "loss": 0.7221, "rewards/accuracies": 0.5, "rewards/chosen": -4.101067066192627, "rewards/margins": 5.892958641052246, "rewards/rejected": -9.994026184082031, "step": 12280 }, { "epoch": 1.91, "learning_rate": 5.1399512422843505e-06, "logits/chosen": -2.8577167987823486, "logits/rejected": -1.5750961303710938, "logps/chosen": -871.7529907226562, "logps/rejected": -480.7837219238281, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -3.2562224864959717, "rewards/margins": 5.574731349945068, "rewards/rejected": -8.830953598022461, "step": 12281 }, { "epoch": 1.91, "learning_rate": 5.139217801753203e-06, "logits/chosen": -2.6553986072540283, "logits/rejected": -2.868215322494507, "logps/chosen": -86.4686279296875, "logps/rejected": -179.19534301757812, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -5.253401279449463, "rewards/margins": 4.858381748199463, "rewards/rejected": -10.111783027648926, "step": 12282 }, { "epoch": 1.91, "learning_rate": 5.138484361222055e-06, "logits/chosen": -0.4262949526309967, "logits/rejected": -3.004394054412842, "logps/chosen": -104.16172790527344, "logps/rejected": -624.9705810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.734087944030762, "rewards/margins": 12.240347862243652, "rewards/rejected": -16.974435806274414, "step": 12283 }, { "epoch": 1.91, "learning_rate": 5.137750920690908e-06, "logits/chosen": -1.8454933166503906, "logits/rejected": -3.013850212097168, "logps/chosen": -169.96099853515625, "logps/rejected": -442.43475341796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.194712162017822, "rewards/margins": 9.189562797546387, "rewards/rejected": -14.384275436401367, "step": 12284 }, { "epoch": 1.91, "learning_rate": 5.13701748015976e-06, "logits/chosen": -2.8701789379119873, "logits/rejected": -2.2985880374908447, "logps/chosen": -637.8223266601562, "logps/rejected": -449.478515625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.843186855316162, "rewards/margins": 6.829111576080322, "rewards/rejected": -10.672298431396484, "step": 12285 }, { "epoch": 1.91, "learning_rate": 5.136284039628612e-06, "logits/chosen": -2.8896403312683105, "logits/rejected": -3.124539375305176, "logps/chosen": -104.7271728515625, "logps/rejected": -194.36566162109375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.267949104309082, "rewards/margins": 7.487285614013672, "rewards/rejected": -10.755233764648438, "step": 12286 }, { "epoch": 1.91, "learning_rate": 5.1355505990974635e-06, "logits/chosen": -1.636148452758789, "logits/rejected": -2.943380832672119, "logps/chosen": -142.45494079589844, "logps/rejected": -422.2621154785156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.0126025676727295, "rewards/margins": 8.615503311157227, "rewards/rejected": -11.628105163574219, "step": 12287 }, { "epoch": 1.91, "learning_rate": 5.134817158566316e-06, "logits/chosen": -1.6856721639633179, "logits/rejected": -2.9338159561157227, "logps/chosen": -110.07963562011719, "logps/rejected": -325.312255859375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -5.8684306144714355, "rewards/margins": 6.317889213562012, "rewards/rejected": -12.186319351196289, "step": 12288 }, { "epoch": 1.91, "learning_rate": 5.134083718035168e-06, "logits/chosen": -2.4749677181243896, "logits/rejected": -2.9240121841430664, "logps/chosen": -228.3942108154297, "logps/rejected": -376.84197998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0332841873168945, "rewards/margins": 10.900459289550781, "rewards/rejected": -13.933743476867676, "step": 12289 }, { "epoch": 1.91, "learning_rate": 5.13335027750402e-06, "logits/chosen": -2.9429383277893066, "logits/rejected": -2.226929187774658, "logps/chosen": -469.0171203613281, "logps/rejected": -264.7419738769531, "loss": 1.7593, "rewards/accuracies": 0.0, "rewards/chosen": -6.668148994445801, "rewards/margins": -1.4500336647033691, "rewards/rejected": -5.218115329742432, "step": 12290 }, { "epoch": 1.91, "learning_rate": 5.132616836972872e-06, "logits/chosen": -2.890226125717163, "logits/rejected": -2.785625696182251, "logps/chosen": -116.01681518554688, "logps/rejected": -194.41184997558594, "loss": 0.196, "rewards/accuracies": 1.0, "rewards/chosen": -1.8852490186691284, "rewards/margins": 4.749890327453613, "rewards/rejected": -6.635139465332031, "step": 12291 }, { "epoch": 1.91, "learning_rate": 5.131883396441724e-06, "logits/chosen": -2.7384350299835205, "logits/rejected": -2.9409759044647217, "logps/chosen": -47.8770751953125, "logps/rejected": -246.61846923828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6330945491790771, "rewards/margins": 10.04773998260498, "rewards/rejected": -11.680834770202637, "step": 12292 }, { "epoch": 1.91, "learning_rate": 5.1311499559105764e-06, "logits/chosen": -2.581923246383667, "logits/rejected": -3.000077724456787, "logps/chosen": -232.60719299316406, "logps/rejected": -358.7870788574219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.0497024059295654, "rewards/margins": 8.592124938964844, "rewards/rejected": -11.641827583312988, "step": 12293 }, { "epoch": 1.91, "learning_rate": 5.130416515379428e-06, "logits/chosen": -2.481449604034424, "logits/rejected": -2.8178772926330566, "logps/chosen": -257.1358947753906, "logps/rejected": -427.759521484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.4882583618164062, "rewards/margins": 7.570432662963867, "rewards/rejected": -11.058691024780273, "step": 12294 }, { "epoch": 1.91, "learning_rate": 5.12968307484828e-06, "logits/chosen": -3.104567289352417, "logits/rejected": -2.8297603130340576, "logps/chosen": -270.391845703125, "logps/rejected": -148.67230224609375, "loss": 1.3794, "rewards/accuracies": 0.5, "rewards/chosen": -6.507528305053711, "rewards/margins": 2.1622862815856934, "rewards/rejected": -8.669814109802246, "step": 12295 }, { "epoch": 1.91, "learning_rate": 5.128949634317132e-06, "logits/chosen": -1.6779145002365112, "logits/rejected": -2.202059745788574, "logps/chosen": -180.44142150878906, "logps/rejected": -345.35467529296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.1183929443359375, "rewards/margins": 9.682443618774414, "rewards/rejected": -10.800836563110352, "step": 12296 }, { "epoch": 1.91, "learning_rate": 5.128216193785985e-06, "logits/chosen": -3.1162734031677246, "logits/rejected": -1.8858284950256348, "logps/chosen": -1243.8516845703125, "logps/rejected": -466.83599853515625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -3.5733368396759033, "rewards/margins": 4.856300354003906, "rewards/rejected": -8.429637908935547, "step": 12297 }, { "epoch": 1.91, "learning_rate": 5.127482753254837e-06, "logits/chosen": -2.7781615257263184, "logits/rejected": -3.0817174911499023, "logps/chosen": -136.04624938964844, "logps/rejected": -346.66131591796875, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -3.5513696670532227, "rewards/margins": 4.832247734069824, "rewards/rejected": -8.383617401123047, "step": 12298 }, { "epoch": 1.91, "learning_rate": 5.1267493127236886e-06, "logits/chosen": -1.6921710968017578, "logits/rejected": -2.957723379135132, "logps/chosen": -191.93191528320312, "logps/rejected": -529.9296264648438, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.194226264953613, "rewards/margins": 8.054113388061523, "rewards/rejected": -12.248340606689453, "step": 12299 }, { "epoch": 1.91, "learning_rate": 5.126015872192541e-06, "logits/chosen": -2.244098424911499, "logits/rejected": -2.8472073078155518, "logps/chosen": -109.43434143066406, "logps/rejected": -123.95635986328125, "loss": 0.3516, "rewards/accuracies": 1.0, "rewards/chosen": -5.861724853515625, "rewards/margins": 1.0138094425201416, "rewards/rejected": -6.875534534454346, "step": 12300 }, { "epoch": 1.91, "learning_rate": 5.125282431661394e-06, "logits/chosen": -2.269519805908203, "logits/rejected": -2.9921669960021973, "logps/chosen": -213.60281372070312, "logps/rejected": -364.81463623046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.3632493019104004, "rewards/margins": 8.741398811340332, "rewards/rejected": -12.10464859008789, "step": 12301 }, { "epoch": 1.91, "learning_rate": 5.124548991130246e-06, "logits/chosen": -2.8231565952301025, "logits/rejected": -2.9677042961120605, "logps/chosen": -208.30093383789062, "logps/rejected": -339.0838928222656, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -3.7391154766082764, "rewards/margins": 4.637497425079346, "rewards/rejected": -8.37661361694336, "step": 12302 }, { "epoch": 1.91, "learning_rate": 5.123815550599098e-06, "logits/chosen": -1.8453418016433716, "logits/rejected": -3.077643632888794, "logps/chosen": -90.91172790527344, "logps/rejected": -372.6402587890625, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -4.451672554016113, "rewards/margins": 3.7880489826202393, "rewards/rejected": -8.239721298217773, "step": 12303 }, { "epoch": 1.91, "learning_rate": 5.12308211006795e-06, "logits/chosen": -1.8781719207763672, "logits/rejected": -2.4834144115448, "logps/chosen": -185.63330078125, "logps/rejected": -425.8468017578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.336706638336182, "rewards/margins": 10.630308151245117, "rewards/rejected": -14.96701431274414, "step": 12304 }, { "epoch": 1.91, "learning_rate": 5.1223486695368015e-06, "logits/chosen": -1.4993572235107422, "logits/rejected": -2.9302287101745605, "logps/chosen": -158.12071228027344, "logps/rejected": -454.4820861816406, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.153909206390381, "rewards/margins": 7.139959812164307, "rewards/rejected": -9.293869018554688, "step": 12305 }, { "epoch": 1.91, "learning_rate": 5.121615229005654e-06, "logits/chosen": -3.121786594390869, "logits/rejected": -2.58482027053833, "logps/chosen": -548.1905517578125, "logps/rejected": -365.09857177734375, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -4.822747707366943, "rewards/margins": 4.157785415649414, "rewards/rejected": -8.980533599853516, "step": 12306 }, { "epoch": 1.91, "learning_rate": 5.120881788474506e-06, "logits/chosen": -1.2103227376937866, "logits/rejected": -2.3904027938842773, "logps/chosen": -238.60577392578125, "logps/rejected": -406.7030944824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4513862133026123, "rewards/margins": 10.293792724609375, "rewards/rejected": -13.745179176330566, "step": 12307 }, { "epoch": 1.91, "learning_rate": 5.120148347943358e-06, "logits/chosen": -2.964486598968506, "logits/rejected": -3.0011889934539795, "logps/chosen": -536.1126708984375, "logps/rejected": -523.6190185546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.385539531707764, "rewards/margins": 6.765040397644043, "rewards/rejected": -11.150579452514648, "step": 12308 }, { "epoch": 1.91, "learning_rate": 5.11941490741221e-06, "logits/chosen": -2.2611169815063477, "logits/rejected": -2.467674493789673, "logps/chosen": -281.40594482421875, "logps/rejected": -344.982421875, "loss": 3.0224, "rewards/accuracies": 0.5, "rewards/chosen": -8.30848503112793, "rewards/margins": 4.017757415771484, "rewards/rejected": -12.326242446899414, "step": 12309 }, { "epoch": 1.91, "learning_rate": 5.118681466881063e-06, "logits/chosen": -2.0391225814819336, "logits/rejected": -2.937394857406616, "logps/chosen": -233.45870971679688, "logps/rejected": -532.8760986328125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.298964023590088, "rewards/margins": 10.096145629882812, "rewards/rejected": -13.395110130310059, "step": 12310 }, { "epoch": 1.91, "learning_rate": 5.1179480263499145e-06, "logits/chosen": -2.825382709503174, "logits/rejected": -2.980712413787842, "logps/chosen": -48.18594741821289, "logps/rejected": -166.5790557861328, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.127497911453247, "rewards/margins": 7.195822715759277, "rewards/rejected": -10.323320388793945, "step": 12311 }, { "epoch": 1.91, "learning_rate": 5.117214585818766e-06, "logits/chosen": -2.961728096008301, "logits/rejected": -3.3566031455993652, "logps/chosen": -55.688201904296875, "logps/rejected": -190.5443572998047, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -3.700695037841797, "rewards/margins": 6.816831588745117, "rewards/rejected": -10.517526626586914, "step": 12312 }, { "epoch": 1.91, "learning_rate": 5.116481145287618e-06, "logits/chosen": -1.8357720375061035, "logits/rejected": -2.8661417961120605, "logps/chosen": -142.802734375, "logps/rejected": -344.7480773925781, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.8307204246520996, "rewards/margins": 10.351709365844727, "rewards/rejected": -13.182430267333984, "step": 12313 }, { "epoch": 1.92, "learning_rate": 5.11574770475647e-06, "logits/chosen": -2.989622116088867, "logits/rejected": -2.5172667503356934, "logps/chosen": -438.68560791015625, "logps/rejected": -533.688232421875, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/chosen": -3.8481063842773438, "rewards/margins": 4.2213897705078125, "rewards/rejected": -8.069496154785156, "step": 12314 }, { "epoch": 1.92, "learning_rate": 5.115014264225323e-06, "logits/chosen": -2.026979684829712, "logits/rejected": -2.723999261856079, "logps/chosen": -241.34324645996094, "logps/rejected": -372.4619140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.383241653442383, "rewards/margins": 8.792425155639648, "rewards/rejected": -14.175666809082031, "step": 12315 }, { "epoch": 1.92, "learning_rate": 5.114280823694175e-06, "logits/chosen": -2.448683023452759, "logits/rejected": -2.7855305671691895, "logps/chosen": -99.66317749023438, "logps/rejected": -374.20513916015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.087498664855957, "rewards/margins": 6.708683490753174, "rewards/rejected": -10.796182632446289, "step": 12316 }, { "epoch": 1.92, "learning_rate": 5.1135473831630275e-06, "logits/chosen": -0.8817853927612305, "logits/rejected": -1.6310075521469116, "logps/chosen": -228.02435302734375, "logps/rejected": -468.118896484375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -4.730254650115967, "rewards/margins": 8.82793140411377, "rewards/rejected": -13.558185577392578, "step": 12317 }, { "epoch": 1.92, "learning_rate": 5.112813942631879e-06, "logits/chosen": -2.4037625789642334, "logits/rejected": -3.130234956741333, "logps/chosen": -102.60569763183594, "logps/rejected": -347.1108093261719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.196866989135742, "rewards/margins": 10.538689613342285, "rewards/rejected": -12.735556602478027, "step": 12318 }, { "epoch": 1.92, "learning_rate": 5.112080502100732e-06, "logits/chosen": -2.293349504470825, "logits/rejected": -3.203127145767212, "logps/chosen": -317.9154357910156, "logps/rejected": -559.2345581054688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9469246864318848, "rewards/margins": 9.452634811401367, "rewards/rejected": -12.399559020996094, "step": 12319 }, { "epoch": 1.92, "learning_rate": 5.111347061569584e-06, "logits/chosen": -2.9716062545776367, "logits/rejected": -2.8327105045318604, "logps/chosen": -150.5450897216797, "logps/rejected": -272.49212646484375, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -1.4773621559143066, "rewards/margins": 5.528321266174316, "rewards/rejected": -7.005683422088623, "step": 12320 }, { "epoch": 1.92, "learning_rate": 5.110613621038436e-06, "logits/chosen": -2.8488831520080566, "logits/rejected": -2.9720048904418945, "logps/chosen": -151.0816650390625, "logps/rejected": -316.1671142578125, "loss": 0.4816, "rewards/accuracies": 0.5, "rewards/chosen": -4.145933628082275, "rewards/margins": 2.0250020027160645, "rewards/rejected": -6.17093563079834, "step": 12321 }, { "epoch": 1.92, "learning_rate": 5.109880180507288e-06, "logits/chosen": -2.6111676692962646, "logits/rejected": -2.9462239742279053, "logps/chosen": -92.69419860839844, "logps/rejected": -146.34584045410156, "loss": 0.2758, "rewards/accuracies": 1.0, "rewards/chosen": -4.135246276855469, "rewards/margins": 3.976628303527832, "rewards/rejected": -8.1118745803833, "step": 12322 }, { "epoch": 1.92, "learning_rate": 5.1091467399761396e-06, "logits/chosen": -2.3331799507141113, "logits/rejected": -2.9640414714813232, "logps/chosen": -151.96827697753906, "logps/rejected": -299.865478515625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.497211456298828, "rewards/margins": 5.956326484680176, "rewards/rejected": -9.453537940979004, "step": 12323 }, { "epoch": 1.92, "learning_rate": 5.108413299444992e-06, "logits/chosen": -2.3581502437591553, "logits/rejected": -3.094728469848633, "logps/chosen": -187.6859130859375, "logps/rejected": -447.7806396484375, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -6.257994651794434, "rewards/margins": 6.31161642074585, "rewards/rejected": -12.569610595703125, "step": 12324 }, { "epoch": 1.92, "learning_rate": 5.107679858913844e-06, "logits/chosen": -3.0273170471191406, "logits/rejected": -3.232137680053711, "logps/chosen": -561.701904296875, "logps/rejected": -563.1486206054688, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -5.65853214263916, "rewards/margins": 5.964838027954102, "rewards/rejected": -11.623371124267578, "step": 12325 }, { "epoch": 1.92, "learning_rate": 5.106946418382696e-06, "logits/chosen": -0.9890900254249573, "logits/rejected": -2.645617961883545, "logps/chosen": -247.66229248046875, "logps/rejected": -512.8519287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8133749961853027, "rewards/margins": 11.242226600646973, "rewards/rejected": -14.055601119995117, "step": 12326 }, { "epoch": 1.92, "learning_rate": 5.106212977851548e-06, "logits/chosen": -2.060135841369629, "logits/rejected": -2.8712990283966064, "logps/chosen": -208.39910888671875, "logps/rejected": -245.89208984375, "loss": 0.3176, "rewards/accuracies": 1.0, "rewards/chosen": -5.3037943840026855, "rewards/margins": 1.529483437538147, "rewards/rejected": -6.833277702331543, "step": 12327 }, { "epoch": 1.92, "learning_rate": 5.105479537320401e-06, "logits/chosen": -1.8651543855667114, "logits/rejected": -3.0859174728393555, "logps/chosen": -104.103515625, "logps/rejected": -318.8302307128906, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -1.4235206842422485, "rewards/margins": 7.298572540283203, "rewards/rejected": -8.72209358215332, "step": 12328 }, { "epoch": 1.92, "learning_rate": 5.1047460967892525e-06, "logits/chosen": -2.8339285850524902, "logits/rejected": -3.117006301879883, "logps/chosen": -185.74664306640625, "logps/rejected": -328.20819091796875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -2.7318310737609863, "rewards/margins": 7.360024452209473, "rewards/rejected": -10.0918550491333, "step": 12329 }, { "epoch": 1.92, "learning_rate": 5.104012656258104e-06, "logits/chosen": -1.8783237934112549, "logits/rejected": -2.8238630294799805, "logps/chosen": -192.11968994140625, "logps/rejected": -290.8179931640625, "loss": 1.8637, "rewards/accuracies": 0.5, "rewards/chosen": -5.577689170837402, "rewards/margins": -0.012920618057250977, "rewards/rejected": -5.564768314361572, "step": 12330 }, { "epoch": 1.92, "learning_rate": 5.103279215726956e-06, "logits/chosen": -2.7614097595214844, "logits/rejected": -2.3874258995056152, "logps/chosen": -156.17446899414062, "logps/rejected": -130.5133056640625, "loss": 0.0826, "rewards/accuracies": 1.0, "rewards/chosen": -3.005873918533325, "rewards/margins": 4.391548156738281, "rewards/rejected": -7.397421836853027, "step": 12331 }, { "epoch": 1.92, "learning_rate": 5.102545775195808e-06, "logits/chosen": -2.7187721729278564, "logits/rejected": -3.124789237976074, "logps/chosen": -51.90508270263672, "logps/rejected": -300.01910400390625, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -3.0577878952026367, "rewards/margins": 5.0427937507629395, "rewards/rejected": -8.100582122802734, "step": 12332 }, { "epoch": 1.92, "learning_rate": 5.101812334664661e-06, "logits/chosen": -2.2904622554779053, "logits/rejected": -3.156496286392212, "logps/chosen": -317.8517150878906, "logps/rejected": -445.64190673828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.496638774871826, "rewards/margins": 8.680404663085938, "rewards/rejected": -11.177043914794922, "step": 12333 }, { "epoch": 1.92, "learning_rate": 5.101078894133514e-06, "logits/chosen": -2.65297794342041, "logits/rejected": -2.5449881553649902, "logps/chosen": -693.566650390625, "logps/rejected": -484.41864013671875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.423130512237549, "rewards/margins": 6.249974250793457, "rewards/rejected": -9.673105239868164, "step": 12334 }, { "epoch": 1.92, "learning_rate": 5.1003454536023655e-06, "logits/chosen": -2.7452855110168457, "logits/rejected": -3.04862380027771, "logps/chosen": -62.976539611816406, "logps/rejected": -214.91819763183594, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.6658108234405518, "rewards/margins": 5.800774574279785, "rewards/rejected": -9.466585159301758, "step": 12335 }, { "epoch": 1.92, "learning_rate": 5.099612013071217e-06, "logits/chosen": -2.7128238677978516, "logits/rejected": -2.8514645099639893, "logps/chosen": -92.56298065185547, "logps/rejected": -281.5888366699219, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.692105770111084, "rewards/margins": 7.585039138793945, "rewards/rejected": -11.277145385742188, "step": 12336 }, { "epoch": 1.92, "learning_rate": 5.09887857254007e-06, "logits/chosen": -1.8543776273727417, "logits/rejected": -2.627822160720825, "logps/chosen": -105.52099609375, "logps/rejected": -356.36077880859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.964446067810059, "rewards/margins": 7.868541717529297, "rewards/rejected": -12.832988739013672, "step": 12337 }, { "epoch": 1.92, "learning_rate": 5.098145132008922e-06, "logits/chosen": -2.1936230659484863, "logits/rejected": -2.872197389602661, "logps/chosen": -102.81249237060547, "logps/rejected": -327.61627197265625, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -6.377752304077148, "rewards/margins": 6.293282508850098, "rewards/rejected": -12.671035766601562, "step": 12338 }, { "epoch": 1.92, "learning_rate": 5.097411691477774e-06, "logits/chosen": -2.079493761062622, "logits/rejected": -2.8320767879486084, "logps/chosen": -101.11311340332031, "logps/rejected": -236.9632110595703, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.677670478820801, "rewards/margins": 8.321708679199219, "rewards/rejected": -13.99937915802002, "step": 12339 }, { "epoch": 1.92, "learning_rate": 5.096678250946626e-06, "logits/chosen": -2.701977491378784, "logits/rejected": -2.8083415031433105, "logps/chosen": -267.864990234375, "logps/rejected": -356.3431396484375, "loss": 0.0849, "rewards/accuracies": 1.0, "rewards/chosen": -4.263693809509277, "rewards/margins": 5.003314971923828, "rewards/rejected": -9.267008781433105, "step": 12340 }, { "epoch": 1.92, "learning_rate": 5.0959448104154785e-06, "logits/chosen": -2.529296636581421, "logits/rejected": -2.892021417617798, "logps/chosen": -40.554229736328125, "logps/rejected": -233.5933837890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.40891695022583, "rewards/margins": 9.328033447265625, "rewards/rejected": -11.736950874328613, "step": 12341 }, { "epoch": 1.92, "learning_rate": 5.09521136988433e-06, "logits/chosen": -2.5687003135681152, "logits/rejected": -2.948896884918213, "logps/chosen": -281.7117004394531, "logps/rejected": -443.9166259765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.321629524230957, "rewards/margins": 9.145476341247559, "rewards/rejected": -14.467105865478516, "step": 12342 }, { "epoch": 1.92, "learning_rate": 5.094477929353182e-06, "logits/chosen": -2.659295082092285, "logits/rejected": -2.8969316482543945, "logps/chosen": -632.2572021484375, "logps/rejected": -629.3060302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.674090623855591, "rewards/margins": 13.297755241394043, "rewards/rejected": -16.971845626831055, "step": 12343 }, { "epoch": 1.92, "learning_rate": 5.093744488822034e-06, "logits/chosen": -1.6618869304656982, "logits/rejected": -2.9096202850341797, "logps/chosen": -241.95252990722656, "logps/rejected": -205.18557739257812, "loss": 0.8426, "rewards/accuracies": 0.5, "rewards/chosen": -3.7756268978118896, "rewards/margins": 3.8672165870666504, "rewards/rejected": -7.642843246459961, "step": 12344 }, { "epoch": 1.92, "learning_rate": 5.093011048290886e-06, "logits/chosen": -2.733494520187378, "logits/rejected": -3.166710376739502, "logps/chosen": -167.85403442382812, "logps/rejected": -336.62261962890625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.254397392272949, "rewards/margins": 7.726770401000977, "rewards/rejected": -9.981167793273926, "step": 12345 }, { "epoch": 1.92, "learning_rate": 5.092277607759739e-06, "logits/chosen": -2.0722081661224365, "logits/rejected": -2.7473649978637695, "logps/chosen": -320.7729187011719, "logps/rejected": -573.3939208984375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.0258636474609375, "rewards/margins": 7.158868789672852, "rewards/rejected": -13.184732437133789, "step": 12346 }, { "epoch": 1.92, "learning_rate": 5.091544167228591e-06, "logits/chosen": -2.8014402389526367, "logits/rejected": -2.080961227416992, "logps/chosen": -298.40380859375, "logps/rejected": -249.291748046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": 0.793411374092102, "rewards/margins": 10.113059043884277, "rewards/rejected": -9.319647789001465, "step": 12347 }, { "epoch": 1.92, "learning_rate": 5.0908107266974425e-06, "logits/chosen": -3.083544969558716, "logits/rejected": -2.986804723739624, "logps/chosen": -428.33184814453125, "logps/rejected": -488.46697998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9044713973999023, "rewards/margins": 11.024734497070312, "rewards/rejected": -14.929206848144531, "step": 12348 }, { "epoch": 1.92, "learning_rate": 5.090077286166294e-06, "logits/chosen": -1.677211046218872, "logits/rejected": -1.894409418106079, "logps/chosen": -260.32049560546875, "logps/rejected": -332.1365051269531, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.7326226234436035, "rewards/margins": 6.520990371704102, "rewards/rejected": -10.253613471984863, "step": 12349 }, { "epoch": 1.92, "learning_rate": 5.089343845635147e-06, "logits/chosen": -2.9533915519714355, "logits/rejected": -2.586094856262207, "logps/chosen": -191.83773803710938, "logps/rejected": -302.94207763671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9909801483154297, "rewards/margins": 9.47869873046875, "rewards/rejected": -12.46967887878418, "step": 12350 }, { "epoch": 1.92, "learning_rate": 5.088610405104e-06, "logits/chosen": -2.7087271213531494, "logits/rejected": -2.812830924987793, "logps/chosen": -306.9628601074219, "logps/rejected": -503.8875732421875, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -7.052769660949707, "rewards/margins": 8.676895141601562, "rewards/rejected": -15.729665756225586, "step": 12351 }, { "epoch": 1.92, "learning_rate": 5.087876964572852e-06, "logits/chosen": -2.371234655380249, "logits/rejected": -2.8597443103790283, "logps/chosen": -670.9515991210938, "logps/rejected": -638.4617919921875, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": -7.571011543273926, "rewards/margins": 5.504900932312012, "rewards/rejected": -13.075912475585938, "step": 12352 }, { "epoch": 1.92, "learning_rate": 5.0871435240417035e-06, "logits/chosen": -1.7141371965408325, "logits/rejected": -3.019960641860962, "logps/chosen": -98.07974243164062, "logps/rejected": -416.27081298828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.5289306640625, "rewards/margins": 10.822659492492676, "rewards/rejected": -15.351590156555176, "step": 12353 }, { "epoch": 1.92, "learning_rate": 5.086410083510555e-06, "logits/chosen": -2.412895917892456, "logits/rejected": -2.6517300605773926, "logps/chosen": -212.533935546875, "logps/rejected": -322.49163818359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.550457000732422, "rewards/margins": 8.922316551208496, "rewards/rejected": -11.472773551940918, "step": 12354 }, { "epoch": 1.92, "learning_rate": 5.085676642979408e-06, "logits/chosen": -2.519848108291626, "logits/rejected": -2.6558234691619873, "logps/chosen": -372.03521728515625, "logps/rejected": -341.4771423339844, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -4.28984260559082, "rewards/margins": 6.288945198059082, "rewards/rejected": -10.578786849975586, "step": 12355 }, { "epoch": 1.92, "learning_rate": 5.08494320244826e-06, "logits/chosen": -2.8300509452819824, "logits/rejected": -1.7724112272262573, "logps/chosen": -342.1677551269531, "logps/rejected": -319.6382751464844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5401504039764404, "rewards/margins": 10.476118087768555, "rewards/rejected": -12.016267776489258, "step": 12356 }, { "epoch": 1.92, "learning_rate": 5.084209761917112e-06, "logits/chosen": -2.7579731941223145, "logits/rejected": -2.8885247707366943, "logps/chosen": -150.87997436523438, "logps/rejected": -195.5247802734375, "loss": 0.0389, "rewards/accuracies": 1.0, "rewards/chosen": -3.414811134338379, "rewards/margins": 3.7519423961639404, "rewards/rejected": -7.166753768920898, "step": 12357 }, { "epoch": 1.92, "learning_rate": 5.083476321385964e-06, "logits/chosen": -2.3721694946289062, "logits/rejected": -2.9193365573883057, "logps/chosen": -110.92375183105469, "logps/rejected": -404.99884033203125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.7742791175842285, "rewards/margins": 7.369926929473877, "rewards/rejected": -11.144206047058105, "step": 12358 }, { "epoch": 1.92, "learning_rate": 5.0827428808548165e-06, "logits/chosen": -2.834549903869629, "logits/rejected": -2.495471954345703, "logps/chosen": -419.79229736328125, "logps/rejected": -508.2142639160156, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.874454021453857, "rewards/margins": 9.758050918579102, "rewards/rejected": -15.632505416870117, "step": 12359 }, { "epoch": 1.92, "learning_rate": 5.082009440323668e-06, "logits/chosen": -2.350142002105713, "logits/rejected": -3.153373956680298, "logps/chosen": -148.58309936523438, "logps/rejected": -365.0035400390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.521609306335449, "rewards/margins": 7.2541680335998535, "rewards/rejected": -10.775777816772461, "step": 12360 }, { "epoch": 1.92, "learning_rate": 5.08127599979252e-06, "logits/chosen": -1.969328761100769, "logits/rejected": -2.6705760955810547, "logps/chosen": -107.99537658691406, "logps/rejected": -268.16064453125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.3665883541107178, "rewards/margins": 8.66232967376709, "rewards/rejected": -12.02891731262207, "step": 12361 }, { "epoch": 1.92, "learning_rate": 5.080542559261372e-06, "logits/chosen": -2.8166751861572266, "logits/rejected": -2.627725601196289, "logps/chosen": -433.3946228027344, "logps/rejected": -603.8532104492188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.841427803039551, "rewards/margins": 13.177021026611328, "rewards/rejected": -18.018447875976562, "step": 12362 }, { "epoch": 1.92, "learning_rate": 5.079809118730224e-06, "logits/chosen": -2.250717878341675, "logits/rejected": -2.980459451675415, "logps/chosen": -421.2098388671875, "logps/rejected": -538.8638305664062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.821279048919678, "rewards/margins": 10.918649673461914, "rewards/rejected": -15.73992919921875, "step": 12363 }, { "epoch": 1.92, "learning_rate": 5.079075678199077e-06, "logits/chosen": -2.552448034286499, "logits/rejected": -2.941037893295288, "logps/chosen": -127.42436218261719, "logps/rejected": -183.66159057617188, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -4.4422101974487305, "rewards/margins": 6.701208114624023, "rewards/rejected": -11.143418312072754, "step": 12364 }, { "epoch": 1.92, "learning_rate": 5.078342237667929e-06, "logits/chosen": -2.732192039489746, "logits/rejected": -2.773386240005493, "logps/chosen": -92.23271942138672, "logps/rejected": -98.4460220336914, "loss": 1.3552, "rewards/accuracies": 0.5, "rewards/chosen": -5.418216228485107, "rewards/margins": 1.3190052509307861, "rewards/rejected": -6.737221717834473, "step": 12365 }, { "epoch": 1.92, "learning_rate": 5.0776087971367805e-06, "logits/chosen": -2.575314998626709, "logits/rejected": -2.7877140045166016, "logps/chosen": -190.97970581054688, "logps/rejected": -341.17633056640625, "loss": 0.4181, "rewards/accuracies": 0.5, "rewards/chosen": -8.322002410888672, "rewards/margins": 4.2707929611206055, "rewards/rejected": -12.592796325683594, "step": 12366 }, { "epoch": 1.92, "learning_rate": 5.076875356605633e-06, "logits/chosen": -2.6736247539520264, "logits/rejected": -3.0680768489837646, "logps/chosen": -206.0343780517578, "logps/rejected": -284.29180908203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.0658340454101562, "rewards/margins": 8.158927917480469, "rewards/rejected": -11.224761962890625, "step": 12367 }, { "epoch": 1.92, "learning_rate": 5.076141916074486e-06, "logits/chosen": -2.7575314044952393, "logits/rejected": -3.203037977218628, "logps/chosen": -134.36859130859375, "logps/rejected": -506.264892578125, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -6.563941478729248, "rewards/margins": 6.381543159484863, "rewards/rejected": -12.945484161376953, "step": 12368 }, { "epoch": 1.92, "learning_rate": 5.075408475543338e-06, "logits/chosen": -2.8472719192504883, "logits/rejected": -3.105858325958252, "logps/chosen": -648.0970458984375, "logps/rejected": -665.8302001953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.340524673461914, "rewards/margins": 7.993130683898926, "rewards/rejected": -12.333654403686523, "step": 12369 }, { "epoch": 1.92, "learning_rate": 5.07467503501219e-06, "logits/chosen": -2.7608067989349365, "logits/rejected": -2.9406578540802, "logps/chosen": -216.21163940429688, "logps/rejected": -400.1456298828125, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": -3.381760597229004, "rewards/margins": 9.512991905212402, "rewards/rejected": -12.894752502441406, "step": 12370 }, { "epoch": 1.92, "learning_rate": 5.073941594481042e-06, "logits/chosen": -2.6782870292663574, "logits/rejected": -3.023263692855835, "logps/chosen": -283.5603332519531, "logps/rejected": -441.748291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.993631362915039, "rewards/margins": 10.706534385681152, "rewards/rejected": -15.700165748596191, "step": 12371 }, { "epoch": 1.92, "learning_rate": 5.0732081539498935e-06, "logits/chosen": -2.159085750579834, "logits/rejected": -2.864840030670166, "logps/chosen": -251.50198364257812, "logps/rejected": -538.489990234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.5885181427001953, "rewards/margins": 10.084700584411621, "rewards/rejected": -12.6732177734375, "step": 12372 }, { "epoch": 1.92, "learning_rate": 5.072474713418746e-06, "logits/chosen": -2.44707989692688, "logits/rejected": -2.877852439880371, "logps/chosen": -98.84129333496094, "logps/rejected": -247.70242309570312, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.949541091918945, "rewards/margins": 7.160434722900391, "rewards/rejected": -13.109975814819336, "step": 12373 }, { "epoch": 1.92, "learning_rate": 5.071741272887598e-06, "logits/chosen": -2.653353214263916, "logits/rejected": -3.0548925399780273, "logps/chosen": -169.89096069335938, "logps/rejected": -373.84381103515625, "loss": 0.23, "rewards/accuracies": 1.0, "rewards/chosen": -4.648538112640381, "rewards/margins": 3.933711290359497, "rewards/rejected": -8.582249641418457, "step": 12374 }, { "epoch": 1.92, "learning_rate": 5.07100783235645e-06, "logits/chosen": -2.357686996459961, "logits/rejected": -2.993359088897705, "logps/chosen": -251.1404266357422, "logps/rejected": -383.4552307128906, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -3.259730815887451, "rewards/margins": 5.194701671600342, "rewards/rejected": -8.454432487487793, "step": 12375 }, { "epoch": 1.92, "learning_rate": 5.070274391825302e-06, "logits/chosen": -2.5261592864990234, "logits/rejected": -1.304864764213562, "logps/chosen": -310.42596435546875, "logps/rejected": -256.81036376953125, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -4.579698085784912, "rewards/margins": 4.335663795471191, "rewards/rejected": -8.915361404418945, "step": 12376 }, { "epoch": 1.92, "learning_rate": 5.0695409512941546e-06, "logits/chosen": -2.7621302604675293, "logits/rejected": -2.9724419116973877, "logps/chosen": -177.88687133789062, "logps/rejected": -225.70263671875, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -5.462176322937012, "rewards/margins": 4.462486743927002, "rewards/rejected": -9.924663543701172, "step": 12377 }, { "epoch": 1.93, "learning_rate": 5.0688075107630064e-06, "logits/chosen": -2.933119058609009, "logits/rejected": -2.3021254539489746, "logps/chosen": -336.3983459472656, "logps/rejected": -279.5414123535156, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.7898712158203125, "rewards/margins": 7.4711713790893555, "rewards/rejected": -10.261042594909668, "step": 12378 }, { "epoch": 1.93, "learning_rate": 5.068074070231858e-06, "logits/chosen": -2.210759162902832, "logits/rejected": -3.1878018379211426, "logps/chosen": -199.79270935058594, "logps/rejected": -552.3675537109375, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -5.616274833679199, "rewards/margins": 7.862105369567871, "rewards/rejected": -13.47838020324707, "step": 12379 }, { "epoch": 1.93, "learning_rate": 5.06734062970071e-06, "logits/chosen": -2.403815507888794, "logits/rejected": -3.0287654399871826, "logps/chosen": -65.71554565429688, "logps/rejected": -253.73507690429688, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.84566068649292, "rewards/margins": 7.403636455535889, "rewards/rejected": -11.249298095703125, "step": 12380 }, { "epoch": 1.93, "learning_rate": 5.066607189169562e-06, "logits/chosen": -2.559948205947876, "logits/rejected": -2.728973150253296, "logps/chosen": -185.43182373046875, "logps/rejected": -251.83448791503906, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.7528395652770996, "rewards/margins": 6.086184501647949, "rewards/rejected": -9.83902359008789, "step": 12381 }, { "epoch": 1.93, "learning_rate": 5.065873748638415e-06, "logits/chosen": -2.669499397277832, "logits/rejected": -1.6719090938568115, "logps/chosen": -479.5218505859375, "logps/rejected": -408.94482421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.717653751373291, "rewards/margins": 10.407634735107422, "rewards/rejected": -13.125288009643555, "step": 12382 }, { "epoch": 1.93, "learning_rate": 5.065140308107267e-06, "logits/chosen": -2.142209053039551, "logits/rejected": -2.7778804302215576, "logps/chosen": -182.28086853027344, "logps/rejected": -376.5836486816406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.720974922180176, "rewards/margins": 9.179914474487305, "rewards/rejected": -11.900888442993164, "step": 12383 }, { "epoch": 1.93, "learning_rate": 5.064406867576119e-06, "logits/chosen": -2.9160335063934326, "logits/rejected": -2.6708431243896484, "logps/chosen": -262.00115966796875, "logps/rejected": -241.38046264648438, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -4.226611137390137, "rewards/margins": 6.966635227203369, "rewards/rejected": -11.193246841430664, "step": 12384 }, { "epoch": 1.93, "learning_rate": 5.063673427044971e-06, "logits/chosen": -2.4938559532165527, "logits/rejected": -2.7963922023773193, "logps/chosen": -389.40185546875, "logps/rejected": -509.3797607421875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.810969591140747, "rewards/margins": 8.637031555175781, "rewards/rejected": -11.448001861572266, "step": 12385 }, { "epoch": 1.93, "learning_rate": 5.062939986513824e-06, "logits/chosen": -2.235025405883789, "logits/rejected": -2.894550323486328, "logps/chosen": -104.32437133789062, "logps/rejected": -365.43310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.217625617980957, "rewards/margins": 11.761308670043945, "rewards/rejected": -14.978935241699219, "step": 12386 }, { "epoch": 1.93, "learning_rate": 5.062206545982676e-06, "logits/chosen": -1.9291001558303833, "logits/rejected": -2.6116771697998047, "logps/chosen": -228.512939453125, "logps/rejected": -392.77178955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.381628036499023, "rewards/margins": 10.329872131347656, "rewards/rejected": -14.71150016784668, "step": 12387 }, { "epoch": 1.93, "learning_rate": 5.061473105451528e-06, "logits/chosen": -1.5894055366516113, "logits/rejected": -2.9220311641693115, "logps/chosen": -136.0653076171875, "logps/rejected": -327.34881591796875, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": -6.594593048095703, "rewards/margins": 6.434525489807129, "rewards/rejected": -13.029119491577148, "step": 12388 }, { "epoch": 1.93, "learning_rate": 5.06073966492038e-06, "logits/chosen": -2.963251829147339, "logits/rejected": -2.9161972999572754, "logps/chosen": -119.31622314453125, "logps/rejected": -327.0910339355469, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -1.773873209953308, "rewards/margins": 5.978405475616455, "rewards/rejected": -7.7522783279418945, "step": 12389 }, { "epoch": 1.93, "learning_rate": 5.060006224389232e-06, "logits/chosen": -2.809366226196289, "logits/rejected": -2.8733251094818115, "logps/chosen": -374.3866271972656, "logps/rejected": -480.768798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9499473571777344, "rewards/margins": 11.061966896057129, "rewards/rejected": -15.011914253234863, "step": 12390 }, { "epoch": 1.93, "learning_rate": 5.059272783858084e-06, "logits/chosen": -2.8415987491607666, "logits/rejected": -1.6559070348739624, "logps/chosen": -363.68231201171875, "logps/rejected": -297.0633544921875, "loss": 0.2396, "rewards/accuracies": 1.0, "rewards/chosen": -4.536462306976318, "rewards/margins": 3.692305326461792, "rewards/rejected": -8.228767395019531, "step": 12391 }, { "epoch": 1.93, "learning_rate": 5.058539343326936e-06, "logits/chosen": -2.5764355659484863, "logits/rejected": -2.9941506385803223, "logps/chosen": -121.58820343017578, "logps/rejected": -360.40704345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.288968086242676, "rewards/margins": 10.470911026000977, "rewards/rejected": -14.759879112243652, "step": 12392 }, { "epoch": 1.93, "learning_rate": 5.057805902795788e-06, "logits/chosen": -2.4361793994903564, "logits/rejected": -2.9901280403137207, "logps/chosen": -177.26730346679688, "logps/rejected": -357.5977783203125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -5.8351874351501465, "rewards/margins": 5.008432388305664, "rewards/rejected": -10.843620300292969, "step": 12393 }, { "epoch": 1.93, "learning_rate": 5.05707246226464e-06, "logits/chosen": -2.0965840816497803, "logits/rejected": -2.9287447929382324, "logps/chosen": -118.07864379882812, "logps/rejected": -351.8074951171875, "loss": 2.8746, "rewards/accuracies": 0.5, "rewards/chosen": -6.750053882598877, "rewards/margins": 0.8848903179168701, "rewards/rejected": -7.634944438934326, "step": 12394 }, { "epoch": 1.93, "learning_rate": 5.056339021733493e-06, "logits/chosen": -2.8373217582702637, "logits/rejected": -2.435964822769165, "logps/chosen": -713.52294921875, "logps/rejected": -536.7096557617188, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.610995292663574, "rewards/margins": 8.773776054382324, "rewards/rejected": -14.384771347045898, "step": 12395 }, { "epoch": 1.93, "learning_rate": 5.0556055812023445e-06, "logits/chosen": -2.721879005432129, "logits/rejected": -2.082956552505493, "logps/chosen": -405.6310729980469, "logps/rejected": -389.53302001953125, "loss": 0.6499, "rewards/accuracies": 0.5, "rewards/chosen": -5.232050895690918, "rewards/margins": 7.458610534667969, "rewards/rejected": -12.690661430358887, "step": 12396 }, { "epoch": 1.93, "learning_rate": 5.054872140671196e-06, "logits/chosen": -2.6459944248199463, "logits/rejected": -1.9027166366577148, "logps/chosen": -361.15826416015625, "logps/rejected": -236.544921875, "loss": 1.2069, "rewards/accuracies": 0.5, "rewards/chosen": -8.50643253326416, "rewards/margins": -0.7436003684997559, "rewards/rejected": -7.762831687927246, "step": 12397 }, { "epoch": 1.93, "learning_rate": 5.054138700140048e-06, "logits/chosen": -2.7965080738067627, "logits/rejected": -1.7244226932525635, "logps/chosen": -312.577392578125, "logps/rejected": -362.1704406738281, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.070222854614258, "rewards/margins": 6.636650085449219, "rewards/rejected": -11.706872940063477, "step": 12398 }, { "epoch": 1.93, "learning_rate": 5.053405259608901e-06, "logits/chosen": -1.843502163887024, "logits/rejected": -2.890535354614258, "logps/chosen": -436.92462158203125, "logps/rejected": -428.38043212890625, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -5.773801326751709, "rewards/margins": 5.630501747131348, "rewards/rejected": -11.404302597045898, "step": 12399 }, { "epoch": 1.93, "learning_rate": 5.052671819077753e-06, "logits/chosen": -2.540942668914795, "logits/rejected": -3.0969250202178955, "logps/chosen": -265.1878662109375, "logps/rejected": -587.7283325195312, "loss": 0.4829, "rewards/accuracies": 0.5, "rewards/chosen": -9.630122184753418, "rewards/margins": 0.8625171184539795, "rewards/rejected": -10.492639541625977, "step": 12400 }, { "epoch": 1.93, "learning_rate": 5.0519383785466056e-06, "logits/chosen": -2.224011182785034, "logits/rejected": -2.97717022895813, "logps/chosen": -200.59515380859375, "logps/rejected": -408.7147216796875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.170219421386719, "rewards/margins": 10.018829345703125, "rewards/rejected": -14.189048767089844, "step": 12401 }, { "epoch": 1.93, "learning_rate": 5.0512049380154574e-06, "logits/chosen": -2.9284400939941406, "logits/rejected": -3.2524726390838623, "logps/chosen": -189.85023498535156, "logps/rejected": -261.794189453125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -4.695366859436035, "rewards/margins": 5.770204067230225, "rewards/rejected": -10.465570449829102, "step": 12402 }, { "epoch": 1.93, "learning_rate": 5.050471497484309e-06, "logits/chosen": -2.1945550441741943, "logits/rejected": -3.028674840927124, "logps/chosen": -148.53414916992188, "logps/rejected": -390.8624572753906, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6437809467315674, "rewards/margins": 8.189689636230469, "rewards/rejected": -11.833470344543457, "step": 12403 }, { "epoch": 1.93, "learning_rate": 5.049738056953162e-06, "logits/chosen": -2.516005039215088, "logits/rejected": -3.1453959941864014, "logps/chosen": -516.9841918945312, "logps/rejected": -645.4267578125, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -5.258735656738281, "rewards/margins": 7.0319623947143555, "rewards/rejected": -12.290699005126953, "step": 12404 }, { "epoch": 1.93, "learning_rate": 5.049004616422014e-06, "logits/chosen": -1.8251053094863892, "logits/rejected": -2.770418167114258, "logps/chosen": -180.55596923828125, "logps/rejected": -407.0886535644531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.996675968170166, "rewards/margins": 12.858002662658691, "rewards/rejected": -15.854679107666016, "step": 12405 }, { "epoch": 1.93, "learning_rate": 5.048271175890866e-06, "logits/chosen": -2.196300983428955, "logits/rejected": -2.992936134338379, "logps/chosen": -91.26553344726562, "logps/rejected": -332.883544921875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -3.2566874027252197, "rewards/margins": 10.154699325561523, "rewards/rejected": -13.411386489868164, "step": 12406 }, { "epoch": 1.93, "learning_rate": 5.047537735359718e-06, "logits/chosen": -2.1914429664611816, "logits/rejected": -2.7738964557647705, "logps/chosen": -126.94278717041016, "logps/rejected": -351.28466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.268004894256592, "rewards/margins": 12.32672119140625, "rewards/rejected": -17.5947265625, "step": 12407 }, { "epoch": 1.93, "learning_rate": 5.04680429482857e-06, "logits/chosen": -2.6732699871063232, "logits/rejected": -2.745393753051758, "logps/chosen": -208.60824584960938, "logps/rejected": -216.65902709960938, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -4.920768737792969, "rewards/margins": 5.774491310119629, "rewards/rejected": -10.695260047912598, "step": 12408 }, { "epoch": 1.93, "learning_rate": 5.046070854297422e-06, "logits/chosen": -2.8229894638061523, "logits/rejected": -2.757197856903076, "logps/chosen": -584.40234375, "logps/rejected": -358.46990966796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.5515670776367188, "rewards/margins": 8.080606460571289, "rewards/rejected": -11.632173538208008, "step": 12409 }, { "epoch": 1.93, "learning_rate": 5.045337413766274e-06, "logits/chosen": -2.620208501815796, "logits/rejected": -2.859971046447754, "logps/chosen": -54.83274841308594, "logps/rejected": -187.6151123046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.070591926574707, "rewards/margins": 8.004528999328613, "rewards/rejected": -11.07512092590332, "step": 12410 }, { "epoch": 1.93, "learning_rate": 5.044603973235126e-06, "logits/chosen": -1.8439737558364868, "logits/rejected": -2.8029019832611084, "logps/chosen": -128.85260009765625, "logps/rejected": -438.623779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.455473899841309, "rewards/margins": 10.250284194946289, "rewards/rejected": -14.705758094787598, "step": 12411 }, { "epoch": 1.93, "learning_rate": 5.043870532703978e-06, "logits/chosen": -1.8354285955429077, "logits/rejected": -2.840658664703369, "logps/chosen": -116.75981903076172, "logps/rejected": -354.9728698730469, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.646970748901367, "rewards/margins": 10.477449417114258, "rewards/rejected": -13.124420166015625, "step": 12412 }, { "epoch": 1.93, "learning_rate": 5.043137092172831e-06, "logits/chosen": -2.5820631980895996, "logits/rejected": -2.969252109527588, "logps/chosen": -125.16316986083984, "logps/rejected": -368.2916259765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.47660493850708, "rewards/margins": 8.604937553405762, "rewards/rejected": -14.081542015075684, "step": 12413 }, { "epoch": 1.93, "learning_rate": 5.0424036516416825e-06, "logits/chosen": -2.7430315017700195, "logits/rejected": -0.7201789617538452, "logps/chosen": -466.0291442871094, "logps/rejected": -132.38284301757812, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/chosen": -1.1431381702423096, "rewards/margins": 7.363555908203125, "rewards/rejected": -8.506693840026855, "step": 12414 }, { "epoch": 1.93, "learning_rate": 5.041670211110534e-06, "logits/chosen": -2.8034090995788574, "logits/rejected": -2.8141677379608154, "logps/chosen": -144.13665771484375, "logps/rejected": -217.5676727294922, "loss": 0.0839, "rewards/accuracies": 1.0, "rewards/chosen": -3.4756202697753906, "rewards/margins": 4.9336371421813965, "rewards/rejected": -8.409257888793945, "step": 12415 }, { "epoch": 1.93, "learning_rate": 5.040936770579386e-06, "logits/chosen": -1.3050833940505981, "logits/rejected": -2.324158191680908, "logps/chosen": -164.55426025390625, "logps/rejected": -502.85302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5483756065368652, "rewards/margins": 13.928893089294434, "rewards/rejected": -17.47726821899414, "step": 12416 }, { "epoch": 1.93, "learning_rate": 5.040203330048239e-06, "logits/chosen": -2.726330280303955, "logits/rejected": -2.489366292953491, "logps/chosen": -161.7257080078125, "logps/rejected": -259.8393859863281, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.237531900405884, "rewards/margins": 8.191476821899414, "rewards/rejected": -10.429008483886719, "step": 12417 }, { "epoch": 1.93, "learning_rate": 5.039469889517092e-06, "logits/chosen": -3.0483410358428955, "logits/rejected": -2.570266008377075, "logps/chosen": -223.9447021484375, "logps/rejected": -192.007080078125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.1401405334472656, "rewards/margins": 5.5507731437683105, "rewards/rejected": -7.690913200378418, "step": 12418 }, { "epoch": 1.93, "learning_rate": 5.038736448985944e-06, "logits/chosen": -2.892347812652588, "logits/rejected": -2.5689284801483154, "logps/chosen": -598.4788818359375, "logps/rejected": -533.1851806640625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -7.594931602478027, "rewards/margins": 5.353604316711426, "rewards/rejected": -12.948535919189453, "step": 12419 }, { "epoch": 1.93, "learning_rate": 5.0380030084547955e-06, "logits/chosen": -2.921327590942383, "logits/rejected": -3.076178550720215, "logps/chosen": -215.17300415039062, "logps/rejected": -278.7408752441406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.690964460372925, "rewards/margins": 7.660655498504639, "rewards/rejected": -10.351619720458984, "step": 12420 }, { "epoch": 1.93, "learning_rate": 5.037269567923647e-06, "logits/chosen": -3.0332517623901367, "logits/rejected": -3.372978925704956, "logps/chosen": -162.87271118164062, "logps/rejected": -342.2606201171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.665417194366455, "rewards/margins": 9.329290390014648, "rewards/rejected": -12.994708061218262, "step": 12421 }, { "epoch": 1.93, "learning_rate": 5.0365361273925e-06, "logits/chosen": -2.7689340114593506, "logits/rejected": -2.7944819927215576, "logps/chosen": -177.99923706054688, "logps/rejected": -347.7271728515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9931139945983887, "rewards/margins": 9.740148544311523, "rewards/rejected": -11.73326301574707, "step": 12422 }, { "epoch": 1.93, "learning_rate": 5.035802686861352e-06, "logits/chosen": -2.771592378616333, "logits/rejected": -2.3539516925811768, "logps/chosen": -364.546142578125, "logps/rejected": -375.506591796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.783180236816406, "rewards/margins": 9.208646774291992, "rewards/rejected": -14.991827011108398, "step": 12423 }, { "epoch": 1.93, "learning_rate": 5.035069246330204e-06, "logits/chosen": -2.007761240005493, "logits/rejected": -3.158545970916748, "logps/chosen": -161.33404541015625, "logps/rejected": -404.9971923828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.6377556324005127, "rewards/margins": 9.201745986938477, "rewards/rejected": -12.839502334594727, "step": 12424 }, { "epoch": 1.93, "learning_rate": 5.034335805799056e-06, "logits/chosen": -2.986541986465454, "logits/rejected": -1.6730769872665405, "logps/chosen": -438.1142272949219, "logps/rejected": -317.02459716796875, "loss": 0.2125, "rewards/accuracies": 1.0, "rewards/chosen": -5.9272308349609375, "rewards/margins": 3.2266666889190674, "rewards/rejected": -9.153897285461426, "step": 12425 }, { "epoch": 1.93, "learning_rate": 5.0336023652679084e-06, "logits/chosen": -2.850937604904175, "logits/rejected": -2.988050699234009, "logps/chosen": -66.06983947753906, "logps/rejected": -158.78636169433594, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.607893466949463, "rewards/margins": 7.03458309173584, "rewards/rejected": -10.642477035522461, "step": 12426 }, { "epoch": 1.93, "learning_rate": 5.03286892473676e-06, "logits/chosen": -2.989182472229004, "logits/rejected": -2.5679566860198975, "logps/chosen": -183.8353271484375, "logps/rejected": -158.599609375, "loss": 0.4223, "rewards/accuracies": 0.5, "rewards/chosen": -7.228271007537842, "rewards/margins": 1.6087238788604736, "rewards/rejected": -8.836995124816895, "step": 12427 }, { "epoch": 1.93, "learning_rate": 5.032135484205612e-06, "logits/chosen": -1.3239407539367676, "logits/rejected": -2.7483627796173096, "logps/chosen": -175.2608642578125, "logps/rejected": -452.1686096191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9012296199798584, "rewards/margins": 10.308351516723633, "rewards/rejected": -14.20958137512207, "step": 12428 }, { "epoch": 1.93, "learning_rate": 5.031402043674464e-06, "logits/chosen": -1.239974856376648, "logits/rejected": -2.800231456756592, "logps/chosen": -250.1614990234375, "logps/rejected": -583.182861328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.650151252746582, "rewards/margins": 9.00369644165039, "rewards/rejected": -11.653846740722656, "step": 12429 }, { "epoch": 1.93, "learning_rate": 5.030668603143316e-06, "logits/chosen": -2.7225821018218994, "logits/rejected": -2.808443784713745, "logps/chosen": -206.98902893066406, "logps/rejected": -226.9456329345703, "loss": 2.78, "rewards/accuracies": 0.5, "rewards/chosen": -6.962557792663574, "rewards/margins": 3.0316662788391113, "rewards/rejected": -9.994224548339844, "step": 12430 }, { "epoch": 1.93, "learning_rate": 5.029935162612169e-06, "logits/chosen": -1.750000238418579, "logits/rejected": -2.933061361312866, "logps/chosen": -210.67800903320312, "logps/rejected": -354.1192626953125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.948699474334717, "rewards/margins": 7.480954647064209, "rewards/rejected": -13.429654121398926, "step": 12431 }, { "epoch": 1.93, "learning_rate": 5.0292017220810206e-06, "logits/chosen": -2.8069679737091064, "logits/rejected": -2.3767735958099365, "logps/chosen": -282.3395690917969, "logps/rejected": -347.58837890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.880285263061523, "rewards/margins": 9.731660842895508, "rewards/rejected": -14.611946105957031, "step": 12432 }, { "epoch": 1.93, "learning_rate": 5.0284682815498724e-06, "logits/chosen": -2.7437477111816406, "logits/rejected": -2.9007434844970703, "logps/chosen": -312.59002685546875, "logps/rejected": -426.84716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.098108291625977, "rewards/margins": 9.512296676635742, "rewards/rejected": -13.610404968261719, "step": 12433 }, { "epoch": 1.93, "learning_rate": 5.027734841018725e-06, "logits/chosen": -2.7593882083892822, "logits/rejected": -1.8850263357162476, "logps/chosen": -400.5459899902344, "logps/rejected": -411.264892578125, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/chosen": -2.8614494800567627, "rewards/margins": 10.528450012207031, "rewards/rejected": -13.389899253845215, "step": 12434 }, { "epoch": 1.93, "learning_rate": 5.027001400487578e-06, "logits/chosen": -2.814058303833008, "logits/rejected": -1.5686006546020508, "logps/chosen": -267.2625732421875, "logps/rejected": -218.5019989013672, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -5.072792053222656, "rewards/margins": 7.104579925537109, "rewards/rejected": -12.177371978759766, "step": 12435 }, { "epoch": 1.93, "learning_rate": 5.02626795995643e-06, "logits/chosen": -2.796034574508667, "logits/rejected": -2.069446325302124, "logps/chosen": -292.80841064453125, "logps/rejected": -452.38616943359375, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -4.425168037414551, "rewards/margins": 8.544783592224121, "rewards/rejected": -12.969951629638672, "step": 12436 }, { "epoch": 1.93, "learning_rate": 5.025534519425282e-06, "logits/chosen": -2.6175148487091064, "logits/rejected": -2.619596004486084, "logps/chosen": -515.30322265625, "logps/rejected": -534.092041015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.912249803543091, "rewards/margins": 8.271627426147461, "rewards/rejected": -12.183877944946289, "step": 12437 }, { "epoch": 1.93, "learning_rate": 5.0248010788941335e-06, "logits/chosen": -2.5961267948150635, "logits/rejected": -2.9590871334075928, "logps/chosen": -657.9462280273438, "logps/rejected": -637.1439208984375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -5.918370246887207, "rewards/margins": 6.010087013244629, "rewards/rejected": -11.928457260131836, "step": 12438 }, { "epoch": 1.93, "learning_rate": 5.024067638362986e-06, "logits/chosen": -1.7338602542877197, "logits/rejected": -2.415151357650757, "logps/chosen": -108.50149536132812, "logps/rejected": -356.2811279296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.941126823425293, "rewards/margins": 13.03689956665039, "rewards/rejected": -16.978025436401367, "step": 12439 }, { "epoch": 1.93, "learning_rate": 5.023334197831838e-06, "logits/chosen": -2.1980817317962646, "logits/rejected": -3.0345191955566406, "logps/chosen": -38.60130310058594, "logps/rejected": -311.705078125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.2941598892211914, "rewards/margins": 9.76891803741455, "rewards/rejected": -12.063077926635742, "step": 12440 }, { "epoch": 1.93, "learning_rate": 5.02260075730069e-06, "logits/chosen": -2.744375467300415, "logits/rejected": -2.550243854522705, "logps/chosen": -306.8800048828125, "logps/rejected": -425.34173583984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.6587419509887695, "rewards/margins": 7.226625442504883, "rewards/rejected": -14.885367393493652, "step": 12441 }, { "epoch": 1.93, "learning_rate": 5.021867316769542e-06, "logits/chosen": -1.8770983219146729, "logits/rejected": -2.7921547889709473, "logps/chosen": -189.47007751464844, "logps/rejected": -473.3058166503906, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.6714701652526855, "rewards/margins": 6.355014801025391, "rewards/rejected": -11.026485443115234, "step": 12442 }, { "epoch": 1.94, "learning_rate": 5.021133876238394e-06, "logits/chosen": -2.5098443031311035, "logits/rejected": -2.8847081661224365, "logps/chosen": -172.38388061523438, "logps/rejected": -267.394775390625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -5.337658882141113, "rewards/margins": 5.967536449432373, "rewards/rejected": -11.305194854736328, "step": 12443 }, { "epoch": 1.94, "learning_rate": 5.0204004357072465e-06, "logits/chosen": -2.5383734703063965, "logits/rejected": -2.9077467918395996, "logps/chosen": -183.2095489501953, "logps/rejected": -409.836669921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.871223449707031, "rewards/margins": 9.104087829589844, "rewards/rejected": -13.975311279296875, "step": 12444 }, { "epoch": 1.94, "learning_rate": 5.019666995176098e-06, "logits/chosen": -1.7737728357315063, "logits/rejected": -2.726163148880005, "logps/chosen": -128.90744018554688, "logps/rejected": -472.1945495605469, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -4.0707926750183105, "rewards/margins": 8.717401504516602, "rewards/rejected": -12.78819465637207, "step": 12445 }, { "epoch": 1.94, "learning_rate": 5.01893355464495e-06, "logits/chosen": -2.410903215408325, "logits/rejected": -3.2146174907684326, "logps/chosen": -545.2742919921875, "logps/rejected": -605.7587890625, "loss": 0.0704, "rewards/accuracies": 1.0, "rewards/chosen": -6.635321617126465, "rewards/margins": 4.848119735717773, "rewards/rejected": -11.483441352844238, "step": 12446 }, { "epoch": 1.94, "learning_rate": 5.018200114113802e-06, "logits/chosen": -2.3230581283569336, "logits/rejected": -3.2175543308258057, "logps/chosen": -77.89826965332031, "logps/rejected": -530.4502563476562, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.624913692474365, "rewards/margins": 9.204599380493164, "rewards/rejected": -14.829513549804688, "step": 12447 }, { "epoch": 1.94, "learning_rate": 5.017466673582655e-06, "logits/chosen": -2.1744210720062256, "logits/rejected": -2.0541229248046875, "logps/chosen": -357.38995361328125, "logps/rejected": -441.4721984863281, "loss": 0.3102, "rewards/accuracies": 1.0, "rewards/chosen": -4.270422458648682, "rewards/margins": 8.0626220703125, "rewards/rejected": -12.333044052124023, "step": 12448 }, { "epoch": 1.94, "learning_rate": 5.016733233051507e-06, "logits/chosen": -1.9391121864318848, "logits/rejected": -2.659830331802368, "logps/chosen": -201.8070068359375, "logps/rejected": -264.1839294433594, "loss": 0.0329, "rewards/accuracies": 1.0, "rewards/chosen": -4.769774913787842, "rewards/margins": 5.044029235839844, "rewards/rejected": -9.813804626464844, "step": 12449 }, { "epoch": 1.94, "learning_rate": 5.015999792520359e-06, "logits/chosen": -2.4560468196868896, "logits/rejected": -3.008911609649658, "logps/chosen": -115.81721496582031, "logps/rejected": -273.1341857910156, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.6475024223327637, "rewards/margins": 6.627647399902344, "rewards/rejected": -10.275150299072266, "step": 12450 }, { "epoch": 1.94, "learning_rate": 5.015266351989211e-06, "logits/chosen": -2.376485824584961, "logits/rejected": -2.6958110332489014, "logps/chosen": -108.05585479736328, "logps/rejected": -225.83258056640625, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -4.564072608947754, "rewards/margins": 4.518145561218262, "rewards/rejected": -9.082218170166016, "step": 12451 }, { "epoch": 1.94, "learning_rate": 5.014532911458063e-06, "logits/chosen": -2.8777859210968018, "logits/rejected": -2.387866973876953, "logps/chosen": -177.83169555664062, "logps/rejected": -436.216796875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.7470741271972656, "rewards/margins": 8.435328483581543, "rewards/rejected": -12.182402610778809, "step": 12452 }, { "epoch": 1.94, "learning_rate": 5.013799470926916e-06, "logits/chosen": -2.8125576972961426, "logits/rejected": -3.180680274963379, "logps/chosen": -527.2716064453125, "logps/rejected": -411.82867431640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.6989240646362305, "rewards/margins": 7.651323318481445, "rewards/rejected": -14.35024642944336, "step": 12453 }, { "epoch": 1.94, "learning_rate": 5.013066030395768e-06, "logits/chosen": -2.178396701812744, "logits/rejected": -2.7699615955352783, "logps/chosen": -258.6802673339844, "logps/rejected": -291.6920166015625, "loss": 0.1085, "rewards/accuracies": 1.0, "rewards/chosen": -5.044681072235107, "rewards/margins": 4.203676223754883, "rewards/rejected": -9.248357772827148, "step": 12454 }, { "epoch": 1.94, "learning_rate": 5.01233258986462e-06, "logits/chosen": -2.5068631172180176, "logits/rejected": -2.3698575496673584, "logps/chosen": -212.21896362304688, "logps/rejected": -390.8636779785156, "loss": 1.3579, "rewards/accuracies": 0.5, "rewards/chosen": -6.3450398445129395, "rewards/margins": 4.6152663230896, "rewards/rejected": -10.960306167602539, "step": 12455 }, { "epoch": 1.94, "learning_rate": 5.0115991493334716e-06, "logits/chosen": -2.735023021697998, "logits/rejected": -2.9818761348724365, "logps/chosen": -221.92083740234375, "logps/rejected": -383.2541809082031, "loss": 0.1544, "rewards/accuracies": 1.0, "rewards/chosen": -5.287717342376709, "rewards/margins": 4.058995246887207, "rewards/rejected": -9.346712112426758, "step": 12456 }, { "epoch": 1.94, "learning_rate": 5.010865708802324e-06, "logits/chosen": -1.2349188327789307, "logits/rejected": -2.24428391456604, "logps/chosen": -127.88734436035156, "logps/rejected": -274.052490234375, "loss": 0.7135, "rewards/accuracies": 0.5, "rewards/chosen": -7.610532760620117, "rewards/margins": 2.746396541595459, "rewards/rejected": -10.356929779052734, "step": 12457 }, { "epoch": 1.94, "learning_rate": 5.010132268271176e-06, "logits/chosen": -2.8255422115325928, "logits/rejected": -2.9104514122009277, "logps/chosen": -54.77090835571289, "logps/rejected": -209.04942321777344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.784761905670166, "rewards/margins": 9.59365177154541, "rewards/rejected": -12.378413200378418, "step": 12458 }, { "epoch": 1.94, "learning_rate": 5.009398827740028e-06, "logits/chosen": -2.320200204849243, "logits/rejected": -2.8182835578918457, "logps/chosen": -129.0469970703125, "logps/rejected": -296.6915283203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.855230331420898, "rewards/margins": 8.711674690246582, "rewards/rejected": -13.566904067993164, "step": 12459 }, { "epoch": 1.94, "learning_rate": 5.00866538720888e-06, "logits/chosen": -2.1946980953216553, "logits/rejected": -2.955869674682617, "logps/chosen": -210.48704528808594, "logps/rejected": -372.93792724609375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.8073863983154297, "rewards/margins": 7.29600191116333, "rewards/rejected": -11.103387832641602, "step": 12460 }, { "epoch": 1.94, "learning_rate": 5.007931946677732e-06, "logits/chosen": -3.08030366897583, "logits/rejected": -3.2351884841918945, "logps/chosen": -245.36004638671875, "logps/rejected": -274.8708801269531, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -5.432138919830322, "rewards/margins": 5.458091735839844, "rewards/rejected": -10.890230178833008, "step": 12461 }, { "epoch": 1.94, "learning_rate": 5.0071985061465845e-06, "logits/chosen": -2.8084139823913574, "logits/rejected": -2.7089672088623047, "logps/chosen": -219.50680541992188, "logps/rejected": -325.25445556640625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -1.5449601411819458, "rewards/margins": 9.096941947937012, "rewards/rejected": -10.641901969909668, "step": 12462 }, { "epoch": 1.94, "learning_rate": 5.006465065615436e-06, "logits/chosen": -3.0508382320404053, "logits/rejected": -3.1677582263946533, "logps/chosen": -850.4530029296875, "logps/rejected": -778.83447265625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.7389678955078125, "rewards/margins": 7.377192497253418, "rewards/rejected": -13.116161346435547, "step": 12463 }, { "epoch": 1.94, "learning_rate": 5.005731625084288e-06, "logits/chosen": -1.9549068212509155, "logits/rejected": -2.6942732334136963, "logps/chosen": -270.1248779296875, "logps/rejected": -362.6826171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.502645969390869, "rewards/margins": 9.628106117248535, "rewards/rejected": -13.130752563476562, "step": 12464 }, { "epoch": 1.94, "learning_rate": 5.00499818455314e-06, "logits/chosen": -2.3125851154327393, "logits/rejected": -2.9041430950164795, "logps/chosen": -156.37591552734375, "logps/rejected": -333.7465515136719, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.27156925201416, "rewards/margins": 8.956140518188477, "rewards/rejected": -12.22770881652832, "step": 12465 }, { "epoch": 1.94, "learning_rate": 5.004264744021993e-06, "logits/chosen": -2.52951717376709, "logits/rejected": -2.0607850551605225, "logps/chosen": -421.6739196777344, "logps/rejected": -237.67178344726562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4204440116882324, "rewards/margins": 9.318389892578125, "rewards/rejected": -12.7388334274292, "step": 12466 }, { "epoch": 1.94, "learning_rate": 5.003531303490845e-06, "logits/chosen": -2.8625221252441406, "logits/rejected": -2.9312291145324707, "logps/chosen": -323.658203125, "logps/rejected": -514.38330078125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -3.248537540435791, "rewards/margins": 9.291465759277344, "rewards/rejected": -12.540002822875977, "step": 12467 }, { "epoch": 1.94, "learning_rate": 5.0027978629596975e-06, "logits/chosen": -3.0461220741271973, "logits/rejected": -2.130950689315796, "logps/chosen": -170.9101104736328, "logps/rejected": -173.53228759765625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -4.780333518981934, "rewards/margins": 5.7979936599731445, "rewards/rejected": -10.578327178955078, "step": 12468 }, { "epoch": 1.94, "learning_rate": 5.002064422428549e-06, "logits/chosen": -3.1246895790100098, "logits/rejected": -2.847107410430908, "logps/chosen": -287.18902587890625, "logps/rejected": -184.0950164794922, "loss": 0.1197, "rewards/accuracies": 1.0, "rewards/chosen": -6.6744866371154785, "rewards/margins": 5.2860107421875, "rewards/rejected": -11.960497856140137, "step": 12469 }, { "epoch": 1.94, "learning_rate": 5.001330981897401e-06, "logits/chosen": -1.684583306312561, "logits/rejected": -2.865980863571167, "logps/chosen": -215.43763732910156, "logps/rejected": -402.3837890625, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -4.862364768981934, "rewards/margins": 6.4617719650268555, "rewards/rejected": -11.324136734008789, "step": 12470 }, { "epoch": 1.94, "learning_rate": 5.000597541366254e-06, "logits/chosen": -2.3220391273498535, "logits/rejected": -2.961766481399536, "logps/chosen": -153.73715209960938, "logps/rejected": -499.7277526855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5575687885284424, "rewards/margins": 14.418127059936523, "rewards/rejected": -17.97569465637207, "step": 12471 }, { "epoch": 1.94, "learning_rate": 4.999864100835106e-06, "logits/chosen": -2.6166648864746094, "logits/rejected": -3.1158578395843506, "logps/chosen": -47.857521057128906, "logps/rejected": -236.11624145507812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.01662540435791, "rewards/margins": 8.31507396697998, "rewards/rejected": -11.33169937133789, "step": 12472 }, { "epoch": 1.94, "learning_rate": 4.999130660303958e-06, "logits/chosen": -2.5574779510498047, "logits/rejected": -2.6476762294769287, "logps/chosen": -223.28298950195312, "logps/rejected": -386.20001220703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.892518043518066, "rewards/margins": 8.136940956115723, "rewards/rejected": -14.029458999633789, "step": 12473 }, { "epoch": 1.94, "learning_rate": 4.99839721977281e-06, "logits/chosen": -2.7265024185180664, "logits/rejected": -2.4495222568511963, "logps/chosen": -338.8077392578125, "logps/rejected": -349.96728515625, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -5.104552268981934, "rewards/margins": 5.892617225646973, "rewards/rejected": -10.997169494628906, "step": 12474 }, { "epoch": 1.94, "learning_rate": 4.997663779241662e-06, "logits/chosen": -2.8103179931640625, "logits/rejected": -2.493682622909546, "logps/chosen": -415.7542724609375, "logps/rejected": -612.318359375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.026970863342285, "rewards/margins": 8.072736740112305, "rewards/rejected": -14.09970760345459, "step": 12475 }, { "epoch": 1.94, "learning_rate": 4.996930338710514e-06, "logits/chosen": -2.6433019638061523, "logits/rejected": -2.6495351791381836, "logps/chosen": -362.7271423339844, "logps/rejected": -359.48748779296875, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -5.914957046508789, "rewards/margins": 4.960086822509766, "rewards/rejected": -10.875043869018555, "step": 12476 }, { "epoch": 1.94, "learning_rate": 4.996196898179366e-06, "logits/chosen": -2.9334769248962402, "logits/rejected": -2.4509787559509277, "logps/chosen": -472.4703369140625, "logps/rejected": -352.8804626464844, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.5998895168304443, "rewards/margins": 6.135099411010742, "rewards/rejected": -8.734989166259766, "step": 12477 }, { "epoch": 1.94, "learning_rate": 4.995463457648218e-06, "logits/chosen": -2.6166653633117676, "logits/rejected": -2.13714599609375, "logps/chosen": -268.4508361816406, "logps/rejected": -359.3634338378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7848665714263916, "rewards/margins": 12.454532623291016, "rewards/rejected": -15.239398956298828, "step": 12478 }, { "epoch": 1.94, "learning_rate": 4.99473001711707e-06, "logits/chosen": -3.001709461212158, "logits/rejected": -1.038130760192871, "logps/chosen": -394.4981994628906, "logps/rejected": -95.41950225830078, "loss": 3.3068, "rewards/accuracies": 0.5, "rewards/chosen": -9.274526596069336, "rewards/margins": -1.7927188873291016, "rewards/rejected": -7.481807708740234, "step": 12479 }, { "epoch": 1.94, "learning_rate": 4.993996576585923e-06, "logits/chosen": -3.05275821685791, "logits/rejected": -3.0065128803253174, "logps/chosen": -151.9809112548828, "logps/rejected": -131.28363037109375, "loss": 3.7007, "rewards/accuracies": 0.5, "rewards/chosen": -9.206149101257324, "rewards/margins": -0.5424835681915283, "rewards/rejected": -8.663665771484375, "step": 12480 }, { "epoch": 1.94, "learning_rate": 4.9932631360547745e-06, "logits/chosen": -2.2540395259857178, "logits/rejected": -2.898545980453491, "logps/chosen": -395.16607666015625, "logps/rejected": -414.74273681640625, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -6.609569549560547, "rewards/margins": 5.49960994720459, "rewards/rejected": -12.109179496765137, "step": 12481 }, { "epoch": 1.94, "learning_rate": 4.992529695523626e-06, "logits/chosen": -3.075263023376465, "logits/rejected": -2.8915634155273438, "logps/chosen": -522.019775390625, "logps/rejected": -342.104248046875, "loss": 2.1633, "rewards/accuracies": 0.5, "rewards/chosen": -8.196961402893066, "rewards/margins": 0.3017411231994629, "rewards/rejected": -8.498703002929688, "step": 12482 }, { "epoch": 1.94, "learning_rate": 4.991796254992478e-06, "logits/chosen": -2.9836764335632324, "logits/rejected": -2.4464035034179688, "logps/chosen": -400.3917236328125, "logps/rejected": -418.4628601074219, "loss": 2.0271, "rewards/accuracies": 0.5, "rewards/chosen": -7.856226921081543, "rewards/margins": 2.1330506801605225, "rewards/rejected": -9.989277839660645, "step": 12483 }, { "epoch": 1.94, "learning_rate": 4.991062814461331e-06, "logits/chosen": -2.483013153076172, "logits/rejected": -2.6540932655334473, "logps/chosen": -180.68609619140625, "logps/rejected": -341.0501708984375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -2.244046926498413, "rewards/margins": 6.960021018981934, "rewards/rejected": -9.20406723022461, "step": 12484 }, { "epoch": 1.94, "learning_rate": 4.990329373930184e-06, "logits/chosen": -2.45931339263916, "logits/rejected": -3.116926908493042, "logps/chosen": -168.7342529296875, "logps/rejected": -362.84735107421875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.7200074195861816, "rewards/margins": 6.57218074798584, "rewards/rejected": -10.29218864440918, "step": 12485 }, { "epoch": 1.94, "learning_rate": 4.9895959333990355e-06, "logits/chosen": -2.942326068878174, "logits/rejected": -3.1155147552490234, "logps/chosen": -129.88589477539062, "logps/rejected": -232.9288330078125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.3989992141723633, "rewards/margins": 6.794795036315918, "rewards/rejected": -10.193794250488281, "step": 12486 }, { "epoch": 1.94, "learning_rate": 4.988862492867887e-06, "logits/chosen": -3.098395347595215, "logits/rejected": -2.8161537647247314, "logps/chosen": -594.7030639648438, "logps/rejected": -452.4024963378906, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.103553771972656, "rewards/margins": 7.9146809577941895, "rewards/rejected": -14.018234252929688, "step": 12487 }, { "epoch": 1.94, "learning_rate": 4.98812905233674e-06, "logits/chosen": -0.7518442273139954, "logits/rejected": -2.8891215324401855, "logps/chosen": -92.58111572265625, "logps/rejected": -590.4050903320312, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.882066011428833, "rewards/margins": 7.239750862121582, "rewards/rejected": -11.121816635131836, "step": 12488 }, { "epoch": 1.94, "learning_rate": 4.987395611805592e-06, "logits/chosen": -2.2175869941711426, "logits/rejected": -2.8894097805023193, "logps/chosen": -222.86404418945312, "logps/rejected": -538.7390747070312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.256474494934082, "rewards/margins": 8.024957656860352, "rewards/rejected": -13.281432151794434, "step": 12489 }, { "epoch": 1.94, "learning_rate": 4.986662171274444e-06, "logits/chosen": -1.4765838384628296, "logits/rejected": -3.010340452194214, "logps/chosen": -104.00003051757812, "logps/rejected": -453.5357971191406, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.445723056793213, "rewards/margins": 7.240383148193359, "rewards/rejected": -11.686105728149414, "step": 12490 }, { "epoch": 1.94, "learning_rate": 4.985928730743296e-06, "logits/chosen": -2.713322162628174, "logits/rejected": -3.072169065475464, "logps/chosen": -72.6334228515625, "logps/rejected": -182.1263427734375, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -2.84291934967041, "rewards/margins": 5.936403751373291, "rewards/rejected": -8.77932357788086, "step": 12491 }, { "epoch": 1.94, "learning_rate": 4.985195290212148e-06, "logits/chosen": -0.9841556549072266, "logits/rejected": -2.3606081008911133, "logps/chosen": -199.2605438232422, "logps/rejected": -532.0095825195312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.9530510902404785, "rewards/margins": 10.44958209991455, "rewards/rejected": -15.402633666992188, "step": 12492 }, { "epoch": 1.94, "learning_rate": 4.984461849681e-06, "logits/chosen": -2.125185251235962, "logits/rejected": -2.595459222793579, "logps/chosen": -127.72662353515625, "logps/rejected": -319.3623352050781, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -7.075163841247559, "rewards/margins": 7.763699054718018, "rewards/rejected": -14.838863372802734, "step": 12493 }, { "epoch": 1.94, "learning_rate": 4.983728409149852e-06, "logits/chosen": -2.5781960487365723, "logits/rejected": -3.0107204914093018, "logps/chosen": -157.8721466064453, "logps/rejected": -303.36676025390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.4838361740112305, "rewards/margins": 7.745275497436523, "rewards/rejected": -10.229111671447754, "step": 12494 }, { "epoch": 1.94, "learning_rate": 4.982994968618704e-06, "logits/chosen": -2.451525926589966, "logits/rejected": -3.0016560554504395, "logps/chosen": -118.00877380371094, "logps/rejected": -268.977294921875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.577256202697754, "rewards/margins": 7.56633186340332, "rewards/rejected": -12.14358901977539, "step": 12495 }, { "epoch": 1.94, "learning_rate": 4.982261528087556e-06, "logits/chosen": -2.0958776473999023, "logits/rejected": -3.07186222076416, "logps/chosen": -77.80706787109375, "logps/rejected": -297.0481262207031, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.451417446136475, "rewards/margins": 7.939000129699707, "rewards/rejected": -12.390417098999023, "step": 12496 }, { "epoch": 1.94, "learning_rate": 4.981528087556409e-06, "logits/chosen": -2.097628593444824, "logits/rejected": -2.887190580368042, "logps/chosen": -77.52896118164062, "logps/rejected": -342.6073303222656, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.2837021350860596, "rewards/margins": 8.497095108032227, "rewards/rejected": -11.780797958374023, "step": 12497 }, { "epoch": 1.94, "learning_rate": 4.980794647025261e-06, "logits/chosen": -2.060741662979126, "logits/rejected": -2.5727758407592773, "logps/chosen": -124.90303039550781, "logps/rejected": -388.1373291015625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.770508289337158, "rewards/margins": 10.756378173828125, "rewards/rejected": -14.526885986328125, "step": 12498 }, { "epoch": 1.94, "learning_rate": 4.9800612064941125e-06, "logits/chosen": -2.593228578567505, "logits/rejected": -3.328813076019287, "logps/chosen": -336.6579284667969, "logps/rejected": -479.14666748046875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -4.2225565910339355, "rewards/margins": 6.315011024475098, "rewards/rejected": -10.537567138671875, "step": 12499 }, { "epoch": 1.94, "learning_rate": 4.979327765962964e-06, "logits/chosen": -3.084869861602783, "logits/rejected": -3.252720355987549, "logps/chosen": -212.30654907226562, "logps/rejected": -285.94921875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.6288392543792725, "rewards/margins": 7.589200019836426, "rewards/rejected": -11.218039512634277, "step": 12500 }, { "epoch": 1.94, "learning_rate": 4.978594325431817e-06, "logits/chosen": -2.8808021545410156, "logits/rejected": -3.2469778060913086, "logps/chosen": -145.3983154296875, "logps/rejected": -241.51449584960938, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -4.241971015930176, "rewards/margins": 6.1870622634887695, "rewards/rejected": -10.429033279418945, "step": 12501 }, { "epoch": 1.94, "learning_rate": 4.97786088490067e-06, "logits/chosen": -2.0777463912963867, "logits/rejected": -3.030623197555542, "logps/chosen": -216.1361846923828, "logps/rejected": -333.21649169921875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -4.480366230010986, "rewards/margins": 7.707385063171387, "rewards/rejected": -12.187751770019531, "step": 12502 }, { "epoch": 1.94, "learning_rate": 4.977127444369522e-06, "logits/chosen": -1.9774729013442993, "logits/rejected": -2.556095838546753, "logps/chosen": -388.44110107421875, "logps/rejected": -480.92706298828125, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -7.175681114196777, "rewards/margins": 4.989559173583984, "rewards/rejected": -12.165241241455078, "step": 12503 }, { "epoch": 1.94, "learning_rate": 4.976394003838374e-06, "logits/chosen": -1.757350206375122, "logits/rejected": -2.8928792476654053, "logps/chosen": -117.2410659790039, "logps/rejected": -275.8227844238281, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -5.359736442565918, "rewards/margins": 4.536108493804932, "rewards/rejected": -9.895845413208008, "step": 12504 }, { "epoch": 1.94, "learning_rate": 4.9756605633072255e-06, "logits/chosen": -2.973074197769165, "logits/rejected": -2.5526280403137207, "logps/chosen": -723.0107421875, "logps/rejected": -462.6228332519531, "loss": 1.3898, "rewards/accuracies": 0.5, "rewards/chosen": -6.505720138549805, "rewards/margins": 3.0514655113220215, "rewards/rejected": -9.557185173034668, "step": 12505 }, { "epoch": 1.94, "learning_rate": 4.974927122776078e-06, "logits/chosen": -2.0514607429504395, "logits/rejected": -2.977074146270752, "logps/chosen": -254.78858947753906, "logps/rejected": -517.7799682617188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.7724335193634033, "rewards/margins": 9.25667953491211, "rewards/rejected": -12.029112815856934, "step": 12506 }, { "epoch": 1.95, "learning_rate": 4.97419368224493e-06, "logits/chosen": -2.5663039684295654, "logits/rejected": -2.60896897315979, "logps/chosen": -275.53570556640625, "logps/rejected": -367.88763427734375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -4.5185089111328125, "rewards/margins": 7.569194316864014, "rewards/rejected": -12.087703704833984, "step": 12507 }, { "epoch": 1.95, "learning_rate": 4.973460241713782e-06, "logits/chosen": -2.512193441390991, "logits/rejected": -2.810992479324341, "logps/chosen": -121.2911605834961, "logps/rejected": -265.8385009765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.670396566390991, "rewards/margins": 9.05813217163086, "rewards/rejected": -12.72852897644043, "step": 12508 }, { "epoch": 1.95, "learning_rate": 4.972726801182634e-06, "logits/chosen": -2.4228017330169678, "logits/rejected": -2.7463185787200928, "logps/chosen": -380.5875244140625, "logps/rejected": -394.7527160644531, "loss": 0.3831, "rewards/accuracies": 0.5, "rewards/chosen": -3.8751072883605957, "rewards/margins": 6.697963714599609, "rewards/rejected": -10.573070526123047, "step": 12509 }, { "epoch": 1.95, "learning_rate": 4.971993360651486e-06, "logits/chosen": -2.826296329498291, "logits/rejected": -2.1266136169433594, "logps/chosen": -373.09185791015625, "logps/rejected": -426.9166259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5148611068725586, "rewards/margins": 10.918925285339355, "rewards/rejected": -14.433786392211914, "step": 12510 }, { "epoch": 1.95, "learning_rate": 4.9712599201203384e-06, "logits/chosen": -2.5420305728912354, "logits/rejected": -3.123537302017212, "logps/chosen": -94.15913391113281, "logps/rejected": -489.7632141113281, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.457065582275391, "rewards/margins": 9.656023979187012, "rewards/rejected": -14.113090515136719, "step": 12511 }, { "epoch": 1.95, "learning_rate": 4.97052647958919e-06, "logits/chosen": -1.8784698247909546, "logits/rejected": -2.778921127319336, "logps/chosen": -170.70217895507812, "logps/rejected": -344.6199951171875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.204382419586182, "rewards/margins": 6.731766700744629, "rewards/rejected": -11.936149597167969, "step": 12512 }, { "epoch": 1.95, "learning_rate": 4.969793039058042e-06, "logits/chosen": -2.3153038024902344, "logits/rejected": -2.8087213039398193, "logps/chosen": -187.8753662109375, "logps/rejected": -312.71453857421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.661317825317383, "rewards/margins": 9.471729278564453, "rewards/rejected": -14.133047103881836, "step": 12513 }, { "epoch": 1.95, "learning_rate": 4.969059598526894e-06, "logits/chosen": -2.488976240158081, "logits/rejected": -2.6634609699249268, "logps/chosen": -456.0770568847656, "logps/rejected": -376.70623779296875, "loss": 1.1603, "rewards/accuracies": 0.5, "rewards/chosen": -6.788226127624512, "rewards/margins": 1.0059304237365723, "rewards/rejected": -7.794157028198242, "step": 12514 }, { "epoch": 1.95, "learning_rate": 4.968326157995747e-06, "logits/chosen": -2.1166770458221436, "logits/rejected": -3.150973320007324, "logps/chosen": -345.8609313964844, "logps/rejected": -437.02532958984375, "loss": 0.0823, "rewards/accuracies": 1.0, "rewards/chosen": -6.928544044494629, "rewards/margins": 5.15557336807251, "rewards/rejected": -12.084117889404297, "step": 12515 }, { "epoch": 1.95, "learning_rate": 4.967592717464599e-06, "logits/chosen": -2.943852663040161, "logits/rejected": -3.190916061401367, "logps/chosen": -201.10891723632812, "logps/rejected": -268.07135009765625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.305872917175293, "rewards/margins": 6.5235514640808105, "rewards/rejected": -12.829423904418945, "step": 12516 }, { "epoch": 1.95, "learning_rate": 4.9668592769334505e-06, "logits/chosen": -2.525949478149414, "logits/rejected": -3.0121450424194336, "logps/chosen": -159.85963439941406, "logps/rejected": -254.49908447265625, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -6.468111038208008, "rewards/margins": 3.194932699203491, "rewards/rejected": -9.663043975830078, "step": 12517 }, { "epoch": 1.95, "learning_rate": 4.966125836402303e-06, "logits/chosen": -3.0145103931427, "logits/rejected": -2.4603171348571777, "logps/chosen": -189.07228088378906, "logps/rejected": -254.4296417236328, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -3.1284337043762207, "rewards/margins": 5.665599346160889, "rewards/rejected": -8.79403305053711, "step": 12518 }, { "epoch": 1.95, "learning_rate": 4.965392395871155e-06, "logits/chosen": -2.701385974884033, "logits/rejected": -2.620476245880127, "logps/chosen": -324.61346435546875, "logps/rejected": -377.115966796875, "loss": 0.1368, "rewards/accuracies": 1.0, "rewards/chosen": -10.629925727844238, "rewards/margins": 1.990004539489746, "rewards/rejected": -12.619930267333984, "step": 12519 }, { "epoch": 1.95, "learning_rate": 4.964658955340008e-06, "logits/chosen": -3.043919563293457, "logits/rejected": -3.1763710975646973, "logps/chosen": -174.505126953125, "logps/rejected": -235.86776733398438, "loss": 0.49, "rewards/accuracies": 0.5, "rewards/chosen": -7.314428329467773, "rewards/margins": 3.64850115776062, "rewards/rejected": -10.962929725646973, "step": 12520 }, { "epoch": 1.95, "learning_rate": 4.96392551480886e-06, "logits/chosen": -2.098844289779663, "logits/rejected": -2.9985766410827637, "logps/chosen": -216.5003204345703, "logps/rejected": -415.007080078125, "loss": 0.6582, "rewards/accuracies": 0.5, "rewards/chosen": -7.685250282287598, "rewards/margins": 1.466773271560669, "rewards/rejected": -9.152023315429688, "step": 12521 }, { "epoch": 1.95, "learning_rate": 4.963192074277712e-06, "logits/chosen": -2.8531863689422607, "logits/rejected": -2.819577693939209, "logps/chosen": -115.00625610351562, "logps/rejected": -221.05447387695312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.172393798828125, "rewards/margins": 7.676860332489014, "rewards/rejected": -14.849254608154297, "step": 12522 }, { "epoch": 1.95, "learning_rate": 4.9624586337465635e-06, "logits/chosen": -1.5612739324569702, "logits/rejected": -2.671790599822998, "logps/chosen": -144.60511779785156, "logps/rejected": -445.94775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.494747161865234, "rewards/margins": 12.196529388427734, "rewards/rejected": -16.69127655029297, "step": 12523 }, { "epoch": 1.95, "learning_rate": 4.961725193215416e-06, "logits/chosen": -2.724727153778076, "logits/rejected": -1.874455213546753, "logps/chosen": -610.950439453125, "logps/rejected": -483.5585632324219, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.775737762451172, "rewards/margins": 8.406132698059082, "rewards/rejected": -14.181869506835938, "step": 12524 }, { "epoch": 1.95, "learning_rate": 4.960991752684268e-06, "logits/chosen": -3.180990695953369, "logits/rejected": -3.2979750633239746, "logps/chosen": -73.12077331542969, "logps/rejected": -160.75927734375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -3.967837333679199, "rewards/margins": 5.793167591094971, "rewards/rejected": -9.761005401611328, "step": 12525 }, { "epoch": 1.95, "learning_rate": 4.96025831215312e-06, "logits/chosen": -2.713568925857544, "logits/rejected": -3.229175329208374, "logps/chosen": -292.9474182128906, "logps/rejected": -482.59326171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.256348609924316, "rewards/margins": 7.228874683380127, "rewards/rejected": -11.485223770141602, "step": 12526 }, { "epoch": 1.95, "learning_rate": 4.959524871621972e-06, "logits/chosen": -2.7721447944641113, "logits/rejected": -2.2127091884613037, "logps/chosen": -208.229736328125, "logps/rejected": -227.69583129882812, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.7444772720336914, "rewards/margins": 6.903478622436523, "rewards/rejected": -10.647954940795898, "step": 12527 }, { "epoch": 1.95, "learning_rate": 4.958791431090825e-06, "logits/chosen": -2.2376911640167236, "logits/rejected": -2.570927143096924, "logps/chosen": -394.77301025390625, "logps/rejected": -517.0367431640625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.554625034332275, "rewards/margins": 8.292139053344727, "rewards/rejected": -14.846763610839844, "step": 12528 }, { "epoch": 1.95, "learning_rate": 4.9580579905596765e-06, "logits/chosen": -2.6017467975616455, "logits/rejected": -3.0309689044952393, "logps/chosen": -148.96209716796875, "logps/rejected": -276.5503845214844, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -1.8637168407440186, "rewards/margins": 7.763948917388916, "rewards/rejected": -9.627666473388672, "step": 12529 }, { "epoch": 1.95, "learning_rate": 4.957324550028528e-06, "logits/chosen": -2.2534379959106445, "logits/rejected": -2.8782947063446045, "logps/chosen": -83.4319076538086, "logps/rejected": -220.44088745117188, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.7069132328033447, "rewards/margins": 8.082456588745117, "rewards/rejected": -11.789369583129883, "step": 12530 }, { "epoch": 1.95, "learning_rate": 4.95659110949738e-06, "logits/chosen": -2.5837786197662354, "logits/rejected": -2.9906349182128906, "logps/chosen": -55.276344299316406, "logps/rejected": -243.6826934814453, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.900639533996582, "rewards/margins": 8.628210067749023, "rewards/rejected": -12.528850555419922, "step": 12531 }, { "epoch": 1.95, "learning_rate": 4.955857668966232e-06, "logits/chosen": -3.032357931137085, "logits/rejected": -2.0953612327575684, "logps/chosen": -342.11309814453125, "logps/rejected": -271.69183349609375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -1.9400863647460938, "rewards/margins": 7.5004987716674805, "rewards/rejected": -9.440585136413574, "step": 12532 }, { "epoch": 1.95, "learning_rate": 4.955124228435085e-06, "logits/chosen": -2.2481768131256104, "logits/rejected": -2.5857176780700684, "logps/chosen": -268.97576904296875, "logps/rejected": -428.8634338378906, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.329829216003418, "rewards/margins": 10.50045394897461, "rewards/rejected": -14.830284118652344, "step": 12533 }, { "epoch": 1.95, "learning_rate": 4.954390787903937e-06, "logits/chosen": -1.5670826435089111, "logits/rejected": -2.7417750358581543, "logps/chosen": -231.423095703125, "logps/rejected": -427.822021484375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.080404758453369, "rewards/margins": 6.785238742828369, "rewards/rejected": -12.865643501281738, "step": 12534 }, { "epoch": 1.95, "learning_rate": 4.9536573473727894e-06, "logits/chosen": -2.8741955757141113, "logits/rejected": -3.0703811645507812, "logps/chosen": -110.10285949707031, "logps/rejected": -259.2969665527344, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.8253021240234375, "rewards/margins": 7.3998260498046875, "rewards/rejected": -12.225128173828125, "step": 12535 }, { "epoch": 1.95, "learning_rate": 4.952923906841641e-06, "logits/chosen": -2.7983410358428955, "logits/rejected": -2.972707986831665, "logps/chosen": -120.52497863769531, "logps/rejected": -379.42730712890625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -5.461098670959473, "rewards/margins": 8.281450271606445, "rewards/rejected": -13.742548942565918, "step": 12536 }, { "epoch": 1.95, "learning_rate": 4.952190466310494e-06, "logits/chosen": -1.6430280208587646, "logits/rejected": -2.6318180561065674, "logps/chosen": -305.97283935546875, "logps/rejected": -308.854736328125, "loss": 0.8502, "rewards/accuracies": 0.5, "rewards/chosen": -5.537850379943848, "rewards/margins": 2.7808055877685547, "rewards/rejected": -8.318655967712402, "step": 12537 }, { "epoch": 1.95, "learning_rate": 4.951457025779346e-06, "logits/chosen": -1.3653019666671753, "logits/rejected": -2.835986375808716, "logps/chosen": -133.58364868164062, "logps/rejected": -463.5602111816406, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.7174224853515625, "rewards/margins": 7.856200695037842, "rewards/rejected": -14.573622703552246, "step": 12538 }, { "epoch": 1.95, "learning_rate": 4.950723585248198e-06, "logits/chosen": -2.575014591217041, "logits/rejected": -1.4833884239196777, "logps/chosen": -223.64047241210938, "logps/rejected": -199.95217895507812, "loss": 0.0757, "rewards/accuracies": 1.0, "rewards/chosen": -4.420685291290283, "rewards/margins": 4.777463912963867, "rewards/rejected": -9.198148727416992, "step": 12539 }, { "epoch": 1.95, "learning_rate": 4.94999014471705e-06, "logits/chosen": -2.811596155166626, "logits/rejected": -2.9233639240264893, "logps/chosen": -476.07025146484375, "logps/rejected": -459.4979248046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.316819667816162, "rewards/margins": 9.940237045288086, "rewards/rejected": -13.257057189941406, "step": 12540 }, { "epoch": 1.95, "learning_rate": 4.9492567041859016e-06, "logits/chosen": -1.8697772026062012, "logits/rejected": -3.0580263137817383, "logps/chosen": -144.9766387939453, "logps/rejected": -587.9218139648438, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -7.14862060546875, "rewards/margins": 7.797671318054199, "rewards/rejected": -14.94629192352295, "step": 12541 }, { "epoch": 1.95, "learning_rate": 4.948523263654754e-06, "logits/chosen": -1.712537407875061, "logits/rejected": -2.601853609085083, "logps/chosen": -131.74705505371094, "logps/rejected": -566.78271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.475317478179932, "rewards/margins": 12.334420204162598, "rewards/rejected": -17.809738159179688, "step": 12542 }, { "epoch": 1.95, "learning_rate": 4.947789823123606e-06, "logits/chosen": -2.0880579948425293, "logits/rejected": -2.750553846359253, "logps/chosen": -300.033203125, "logps/rejected": -426.9062805175781, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.603919982910156, "rewards/margins": 8.116846084594727, "rewards/rejected": -12.720766067504883, "step": 12543 }, { "epoch": 1.95, "learning_rate": 4.947056382592458e-06, "logits/chosen": -2.5582244396209717, "logits/rejected": -3.1992671489715576, "logps/chosen": -121.4561767578125, "logps/rejected": -353.0399169921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.1825480461120605, "rewards/margins": 8.714751243591309, "rewards/rejected": -13.897298812866211, "step": 12544 }, { "epoch": 1.95, "learning_rate": 4.94632294206131e-06, "logits/chosen": -2.7293379306793213, "logits/rejected": -2.8751494884490967, "logps/chosen": -410.3612976074219, "logps/rejected": -358.0281982421875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -5.619925022125244, "rewards/margins": 5.852665901184082, "rewards/rejected": -11.472591400146484, "step": 12545 }, { "epoch": 1.95, "learning_rate": 4.945589501530163e-06, "logits/chosen": -3.065786123275757, "logits/rejected": -3.1314122676849365, "logps/chosen": -188.19688415527344, "logps/rejected": -254.40570068359375, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -4.236832141876221, "rewards/margins": 7.930825233459473, "rewards/rejected": -12.167657852172852, "step": 12546 }, { "epoch": 1.95, "learning_rate": 4.9448560609990145e-06, "logits/chosen": -2.872791051864624, "logits/rejected": -3.1230857372283936, "logps/chosen": -479.2181396484375, "logps/rejected": -524.799560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.266393184661865, "rewards/margins": 8.952140808105469, "rewards/rejected": -14.218533515930176, "step": 12547 }, { "epoch": 1.95, "learning_rate": 4.944122620467866e-06, "logits/chosen": -3.013671398162842, "logits/rejected": -2.292823314666748, "logps/chosen": -393.6044921875, "logps/rejected": -268.55548095703125, "loss": 3.0599, "rewards/accuracies": 0.5, "rewards/chosen": -8.434006690979004, "rewards/margins": -0.028941869735717773, "rewards/rejected": -8.405064582824707, "step": 12548 }, { "epoch": 1.95, "learning_rate": 4.943389179936718e-06, "logits/chosen": -2.207847833633423, "logits/rejected": -3.3014211654663086, "logps/chosen": -68.70204162597656, "logps/rejected": -302.57598876953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8881378173828125, "rewards/margins": 10.285660743713379, "rewards/rejected": -14.173798561096191, "step": 12549 }, { "epoch": 1.95, "learning_rate": 4.94265573940557e-06, "logits/chosen": -2.6664481163024902, "logits/rejected": -2.976482629776001, "logps/chosen": -82.54946899414062, "logps/rejected": -234.5582275390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.313478469848633, "rewards/margins": 8.546659469604492, "rewards/rejected": -13.860137939453125, "step": 12550 }, { "epoch": 1.95, "learning_rate": 4.941922298874423e-06, "logits/chosen": -0.7892223000526428, "logits/rejected": -2.8739612102508545, "logps/chosen": -269.5230712890625, "logps/rejected": -824.4434814453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.735686779022217, "rewards/margins": 8.647764205932617, "rewards/rejected": -16.383451461791992, "step": 12551 }, { "epoch": 1.95, "learning_rate": 4.941188858343276e-06, "logits/chosen": -3.0727882385253906, "logits/rejected": -2.666377305984497, "logps/chosen": -472.44964599609375, "logps/rejected": -466.4701843261719, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.619717597961426, "rewards/margins": 8.074585914611816, "rewards/rejected": -11.694303512573242, "step": 12552 }, { "epoch": 1.95, "learning_rate": 4.9404554178121275e-06, "logits/chosen": -2.7173750400543213, "logits/rejected": -2.690382242202759, "logps/chosen": -202.516357421875, "logps/rejected": -290.0284423828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.119138240814209, "rewards/margins": 7.685051918029785, "rewards/rejected": -11.804189682006836, "step": 12553 }, { "epoch": 1.95, "learning_rate": 4.939721977280979e-06, "logits/chosen": -2.25298810005188, "logits/rejected": -3.003718614578247, "logps/chosen": -188.31097412109375, "logps/rejected": -282.6686706542969, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/chosen": -5.000924110412598, "rewards/margins": 5.692018985748291, "rewards/rejected": -10.692943572998047, "step": 12554 }, { "epoch": 1.95, "learning_rate": 4.938988536749832e-06, "logits/chosen": -1.9802472591400146, "logits/rejected": -3.0032315254211426, "logps/chosen": -369.9239501953125, "logps/rejected": -531.19970703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.914653778076172, "rewards/margins": 7.74722957611084, "rewards/rejected": -11.661883354187012, "step": 12555 }, { "epoch": 1.95, "learning_rate": 4.938255096218684e-06, "logits/chosen": -3.1393678188323975, "logits/rejected": -3.0915839672088623, "logps/chosen": -346.846435546875, "logps/rejected": -357.64447021484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.09871244430542, "rewards/margins": 8.748530387878418, "rewards/rejected": -12.84724235534668, "step": 12556 }, { "epoch": 1.95, "learning_rate": 4.937521655687536e-06, "logits/chosen": -3.032817840576172, "logits/rejected": -3.150306224822998, "logps/chosen": -388.79083251953125, "logps/rejected": -392.9743347167969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.8220138549804688, "rewards/margins": 9.045251846313477, "rewards/rejected": -12.867265701293945, "step": 12557 }, { "epoch": 1.95, "learning_rate": 4.936788215156388e-06, "logits/chosen": -2.849910020828247, "logits/rejected": -1.9294662475585938, "logps/chosen": -275.6239013671875, "logps/rejected": -260.7889404296875, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -5.954008102416992, "rewards/margins": 5.2018232345581055, "rewards/rejected": -11.155831336975098, "step": 12558 }, { "epoch": 1.95, "learning_rate": 4.93605477462524e-06, "logits/chosen": -1.9035321474075317, "logits/rejected": -2.71138596534729, "logps/chosen": -157.73544311523438, "logps/rejected": -375.583740234375, "loss": 0.5958, "rewards/accuracies": 0.5, "rewards/chosen": -6.741175174713135, "rewards/margins": 5.540311813354492, "rewards/rejected": -12.281486511230469, "step": 12559 }, { "epoch": 1.95, "learning_rate": 4.935321334094092e-06, "logits/chosen": -2.933196783065796, "logits/rejected": -2.9878900051116943, "logps/chosen": -84.36676025390625, "logps/rejected": -205.19583129882812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.835234642028809, "rewards/margins": 9.01987075805664, "rewards/rejected": -13.855104446411133, "step": 12560 }, { "epoch": 1.95, "learning_rate": 4.934587893562944e-06, "logits/chosen": -2.4090187549591064, "logits/rejected": -2.871797561645508, "logps/chosen": -398.1554260253906, "logps/rejected": -447.09442138671875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.6274094581604, "rewards/margins": 7.867398262023926, "rewards/rejected": -13.494808197021484, "step": 12561 }, { "epoch": 1.95, "learning_rate": 4.933854453031796e-06, "logits/chosen": -2.764899969100952, "logits/rejected": -3.130038261413574, "logps/chosen": -118.91329193115234, "logps/rejected": -300.49310302734375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -5.3582329750061035, "rewards/margins": 5.858724117279053, "rewards/rejected": -11.216957092285156, "step": 12562 }, { "epoch": 1.95, "learning_rate": 4.933121012500648e-06, "logits/chosen": -2.026787519454956, "logits/rejected": -3.2593488693237305, "logps/chosen": -99.06417846679688, "logps/rejected": -390.50054931640625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -6.795354843139648, "rewards/margins": 6.6485748291015625, "rewards/rejected": -13.443929672241211, "step": 12563 }, { "epoch": 1.95, "learning_rate": 4.932387571969501e-06, "logits/chosen": -1.4030367136001587, "logits/rejected": -2.7462799549102783, "logps/chosen": -154.26947021484375, "logps/rejected": -377.14501953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.870927572250366, "rewards/margins": 8.21157169342041, "rewards/rejected": -12.082498550415039, "step": 12564 }, { "epoch": 1.95, "learning_rate": 4.9316541314383526e-06, "logits/chosen": -2.7110447883605957, "logits/rejected": -2.6344404220581055, "logps/chosen": -219.5783233642578, "logps/rejected": -378.93756103515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.8944597244262695, "rewards/margins": 10.787134170532227, "rewards/rejected": -13.681593894958496, "step": 12565 }, { "epoch": 1.95, "learning_rate": 4.9309206909072044e-06, "logits/chosen": -3.0067925453186035, "logits/rejected": -2.9633026123046875, "logps/chosen": -390.0277099609375, "logps/rejected": -364.81134033203125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -5.4768877029418945, "rewards/margins": 6.2910614013671875, "rewards/rejected": -11.767949104309082, "step": 12566 }, { "epoch": 1.95, "learning_rate": 4.930187250376056e-06, "logits/chosen": -2.6140992641448975, "logits/rejected": -2.6674556732177734, "logps/chosen": -733.0477905273438, "logps/rejected": -555.92724609375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -5.674585819244385, "rewards/margins": 6.8202619552612305, "rewards/rejected": -12.494848251342773, "step": 12567 }, { "epoch": 1.95, "learning_rate": 4.929453809844909e-06, "logits/chosen": -2.650848865509033, "logits/rejected": -3.1079986095428467, "logps/chosen": -104.25418853759766, "logps/rejected": -235.3631134033203, "loss": 0.1107, "rewards/accuracies": 1.0, "rewards/chosen": -7.28117561340332, "rewards/margins": 3.7009389400482178, "rewards/rejected": -10.982114791870117, "step": 12568 }, { "epoch": 1.95, "learning_rate": 4.928720369313762e-06, "logits/chosen": -2.956662178039551, "logits/rejected": -2.6193299293518066, "logps/chosen": -443.2660827636719, "logps/rejected": -356.44122314453125, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": -8.19968318939209, "rewards/margins": 3.90129017829895, "rewards/rejected": -12.100973129272461, "step": 12569 }, { "epoch": 1.95, "learning_rate": 4.927986928782614e-06, "logits/chosen": -2.7850685119628906, "logits/rejected": -1.749424695968628, "logps/chosen": -370.2449645996094, "logps/rejected": -142.2526397705078, "loss": 0.8986, "rewards/accuracies": 0.5, "rewards/chosen": -6.678432464599609, "rewards/margins": 1.4013197422027588, "rewards/rejected": -8.079751968383789, "step": 12570 }, { "epoch": 1.96, "learning_rate": 4.9272534882514655e-06, "logits/chosen": -2.428044557571411, "logits/rejected": -3.026700496673584, "logps/chosen": -116.00907897949219, "logps/rejected": -414.4779052734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.260242938995361, "rewards/margins": 9.516862869262695, "rewards/rejected": -13.777105331420898, "step": 12571 }, { "epoch": 1.96, "learning_rate": 4.926520047720317e-06, "logits/chosen": -1.7542006969451904, "logits/rejected": -2.6992123126983643, "logps/chosen": -114.36122131347656, "logps/rejected": -351.20068359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.633238792419434, "rewards/margins": 8.11696720123291, "rewards/rejected": -12.750205993652344, "step": 12572 }, { "epoch": 1.96, "learning_rate": 4.92578660718917e-06, "logits/chosen": -2.7501330375671387, "logits/rejected": -2.147392749786377, "logps/chosen": -287.0071716308594, "logps/rejected": -338.3932800292969, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.62370491027832, "rewards/margins": 7.335445404052734, "rewards/rejected": -11.959150314331055, "step": 12573 }, { "epoch": 1.96, "learning_rate": 4.925053166658022e-06, "logits/chosen": -3.098283052444458, "logits/rejected": -3.2177512645721436, "logps/chosen": -105.56916809082031, "logps/rejected": -286.8277893066406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9131228923797607, "rewards/margins": 10.631416320800781, "rewards/rejected": -13.544538497924805, "step": 12574 }, { "epoch": 1.96, "learning_rate": 4.924319726126874e-06, "logits/chosen": -2.4357123374938965, "logits/rejected": -2.965172529220581, "logps/chosen": -146.48550415039062, "logps/rejected": -350.3248291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.8032327890396118, "rewards/margins": 10.698476791381836, "rewards/rejected": -11.501708984375, "step": 12575 }, { "epoch": 1.96, "learning_rate": 4.923586285595726e-06, "logits/chosen": -1.6943998336791992, "logits/rejected": -2.8418710231781006, "logps/chosen": -86.20973205566406, "logps/rejected": -437.49188232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.576753616333008, "rewards/margins": 13.024871826171875, "rewards/rejected": -16.601625442504883, "step": 12576 }, { "epoch": 1.96, "learning_rate": 4.9228528450645785e-06, "logits/chosen": -1.0526232719421387, "logits/rejected": -2.76458477973938, "logps/chosen": -110.72515869140625, "logps/rejected": -426.3522033691406, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": -5.200611114501953, "rewards/margins": 3.936673164367676, "rewards/rejected": -9.137284278869629, "step": 12577 }, { "epoch": 1.96, "learning_rate": 4.92211940453343e-06, "logits/chosen": -2.286853075027466, "logits/rejected": -2.8666787147521973, "logps/chosen": -187.44363403320312, "logps/rejected": -465.1347351074219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.203769683837891, "rewards/margins": 8.06046199798584, "rewards/rejected": -12.26423168182373, "step": 12578 }, { "epoch": 1.96, "learning_rate": 4.921385964002282e-06, "logits/chosen": -2.7521326541900635, "logits/rejected": -2.5918359756469727, "logps/chosen": -214.38909912109375, "logps/rejected": -214.85836791992188, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -5.496506690979004, "rewards/margins": 4.596410751342773, "rewards/rejected": -10.092917442321777, "step": 12579 }, { "epoch": 1.96, "learning_rate": 4.920652523471134e-06, "logits/chosen": -2.8802390098571777, "logits/rejected": -2.6263482570648193, "logps/chosen": -798.3643188476562, "logps/rejected": -601.0308227539062, "loss": 0.3451, "rewards/accuracies": 1.0, "rewards/chosen": -4.8310136795043945, "rewards/margins": 3.8480679988861084, "rewards/rejected": -8.679081916809082, "step": 12580 }, { "epoch": 1.96, "learning_rate": 4.919919082939986e-06, "logits/chosen": -1.9449275732040405, "logits/rejected": -2.7531063556671143, "logps/chosen": -390.643798828125, "logps/rejected": -580.80029296875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -8.255537033081055, "rewards/margins": 6.520689964294434, "rewards/rejected": -14.776226997375488, "step": 12581 }, { "epoch": 1.96, "learning_rate": 4.919185642408839e-06, "logits/chosen": -1.8146636486053467, "logits/rejected": -3.0839946269989014, "logps/chosen": -273.9906921386719, "logps/rejected": -595.4197998046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.49612283706665, "rewards/margins": 7.846707344055176, "rewards/rejected": -12.342830657958984, "step": 12582 }, { "epoch": 1.96, "learning_rate": 4.918452201877691e-06, "logits/chosen": -3.0228395462036133, "logits/rejected": -2.6288399696350098, "logps/chosen": -250.56475830078125, "logps/rejected": -202.0059356689453, "loss": 0.1156, "rewards/accuracies": 1.0, "rewards/chosen": -7.973066806793213, "rewards/margins": 2.2007808685302734, "rewards/rejected": -10.173847198486328, "step": 12583 }, { "epoch": 1.96, "learning_rate": 4.9177187613465425e-06, "logits/chosen": -2.9851794242858887, "logits/rejected": -2.692898750305176, "logps/chosen": -465.1422424316406, "logps/rejected": -554.35302734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.187058925628662, "rewards/margins": 8.784849166870117, "rewards/rejected": -13.971908569335938, "step": 12584 }, { "epoch": 1.96, "learning_rate": 4.916985320815395e-06, "logits/chosen": -0.8631370067596436, "logits/rejected": -2.7338602542877197, "logps/chosen": -86.19635009765625, "logps/rejected": -419.0561218261719, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.921639919281006, "rewards/margins": 7.249298095703125, "rewards/rejected": -11.170937538146973, "step": 12585 }, { "epoch": 1.96, "learning_rate": 4.916251880284248e-06, "logits/chosen": -2.480759620666504, "logits/rejected": -3.044630527496338, "logps/chosen": -103.15963745117188, "logps/rejected": -304.89288330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.2464141845703125, "rewards/margins": 11.281038284301758, "rewards/rejected": -15.527451515197754, "step": 12586 }, { "epoch": 1.96, "learning_rate": 4.9155184397531e-06, "logits/chosen": -2.672605276107788, "logits/rejected": -2.815746307373047, "logps/chosen": -158.0467071533203, "logps/rejected": -313.24139404296875, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.068505048751831, "rewards/margins": 10.233304977416992, "rewards/rejected": -12.301809310913086, "step": 12587 }, { "epoch": 1.96, "learning_rate": 4.914784999221952e-06, "logits/chosen": -2.666724920272827, "logits/rejected": -2.185746669769287, "logps/chosen": -193.8921356201172, "logps/rejected": -379.63818359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.192363739013672, "rewards/margins": 9.069140434265137, "rewards/rejected": -13.261504173278809, "step": 12588 }, { "epoch": 1.96, "learning_rate": 4.9140515586908036e-06, "logits/chosen": -3.0066609382629395, "logits/rejected": -2.6500086784362793, "logps/chosen": -513.9896850585938, "logps/rejected": -365.1300964355469, "loss": 2.5167, "rewards/accuracies": 0.5, "rewards/chosen": -7.795031547546387, "rewards/margins": 3.078601598739624, "rewards/rejected": -10.87363338470459, "step": 12589 }, { "epoch": 1.96, "learning_rate": 4.9133181181596554e-06, "logits/chosen": -2.689800262451172, "logits/rejected": -3.0583274364471436, "logps/chosen": -216.722412109375, "logps/rejected": -491.5238037109375, "loss": 1.4105, "rewards/accuracies": 0.5, "rewards/chosen": -6.893584251403809, "rewards/margins": 3.084319591522217, "rewards/rejected": -9.977903366088867, "step": 12590 }, { "epoch": 1.96, "learning_rate": 4.912584677628508e-06, "logits/chosen": -2.257261276245117, "logits/rejected": -1.6656672954559326, "logps/chosen": -151.11859130859375, "logps/rejected": -219.1217041015625, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -3.996127128601074, "rewards/margins": 5.538606643676758, "rewards/rejected": -9.534733772277832, "step": 12591 }, { "epoch": 1.96, "learning_rate": 4.91185123709736e-06, "logits/chosen": -2.167299747467041, "logits/rejected": -3.2384321689605713, "logps/chosen": -124.07292175292969, "logps/rejected": -451.0856628417969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.937716960906982, "rewards/margins": 9.24707317352295, "rewards/rejected": -14.18479061126709, "step": 12592 }, { "epoch": 1.96, "learning_rate": 4.911117796566212e-06, "logits/chosen": -2.354970693588257, "logits/rejected": -3.257082223892212, "logps/chosen": -101.35160827636719, "logps/rejected": -435.3822326660156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.8204545974731445, "rewards/margins": 11.510347366333008, "rewards/rejected": -16.330801010131836, "step": 12593 }, { "epoch": 1.96, "learning_rate": 4.910384356035064e-06, "logits/chosen": -2.8620848655700684, "logits/rejected": -3.088726758956909, "logps/chosen": -121.03996276855469, "logps/rejected": -260.7918701171875, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -3.993459463119507, "rewards/margins": 6.47796630859375, "rewards/rejected": -10.471426010131836, "step": 12594 }, { "epoch": 1.96, "learning_rate": 4.9096509155039165e-06, "logits/chosen": -3.1320579051971436, "logits/rejected": -2.8169920444488525, "logps/chosen": -489.93670654296875, "logps/rejected": -532.8016357421875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -4.380126953125, "rewards/margins": 8.477919578552246, "rewards/rejected": -12.858046531677246, "step": 12595 }, { "epoch": 1.96, "learning_rate": 4.908917474972768e-06, "logits/chosen": -3.2056350708007812, "logits/rejected": -2.7907633781433105, "logps/chosen": -125.57940673828125, "logps/rejected": -206.62078857421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2931617498397827, "rewards/margins": 9.254989624023438, "rewards/rejected": -10.548151969909668, "step": 12596 }, { "epoch": 1.96, "learning_rate": 4.90818403444162e-06, "logits/chosen": -3.1232059001922607, "logits/rejected": -2.6068928241729736, "logps/chosen": -302.40185546875, "logps/rejected": -165.46859741210938, "loss": 0.0932, "rewards/accuracies": 1.0, "rewards/chosen": -5.291615009307861, "rewards/margins": 4.0598344802856445, "rewards/rejected": -9.351449966430664, "step": 12597 }, { "epoch": 1.96, "learning_rate": 4.907450593910472e-06, "logits/chosen": -2.1946163177490234, "logits/rejected": -2.899043083190918, "logps/chosen": -208.24295043945312, "logps/rejected": -459.77288818359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.1543335914611816, "rewards/margins": 8.186505317687988, "rewards/rejected": -10.340839385986328, "step": 12598 }, { "epoch": 1.96, "learning_rate": 4.906717153379324e-06, "logits/chosen": -2.827322006225586, "logits/rejected": -2.9631264209747314, "logps/chosen": -213.7356414794922, "logps/rejected": -358.0069885253906, "loss": 1.1551, "rewards/accuracies": 0.5, "rewards/chosen": -8.43055534362793, "rewards/margins": 0.5716233253479004, "rewards/rejected": -9.002178192138672, "step": 12599 }, { "epoch": 1.96, "learning_rate": 4.905983712848177e-06, "logits/chosen": -1.3210294246673584, "logits/rejected": -2.6835832595825195, "logps/chosen": -159.2734375, "logps/rejected": -411.8778381347656, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.375633716583252, "rewards/margins": 7.29582405090332, "rewards/rejected": -13.671457290649414, "step": 12600 }, { "epoch": 1.96, "learning_rate": 4.905250272317029e-06, "logits/chosen": -3.041491985321045, "logits/rejected": -2.5037589073181152, "logps/chosen": -319.11590576171875, "logps/rejected": -285.90704345703125, "loss": 0.6263, "rewards/accuracies": 0.5, "rewards/chosen": -4.479795932769775, "rewards/margins": 2.4878671169281006, "rewards/rejected": -6.967662811279297, "step": 12601 }, { "epoch": 1.96, "learning_rate": 4.904516831785881e-06, "logits/chosen": -3.159879684448242, "logits/rejected": -2.975372791290283, "logps/chosen": -271.8756103515625, "logps/rejected": -351.0340576171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.766998291015625, "rewards/margins": 7.957564353942871, "rewards/rejected": -11.724563598632812, "step": 12602 }, { "epoch": 1.96, "learning_rate": 4.903783391254733e-06, "logits/chosen": -1.9701443910598755, "logits/rejected": -2.367835521697998, "logps/chosen": -576.2322387695312, "logps/rejected": -842.7730712890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.321237087249756, "rewards/margins": 12.185251235961914, "rewards/rejected": -17.506488800048828, "step": 12603 }, { "epoch": 1.96, "learning_rate": 4.903049950723586e-06, "logits/chosen": -2.3270926475524902, "logits/rejected": -3.0513429641723633, "logps/chosen": -139.04299926757812, "logps/rejected": -465.099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4504144191741943, "rewards/margins": 14.377164840698242, "rewards/rejected": -16.827579498291016, "step": 12604 }, { "epoch": 1.96, "learning_rate": 4.902316510192438e-06, "logits/chosen": -2.6190969944000244, "logits/rejected": -2.187664270401001, "logps/chosen": -235.45123291015625, "logps/rejected": -314.301025390625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.393815040588379, "rewards/margins": 8.303430557250977, "rewards/rejected": -12.697245597839355, "step": 12605 }, { "epoch": 1.96, "learning_rate": 4.90158306966129e-06, "logits/chosen": -1.1963046789169312, "logits/rejected": -2.7061986923217773, "logps/chosen": -111.79373931884766, "logps/rejected": -334.54290771484375, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -7.400474548339844, "rewards/margins": 5.497261047363281, "rewards/rejected": -12.897735595703125, "step": 12606 }, { "epoch": 1.96, "learning_rate": 4.900849629130142e-06, "logits/chosen": -2.6047286987304688, "logits/rejected": -2.873750686645508, "logps/chosen": -187.83851623535156, "logps/rejected": -301.9087219238281, "loss": 0.4363, "rewards/accuracies": 0.5, "rewards/chosen": -5.8952836990356445, "rewards/margins": 1.3868675231933594, "rewards/rejected": -7.282151222229004, "step": 12607 }, { "epoch": 1.96, "learning_rate": 4.9001161885989935e-06, "logits/chosen": -2.5723276138305664, "logits/rejected": -2.01523756980896, "logps/chosen": -416.54803466796875, "logps/rejected": -493.2649230957031, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -5.7978105545043945, "rewards/margins": 6.503422737121582, "rewards/rejected": -12.301233291625977, "step": 12608 }, { "epoch": 1.96, "learning_rate": 4.899382748067846e-06, "logits/chosen": -2.855149269104004, "logits/rejected": -2.5311625003814697, "logps/chosen": -332.4090270996094, "logps/rejected": -219.7850341796875, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -7.672533988952637, "rewards/margins": 4.443600654602051, "rewards/rejected": -12.116134643554688, "step": 12609 }, { "epoch": 1.96, "learning_rate": 4.898649307536698e-06, "logits/chosen": -2.0147640705108643, "logits/rejected": -2.696908950805664, "logps/chosen": -467.30914306640625, "logps/rejected": -674.1280517578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.82663106918335, "rewards/margins": 8.283042907714844, "rewards/rejected": -13.109674453735352, "step": 12610 }, { "epoch": 1.96, "learning_rate": 4.89791586700555e-06, "logits/chosen": -2.433915376663208, "logits/rejected": -2.7246646881103516, "logps/chosen": -125.04252624511719, "logps/rejected": -300.55731201171875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -4.765295505523682, "rewards/margins": 8.535629272460938, "rewards/rejected": -13.300924301147461, "step": 12611 }, { "epoch": 1.96, "learning_rate": 4.897182426474402e-06, "logits/chosen": -1.6540753841400146, "logits/rejected": -2.801027536392212, "logps/chosen": -114.97993469238281, "logps/rejected": -434.3852844238281, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.444814682006836, "rewards/margins": 11.43124771118164, "rewards/rejected": -14.876062393188477, "step": 12612 }, { "epoch": 1.96, "learning_rate": 4.896448985943255e-06, "logits/chosen": -2.889925956726074, "logits/rejected": -2.359720468521118, "logps/chosen": -205.89492797851562, "logps/rejected": -147.5358123779297, "loss": 1.1703, "rewards/accuracies": 0.5, "rewards/chosen": -8.383500099182129, "rewards/margins": 0.23757123947143555, "rewards/rejected": -8.621070861816406, "step": 12613 }, { "epoch": 1.96, "learning_rate": 4.8957155454121065e-06, "logits/chosen": -2.8299994468688965, "logits/rejected": -2.80092716217041, "logps/chosen": -494.7987976074219, "logps/rejected": -375.40777587890625, "loss": 3.0013, "rewards/accuracies": 0.5, "rewards/chosen": -7.686179161071777, "rewards/margins": 0.299175500869751, "rewards/rejected": -7.985354423522949, "step": 12614 }, { "epoch": 1.96, "learning_rate": 4.894982104880958e-06, "logits/chosen": -1.4413777589797974, "logits/rejected": -2.335113763809204, "logps/chosen": -268.5269775390625, "logps/rejected": -340.300537109375, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -7.156731605529785, "rewards/margins": 3.448366165161133, "rewards/rejected": -10.605097770690918, "step": 12615 }, { "epoch": 1.96, "learning_rate": 4.89424866434981e-06, "logits/chosen": -2.6740782260894775, "logits/rejected": -2.413320779800415, "logps/chosen": -151.8192138671875, "logps/rejected": -442.9635314941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5918567180633545, "rewards/margins": 13.656307220458984, "rewards/rejected": -16.2481632232666, "step": 12616 }, { "epoch": 1.96, "learning_rate": 4.893515223818662e-06, "logits/chosen": -1.8229798078536987, "logits/rejected": -2.6639482975006104, "logps/chosen": -259.21844482421875, "logps/rejected": -425.7972412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.261806488037109, "rewards/margins": 9.604963302612305, "rewards/rejected": -14.866769790649414, "step": 12617 }, { "epoch": 1.96, "learning_rate": 4.892781783287515e-06, "logits/chosen": -2.30305814743042, "logits/rejected": -2.772545337677002, "logps/chosen": -219.86761474609375, "logps/rejected": -389.75335693359375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -3.088278293609619, "rewards/margins": 6.592988967895508, "rewards/rejected": -9.681266784667969, "step": 12618 }, { "epoch": 1.96, "learning_rate": 4.8920483427563675e-06, "logits/chosen": -2.1801693439483643, "logits/rejected": -3.31506609916687, "logps/chosen": -259.44476318359375, "logps/rejected": -545.66552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.203863620758057, "rewards/margins": 9.712101936340332, "rewards/rejected": -13.915966033935547, "step": 12619 }, { "epoch": 1.96, "learning_rate": 4.891314902225219e-06, "logits/chosen": -2.083829879760742, "logits/rejected": -2.999603271484375, "logps/chosen": -87.32400512695312, "logps/rejected": -291.43853759765625, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.894000768661499, "rewards/margins": 7.235448837280273, "rewards/rejected": -10.129449844360352, "step": 12620 }, { "epoch": 1.96, "learning_rate": 4.890581461694071e-06, "logits/chosen": -1.5400872230529785, "logits/rejected": -2.9421777725219727, "logps/chosen": -56.07716369628906, "logps/rejected": -250.9608154296875, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -4.061402320861816, "rewards/margins": 4.734789848327637, "rewards/rejected": -8.796192169189453, "step": 12621 }, { "epoch": 1.96, "learning_rate": 4.889848021162924e-06, "logits/chosen": -2.7157084941864014, "logits/rejected": -2.714489221572876, "logps/chosen": -149.39862060546875, "logps/rejected": -227.92404174804688, "loss": 3.0407, "rewards/accuracies": 0.5, "rewards/chosen": -8.060312271118164, "rewards/margins": 2.066551446914673, "rewards/rejected": -10.126863479614258, "step": 12622 }, { "epoch": 1.96, "learning_rate": 4.889114580631776e-06, "logits/chosen": -1.7874001264572144, "logits/rejected": -3.080104112625122, "logps/chosen": -155.9345703125, "logps/rejected": -432.89483642578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.2888081073760986, "rewards/margins": 9.133674621582031, "rewards/rejected": -11.42248249053955, "step": 12623 }, { "epoch": 1.96, "learning_rate": 4.888381140100628e-06, "logits/chosen": -2.8656005859375, "logits/rejected": -2.7854971885681152, "logps/chosen": -123.09559631347656, "logps/rejected": -271.7277526855469, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -6.649763584136963, "rewards/margins": 3.462357997894287, "rewards/rejected": -10.11212158203125, "step": 12624 }, { "epoch": 1.96, "learning_rate": 4.88764769956948e-06, "logits/chosen": -2.907721996307373, "logits/rejected": -2.280165672302246, "logps/chosen": -238.0755615234375, "logps/rejected": -185.44418334960938, "loss": 0.8238, "rewards/accuracies": 0.5, "rewards/chosen": -4.955416202545166, "rewards/margins": 1.3088804483413696, "rewards/rejected": -6.264296531677246, "step": 12625 }, { "epoch": 1.96, "learning_rate": 4.886914259038332e-06, "logits/chosen": -3.0178921222686768, "logits/rejected": -3.21258544921875, "logps/chosen": -66.7291030883789, "logps/rejected": -226.51922607421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.0994675159454346, "rewards/margins": 9.25023078918457, "rewards/rejected": -11.34969711303711, "step": 12626 }, { "epoch": 1.96, "learning_rate": 4.886180818507184e-06, "logits/chosen": -1.6576123237609863, "logits/rejected": -2.6489346027374268, "logps/chosen": -148.19102478027344, "logps/rejected": -346.69873046875, "loss": 1.2264, "rewards/accuracies": 0.5, "rewards/chosen": -3.377624988555908, "rewards/margins": 2.6069984436035156, "rewards/rejected": -5.984623432159424, "step": 12627 }, { "epoch": 1.96, "learning_rate": 4.885447377976036e-06, "logits/chosen": -1.8163237571716309, "logits/rejected": -2.973609685897827, "logps/chosen": -213.96429443359375, "logps/rejected": -563.001708984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.422710418701172, "rewards/margins": 9.079547882080078, "rewards/rejected": -11.50225830078125, "step": 12628 }, { "epoch": 1.96, "learning_rate": 4.884713937444888e-06, "logits/chosen": -3.183954954147339, "logits/rejected": -3.3413467407226562, "logps/chosen": -96.03138732910156, "logps/rejected": -244.46929931640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -1.0050982236862183, "rewards/margins": 8.985698699951172, "rewards/rejected": -9.99079704284668, "step": 12629 }, { "epoch": 1.96, "learning_rate": 4.88398049691374e-06, "logits/chosen": -0.8396883010864258, "logits/rejected": -2.4538843631744385, "logps/chosen": -100.74803161621094, "logps/rejected": -450.88873291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.695687770843506, "rewards/margins": 11.374954223632812, "rewards/rejected": -15.070642471313477, "step": 12630 }, { "epoch": 1.96, "learning_rate": 4.883247056382593e-06, "logits/chosen": -2.8321971893310547, "logits/rejected": -2.8785455226898193, "logps/chosen": -92.90351867675781, "logps/rejected": -424.9199523925781, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.2274210453033447, "rewards/margins": 9.257226943969727, "rewards/rejected": -12.484647750854492, "step": 12631 }, { "epoch": 1.96, "learning_rate": 4.8825136158514445e-06, "logits/chosen": -2.829660654067993, "logits/rejected": -2.178744316101074, "logps/chosen": -193.7334442138672, "logps/rejected": -166.69027709960938, "loss": 0.7653, "rewards/accuracies": 0.5, "rewards/chosen": -7.687865734100342, "rewards/margins": 0.006614208221435547, "rewards/rejected": -7.694479942321777, "step": 12632 }, { "epoch": 1.96, "learning_rate": 4.881780175320296e-06, "logits/chosen": -3.094101905822754, "logits/rejected": -2.716625690460205, "logps/chosen": -1108.866943359375, "logps/rejected": -662.0277099609375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -5.750756740570068, "rewards/margins": 5.851495742797852, "rewards/rejected": -11.602252960205078, "step": 12633 }, { "epoch": 1.96, "learning_rate": 4.881046734789148e-06, "logits/chosen": -2.6292433738708496, "logits/rejected": -2.758502244949341, "logps/chosen": -153.99810791015625, "logps/rejected": -213.51136779785156, "loss": 0.1216, "rewards/accuracies": 1.0, "rewards/chosen": -4.050797462463379, "rewards/margins": 3.885895013809204, "rewards/rejected": -7.936692714691162, "step": 12634 }, { "epoch": 1.97, "learning_rate": 4.880313294258001e-06, "logits/chosen": -2.765151262283325, "logits/rejected": -2.8455963134765625, "logps/chosen": -302.764404296875, "logps/rejected": -431.081298828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.483328342437744, "rewards/margins": 7.554281711578369, "rewards/rejected": -12.037610054016113, "step": 12635 }, { "epoch": 1.97, "learning_rate": 4.879579853726854e-06, "logits/chosen": -2.4928138256073, "logits/rejected": -2.028550624847412, "logps/chosen": -476.9093933105469, "logps/rejected": -521.5361328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.982626914978027, "rewards/margins": 8.815018653869629, "rewards/rejected": -16.797645568847656, "step": 12636 }, { "epoch": 1.97, "learning_rate": 4.878846413195706e-06, "logits/chosen": -2.1397287845611572, "logits/rejected": -3.100696086883545, "logps/chosen": -98.37539672851562, "logps/rejected": -273.71112060546875, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -4.198139667510986, "rewards/margins": 3.9138383865356445, "rewards/rejected": -8.111978530883789, "step": 12637 }, { "epoch": 1.97, "learning_rate": 4.8781129726645575e-06, "logits/chosen": -1.7866817712783813, "logits/rejected": -2.696805715560913, "logps/chosen": -256.9781494140625, "logps/rejected": -324.94647216796875, "loss": 0.32, "rewards/accuracies": 1.0, "rewards/chosen": -4.607540130615234, "rewards/margins": 3.581212282180786, "rewards/rejected": -8.188753128051758, "step": 12638 }, { "epoch": 1.97, "learning_rate": 4.877379532133409e-06, "logits/chosen": -2.8564751148223877, "logits/rejected": -2.8053576946258545, "logps/chosen": -127.57077026367188, "logps/rejected": -208.44297790527344, "loss": 0.2079, "rewards/accuracies": 1.0, "rewards/chosen": -5.954946041107178, "rewards/margins": 3.5744590759277344, "rewards/rejected": -9.52940559387207, "step": 12639 }, { "epoch": 1.97, "learning_rate": 4.876646091602262e-06, "logits/chosen": -2.917984962463379, "logits/rejected": -1.0980442762374878, "logps/chosen": -339.8199157714844, "logps/rejected": -127.44798278808594, "loss": 0.7077, "rewards/accuracies": 0.5, "rewards/chosen": -5.319723606109619, "rewards/margins": 0.24447941780090332, "rewards/rejected": -5.564203262329102, "step": 12640 }, { "epoch": 1.97, "learning_rate": 4.875912651071114e-06, "logits/chosen": -2.651453971862793, "logits/rejected": -2.5923216342926025, "logps/chosen": -168.27081298828125, "logps/rejected": -383.18939208984375, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -3.3916451930999756, "rewards/margins": 8.785663604736328, "rewards/rejected": -12.177309036254883, "step": 12641 }, { "epoch": 1.97, "learning_rate": 4.875179210539966e-06, "logits/chosen": -2.5983030796051025, "logits/rejected": -2.408315896987915, "logps/chosen": -197.63754272460938, "logps/rejected": -260.0278015136719, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -6.273825645446777, "rewards/margins": 5.255674362182617, "rewards/rejected": -11.529500007629395, "step": 12642 }, { "epoch": 1.97, "learning_rate": 4.874445770008818e-06, "logits/chosen": -3.0019478797912598, "logits/rejected": -3.0633795261383057, "logps/chosen": -437.2004089355469, "logps/rejected": -399.477783203125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.2881956100463867, "rewards/margins": 8.377287864685059, "rewards/rejected": -11.665483474731445, "step": 12643 }, { "epoch": 1.97, "learning_rate": 4.8737123294776704e-06, "logits/chosen": -2.7046868801116943, "logits/rejected": -3.2030725479125977, "logps/chosen": -107.81934356689453, "logps/rejected": -245.63967895507812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -2.888582706451416, "rewards/margins": 7.588743209838867, "rewards/rejected": -10.477325439453125, "step": 12644 }, { "epoch": 1.97, "learning_rate": 4.872978888946522e-06, "logits/chosen": -2.0103561878204346, "logits/rejected": -3.0356991291046143, "logps/chosen": -271.05596923828125, "logps/rejected": -401.19500732421875, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/chosen": -3.406254768371582, "rewards/margins": 6.330731391906738, "rewards/rejected": -9.73698616027832, "step": 12645 }, { "epoch": 1.97, "learning_rate": 4.872245448415374e-06, "logits/chosen": -2.8549270629882812, "logits/rejected": -3.055434465408325, "logps/chosen": -120.56388854980469, "logps/rejected": -296.76708984375, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -3.6280550956726074, "rewards/margins": 4.956058979034424, "rewards/rejected": -8.584114074707031, "step": 12646 }, { "epoch": 1.97, "learning_rate": 4.871512007884226e-06, "logits/chosen": -0.6394829154014587, "logits/rejected": -2.7557594776153564, "logps/chosen": -115.3919448852539, "logps/rejected": -666.9436645507812, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -5.161181449890137, "rewards/margins": 8.245523452758789, "rewards/rejected": -13.406704902648926, "step": 12647 }, { "epoch": 1.97, "learning_rate": 4.870778567353078e-06, "logits/chosen": -2.941185235977173, "logits/rejected": -2.893324613571167, "logps/chosen": -164.42831420898438, "logps/rejected": -266.15838623046875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.2947428226470947, "rewards/margins": 6.599984169006348, "rewards/rejected": -9.89472770690918, "step": 12648 }, { "epoch": 1.97, "learning_rate": 4.870045126821931e-06, "logits/chosen": -2.5628035068511963, "logits/rejected": -2.829300880432129, "logps/chosen": -565.1740112304688, "logps/rejected": -474.52569580078125, "loss": 1.6463, "rewards/accuracies": 0.5, "rewards/chosen": -4.386742115020752, "rewards/margins": 2.4704113006591797, "rewards/rejected": -6.857153415679932, "step": 12649 }, { "epoch": 1.97, "learning_rate": 4.8693116862907825e-06, "logits/chosen": -3.1644906997680664, "logits/rejected": -2.7199759483337402, "logps/chosen": -186.04611206054688, "logps/rejected": -159.08021545410156, "loss": 0.7609, "rewards/accuracies": 0.0, "rewards/chosen": -5.1677141189575195, "rewards/margins": -0.1292881965637207, "rewards/rejected": -5.038425922393799, "step": 12650 }, { "epoch": 1.97, "learning_rate": 4.8685782457596344e-06, "logits/chosen": -2.8537449836730957, "logits/rejected": -2.822028875350952, "logps/chosen": -409.0250244140625, "logps/rejected": -475.7283935546875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -4.0677947998046875, "rewards/margins": 7.345921516418457, "rewards/rejected": -11.413716316223145, "step": 12651 }, { "epoch": 1.97, "learning_rate": 4.867844805228487e-06, "logits/chosen": -2.0333194732666016, "logits/rejected": -2.9151699542999268, "logps/chosen": -80.85719299316406, "logps/rejected": -260.84417724609375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.790005922317505, "rewards/margins": 8.136256217956543, "rewards/rejected": -10.926261901855469, "step": 12652 }, { "epoch": 1.97, "learning_rate": 4.86711136469734e-06, "logits/chosen": -1.512795329093933, "logits/rejected": -2.7859230041503906, "logps/chosen": -110.02677917480469, "logps/rejected": -274.10479736328125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.98480224609375, "rewards/margins": 6.6905012130737305, "rewards/rejected": -11.67530345916748, "step": 12653 }, { "epoch": 1.97, "learning_rate": 4.866377924166192e-06, "logits/chosen": -2.7427947521209717, "logits/rejected": -2.303785562515259, "logps/chosen": -229.3824005126953, "logps/rejected": -208.894775390625, "loss": 0.2256, "rewards/accuracies": 1.0, "rewards/chosen": -4.528914928436279, "rewards/margins": 4.937414169311523, "rewards/rejected": -9.466328620910645, "step": 12654 }, { "epoch": 1.97, "learning_rate": 4.865644483635044e-06, "logits/chosen": -2.7308545112609863, "logits/rejected": -2.873305320739746, "logps/chosen": -191.4466552734375, "logps/rejected": -190.86109924316406, "loss": 2.1017, "rewards/accuracies": 0.5, "rewards/chosen": -8.077095031738281, "rewards/margins": -0.1750197410583496, "rewards/rejected": -7.90207576751709, "step": 12655 }, { "epoch": 1.97, "learning_rate": 4.8649110431038955e-06, "logits/chosen": -2.526925802230835, "logits/rejected": -2.804595708847046, "logps/chosen": -581.552734375, "logps/rejected": -526.7449951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7583303451538086, "rewards/margins": 10.579349517822266, "rewards/rejected": -12.337678909301758, "step": 12656 }, { "epoch": 1.97, "learning_rate": 4.864177602572747e-06, "logits/chosen": -1.4393235445022583, "logits/rejected": -2.47908091545105, "logps/chosen": -170.79359436035156, "logps/rejected": -413.01910400390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.5254368782043457, "rewards/margins": 8.323709487915039, "rewards/rejected": -11.849146842956543, "step": 12657 }, { "epoch": 1.97, "learning_rate": 4.8634441620416e-06, "logits/chosen": -2.493285655975342, "logits/rejected": -2.6423864364624023, "logps/chosen": -270.9353332519531, "logps/rejected": -296.7749328613281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.669720649719238, "rewards/margins": 8.305730819702148, "rewards/rejected": -12.975452423095703, "step": 12658 }, { "epoch": 1.97, "learning_rate": 4.862710721510452e-06, "logits/chosen": -2.6369383335113525, "logits/rejected": -2.097935199737549, "logps/chosen": -252.20753479003906, "logps/rejected": -233.74737548828125, "loss": 0.0638, "rewards/accuracies": 1.0, "rewards/chosen": -6.004724025726318, "rewards/margins": 3.115004777908325, "rewards/rejected": -9.119729042053223, "step": 12659 }, { "epoch": 1.97, "learning_rate": 4.861977280979304e-06, "logits/chosen": -2.024174928665161, "logits/rejected": -2.8591206073760986, "logps/chosen": -138.19789123535156, "logps/rejected": -443.5050048828125, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -6.019655227661133, "rewards/margins": 5.368636131286621, "rewards/rejected": -11.388290405273438, "step": 12660 }, { "epoch": 1.97, "learning_rate": 4.861243840448156e-06, "logits/chosen": -2.925314426422119, "logits/rejected": -2.7971768379211426, "logps/chosen": -264.3677978515625, "logps/rejected": -268.721923828125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -2.69936466217041, "rewards/margins": 6.558132171630859, "rewards/rejected": -9.25749683380127, "step": 12661 }, { "epoch": 1.97, "learning_rate": 4.8605103999170085e-06, "logits/chosen": -3.1238059997558594, "logits/rejected": -0.7839598655700684, "logps/chosen": -611.8436279296875, "logps/rejected": -146.76596069335938, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -0.5809784531593323, "rewards/margins": 5.999154090881348, "rewards/rejected": -6.580132484436035, "step": 12662 }, { "epoch": 1.97, "learning_rate": 4.85977695938586e-06, "logits/chosen": -2.901906728744507, "logits/rejected": -2.255635976791382, "logps/chosen": -228.0360107421875, "logps/rejected": -303.435302734375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -4.881252288818359, "rewards/margins": 6.4485063552856445, "rewards/rejected": -11.329758644104004, "step": 12663 }, { "epoch": 1.97, "learning_rate": 4.859043518854712e-06, "logits/chosen": -2.5371506214141846, "logits/rejected": -2.9268264770507812, "logps/chosen": -302.51788330078125, "logps/rejected": -448.07183837890625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.745697021484375, "rewards/margins": 8.176029205322266, "rewards/rejected": -11.921727180480957, "step": 12664 }, { "epoch": 1.97, "learning_rate": 4.858310078323564e-06, "logits/chosen": -2.3324460983276367, "logits/rejected": -3.082357406616211, "logps/chosen": -233.39715576171875, "logps/rejected": -395.4018859863281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.9588069915771484, "rewards/margins": 8.317453384399414, "rewards/rejected": -12.276260375976562, "step": 12665 }, { "epoch": 1.97, "learning_rate": 4.857576637792416e-06, "logits/chosen": -1.36494779586792, "logits/rejected": -2.4182183742523193, "logps/chosen": -84.05793762207031, "logps/rejected": -278.7485656738281, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -3.402078151702881, "rewards/margins": 6.316289901733398, "rewards/rejected": -9.718368530273438, "step": 12666 }, { "epoch": 1.97, "learning_rate": 4.856843197261269e-06, "logits/chosen": -1.802547574043274, "logits/rejected": -3.12801456451416, "logps/chosen": -66.51944732666016, "logps/rejected": -166.33389282226562, "loss": 0.5176, "rewards/accuracies": 0.5, "rewards/chosen": -5.136920928955078, "rewards/margins": 2.5907580852508545, "rewards/rejected": -7.727679252624512, "step": 12667 }, { "epoch": 1.97, "learning_rate": 4.856109756730121e-06, "logits/chosen": -2.3976588249206543, "logits/rejected": -2.934509754180908, "logps/chosen": -147.39088439941406, "logps/rejected": -331.06964111328125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.866419792175293, "rewards/margins": 5.878751754760742, "rewards/rejected": -9.745171546936035, "step": 12668 }, { "epoch": 1.97, "learning_rate": 4.855376316198973e-06, "logits/chosen": -1.604967474937439, "logits/rejected": -2.7501566410064697, "logps/chosen": -150.4986572265625, "logps/rejected": -312.19873046875, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -4.384073257446289, "rewards/margins": 4.948385238647461, "rewards/rejected": -9.33245849609375, "step": 12669 }, { "epoch": 1.97, "learning_rate": 4.854642875667825e-06, "logits/chosen": -2.806657075881958, "logits/rejected": -3.300086736679077, "logps/chosen": -191.6033935546875, "logps/rejected": -245.09422302246094, "loss": 0.0464, "rewards/accuracies": 1.0, "rewards/chosen": -6.374566078186035, "rewards/margins": 3.882725954055786, "rewards/rejected": -10.257291793823242, "step": 12670 }, { "epoch": 1.97, "learning_rate": 4.853909435136678e-06, "logits/chosen": -2.4731931686401367, "logits/rejected": -2.829878807067871, "logps/chosen": -245.81448364257812, "logps/rejected": -421.7355041503906, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -4.584277629852295, "rewards/margins": 5.26304292678833, "rewards/rejected": -9.847320556640625, "step": 12671 }, { "epoch": 1.97, "learning_rate": 4.85317599460553e-06, "logits/chosen": -2.765401601791382, "logits/rejected": -3.329132080078125, "logps/chosen": -175.7869415283203, "logps/rejected": -398.4713134765625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -4.590020179748535, "rewards/margins": 5.357856750488281, "rewards/rejected": -9.9478759765625, "step": 12672 }, { "epoch": 1.97, "learning_rate": 4.852442554074382e-06, "logits/chosen": -2.5528430938720703, "logits/rejected": -2.989872455596924, "logps/chosen": -122.76881408691406, "logps/rejected": -293.3342590332031, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -5.024791717529297, "rewards/margins": 5.826343536376953, "rewards/rejected": -10.85113525390625, "step": 12673 }, { "epoch": 1.97, "learning_rate": 4.8517091135432336e-06, "logits/chosen": -3.068915843963623, "logits/rejected": -2.457852363586426, "logps/chosen": -220.6298828125, "logps/rejected": -133.06275939941406, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -3.6805291175842285, "rewards/margins": 4.213414192199707, "rewards/rejected": -7.893942832946777, "step": 12674 }, { "epoch": 1.97, "learning_rate": 4.850975673012086e-06, "logits/chosen": -2.120823383331299, "logits/rejected": -2.9741809368133545, "logps/chosen": -191.8998565673828, "logps/rejected": -319.3694152832031, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.1101455688476562, "rewards/margins": 7.529715538024902, "rewards/rejected": -9.639861106872559, "step": 12675 }, { "epoch": 1.97, "learning_rate": 4.850242232480938e-06, "logits/chosen": -2.4823861122131348, "logits/rejected": -2.8916213512420654, "logps/chosen": -103.16520690917969, "logps/rejected": -392.736572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0266964435577393, "rewards/margins": 11.839418411254883, "rewards/rejected": -14.866114616394043, "step": 12676 }, { "epoch": 1.97, "learning_rate": 4.84950879194979e-06, "logits/chosen": -2.7033731937408447, "logits/rejected": -2.973693370819092, "logps/chosen": -308.6783142089844, "logps/rejected": -483.2867736816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.953542470932007, "rewards/margins": 12.034467697143555, "rewards/rejected": -14.98801040649414, "step": 12677 }, { "epoch": 1.97, "learning_rate": 4.848775351418642e-06, "logits/chosen": -3.014519214630127, "logits/rejected": -2.6889736652374268, "logps/chosen": -353.0046691894531, "logps/rejected": -434.49755859375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.7192025184631348, "rewards/margins": 11.004404067993164, "rewards/rejected": -13.723608016967773, "step": 12678 }, { "epoch": 1.97, "learning_rate": 4.848041910887494e-06, "logits/chosen": -2.121943950653076, "logits/rejected": -2.9876692295074463, "logps/chosen": -150.6233673095703, "logps/rejected": -651.9713745117188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.947722911834717, "rewards/margins": 11.137359619140625, "rewards/rejected": -15.0850830078125, "step": 12679 }, { "epoch": 1.97, "learning_rate": 4.8473084703563465e-06, "logits/chosen": -2.816526174545288, "logits/rejected": -1.118759274482727, "logps/chosen": -627.06103515625, "logps/rejected": -395.37353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.551011085510254, "rewards/margins": 10.60340690612793, "rewards/rejected": -16.154417037963867, "step": 12680 }, { "epoch": 1.97, "learning_rate": 4.846575029825198e-06, "logits/chosen": -3.339150905609131, "logits/rejected": -3.0651865005493164, "logps/chosen": -516.0008544921875, "logps/rejected": -413.3316650390625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -4.269639015197754, "rewards/margins": 6.71466064453125, "rewards/rejected": -10.984298706054688, "step": 12681 }, { "epoch": 1.97, "learning_rate": 4.84584158929405e-06, "logits/chosen": -2.4752449989318848, "logits/rejected": -2.841076374053955, "logps/chosen": -64.02835083007812, "logps/rejected": -206.51248168945312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.1960062980651855, "rewards/margins": 6.49932336807251, "rewards/rejected": -9.695329666137695, "step": 12682 }, { "epoch": 1.97, "learning_rate": 4.845108148762902e-06, "logits/chosen": -2.0982251167297363, "logits/rejected": -1.2466208934783936, "logps/chosen": -289.42291259765625, "logps/rejected": -301.2919006347656, "loss": 0.1221, "rewards/accuracies": 1.0, "rewards/chosen": -4.0355048179626465, "rewards/margins": 7.08975887298584, "rewards/rejected": -11.125264167785645, "step": 12683 }, { "epoch": 1.97, "learning_rate": 4.844374708231755e-06, "logits/chosen": -2.7931149005889893, "logits/rejected": -2.839277982711792, "logps/chosen": -75.7429428100586, "logps/rejected": -265.07391357421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.8754889965057373, "rewards/margins": 9.32171630859375, "rewards/rejected": -12.19720458984375, "step": 12684 }, { "epoch": 1.97, "learning_rate": 4.843641267700607e-06, "logits/chosen": -1.359618067741394, "logits/rejected": -2.925624132156372, "logps/chosen": -86.91220092773438, "logps/rejected": -298.56903076171875, "loss": 0.1162, "rewards/accuracies": 1.0, "rewards/chosen": -5.395650386810303, "rewards/margins": 4.217825889587402, "rewards/rejected": -9.613476753234863, "step": 12685 }, { "epoch": 1.97, "learning_rate": 4.842907827169459e-06, "logits/chosen": -1.540159821510315, "logits/rejected": -2.9255239963531494, "logps/chosen": -157.11309814453125, "logps/rejected": -467.64434814453125, "loss": 0.2235, "rewards/accuracies": 1.0, "rewards/chosen": -7.425632476806641, "rewards/margins": 3.653150796890259, "rewards/rejected": -11.07878303527832, "step": 12686 }, { "epoch": 1.97, "learning_rate": 4.842174386638311e-06, "logits/chosen": -1.6210997104644775, "logits/rejected": -2.5182368755340576, "logps/chosen": -307.92431640625, "logps/rejected": -609.9802856445312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9940438270568848, "rewards/margins": 10.835445404052734, "rewards/rejected": -13.829489707946777, "step": 12687 }, { "epoch": 1.97, "learning_rate": 4.841440946107163e-06, "logits/chosen": -2.667478561401367, "logits/rejected": -2.4188215732574463, "logps/chosen": -332.5036315917969, "logps/rejected": -378.45648193359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.3543496131896973, "rewards/margins": 7.8116865158081055, "rewards/rejected": -11.166036605834961, "step": 12688 }, { "epoch": 1.97, "learning_rate": 4.840707505576016e-06, "logits/chosen": -2.7049720287323, "logits/rejected": -2.3181495666503906, "logps/chosen": -184.8323974609375, "logps/rejected": -444.2933349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.359749794006348, "rewards/margins": 10.45532512664795, "rewards/rejected": -15.815074920654297, "step": 12689 }, { "epoch": 1.97, "learning_rate": 4.839974065044868e-06, "logits/chosen": -2.9911327362060547, "logits/rejected": -3.2263102531433105, "logps/chosen": -429.1455383300781, "logps/rejected": -454.0006408691406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.239462852478027, "rewards/margins": 8.88818645477295, "rewards/rejected": -13.127649307250977, "step": 12690 }, { "epoch": 1.97, "learning_rate": 4.83924062451372e-06, "logits/chosen": -2.2833619117736816, "logits/rejected": -2.8991739749908447, "logps/chosen": -190.73739624023438, "logps/rejected": -688.1965942382812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.627793788909912, "rewards/margins": 11.92405891418457, "rewards/rejected": -16.55185317993164, "step": 12691 }, { "epoch": 1.97, "learning_rate": 4.838507183982572e-06, "logits/chosen": -2.852663993835449, "logits/rejected": -2.2992279529571533, "logps/chosen": -189.704833984375, "logps/rejected": -285.0195007324219, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.447357654571533, "rewards/margins": 7.157013416290283, "rewards/rejected": -11.604371070861816, "step": 12692 }, { "epoch": 1.97, "learning_rate": 4.837773743451424e-06, "logits/chosen": -1.6998144388198853, "logits/rejected": -2.893371343612671, "logps/chosen": -271.9029846191406, "logps/rejected": -513.1148071289062, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -3.612525224685669, "rewards/margins": 6.779241561889648, "rewards/rejected": -10.391766548156738, "step": 12693 }, { "epoch": 1.97, "learning_rate": 4.837040302920276e-06, "logits/chosen": -3.016263008117676, "logits/rejected": -3.1055855751037598, "logps/chosen": -259.0100402832031, "logps/rejected": -396.0099182128906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.1166130006313324, "rewards/margins": 11.18207836151123, "rewards/rejected": -11.298690795898438, "step": 12694 }, { "epoch": 1.97, "learning_rate": 4.836306862389128e-06, "logits/chosen": -2.3327476978302, "logits/rejected": -3.3375084400177, "logps/chosen": -96.85247802734375, "logps/rejected": -335.61083984375, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/chosen": -3.375889301300049, "rewards/margins": 5.130906105041504, "rewards/rejected": -8.506794929504395, "step": 12695 }, { "epoch": 1.97, "learning_rate": 4.83557342185798e-06, "logits/chosen": -2.543022871017456, "logits/rejected": -2.9260756969451904, "logps/chosen": -234.87371826171875, "logps/rejected": -249.90374755859375, "loss": 0.7845, "rewards/accuracies": 0.5, "rewards/chosen": -5.031314373016357, "rewards/margins": 3.025254487991333, "rewards/rejected": -8.05656909942627, "step": 12696 }, { "epoch": 1.97, "learning_rate": 4.834839981326832e-06, "logits/chosen": -2.6362531185150146, "logits/rejected": -2.8760323524475098, "logps/chosen": -102.67071533203125, "logps/rejected": -240.7551727294922, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -5.335428237915039, "rewards/margins": 5.381844997406006, "rewards/rejected": -10.717273712158203, "step": 12697 }, { "epoch": 1.97, "learning_rate": 4.8341065407956846e-06, "logits/chosen": -2.704455614089966, "logits/rejected": -1.729573130607605, "logps/chosen": -653.1900024414062, "logps/rejected": -496.66693115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.739032745361328, "rewards/margins": 10.332077980041504, "rewards/rejected": -15.071110725402832, "step": 12698 }, { "epoch": 1.97, "learning_rate": 4.8333731002645364e-06, "logits/chosen": -2.075989007949829, "logits/rejected": -2.8209550380706787, "logps/chosen": -386.2407531738281, "logps/rejected": -709.5133666992188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.069878101348877, "rewards/margins": 10.277362823486328, "rewards/rejected": -13.347241401672363, "step": 12699 }, { "epoch": 1.98, "learning_rate": 4.832639659733388e-06, "logits/chosen": -1.9562046527862549, "logits/rejected": -2.9676856994628906, "logps/chosen": -250.392578125, "logps/rejected": -455.1218566894531, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.7849440574645996, "rewards/margins": 6.672394752502441, "rewards/rejected": -10.4573392868042, "step": 12700 }, { "epoch": 1.98, "learning_rate": 4.83190621920224e-06, "logits/chosen": -2.4812190532684326, "logits/rejected": -2.711061716079712, "logps/chosen": -206.68582153320312, "logps/rejected": -208.10464477539062, "loss": 2.6357, "rewards/accuracies": 0.5, "rewards/chosen": -5.7592291831970215, "rewards/margins": 2.0214996337890625, "rewards/rejected": -7.780728816986084, "step": 12701 }, { "epoch": 1.98, "learning_rate": 4.831172778671093e-06, "logits/chosen": -2.0354270935058594, "logits/rejected": -2.8039886951446533, "logps/chosen": -167.8043670654297, "logps/rejected": -383.92205810546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.948805332183838, "rewards/margins": 7.06533145904541, "rewards/rejected": -13.014137268066406, "step": 12702 }, { "epoch": 1.98, "learning_rate": 4.830439338139945e-06, "logits/chosen": -3.039045810699463, "logits/rejected": -2.4487853050231934, "logps/chosen": -299.1220703125, "logps/rejected": -392.77203369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7352631092071533, "rewards/margins": 15.009843826293945, "rewards/rejected": -17.745105743408203, "step": 12703 }, { "epoch": 1.98, "learning_rate": 4.8297058976087975e-06, "logits/chosen": -2.3582723140716553, "logits/rejected": -2.57126784324646, "logps/chosen": -231.3441619873047, "logps/rejected": -314.90289306640625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -5.959531784057617, "rewards/margins": 5.198368072509766, "rewards/rejected": -11.157899856567383, "step": 12704 }, { "epoch": 1.98, "learning_rate": 4.828972457077649e-06, "logits/chosen": -2.770580768585205, "logits/rejected": -2.935645341873169, "logps/chosen": -191.68911743164062, "logps/rejected": -218.8203125, "loss": 1.0995, "rewards/accuracies": 0.5, "rewards/chosen": -7.691174507141113, "rewards/margins": 1.3561925888061523, "rewards/rejected": -9.047367095947266, "step": 12705 }, { "epoch": 1.98, "learning_rate": 4.828239016546501e-06, "logits/chosen": -3.076251745223999, "logits/rejected": -2.464829921722412, "logps/chosen": -331.2598876953125, "logps/rejected": -313.0790100097656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.7436286211013794, "rewards/margins": 9.567155838012695, "rewards/rejected": -10.310785293579102, "step": 12706 }, { "epoch": 1.98, "learning_rate": 4.827505576015354e-06, "logits/chosen": -2.7220168113708496, "logits/rejected": -3.164259672164917, "logps/chosen": -191.3753662109375, "logps/rejected": -388.00054931640625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -2.8602564334869385, "rewards/margins": 9.591796875, "rewards/rejected": -12.45205307006836, "step": 12707 }, { "epoch": 1.98, "learning_rate": 4.826772135484206e-06, "logits/chosen": -2.5678064823150635, "logits/rejected": -3.126535177230835, "logps/chosen": -267.65264892578125, "logps/rejected": -415.50262451171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.2020180225372314, "rewards/margins": 8.947394371032715, "rewards/rejected": -12.149412155151367, "step": 12708 }, { "epoch": 1.98, "learning_rate": 4.826038694953058e-06, "logits/chosen": -2.533409595489502, "logits/rejected": -3.1614725589752197, "logps/chosen": -123.22876739501953, "logps/rejected": -198.69580078125, "loss": 0.0634, "rewards/accuracies": 1.0, "rewards/chosen": -4.851393699645996, "rewards/margins": 5.415284633636475, "rewards/rejected": -10.266678810119629, "step": 12709 }, { "epoch": 1.98, "learning_rate": 4.82530525442191e-06, "logits/chosen": -2.8466320037841797, "logits/rejected": -2.470813035964966, "logps/chosen": -420.4248962402344, "logps/rejected": -434.24578857421875, "loss": 0.1385, "rewards/accuracies": 1.0, "rewards/chosen": -3.0510506629943848, "rewards/margins": 3.6219303607940674, "rewards/rejected": -6.672981262207031, "step": 12710 }, { "epoch": 1.98, "learning_rate": 4.824571813890762e-06, "logits/chosen": -1.0075327157974243, "logits/rejected": -1.527677059173584, "logps/chosen": -366.0198974609375, "logps/rejected": -351.6004638671875, "loss": 1.3736, "rewards/accuracies": 0.5, "rewards/chosen": -5.770395278930664, "rewards/margins": 4.060107231140137, "rewards/rejected": -9.8305025100708, "step": 12711 }, { "epoch": 1.98, "learning_rate": 4.823838373359614e-06, "logits/chosen": -2.3822243213653564, "logits/rejected": -2.780890464782715, "logps/chosen": -175.10183715820312, "logps/rejected": -193.94003295898438, "loss": 0.0962, "rewards/accuracies": 1.0, "rewards/chosen": -5.36146354675293, "rewards/margins": 3.2970380783081055, "rewards/rejected": -8.658501625061035, "step": 12712 }, { "epoch": 1.98, "learning_rate": 4.823104932828466e-06, "logits/chosen": -2.89363956451416, "logits/rejected": -2.6538965702056885, "logps/chosen": -147.03958129882812, "logps/rejected": -349.2218017578125, "loss": 1.7582, "rewards/accuracies": 0.5, "rewards/chosen": -6.063600540161133, "rewards/margins": 4.766637802124023, "rewards/rejected": -10.830238342285156, "step": 12713 }, { "epoch": 1.98, "learning_rate": 4.822371492297318e-06, "logits/chosen": -3.00124454498291, "logits/rejected": -2.893355369567871, "logps/chosen": -191.1534423828125, "logps/rejected": -273.0850830078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.2402055263519287, "rewards/margins": 11.299501419067383, "rewards/rejected": -14.53970718383789, "step": 12714 }, { "epoch": 1.98, "learning_rate": 4.821638051766171e-06, "logits/chosen": -1.9396718740463257, "logits/rejected": -2.8519065380096436, "logps/chosen": -177.78988647460938, "logps/rejected": -296.51666259765625, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -4.839277744293213, "rewards/margins": 4.741243362426758, "rewards/rejected": -9.580521583557129, "step": 12715 }, { "epoch": 1.98, "learning_rate": 4.820904611235023e-06, "logits/chosen": -2.807159662246704, "logits/rejected": -2.4679811000823975, "logps/chosen": -751.5396728515625, "logps/rejected": -488.96173095703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.130189418792725, "rewards/margins": 8.168380737304688, "rewards/rejected": -12.29857063293457, "step": 12716 }, { "epoch": 1.98, "learning_rate": 4.8201711707038745e-06, "logits/chosen": -2.339219808578491, "logits/rejected": -2.9862163066864014, "logps/chosen": -107.5257568359375, "logps/rejected": -581.7191772460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8732779026031494, "rewards/margins": 11.307985305786133, "rewards/rejected": -15.18126392364502, "step": 12717 }, { "epoch": 1.98, "learning_rate": 4.819437730172726e-06, "logits/chosen": -2.513978958129883, "logits/rejected": -2.8629088401794434, "logps/chosen": -131.16354370117188, "logps/rejected": -217.6178741455078, "loss": 0.0934, "rewards/accuracies": 1.0, "rewards/chosen": -5.531451225280762, "rewards/margins": 5.623834133148193, "rewards/rejected": -11.155285835266113, "step": 12718 }, { "epoch": 1.98, "learning_rate": 4.818704289641578e-06, "logits/chosen": -2.6637213230133057, "logits/rejected": -2.8441035747528076, "logps/chosen": -253.92176818847656, "logps/rejected": -275.1860656738281, "loss": 0.3775, "rewards/accuracies": 0.5, "rewards/chosen": -4.413093090057373, "rewards/margins": 3.975411891937256, "rewards/rejected": -8.388504981994629, "step": 12719 }, { "epoch": 1.98, "learning_rate": 4.817970849110431e-06, "logits/chosen": -1.9869178533554077, "logits/rejected": -2.9067702293395996, "logps/chosen": -144.9969024658203, "logps/rejected": -365.88311767578125, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -4.131953239440918, "rewards/margins": 7.149898529052734, "rewards/rejected": -11.281850814819336, "step": 12720 }, { "epoch": 1.98, "learning_rate": 4.817237408579284e-06, "logits/chosen": -2.4478418827056885, "logits/rejected": -3.0801444053649902, "logps/chosen": -188.70616149902344, "logps/rejected": -377.55169677734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.4348446130752563, "rewards/margins": 7.964605331420898, "rewards/rejected": -9.399450302124023, "step": 12721 }, { "epoch": 1.98, "learning_rate": 4.8165039680481356e-06, "logits/chosen": -2.9664440155029297, "logits/rejected": -2.2970902919769287, "logps/chosen": -218.73324584960938, "logps/rejected": -191.92892456054688, "loss": 0.1711, "rewards/accuracies": 1.0, "rewards/chosen": -3.5246405601501465, "rewards/margins": 4.90943717956543, "rewards/rejected": -8.434077262878418, "step": 12722 }, { "epoch": 1.98, "learning_rate": 4.8157705275169874e-06, "logits/chosen": -2.1710972785949707, "logits/rejected": -2.856961488723755, "logps/chosen": -240.92019653320312, "logps/rejected": -361.50933837890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.886120796203613, "rewards/margins": 8.109359741210938, "rewards/rejected": -12.99548053741455, "step": 12723 }, { "epoch": 1.98, "learning_rate": 4.81503708698584e-06, "logits/chosen": -2.832510471343994, "logits/rejected": -2.8791158199310303, "logps/chosen": -99.59676361083984, "logps/rejected": -146.52377319335938, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -4.176827907562256, "rewards/margins": 6.917922019958496, "rewards/rejected": -11.094749450683594, "step": 12724 }, { "epoch": 1.98, "learning_rate": 4.814303646454692e-06, "logits/chosen": -2.762005567550659, "logits/rejected": -3.0256576538085938, "logps/chosen": -60.060585021972656, "logps/rejected": -241.74217224121094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3120713233947754, "rewards/margins": 9.100713729858398, "rewards/rejected": -11.412784576416016, "step": 12725 }, { "epoch": 1.98, "learning_rate": 4.813570205923544e-06, "logits/chosen": -2.2208893299102783, "logits/rejected": -2.7700281143188477, "logps/chosen": -115.31461334228516, "logps/rejected": -234.8221435546875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.396385192871094, "rewards/margins": 6.2679033279418945, "rewards/rejected": -10.664288520812988, "step": 12726 }, { "epoch": 1.98, "learning_rate": 4.812836765392396e-06, "logits/chosen": -2.6075549125671387, "logits/rejected": -2.8534345626831055, "logps/chosen": -147.07713317871094, "logps/rejected": -339.823486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.993774890899658, "rewards/margins": 10.89228630065918, "rewards/rejected": -13.886061668395996, "step": 12727 }, { "epoch": 1.98, "learning_rate": 4.812103324861248e-06, "logits/chosen": -1.892133116722107, "logits/rejected": -2.912415027618408, "logps/chosen": -103.2213363647461, "logps/rejected": -300.3550109863281, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.8664767742156982, "rewards/margins": 7.986577987670898, "rewards/rejected": -11.853055000305176, "step": 12728 }, { "epoch": 1.98, "learning_rate": 4.8113698843301e-06, "logits/chosen": -2.3513100147247314, "logits/rejected": -2.8641860485076904, "logps/chosen": -157.23080444335938, "logps/rejected": -243.70755004882812, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.555943012237549, "rewards/margins": 6.6513285636901855, "rewards/rejected": -10.207271575927734, "step": 12729 }, { "epoch": 1.98, "learning_rate": 4.810636443798952e-06, "logits/chosen": -2.703057050704956, "logits/rejected": -2.799842357635498, "logps/chosen": -292.0456848144531, "logps/rejected": -322.62774658203125, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -4.945897102355957, "rewards/margins": 5.658730506896973, "rewards/rejected": -10.60462760925293, "step": 12730 }, { "epoch": 1.98, "learning_rate": 4.809903003267804e-06, "logits/chosen": -2.7073018550872803, "logits/rejected": -2.642249822616577, "logps/chosen": -156.57205200195312, "logps/rejected": -322.491943359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.517401695251465, "rewards/margins": 6.449712753295898, "rewards/rejected": -11.967114448547363, "step": 12731 }, { "epoch": 1.98, "learning_rate": 4.809169562736656e-06, "logits/chosen": -2.7267284393310547, "logits/rejected": -3.051856279373169, "logps/chosen": -359.3994445800781, "logps/rejected": -427.4469299316406, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.294773101806641, "rewards/margins": 7.116656303405762, "rewards/rejected": -13.411429405212402, "step": 12732 }, { "epoch": 1.98, "learning_rate": 4.808436122205509e-06, "logits/chosen": -2.8092410564422607, "logits/rejected": -3.1597726345062256, "logps/chosen": -255.62347412109375, "logps/rejected": -428.1221923828125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.147810935974121, "rewards/margins": 6.851151466369629, "rewards/rejected": -10.99896240234375, "step": 12733 }, { "epoch": 1.98, "learning_rate": 4.807702681674361e-06, "logits/chosen": -3.0117862224578857, "logits/rejected": -3.174551248550415, "logps/chosen": -161.48655700683594, "logps/rejected": -283.0276184082031, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.9929847717285156, "rewards/margins": 8.318611145019531, "rewards/rejected": -11.311595916748047, "step": 12734 }, { "epoch": 1.98, "learning_rate": 4.8069692411432125e-06, "logits/chosen": -2.663494348526001, "logits/rejected": -2.970093011856079, "logps/chosen": -168.98098754882812, "logps/rejected": -428.192138671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.412797451019287, "rewards/margins": 10.30662727355957, "rewards/rejected": -12.719425201416016, "step": 12735 }, { "epoch": 1.98, "learning_rate": 4.806235800612064e-06, "logits/chosen": -1.7045173645019531, "logits/rejected": -2.9801666736602783, "logps/chosen": -49.39906311035156, "logps/rejected": -238.58163452148438, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -3.4658238887786865, "rewards/margins": 5.565852642059326, "rewards/rejected": -9.031676292419434, "step": 12736 }, { "epoch": 1.98, "learning_rate": 4.805502360080917e-06, "logits/chosen": -2.981379270553589, "logits/rejected": -2.6299915313720703, "logps/chosen": -485.8246765136719, "logps/rejected": -501.83917236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.037651062011719, "rewards/margins": 11.18750286102295, "rewards/rejected": -17.225154876708984, "step": 12737 }, { "epoch": 1.98, "learning_rate": 4.80476891954977e-06, "logits/chosen": -2.7681844234466553, "logits/rejected": -2.434548854827881, "logps/chosen": -604.220703125, "logps/rejected": -546.3609619140625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.643532752990723, "rewards/margins": 7.333556175231934, "rewards/rejected": -11.977088928222656, "step": 12738 }, { "epoch": 1.98, "learning_rate": 4.804035479018622e-06, "logits/chosen": -1.8953754901885986, "logits/rejected": -2.794365167617798, "logps/chosen": -216.90159606933594, "logps/rejected": -453.9267883300781, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.888815402984619, "rewards/margins": 7.679725646972656, "rewards/rejected": -11.568540573120117, "step": 12739 }, { "epoch": 1.98, "learning_rate": 4.803302038487474e-06, "logits/chosen": -2.3712430000305176, "logits/rejected": -2.3913674354553223, "logps/chosen": -102.721435546875, "logps/rejected": -494.5479431152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.570070505142212, "rewards/margins": 12.471563339233398, "rewards/rejected": -15.041633605957031, "step": 12740 }, { "epoch": 1.98, "learning_rate": 4.8025685979563255e-06, "logits/chosen": -2.007011890411377, "logits/rejected": -2.928610324859619, "logps/chosen": -133.6536865234375, "logps/rejected": -465.2191162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.990223407745361, "rewards/margins": 11.58169174194336, "rewards/rejected": -16.571914672851562, "step": 12741 }, { "epoch": 1.98, "learning_rate": 4.801835157425178e-06, "logits/chosen": -1.7059698104858398, "logits/rejected": -2.92232608795166, "logps/chosen": -235.39515686035156, "logps/rejected": -393.80072021484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6664867401123047, "rewards/margins": 9.302146911621094, "rewards/rejected": -12.968633651733398, "step": 12742 }, { "epoch": 1.98, "learning_rate": 4.80110171689403e-06, "logits/chosen": -1.6947453022003174, "logits/rejected": -2.725804567337036, "logps/chosen": -189.84588623046875, "logps/rejected": -457.866943359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4041779041290283, "rewards/margins": 9.545490264892578, "rewards/rejected": -12.949667930603027, "step": 12743 }, { "epoch": 1.98, "learning_rate": 4.800368276362882e-06, "logits/chosen": -2.8954670429229736, "logits/rejected": -2.0684638023376465, "logps/chosen": -266.88909912109375, "logps/rejected": -327.2633361816406, "loss": 0.6248, "rewards/accuracies": 0.5, "rewards/chosen": -4.925414085388184, "rewards/margins": 3.5470945835113525, "rewards/rejected": -8.472508430480957, "step": 12744 }, { "epoch": 1.98, "learning_rate": 4.799634835831734e-06, "logits/chosen": -2.437758207321167, "logits/rejected": -3.1436028480529785, "logps/chosen": -84.983642578125, "logps/rejected": -312.58367919921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.444714069366455, "rewards/margins": 7.9724907875061035, "rewards/rejected": -10.417204856872559, "step": 12745 }, { "epoch": 1.98, "learning_rate": 4.798901395300586e-06, "logits/chosen": -2.6127889156341553, "logits/rejected": -3.0384175777435303, "logps/chosen": -80.85566711425781, "logps/rejected": -261.044921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.50221848487854, "rewards/margins": 8.4191312789917, "rewards/rejected": -11.92134952545166, "step": 12746 }, { "epoch": 1.98, "learning_rate": 4.7981679547694385e-06, "logits/chosen": -2.9087061882019043, "logits/rejected": -2.0207560062408447, "logps/chosen": -289.7608947753906, "logps/rejected": -256.18365478515625, "loss": 0.0316, "rewards/accuracies": 1.0, "rewards/chosen": -6.922715663909912, "rewards/margins": 4.723514556884766, "rewards/rejected": -11.646230697631836, "step": 12747 }, { "epoch": 1.98, "learning_rate": 4.79743451423829e-06, "logits/chosen": -2.199249505996704, "logits/rejected": -2.9070703983306885, "logps/chosen": -78.19007873535156, "logps/rejected": -228.93844604492188, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/chosen": -6.014873504638672, "rewards/margins": 4.485919952392578, "rewards/rejected": -10.50079345703125, "step": 12748 }, { "epoch": 1.98, "learning_rate": 4.796701073707142e-06, "logits/chosen": -1.0732167959213257, "logits/rejected": -1.9372440576553345, "logps/chosen": -254.5802001953125, "logps/rejected": -498.7094421386719, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.098142147064209, "rewards/margins": 9.922845840454102, "rewards/rejected": -13.020988464355469, "step": 12749 }, { "epoch": 1.98, "learning_rate": 4.795967633175994e-06, "logits/chosen": -2.819973945617676, "logits/rejected": -1.8082947731018066, "logps/chosen": -216.83096313476562, "logps/rejected": -195.74407958984375, "loss": 0.42, "rewards/accuracies": 0.5, "rewards/chosen": -4.63352108001709, "rewards/margins": 4.678341865539551, "rewards/rejected": -9.31186294555664, "step": 12750 }, { "epoch": 1.98, "learning_rate": 4.795234192644847e-06, "logits/chosen": -2.9067270755767822, "logits/rejected": -3.026130199432373, "logps/chosen": -56.684669494628906, "logps/rejected": -264.2137145996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.943678855895996, "rewards/margins": 10.79799747467041, "rewards/rejected": -13.741676330566406, "step": 12751 }, { "epoch": 1.98, "learning_rate": 4.794500752113699e-06, "logits/chosen": -2.5059452056884766, "logits/rejected": -3.006354331970215, "logps/chosen": -143.86207580566406, "logps/rejected": -313.9664001464844, "loss": 0.128, "rewards/accuracies": 1.0, "rewards/chosen": -4.501868724822998, "rewards/margins": 4.594588279724121, "rewards/rejected": -9.096456527709961, "step": 12752 }, { "epoch": 1.98, "learning_rate": 4.7937673115825506e-06, "logits/chosen": -1.9164540767669678, "logits/rejected": -2.672901153564453, "logps/chosen": -427.9868469238281, "logps/rejected": -405.4524841308594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.01431131362915, "rewards/margins": 8.53980541229248, "rewards/rejected": -12.554117202758789, "step": 12753 }, { "epoch": 1.98, "learning_rate": 4.793033871051403e-06, "logits/chosen": -2.800032138824463, "logits/rejected": -1.4958744049072266, "logps/chosen": -230.06951904296875, "logps/rejected": -58.474815368652344, "loss": 4.6759, "rewards/accuracies": 0.0, "rewards/chosen": -8.871390342712402, "rewards/margins": -4.613032817840576, "rewards/rejected": -4.258358001708984, "step": 12754 }, { "epoch": 1.98, "learning_rate": 4.792300430520255e-06, "logits/chosen": -2.591980218887329, "logits/rejected": -2.80129075050354, "logps/chosen": -135.46450805664062, "logps/rejected": -441.50006103515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0070621967315674, "rewards/margins": 9.084458351135254, "rewards/rejected": -12.091520309448242, "step": 12755 }, { "epoch": 1.98, "learning_rate": 4.791566989989108e-06, "logits/chosen": -2.8246917724609375, "logits/rejected": -2.7266621589660645, "logps/chosen": -197.32174682617188, "logps/rejected": -195.59959411621094, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -3.018773078918457, "rewards/margins": 7.501307487487793, "rewards/rejected": -10.52008056640625, "step": 12756 }, { "epoch": 1.98, "learning_rate": 4.79083354945796e-06, "logits/chosen": -2.5186054706573486, "logits/rejected": -2.8704798221588135, "logps/chosen": -107.55514526367188, "logps/rejected": -258.40802001953125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.7540640830993652, "rewards/margins": 7.09282112121582, "rewards/rejected": -10.846885681152344, "step": 12757 }, { "epoch": 1.98, "learning_rate": 4.790100108926812e-06, "logits/chosen": -1.7621318101882935, "logits/rejected": -2.8429477214813232, "logps/chosen": -175.36227416992188, "logps/rejected": -487.6561279296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.099625825881958, "rewards/margins": 10.68809986114502, "rewards/rejected": -12.787725448608398, "step": 12758 }, { "epoch": 1.98, "learning_rate": 4.7893666683956635e-06, "logits/chosen": -2.409791946411133, "logits/rejected": -2.7857131958007812, "logps/chosen": -112.84628295898438, "logps/rejected": -277.54705810546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0076751708984375, "rewards/margins": 9.907306671142578, "rewards/rejected": -12.914981842041016, "step": 12759 }, { "epoch": 1.98, "learning_rate": 4.788633227864516e-06, "logits/chosen": -2.982642650604248, "logits/rejected": -3.0067038536071777, "logps/chosen": -368.94207763671875, "logps/rejected": -250.05935668945312, "loss": 0.1104, "rewards/accuracies": 1.0, "rewards/chosen": -5.655696868896484, "rewards/margins": 5.307818412780762, "rewards/rejected": -10.96351432800293, "step": 12760 }, { "epoch": 1.98, "learning_rate": 4.787899787333368e-06, "logits/chosen": -2.979614019393921, "logits/rejected": -2.9277913570404053, "logps/chosen": -266.60113525390625, "logps/rejected": -126.51524353027344, "loss": 1.7675, "rewards/accuracies": 0.5, "rewards/chosen": -7.255480766296387, "rewards/margins": -0.3180224895477295, "rewards/rejected": -6.937458515167236, "step": 12761 }, { "epoch": 1.98, "learning_rate": 4.78716634680222e-06, "logits/chosen": -2.778320074081421, "logits/rejected": -2.939365863800049, "logps/chosen": -97.88076782226562, "logps/rejected": -373.89483642578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.7404463291168213, "rewards/margins": 7.704806804656982, "rewards/rejected": -11.445253372192383, "step": 12762 }, { "epoch": 1.98, "learning_rate": 4.786432906271072e-06, "logits/chosen": -2.4013795852661133, "logits/rejected": -2.9304168224334717, "logps/chosen": -631.0671997070312, "logps/rejected": -545.3065185546875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -3.6430461406707764, "rewards/margins": 6.428105354309082, "rewards/rejected": -10.071151733398438, "step": 12763 }, { "epoch": 1.99, "learning_rate": 4.785699465739925e-06, "logits/chosen": -2.546057939529419, "logits/rejected": -2.915205478668213, "logps/chosen": -609.1405639648438, "logps/rejected": -760.6578369140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.369847297668457, "rewards/margins": 9.098734855651855, "rewards/rejected": -13.468582153320312, "step": 12764 }, { "epoch": 1.99, "learning_rate": 4.7849660252087765e-06, "logits/chosen": -2.732889413833618, "logits/rejected": -2.4143762588500977, "logps/chosen": -720.1797485351562, "logps/rejected": -640.8824462890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.610729455947876, "rewards/margins": 9.779648780822754, "rewards/rejected": -11.39037799835205, "step": 12765 }, { "epoch": 1.99, "learning_rate": 4.784232584677628e-06, "logits/chosen": -1.8356057405471802, "logits/rejected": -2.713292360305786, "logps/chosen": -108.72975158691406, "logps/rejected": -429.38604736328125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.148874044418335, "rewards/margins": 8.537826538085938, "rewards/rejected": -11.686700820922852, "step": 12766 }, { "epoch": 1.99, "learning_rate": 4.78349914414648e-06, "logits/chosen": -3.0347743034362793, "logits/rejected": -2.7702414989471436, "logps/chosen": -350.7393798828125, "logps/rejected": -340.02978515625, "loss": 3.333, "rewards/accuracies": 0.5, "rewards/chosen": -9.004600524902344, "rewards/margins": 1.6442492008209229, "rewards/rejected": -10.648849487304688, "step": 12767 }, { "epoch": 1.99, "learning_rate": 4.782765703615332e-06, "logits/chosen": -3.1904919147491455, "logits/rejected": -2.6165671348571777, "logps/chosen": -487.67999267578125, "logps/rejected": -411.95654296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.448505878448486, "rewards/margins": 8.960222244262695, "rewards/rejected": -16.408727645874023, "step": 12768 }, { "epoch": 1.99, "learning_rate": 4.782032263084185e-06, "logits/chosen": -2.533604383468628, "logits/rejected": -1.9338278770446777, "logps/chosen": -424.1287841796875, "logps/rejected": -493.4228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.922967553138733, "rewards/margins": 12.760237693786621, "rewards/rejected": -14.683204650878906, "step": 12769 }, { "epoch": 1.99, "learning_rate": 4.781298822553037e-06, "logits/chosen": -2.377631425857544, "logits/rejected": -2.9341204166412354, "logps/chosen": -169.9261474609375, "logps/rejected": -353.769287109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.2631943225860596, "rewards/margins": 7.940316200256348, "rewards/rejected": -11.203510284423828, "step": 12770 }, { "epoch": 1.99, "learning_rate": 4.7805653820218895e-06, "logits/chosen": -2.698085069656372, "logits/rejected": -2.486177921295166, "logps/chosen": -293.67425537109375, "logps/rejected": -538.0992431640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.172365665435791, "rewards/margins": 9.222272872924805, "rewards/rejected": -14.394638061523438, "step": 12771 }, { "epoch": 1.99, "learning_rate": 4.779831941490741e-06, "logits/chosen": -1.173418402671814, "logits/rejected": -1.5898765325546265, "logps/chosen": -60.238739013671875, "logps/rejected": -180.86422729492188, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -3.501704454421997, "rewards/margins": 5.603389739990234, "rewards/rejected": -9.105093955993652, "step": 12772 }, { "epoch": 1.99, "learning_rate": 4.779098500959594e-06, "logits/chosen": -1.624635100364685, "logits/rejected": -3.0217220783233643, "logps/chosen": -214.23898315429688, "logps/rejected": -431.8463439941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3308653831481934, "rewards/margins": 10.476848602294922, "rewards/rejected": -13.807714462280273, "step": 12773 }, { "epoch": 1.99, "learning_rate": 4.778365060428446e-06, "logits/chosen": -2.989600658416748, "logits/rejected": -2.7902395725250244, "logps/chosen": -346.888427734375, "logps/rejected": -235.28533935546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.350085735321045, "rewards/margins": 9.117339134216309, "rewards/rejected": -12.467424392700195, "step": 12774 }, { "epoch": 1.99, "learning_rate": 4.777631619897298e-06, "logits/chosen": -1.5721756219863892, "logits/rejected": -2.3460772037506104, "logps/chosen": -111.92044067382812, "logps/rejected": -318.7167663574219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.2210029363632202, "rewards/margins": 11.493179321289062, "rewards/rejected": -12.714181900024414, "step": 12775 }, { "epoch": 1.99, "learning_rate": 4.77689817936615e-06, "logits/chosen": -2.9362690448760986, "logits/rejected": -2.7722599506378174, "logps/chosen": -447.86285400390625, "logps/rejected": -366.9464111328125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -3.379958391189575, "rewards/margins": 8.683235168457031, "rewards/rejected": -12.063192367553711, "step": 12776 }, { "epoch": 1.99, "learning_rate": 4.776164738835002e-06, "logits/chosen": -2.781844139099121, "logits/rejected": -2.7385239601135254, "logps/chosen": -339.45135498046875, "logps/rejected": -286.9289855957031, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.536773681640625, "rewards/margins": 6.838509559631348, "rewards/rejected": -10.375283241271973, "step": 12777 }, { "epoch": 1.99, "learning_rate": 4.775431298303854e-06, "logits/chosen": -2.4678282737731934, "logits/rejected": -2.6374049186706543, "logps/chosen": -217.99586486816406, "logps/rejected": -509.55126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7930984497070312, "rewards/margins": 13.806463241577148, "rewards/rejected": -16.59956169128418, "step": 12778 }, { "epoch": 1.99, "learning_rate": 4.774697857772706e-06, "logits/chosen": -2.7188050746917725, "logits/rejected": -1.7913542985916138, "logps/chosen": -157.1364288330078, "logps/rejected": -122.37338256835938, "loss": 0.1323, "rewards/accuracies": 1.0, "rewards/chosen": -5.04414176940918, "rewards/margins": 2.0266571044921875, "rewards/rejected": -7.070798873901367, "step": 12779 }, { "epoch": 1.99, "learning_rate": 4.773964417241558e-06, "logits/chosen": -2.671598196029663, "logits/rejected": -3.136118173599243, "logps/chosen": -112.99662780761719, "logps/rejected": -334.3518371582031, "loss": 1.4015, "rewards/accuracies": 0.5, "rewards/chosen": -6.886197090148926, "rewards/margins": 2.968677043914795, "rewards/rejected": -9.854874610900879, "step": 12780 }, { "epoch": 1.99, "learning_rate": 4.77323097671041e-06, "logits/chosen": -3.1346805095672607, "logits/rejected": -2.87436580657959, "logps/chosen": -343.1815185546875, "logps/rejected": -391.3012390136719, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.108334541320801, "rewards/margins": 7.923001289367676, "rewards/rejected": -12.031335830688477, "step": 12781 }, { "epoch": 1.99, "learning_rate": 4.772497536179263e-06, "logits/chosen": -2.4301936626434326, "logits/rejected": -3.017726421356201, "logps/chosen": -72.00182342529297, "logps/rejected": -179.54637145996094, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -2.7429046630859375, "rewards/margins": 6.446568489074707, "rewards/rejected": -9.189474105834961, "step": 12782 }, { "epoch": 1.99, "learning_rate": 4.7717640956481145e-06, "logits/chosen": -2.5970377922058105, "logits/rejected": -2.875004291534424, "logps/chosen": -145.1829833984375, "logps/rejected": -250.96405029296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.856959581375122, "rewards/margins": 7.136923789978027, "rewards/rejected": -9.99388313293457, "step": 12783 }, { "epoch": 1.99, "learning_rate": 4.771030655116966e-06, "logits/chosen": -1.9936836957931519, "logits/rejected": -2.9305663108825684, "logps/chosen": -214.71841430664062, "logps/rejected": -369.3962707519531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.721721649169922, "rewards/margins": 8.543582916259766, "rewards/rejected": -11.265304565429688, "step": 12784 }, { "epoch": 1.99, "learning_rate": 4.770297214585818e-06, "logits/chosen": -2.223353385925293, "logits/rejected": -3.0307154655456543, "logps/chosen": -292.3387756347656, "logps/rejected": -506.06097412109375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -6.425849437713623, "rewards/margins": 5.768411636352539, "rewards/rejected": -12.19426155090332, "step": 12785 }, { "epoch": 1.99, "learning_rate": 4.76956377405467e-06, "logits/chosen": -2.0946202278137207, "logits/rejected": -2.9543023109436035, "logps/chosen": -194.83523559570312, "logps/rejected": -500.7050476074219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8303866386413574, "rewards/margins": 11.496706008911133, "rewards/rejected": -15.327092170715332, "step": 12786 }, { "epoch": 1.99, "learning_rate": 4.768830333523523e-06, "logits/chosen": -2.574460506439209, "logits/rejected": -3.0417749881744385, "logps/chosen": -286.85260009765625, "logps/rejected": -435.6105041503906, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.5402023792266846, "rewards/margins": 8.044076919555664, "rewards/rejected": -11.58427906036377, "step": 12787 }, { "epoch": 1.99, "learning_rate": 4.768096892992376e-06, "logits/chosen": -2.940937042236328, "logits/rejected": -1.5424355268478394, "logps/chosen": -243.4667205810547, "logps/rejected": -122.21190643310547, "loss": 0.4542, "rewards/accuracies": 0.5, "rewards/chosen": -2.4951303005218506, "rewards/margins": 1.3751598596572876, "rewards/rejected": -3.8702902793884277, "step": 12788 }, { "epoch": 1.99, "learning_rate": 4.7673634524612275e-06, "logits/chosen": -3.021850109100342, "logits/rejected": -2.359968662261963, "logps/chosen": -396.318115234375, "logps/rejected": -327.24920654296875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -7.713157653808594, "rewards/margins": 6.9466938972473145, "rewards/rejected": -14.659852027893066, "step": 12789 }, { "epoch": 1.99, "learning_rate": 4.766630011930079e-06, "logits/chosen": -2.5686542987823486, "logits/rejected": -2.995119333267212, "logps/chosen": -203.0702362060547, "logps/rejected": -325.2478332519531, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.7896203994750977, "rewards/margins": 7.456795692443848, "rewards/rejected": -11.246416091918945, "step": 12790 }, { "epoch": 1.99, "learning_rate": 4.765896571398932e-06, "logits/chosen": -2.2656052112579346, "logits/rejected": -3.0580661296844482, "logps/chosen": -198.99356079101562, "logps/rejected": -443.51226806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5126259326934814, "rewards/margins": 12.060603141784668, "rewards/rejected": -14.57322883605957, "step": 12791 }, { "epoch": 1.99, "learning_rate": 4.765163130867784e-06, "logits/chosen": -3.0722172260284424, "logits/rejected": -3.1073808670043945, "logps/chosen": -163.20236206054688, "logps/rejected": -289.400634765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.820255756378174, "rewards/margins": 8.370363235473633, "rewards/rejected": -12.190618515014648, "step": 12792 }, { "epoch": 1.99, "learning_rate": 4.764429690336636e-06, "logits/chosen": -1.8438690900802612, "logits/rejected": -2.751357316970825, "logps/chosen": -129.00146484375, "logps/rejected": -357.9685363769531, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.279856204986572, "rewards/margins": 8.007627487182617, "rewards/rejected": -12.287484169006348, "step": 12793 }, { "epoch": 1.99, "learning_rate": 4.763696249805488e-06, "logits/chosen": -1.8413015604019165, "logits/rejected": -2.780226707458496, "logps/chosen": -49.24030685424805, "logps/rejected": -236.38864135742188, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -3.3810784816741943, "rewards/margins": 4.887716770172119, "rewards/rejected": -8.268795013427734, "step": 12794 }, { "epoch": 1.99, "learning_rate": 4.76296280927434e-06, "logits/chosen": -1.9851056337356567, "logits/rejected": -3.0890471935272217, "logps/chosen": -294.36260986328125, "logps/rejected": -397.8853454589844, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -4.750970840454102, "rewards/margins": 6.570775985717773, "rewards/rejected": -11.321746826171875, "step": 12795 }, { "epoch": 1.99, "learning_rate": 4.762229368743192e-06, "logits/chosen": -3.1408894062042236, "logits/rejected": -3.2886335849761963, "logps/chosen": -50.75086975097656, "logps/rejected": -184.86575317382812, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.601125717163086, "rewards/margins": 7.297057151794434, "rewards/rejected": -8.898183822631836, "step": 12796 }, { "epoch": 1.99, "learning_rate": 4.761495928212044e-06, "logits/chosen": -2.939552068710327, "logits/rejected": -2.974682569503784, "logps/chosen": -285.499267578125, "logps/rejected": -318.7794494628906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.093774795532227, "rewards/margins": 8.685928344726562, "rewards/rejected": -12.779702186584473, "step": 12797 }, { "epoch": 1.99, "learning_rate": 4.760762487680896e-06, "logits/chosen": -2.8994784355163574, "logits/rejected": -1.7451261281967163, "logps/chosen": -349.5708312988281, "logps/rejected": -350.74066162109375, "loss": 0.1112, "rewards/accuracies": 1.0, "rewards/chosen": -5.303351402282715, "rewards/margins": 4.647976875305176, "rewards/rejected": -9.95132827758789, "step": 12798 }, { "epoch": 1.99, "learning_rate": 4.760029047149748e-06, "logits/chosen": -2.80387544631958, "logits/rejected": -3.003865957260132, "logps/chosen": -551.8360595703125, "logps/rejected": -448.05682373046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.2332658767700195, "rewards/margins": 8.917452812194824, "rewards/rejected": -16.150718688964844, "step": 12799 }, { "epoch": 1.99, "learning_rate": 4.759295606618601e-06, "logits/chosen": -2.664445400238037, "logits/rejected": -1.2527610063552856, "logps/chosen": -798.1051025390625, "logps/rejected": -426.8049621582031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.3907264471054077, "rewards/margins": 10.1671781539917, "rewards/rejected": -11.557905197143555, "step": 12800 }, { "epoch": 1.99, "learning_rate": 4.758562166087453e-06, "logits/chosen": -2.1903018951416016, "logits/rejected": -2.5992698669433594, "logps/chosen": -457.5445556640625, "logps/rejected": -489.1296081542969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.850926399230957, "rewards/margins": 7.772859573364258, "rewards/rejected": -12.623785018920898, "step": 12801 }, { "epoch": 1.99, "learning_rate": 4.7578287255563045e-06, "logits/chosen": -2.672567129135132, "logits/rejected": -2.1214723587036133, "logps/chosen": -164.7684783935547, "logps/rejected": -155.23410034179688, "loss": 0.1414, "rewards/accuracies": 1.0, "rewards/chosen": -5.456062316894531, "rewards/margins": 2.299703598022461, "rewards/rejected": -7.755765914916992, "step": 12802 }, { "epoch": 1.99, "learning_rate": 4.757095285025156e-06, "logits/chosen": -2.603356122970581, "logits/rejected": -3.0667171478271484, "logps/chosen": -104.43658447265625, "logps/rejected": -234.6371307373047, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.337876319885254, "rewards/margins": 7.042484760284424, "rewards/rejected": -10.380361557006836, "step": 12803 }, { "epoch": 1.99, "learning_rate": 4.756361844494009e-06, "logits/chosen": -2.924051523208618, "logits/rejected": -3.250206708908081, "logps/chosen": -224.55857849121094, "logps/rejected": -440.0836181640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7827677726745605, "rewards/margins": 10.660561561584473, "rewards/rejected": -14.443328857421875, "step": 12804 }, { "epoch": 1.99, "learning_rate": 4.755628403962862e-06, "logits/chosen": -2.6020703315734863, "logits/rejected": -3.0725908279418945, "logps/chosen": -346.1097412109375, "logps/rejected": -440.01849365234375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -7.461787223815918, "rewards/margins": 7.996997356414795, "rewards/rejected": -15.458784103393555, "step": 12805 }, { "epoch": 1.99, "learning_rate": 4.754894963431714e-06, "logits/chosen": -2.777315378189087, "logits/rejected": -2.9538118839263916, "logps/chosen": -95.07898712158203, "logps/rejected": -201.14190673828125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.599820137023926, "rewards/margins": 6.34031343460083, "rewards/rejected": -9.940134048461914, "step": 12806 }, { "epoch": 1.99, "learning_rate": 4.7541615229005656e-06, "logits/chosen": -2.6269171237945557, "logits/rejected": -2.868725061416626, "logps/chosen": -125.30851745605469, "logps/rejected": -278.097900390625, "loss": 0.0308, "rewards/accuracies": 1.0, "rewards/chosen": -4.6301069259643555, "rewards/margins": 4.378729820251465, "rewards/rejected": -9.00883674621582, "step": 12807 }, { "epoch": 1.99, "learning_rate": 4.7534280823694174e-06, "logits/chosen": -2.4788572788238525, "logits/rejected": -2.9838435649871826, "logps/chosen": -202.1473388671875, "logps/rejected": -385.108154296875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -2.414886474609375, "rewards/margins": 7.60073709487915, "rewards/rejected": -10.015623092651367, "step": 12808 }, { "epoch": 1.99, "learning_rate": 4.75269464183827e-06, "logits/chosen": -2.1295690536499023, "logits/rejected": -2.9757273197174072, "logps/chosen": -123.54841613769531, "logps/rejected": -358.58502197265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.8409016132354736, "rewards/margins": 8.85696792602539, "rewards/rejected": -12.697870254516602, "step": 12809 }, { "epoch": 1.99, "learning_rate": 4.751961201307122e-06, "logits/chosen": -1.565982699394226, "logits/rejected": -2.5514564514160156, "logps/chosen": -159.0661163330078, "logps/rejected": -438.9088134765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.617473602294922, "rewards/margins": 12.596214294433594, "rewards/rejected": -15.213687896728516, "step": 12810 }, { "epoch": 1.99, "learning_rate": 4.751227760775974e-06, "logits/chosen": -2.7880351543426514, "logits/rejected": -2.9488089084625244, "logps/chosen": -283.2200927734375, "logps/rejected": -371.61474609375, "loss": 1.2647, "rewards/accuracies": 0.5, "rewards/chosen": -11.60506820678711, "rewards/margins": 1.630845069885254, "rewards/rejected": -13.23591423034668, "step": 12811 }, { "epoch": 1.99, "learning_rate": 4.750494320244826e-06, "logits/chosen": -2.9199211597442627, "logits/rejected": -2.8701446056365967, "logps/chosen": -188.3968048095703, "logps/rejected": -200.74757385253906, "loss": 0.1103, "rewards/accuracies": 1.0, "rewards/chosen": -5.296884536743164, "rewards/margins": 4.394621849060059, "rewards/rejected": -9.691506385803223, "step": 12812 }, { "epoch": 1.99, "learning_rate": 4.7497608797136785e-06, "logits/chosen": -2.8883657455444336, "logits/rejected": -2.7494583129882812, "logps/chosen": -277.1285705566406, "logps/rejected": -181.51255798339844, "loss": 0.1077, "rewards/accuracies": 1.0, "rewards/chosen": -3.8038535118103027, "rewards/margins": 3.6316232681274414, "rewards/rejected": -7.435477256774902, "step": 12813 }, { "epoch": 1.99, "learning_rate": 4.74902743918253e-06, "logits/chosen": -1.2512727975845337, "logits/rejected": -2.958686351776123, "logps/chosen": -144.38746643066406, "logps/rejected": -534.3026733398438, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.780133247375488, "rewards/margins": 7.4638166427612305, "rewards/rejected": -14.243949890136719, "step": 12814 }, { "epoch": 1.99, "learning_rate": 4.748293998651382e-06, "logits/chosen": -2.236549139022827, "logits/rejected": -2.9941697120666504, "logps/chosen": -346.4307861328125, "logps/rejected": -609.5143432617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5409600734710693, "rewards/margins": 13.184708595275879, "rewards/rejected": -16.725669860839844, "step": 12815 }, { "epoch": 1.99, "learning_rate": 4.747560558120234e-06, "logits/chosen": -2.0291075706481934, "logits/rejected": -2.7978806495666504, "logps/chosen": -107.60855865478516, "logps/rejected": -216.62677001953125, "loss": 0.0294, "rewards/accuracies": 1.0, "rewards/chosen": -5.804874420166016, "rewards/margins": 4.195652961730957, "rewards/rejected": -10.000528335571289, "step": 12816 }, { "epoch": 1.99, "learning_rate": 4.746827117589086e-06, "logits/chosen": -2.766571521759033, "logits/rejected": -2.8848838806152344, "logps/chosen": -46.74602508544922, "logps/rejected": -212.14230346679688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.7957892417907715, "rewards/margins": 9.597658157348633, "rewards/rejected": -12.393447875976562, "step": 12817 }, { "epoch": 1.99, "learning_rate": 4.746093677057939e-06, "logits/chosen": -2.7954468727111816, "logits/rejected": -2.622483015060425, "logps/chosen": -284.51708984375, "logps/rejected": -333.5124206542969, "loss": 0.6521, "rewards/accuracies": 0.5, "rewards/chosen": -6.740756511688232, "rewards/margins": 3.643014430999756, "rewards/rejected": -10.383770942687988, "step": 12818 }, { "epoch": 1.99, "learning_rate": 4.745360236526791e-06, "logits/chosen": -2.9119460582733154, "logits/rejected": -3.1121749877929688, "logps/chosen": -170.34083557128906, "logps/rejected": -341.53387451171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.209484577178955, "rewards/margins": 6.7446184158325195, "rewards/rejected": -11.954103469848633, "step": 12819 }, { "epoch": 1.99, "learning_rate": 4.7446267959956425e-06, "logits/chosen": -2.4731528759002686, "logits/rejected": -2.815150499343872, "logps/chosen": -412.22503662109375, "logps/rejected": -517.314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.037187099456787, "rewards/margins": 11.283123970031738, "rewards/rejected": -14.320310592651367, "step": 12820 }, { "epoch": 1.99, "learning_rate": 4.743893355464495e-06, "logits/chosen": -1.8921849727630615, "logits/rejected": -2.9334399700164795, "logps/chosen": -84.53754425048828, "logps/rejected": -326.52923583984375, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -4.793521404266357, "rewards/margins": 6.750187873840332, "rewards/rejected": -11.543709754943848, "step": 12821 }, { "epoch": 1.99, "learning_rate": 4.743159914933348e-06, "logits/chosen": -2.6408212184906006, "logits/rejected": -1.5964672565460205, "logps/chosen": -190.21841430664062, "logps/rejected": -163.6318359375, "loss": 1.4196, "rewards/accuracies": 0.5, "rewards/chosen": -7.328461647033691, "rewards/margins": 2.9387238025665283, "rewards/rejected": -10.26718521118164, "step": 12822 }, { "epoch": 1.99, "learning_rate": 4.7424264744022e-06, "logits/chosen": -2.421841859817505, "logits/rejected": -2.85783052444458, "logps/chosen": -108.9503173828125, "logps/rejected": -307.1934509277344, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -4.6744537353515625, "rewards/margins": 4.1970930099487305, "rewards/rejected": -8.871546745300293, "step": 12823 }, { "epoch": 1.99, "learning_rate": 4.741693033871052e-06, "logits/chosen": -1.4124284982681274, "logits/rejected": -2.699119806289673, "logps/chosen": -116.97563171386719, "logps/rejected": -307.90594482421875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -6.52086067199707, "rewards/margins": 7.533833026885986, "rewards/rejected": -14.054693222045898, "step": 12824 }, { "epoch": 1.99, "learning_rate": 4.740959593339904e-06, "logits/chosen": -2.7980129718780518, "logits/rejected": -2.722165584564209, "logps/chosen": -259.68505859375, "logps/rejected": -426.68218994140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.6851534843444824, "rewards/margins": 9.839567184448242, "rewards/rejected": -13.524721145629883, "step": 12825 }, { "epoch": 1.99, "learning_rate": 4.7402261528087555e-06, "logits/chosen": -2.7024118900299072, "logits/rejected": -2.0083110332489014, "logps/chosen": -256.755126953125, "logps/rejected": -176.12631225585938, "loss": 1.3442, "rewards/accuracies": 0.5, "rewards/chosen": -5.38557767868042, "rewards/margins": 2.844541072845459, "rewards/rejected": -8.230118751525879, "step": 12826 }, { "epoch": 1.99, "learning_rate": 4.739492712277608e-06, "logits/chosen": -2.550104856491089, "logits/rejected": -3.068138837814331, "logps/chosen": -92.13964080810547, "logps/rejected": -201.71942138671875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.848757266998291, "rewards/margins": 5.808712005615234, "rewards/rejected": -8.657469749450684, "step": 12827 }, { "epoch": 2.0, "learning_rate": 4.73875927174646e-06, "logits/chosen": -1.675382137298584, "logits/rejected": -3.140476703643799, "logps/chosen": -263.7208251953125, "logps/rejected": -527.2454833984375, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -4.366710186004639, "rewards/margins": 7.4390106201171875, "rewards/rejected": -11.805721282958984, "step": 12828 }, { "epoch": 2.0, "learning_rate": 4.738025831215312e-06, "logits/chosen": -2.894493579864502, "logits/rejected": -2.7957749366760254, "logps/chosen": -425.3494567871094, "logps/rejected": -853.1651611328125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.78394889831543, "rewards/margins": 6.9850006103515625, "rewards/rejected": -13.768949508666992, "step": 12829 }, { "epoch": 2.0, "learning_rate": 4.737292390684164e-06, "logits/chosen": -1.927592396736145, "logits/rejected": -3.0715115070343018, "logps/chosen": -120.98487091064453, "logps/rejected": -438.3725280761719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.303462505340576, "rewards/margins": 11.32055950164795, "rewards/rejected": -15.624021530151367, "step": 12830 }, { "epoch": 2.0, "learning_rate": 4.7365589501530166e-06, "logits/chosen": -2.695718288421631, "logits/rejected": -2.703946352005005, "logps/chosen": -182.90687561035156, "logps/rejected": -413.9231872558594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7705516815185547, "rewards/margins": 10.488940238952637, "rewards/rejected": -14.259491920471191, "step": 12831 }, { "epoch": 2.0, "learning_rate": 4.7358255096218684e-06, "logits/chosen": -2.9716248512268066, "logits/rejected": -2.7807843685150146, "logps/chosen": -458.802978515625, "logps/rejected": -417.47259521484375, "loss": 0.2171, "rewards/accuracies": 1.0, "rewards/chosen": -5.232522964477539, "rewards/margins": 5.176846504211426, "rewards/rejected": -10.409369468688965, "step": 12832 }, { "epoch": 2.0, "learning_rate": 4.73509206909072e-06, "logits/chosen": -2.0628628730773926, "logits/rejected": -2.774390697479248, "logps/chosen": -153.85693359375, "logps/rejected": -256.65142822265625, "loss": 0.6425, "rewards/accuracies": 0.5, "rewards/chosen": -6.089940547943115, "rewards/margins": 2.2945358753204346, "rewards/rejected": -8.384476661682129, "step": 12833 }, { "epoch": 2.0, "learning_rate": 4.734358628559572e-06, "logits/chosen": -1.969156265258789, "logits/rejected": -2.622443675994873, "logps/chosen": -181.9412841796875, "logps/rejected": -392.3094482421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.382156848907471, "rewards/margins": 10.115608215332031, "rewards/rejected": -14.497764587402344, "step": 12834 }, { "epoch": 2.0, "learning_rate": 4.733625188028424e-06, "logits/chosen": -2.706512689590454, "logits/rejected": -2.958618402481079, "logps/chosen": -124.14447021484375, "logps/rejected": -330.332763671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.0137240886688232, "rewards/margins": 9.106363296508789, "rewards/rejected": -12.120087623596191, "step": 12835 }, { "epoch": 2.0, "learning_rate": 4.732891747497277e-06, "logits/chosen": -1.9093841314315796, "logits/rejected": -2.70664381980896, "logps/chosen": -278.7639465332031, "logps/rejected": -356.07647705078125, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -3.907823324203491, "rewards/margins": 6.052865505218506, "rewards/rejected": -9.960689544677734, "step": 12836 }, { "epoch": 2.0, "learning_rate": 4.732158306966129e-06, "logits/chosen": -2.748483896255493, "logits/rejected": -2.622796058654785, "logps/chosen": -193.63616943359375, "logps/rejected": -411.18450927734375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -5.407629013061523, "rewards/margins": 7.869072437286377, "rewards/rejected": -13.276700973510742, "step": 12837 }, { "epoch": 2.0, "learning_rate": 4.731424866434981e-06, "logits/chosen": -2.58493971824646, "logits/rejected": -3.0178751945495605, "logps/chosen": -108.06661224365234, "logps/rejected": -224.14727783203125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.00259256362915, "rewards/margins": 6.2873430252075195, "rewards/rejected": -10.289936065673828, "step": 12838 }, { "epoch": 2.0, "learning_rate": 4.730691425903833e-06, "logits/chosen": -1.786249041557312, "logits/rejected": -2.6923036575317383, "logps/chosen": -74.48884582519531, "logps/rejected": -263.56732177734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.218552827835083, "rewards/margins": 9.770252227783203, "rewards/rejected": -12.988804817199707, "step": 12839 }, { "epoch": 2.0, "learning_rate": 4.729957985372686e-06, "logits/chosen": -2.750825881958008, "logits/rejected": -2.0138933658599854, "logps/chosen": -413.6393127441406, "logps/rejected": -519.4319458007812, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.887589454650879, "rewards/margins": 5.927567958831787, "rewards/rejected": -10.815156936645508, "step": 12840 }, { "epoch": 2.0, "learning_rate": 4.729224544841538e-06, "logits/chosen": -3.121767044067383, "logits/rejected": -2.44099760055542, "logps/chosen": -169.79791259765625, "logps/rejected": -50.293575286865234, "loss": 1.9029, "rewards/accuracies": 0.0, "rewards/chosen": -5.232048511505127, "rewards/margins": -1.6566437482833862, "rewards/rejected": -3.575404644012451, "step": 12841 }, { "epoch": 2.0, "learning_rate": 4.72849110431039e-06, "logits/chosen": -2.943924903869629, "logits/rejected": -3.011403799057007, "logps/chosen": -51.759857177734375, "logps/rejected": -220.12596130371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.4308643341064453, "rewards/margins": 9.559833526611328, "rewards/rejected": -11.990697860717773, "step": 12842 }, { "epoch": 2.0, "learning_rate": 4.727757663779242e-06, "logits/chosen": -2.7157247066497803, "logits/rejected": -2.1906471252441406, "logps/chosen": -419.04656982421875, "logps/rejected": -379.0020751953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4577102661132812, "rewards/margins": 9.250676155090332, "rewards/rejected": -12.708386421203613, "step": 12843 }, { "epoch": 2.0, "learning_rate": 4.7270242232480935e-06, "logits/chosen": -2.804004192352295, "logits/rejected": -2.8279104232788086, "logps/chosen": -403.685302734375, "logps/rejected": -450.0585632324219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.183941841125488, "rewards/margins": 10.561728477478027, "rewards/rejected": -15.745670318603516, "step": 12844 }, { "epoch": 2.0, "learning_rate": 4.726290782716946e-06, "logits/chosen": -0.6004499197006226, "logits/rejected": -1.6642224788665771, "logps/chosen": -180.79335021972656, "logps/rejected": -462.3244934082031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.387109756469727, "rewards/margins": 9.257723808288574, "rewards/rejected": -13.6448335647583, "step": 12845 }, { "epoch": 2.0, "learning_rate": 4.725557342185798e-06, "logits/chosen": -2.77668833732605, "logits/rejected": -2.2316553592681885, "logps/chosen": -380.26171875, "logps/rejected": -526.8861083984375, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -7.686200141906738, "rewards/margins": 4.899444580078125, "rewards/rejected": -12.585644721984863, "step": 12846 }, { "epoch": 2.0, "learning_rate": 4.72482390165465e-06, "logits/chosen": -3.1459476947784424, "logits/rejected": -3.1827566623687744, "logps/chosen": -197.160400390625, "logps/rejected": -330.1239929199219, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -5.209451675415039, "rewards/margins": 5.566832542419434, "rewards/rejected": -10.776284217834473, "step": 12847 }, { "epoch": 2.0, "learning_rate": 4.724090461123502e-06, "logits/chosen": -1.9245847463607788, "logits/rejected": -3.1416003704071045, "logps/chosen": -83.12307739257812, "logps/rejected": -346.89453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.401605606079102, "rewards/margins": 8.745145797729492, "rewards/rejected": -13.146751403808594, "step": 12848 }, { "epoch": 2.0, "learning_rate": 4.723357020592355e-06, "logits/chosen": -2.7344560623168945, "logits/rejected": -2.2926790714263916, "logps/chosen": -299.7945556640625, "logps/rejected": -315.51776123046875, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -3.8994507789611816, "rewards/margins": 8.109583854675293, "rewards/rejected": -12.009034156799316, "step": 12849 }, { "epoch": 2.0, "learning_rate": 4.7226235800612065e-06, "logits/chosen": -1.9663769006729126, "logits/rejected": -1.9702683687210083, "logps/chosen": -549.5943603515625, "logps/rejected": -345.0158386230469, "loss": 0.577, "rewards/accuracies": 0.5, "rewards/chosen": -2.6337554454803467, "rewards/margins": 7.039958953857422, "rewards/rejected": -9.673714637756348, "step": 12850 }, { "epoch": 2.0, "learning_rate": 4.721890139530058e-06, "logits/chosen": -1.138938069343567, "logits/rejected": -1.5465285778045654, "logps/chosen": -214.82373046875, "logps/rejected": -472.2148742675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.106166124343872, "rewards/margins": 10.810772895812988, "rewards/rejected": -12.916938781738281, "step": 12851 }, { "epoch": 2.0, "learning_rate": 4.72115669899891e-06, "logits/chosen": -1.9995968341827393, "logits/rejected": -2.711467742919922, "logps/chosen": -189.05081176757812, "logps/rejected": -310.990478515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.756853103637695, "rewards/margins": 6.774479866027832, "rewards/rejected": -11.531333923339844, "step": 12852 }, { "epoch": 2.0, "learning_rate": 4.720423258467762e-06, "logits/chosen": -2.5654423236846924, "logits/rejected": -2.85677170753479, "logps/chosen": -584.652587890625, "logps/rejected": -545.089111328125, "loss": 1.6165, "rewards/accuracies": 0.5, "rewards/chosen": -7.135287284851074, "rewards/margins": 3.558803081512451, "rewards/rejected": -10.694089889526367, "step": 12853 }, { "epoch": 2.0, "learning_rate": 4.719689817936615e-06, "logits/chosen": -1.754127025604248, "logits/rejected": -2.8985629081726074, "logps/chosen": -92.03951263427734, "logps/rejected": -471.00732421875, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -4.49479341506958, "rewards/margins": 9.293840408325195, "rewards/rejected": -13.788633346557617, "step": 12854 }, { "epoch": 2.0, "learning_rate": 4.7189563774054676e-06, "logits/chosen": -3.223526954650879, "logits/rejected": -3.1336405277252197, "logps/chosen": -111.65564727783203, "logps/rejected": -111.45279693603516, "loss": 0.0842, "rewards/accuracies": 1.0, "rewards/chosen": -2.878378391265869, "rewards/margins": 3.7716524600982666, "rewards/rejected": -6.650031089782715, "step": 12855 }, { "epoch": 2.0, "learning_rate": 4.7182229368743194e-06, "logits/chosen": -2.7684683799743652, "logits/rejected": -2.420707941055298, "logps/chosen": -187.93223571777344, "logps/rejected": -197.81504821777344, "loss": 0.2062, "rewards/accuracies": 1.0, "rewards/chosen": -7.646791934967041, "rewards/margins": 2.541409969329834, "rewards/rejected": -10.188201904296875, "step": 12856 }, { "epoch": 2.0, "learning_rate": 4.717489496343171e-06, "logits/chosen": -2.8484601974487305, "logits/rejected": -2.763421058654785, "logps/chosen": -297.80340576171875, "logps/rejected": -241.08056640625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -5.510438919067383, "rewards/margins": 5.002802848815918, "rewards/rejected": -10.5132417678833, "step": 12857 }, { "epoch": 2.0, "learning_rate": 4.716756055812024e-06, "logits/chosen": -2.1692206859588623, "logits/rejected": -2.9810678958892822, "logps/chosen": -78.83900451660156, "logps/rejected": -280.1297607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8507351875305176, "rewards/margins": 10.229040145874023, "rewards/rejected": -13.0797758102417, "step": 12858 }, { "epoch": 2.0, "learning_rate": 4.716022615280876e-06, "logits/chosen": -2.739415168762207, "logits/rejected": -2.9535043239593506, "logps/chosen": -302.8387451171875, "logps/rejected": -463.78070068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6940488815307617, "rewards/margins": 11.973751068115234, "rewards/rejected": -15.667799949645996, "step": 12859 }, { "epoch": 2.0, "learning_rate": 4.715289174749728e-06, "logits/chosen": -3.149690866470337, "logits/rejected": -2.7798120975494385, "logps/chosen": -1164.905029296875, "logps/rejected": -634.3836669921875, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/chosen": -5.233716011047363, "rewards/margins": 3.8179750442504883, "rewards/rejected": -9.051691055297852, "step": 12860 }, { "epoch": 2.0, "learning_rate": 4.71455573421858e-06, "logits/chosen": -2.5526375770568848, "logits/rejected": -2.0068626403808594, "logps/chosen": -172.80458068847656, "logps/rejected": -250.32321166992188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2890872955322266, "rewards/margins": 8.956851959228516, "rewards/rejected": -11.245939254760742, "step": 12861 }, { "epoch": 2.0, "learning_rate": 4.713822293687432e-06, "logits/chosen": -2.0036699771881104, "logits/rejected": -2.9046261310577393, "logps/chosen": -236.54751586914062, "logps/rejected": -357.37274169921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.010142803192139, "rewards/margins": 8.410078048706055, "rewards/rejected": -12.420221328735352, "step": 12862 }, { "epoch": 2.0, "learning_rate": 4.713088853156284e-06, "logits/chosen": -2.220228672027588, "logits/rejected": -2.8262710571289062, "logps/chosen": -449.401123046875, "logps/rejected": -551.8395385742188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.383172988891602, "rewards/margins": 10.089197158813477, "rewards/rejected": -15.472370147705078, "step": 12863 }, { "epoch": 2.0, "learning_rate": 4.712355412625136e-06, "logits/chosen": -2.0554590225219727, "logits/rejected": -2.9129819869995117, "logps/chosen": -202.8641357421875, "logps/rejected": -442.95660400390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.676374435424805, "rewards/margins": 6.98646354675293, "rewards/rejected": -12.662837982177734, "step": 12864 }, { "epoch": 2.0, "learning_rate": 4.711621972093988e-06, "logits/chosen": -2.7329537868499756, "logits/rejected": -2.564967393875122, "logps/chosen": -391.016357421875, "logps/rejected": -634.321044921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.500262498855591, "rewards/margins": 8.807450294494629, "rewards/rejected": -11.30771255493164, "step": 12865 }, { "epoch": 2.0, "learning_rate": 4.71088853156284e-06, "logits/chosen": -2.8190524578094482, "logits/rejected": -2.9664318561553955, "logps/chosen": -232.37594604492188, "logps/rejected": -352.4485168457031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.88208270072937, "rewards/margins": 7.9537577629089355, "rewards/rejected": -11.835840225219727, "step": 12866 }, { "epoch": 2.0, "learning_rate": 4.710155091031693e-06, "logits/chosen": -1.7101325988769531, "logits/rejected": -2.9042129516601562, "logps/chosen": -98.62968444824219, "logps/rejected": -393.5938720703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.643059730529785, "rewards/margins": 10.602361679077148, "rewards/rejected": -14.245420455932617, "step": 12867 }, { "epoch": 2.0, "learning_rate": 4.7094216505005445e-06, "logits/chosen": -2.301812171936035, "logits/rejected": -3.1009128093719482, "logps/chosen": -47.12799835205078, "logps/rejected": -387.43603515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.1544995307922363, "rewards/margins": 10.520492553710938, "rewards/rejected": -13.674991607666016, "step": 12868 }, { "epoch": 2.0, "learning_rate": 4.708688209969396e-06, "logits/chosen": -2.6427783966064453, "logits/rejected": -2.468271255493164, "logps/chosen": -400.3617248535156, "logps/rejected": -457.817626953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0377559661865234, "rewards/margins": 13.879457473754883, "rewards/rejected": -16.917213439941406, "step": 12869 }, { "epoch": 2.0, "learning_rate": 4.707954769438248e-06, "logits/chosen": -2.6636154651641846, "logits/rejected": -2.760650396347046, "logps/chosen": -450.95989990234375, "logps/rejected": -530.5736083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.586184501647949, "rewards/margins": 10.650785446166992, "rewards/rejected": -15.236970901489258, "step": 12870 }, { "epoch": 2.0, "learning_rate": 4.707221328907101e-06, "logits/chosen": -1.160249948501587, "logits/rejected": -2.616628408432007, "logps/chosen": -162.44119262695312, "logps/rejected": -386.56427001953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.0179924964904785, "rewards/margins": 8.358431816101074, "rewards/rejected": -13.376423835754395, "step": 12871 }, { "epoch": 2.0, "learning_rate": 4.706487888375954e-06, "logits/chosen": -2.498305320739746, "logits/rejected": -2.648040294647217, "logps/chosen": -290.56158447265625, "logps/rejected": -399.1192321777344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.4812264442443848, "rewards/margins": 11.36522102355957, "rewards/rejected": -13.846446990966797, "step": 12872 }, { "epoch": 2.0, "learning_rate": 4.705754447844806e-06, "logits/chosen": -2.326704263687134, "logits/rejected": -3.35396671295166, "logps/chosen": -327.3240661621094, "logps/rejected": -394.2452392578125, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/chosen": -2.3939619064331055, "rewards/margins": 6.288632392883301, "rewards/rejected": -8.682594299316406, "step": 12873 }, { "epoch": 2.0, "learning_rate": 4.7050210073136575e-06, "logits/chosen": -3.3092408180236816, "logits/rejected": -3.132842540740967, "logps/chosen": -122.75105285644531, "logps/rejected": -124.27629089355469, "loss": 0.2234, "rewards/accuracies": 1.0, "rewards/chosen": -3.574028730392456, "rewards/margins": 4.0444254875183105, "rewards/rejected": -7.6184539794921875, "step": 12874 }, { "epoch": 2.0, "learning_rate": 4.704287566782509e-06, "logits/chosen": -2.8483834266662598, "logits/rejected": -3.08842396736145, "logps/chosen": -157.02151489257812, "logps/rejected": -538.4722290039062, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -5.086679458618164, "rewards/margins": 6.7614312171936035, "rewards/rejected": -11.84811019897461, "step": 12875 }, { "epoch": 2.0, "learning_rate": 4.703554126251362e-06, "logits/chosen": -1.6230462789535522, "logits/rejected": -2.907217264175415, "logps/chosen": -153.1231231689453, "logps/rejected": -496.76910400390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.3808536529541016, "rewards/margins": 11.374675750732422, "rewards/rejected": -14.755529403686523, "step": 12876 }, { "epoch": 2.0, "learning_rate": 4.702820685720214e-06, "logits/chosen": -2.921731948852539, "logits/rejected": -1.5604032278060913, "logps/chosen": -279.9173278808594, "logps/rejected": -227.33453369140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.4849988222122192, "rewards/margins": 9.171690940856934, "rewards/rejected": -10.656689643859863, "step": 12877 }, { "epoch": 2.0, "learning_rate": 4.702087245189066e-06, "logits/chosen": -2.165226936340332, "logits/rejected": -2.811892032623291, "logps/chosen": -129.5194091796875, "logps/rejected": -202.21047973632812, "loss": 1.0075, "rewards/accuracies": 0.5, "rewards/chosen": -5.0828657150268555, "rewards/margins": 2.5663037300109863, "rewards/rejected": -7.649169921875, "step": 12878 }, { "epoch": 2.0, "learning_rate": 4.701353804657918e-06, "logits/chosen": -1.8788108825683594, "logits/rejected": -2.9744772911071777, "logps/chosen": -89.18798828125, "logps/rejected": -388.9078369140625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.221055030822754, "rewards/margins": 7.11021614074707, "rewards/rejected": -12.33127212524414, "step": 12879 }, { "epoch": 2.0, "learning_rate": 4.7006203641267705e-06, "logits/chosen": -2.0300633907318115, "logits/rejected": -2.996492385864258, "logps/chosen": -356.99945068359375, "logps/rejected": -474.5452880859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.564443111419678, "rewards/margins": 7.067803382873535, "rewards/rejected": -11.632246017456055, "step": 12880 }, { "epoch": 2.0, "learning_rate": 4.699886923595622e-06, "logits/chosen": -3.2583515644073486, "logits/rejected": -3.173438787460327, "logps/chosen": -505.23773193359375, "logps/rejected": -380.8079833984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.59317684173584, "rewards/margins": 9.2762451171875, "rewards/rejected": -14.86942195892334, "step": 12881 }, { "epoch": 2.0, "learning_rate": 4.699153483064474e-06, "logits/chosen": -2.629976987838745, "logits/rejected": -2.704106092453003, "logps/chosen": -309.016845703125, "logps/rejected": -286.3485107421875, "loss": 0.0818, "rewards/accuracies": 1.0, "rewards/chosen": -2.3540937900543213, "rewards/margins": 5.789037704467773, "rewards/rejected": -8.143131256103516, "step": 12882 }, { "epoch": 2.0, "learning_rate": 4.698420042533326e-06, "logits/chosen": -2.652803897857666, "logits/rejected": -2.7419064044952393, "logps/chosen": -152.1766815185547, "logps/rejected": -254.404541015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.46311354637146, "rewards/margins": 7.959317207336426, "rewards/rejected": -10.422430038452148, "step": 12883 }, { "epoch": 2.0, "learning_rate": 4.697686602002178e-06, "logits/chosen": -2.043058395385742, "logits/rejected": -2.691887617111206, "logps/chosen": -284.4039611816406, "logps/rejected": -445.7261657714844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.279771566390991, "rewards/margins": 11.110827445983887, "rewards/rejected": -14.39059829711914, "step": 12884 }, { "epoch": 2.0, "learning_rate": 4.696953161471031e-06, "logits/chosen": -2.496821641921997, "logits/rejected": -3.044322967529297, "logps/chosen": -182.3596954345703, "logps/rejected": -248.18170166015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.0584341287612915, "rewards/margins": 8.544132232666016, "rewards/rejected": -9.602566719055176, "step": 12885 }, { "epoch": 2.0, "learning_rate": 4.6962197209398826e-06, "logits/chosen": -1.324205756187439, "logits/rejected": -3.1643433570861816, "logps/chosen": -122.36228942871094, "logps/rejected": -500.195068359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.850415229797363, "rewards/margins": 7.777200222015381, "rewards/rejected": -13.627615928649902, "step": 12886 }, { "epoch": 2.0, "learning_rate": 4.6954862804087345e-06, "logits/chosen": -2.9150469303131104, "logits/rejected": -1.700596570968628, "logps/chosen": -665.8255004882812, "logps/rejected": -432.42486572265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.145990014076233, "rewards/margins": 10.153783798217773, "rewards/rejected": -11.299773216247559, "step": 12887 }, { "epoch": 2.0, "learning_rate": 4.694752839877587e-06, "logits/chosen": -2.532646656036377, "logits/rejected": -3.178636312484741, "logps/chosen": -99.37873840332031, "logps/rejected": -260.70098876953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.5664501190185547, "rewards/margins": 8.298893928527832, "rewards/rejected": -11.865345001220703, "step": 12888 }, { "epoch": 2.0, "learning_rate": 4.69401939934644e-06, "logits/chosen": -2.2095906734466553, "logits/rejected": -3.059144973754883, "logps/chosen": -96.54354858398438, "logps/rejected": -375.7564697265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.2063417434692383, "rewards/margins": 7.267198085784912, "rewards/rejected": -9.473539352416992, "step": 12889 }, { "epoch": 2.0, "learning_rate": 4.693285958815292e-06, "logits/chosen": -2.7356019020080566, "logits/rejected": -2.5071113109588623, "logps/chosen": -273.939697265625, "logps/rejected": -339.7157897949219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.720755100250244, "rewards/margins": 8.715206146240234, "rewards/rejected": -12.43596076965332, "step": 12890 }, { "epoch": 2.0, "learning_rate": 4.692552518284144e-06, "logits/chosen": -1.7569644451141357, "logits/rejected": -2.6655454635620117, "logps/chosen": -240.3592529296875, "logps/rejected": -404.5606689453125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.388174533843994, "rewards/margins": 8.148172378540039, "rewards/rejected": -11.536346435546875, "step": 12891 }, { "epoch": 2.0, "learning_rate": 4.6918190777529955e-06, "logits/chosen": -1.7136268615722656, "logits/rejected": -2.6759402751922607, "logps/chosen": -95.12206268310547, "logps/rejected": -267.8400573730469, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.7719926834106445, "rewards/margins": 6.792113304138184, "rewards/rejected": -10.564105987548828, "step": 12892 }, { "epoch": 2.01, "learning_rate": 4.691085637221847e-06, "logits/chosen": -2.2411882877349854, "logits/rejected": -3.023339033126831, "logps/chosen": -57.654449462890625, "logps/rejected": -363.05816650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.401005506515503, "rewards/margins": 10.728233337402344, "rewards/rejected": -14.129239082336426, "step": 12893 }, { "epoch": 2.01, "learning_rate": 4.6903521966907e-06, "logits/chosen": -1.9394289255142212, "logits/rejected": -3.150580406188965, "logps/chosen": -112.63691711425781, "logps/rejected": -346.2081298828125, "loss": 0.2922, "rewards/accuracies": 1.0, "rewards/chosen": -5.530095100402832, "rewards/margins": 3.8443055152893066, "rewards/rejected": -9.374401092529297, "step": 12894 }, { "epoch": 2.01, "learning_rate": 4.689618756159552e-06, "logits/chosen": -1.8876584768295288, "logits/rejected": -2.812368392944336, "logps/chosen": -80.64665985107422, "logps/rejected": -258.7781677246094, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -5.643251895904541, "rewards/margins": 6.547170639038086, "rewards/rejected": -12.190422058105469, "step": 12895 }, { "epoch": 2.01, "learning_rate": 4.688885315628404e-06, "logits/chosen": -2.4596104621887207, "logits/rejected": -3.179847002029419, "logps/chosen": -247.59518432617188, "logps/rejected": -515.2938842773438, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.6886305809020996, "rewards/margins": 11.780444145202637, "rewards/rejected": -15.469075202941895, "step": 12896 }, { "epoch": 2.01, "learning_rate": 4.688151875097256e-06, "logits/chosen": -1.4640048742294312, "logits/rejected": -2.813544988632202, "logps/chosen": -161.67337036132812, "logps/rejected": -492.31915283203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.768567085266113, "rewards/margins": 10.19654655456543, "rewards/rejected": -14.96511459350586, "step": 12897 }, { "epoch": 2.01, "learning_rate": 4.6874184345661085e-06, "logits/chosen": -2.9389595985412598, "logits/rejected": -3.0291757583618164, "logps/chosen": -133.48562622070312, "logps/rejected": -193.744873046875, "loss": 0.8515, "rewards/accuracies": 0.5, "rewards/chosen": -7.968585968017578, "rewards/margins": 0.9343323707580566, "rewards/rejected": -8.902917861938477, "step": 12898 }, { "epoch": 2.01, "learning_rate": 4.68668499403496e-06, "logits/chosen": -1.6477129459381104, "logits/rejected": -2.9122602939605713, "logps/chosen": -90.55320739746094, "logps/rejected": -254.50511169433594, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/chosen": -3.7054615020751953, "rewards/margins": 8.304718017578125, "rewards/rejected": -12.01017951965332, "step": 12899 }, { "epoch": 2.01, "learning_rate": 4.685951553503812e-06, "logits/chosen": -2.1053829193115234, "logits/rejected": -1.5255008935928345, "logps/chosen": -303.3059387207031, "logps/rejected": -227.55917358398438, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -3.2757675647735596, "rewards/margins": 7.729008197784424, "rewards/rejected": -11.004776000976562, "step": 12900 }, { "epoch": 2.01, "learning_rate": 4.685218112972664e-06, "logits/chosen": -2.759906053543091, "logits/rejected": -2.902733564376831, "logps/chosen": -238.19772338867188, "logps/rejected": -387.8170166015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.4439642429351807, "rewards/margins": 12.25613021850586, "rewards/rejected": -14.700094223022461, "step": 12901 }, { "epoch": 2.01, "learning_rate": 4.684484672441517e-06, "logits/chosen": -2.521583080291748, "logits/rejected": -2.8361496925354004, "logps/chosen": -64.57597351074219, "logps/rejected": -254.2826385498047, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.90874981880188, "rewards/margins": 8.692680358886719, "rewards/rejected": -12.60142993927002, "step": 12902 }, { "epoch": 2.01, "learning_rate": 4.683751231910369e-06, "logits/chosen": -1.9975647926330566, "logits/rejected": -3.069641351699829, "logps/chosen": -147.4103240966797, "logps/rejected": -399.51861572265625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.760646343231201, "rewards/margins": 7.2508392333984375, "rewards/rejected": -11.011486053466797, "step": 12903 }, { "epoch": 2.01, "learning_rate": 4.683017791379221e-06, "logits/chosen": -2.0836181640625, "logits/rejected": -2.385499954223633, "logps/chosen": -214.3191375732422, "logps/rejected": -401.6181945800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8774967193603516, "rewards/margins": 10.966333389282227, "rewards/rejected": -14.843830108642578, "step": 12904 }, { "epoch": 2.01, "learning_rate": 4.682284350848073e-06, "logits/chosen": -2.649658203125, "logits/rejected": -2.6619088649749756, "logps/chosen": -868.8380126953125, "logps/rejected": -805.2423095703125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.0356125831604, "rewards/margins": 8.443299293518066, "rewards/rejected": -14.478912353515625, "step": 12905 }, { "epoch": 2.01, "learning_rate": 4.681550910316925e-06, "logits/chosen": -2.906212329864502, "logits/rejected": -3.1012818813323975, "logps/chosen": -187.04522705078125, "logps/rejected": -302.666748046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -1.6546180248260498, "rewards/margins": 8.826645851135254, "rewards/rejected": -10.481264114379883, "step": 12906 }, { "epoch": 2.01, "learning_rate": 4.680817469785778e-06, "logits/chosen": -2.6092731952667236, "logits/rejected": -1.4289082288742065, "logps/chosen": -279.22467041015625, "logps/rejected": -251.48727416992188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.23005485534668, "rewards/margins": 7.939436435699463, "rewards/rejected": -12.169490814208984, "step": 12907 }, { "epoch": 2.01, "learning_rate": 4.68008402925463e-06, "logits/chosen": -2.7926390171051025, "logits/rejected": -1.7264482975006104, "logps/chosen": -457.3658142089844, "logps/rejected": -311.955810546875, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -5.511415958404541, "rewards/margins": 5.314829349517822, "rewards/rejected": -10.826245307922363, "step": 12908 }, { "epoch": 2.01, "learning_rate": 4.679350588723482e-06, "logits/chosen": -2.957890510559082, "logits/rejected": -2.235297441482544, "logps/chosen": -653.5725708007812, "logps/rejected": -358.8343505859375, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -5.775805473327637, "rewards/margins": 4.661286354064941, "rewards/rejected": -10.437091827392578, "step": 12909 }, { "epoch": 2.01, "learning_rate": 4.678617148192334e-06, "logits/chosen": -2.852691173553467, "logits/rejected": -2.4254884719848633, "logps/chosen": -359.3377685546875, "logps/rejected": -471.44610595703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.9516873359680176, "rewards/margins": 8.886499404907227, "rewards/rejected": -11.838186264038086, "step": 12910 }, { "epoch": 2.01, "learning_rate": 4.677883707661186e-06, "logits/chosen": -1.5732448101043701, "logits/rejected": -2.6748452186584473, "logps/chosen": -165.4120330810547, "logps/rejected": -255.75306701660156, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -4.207979202270508, "rewards/margins": 6.066034317016602, "rewards/rejected": -10.27401351928711, "step": 12911 }, { "epoch": 2.01, "learning_rate": 4.677150267130038e-06, "logits/chosen": -2.4119598865509033, "logits/rejected": -2.927813768386841, "logps/chosen": -244.39527893066406, "logps/rejected": -488.66754150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4863457679748535, "rewards/margins": 13.379364013671875, "rewards/rejected": -16.86570930480957, "step": 12912 }, { "epoch": 2.01, "learning_rate": 4.67641682659889e-06, "logits/chosen": -2.3375847339630127, "logits/rejected": -3.160733222961426, "logps/chosen": -164.90145874023438, "logps/rejected": -375.8623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.775890827178955, "rewards/margins": 10.696968078613281, "rewards/rejected": -12.472859382629395, "step": 12913 }, { "epoch": 2.01, "learning_rate": 4.675683386067742e-06, "logits/chosen": -1.5453662872314453, "logits/rejected": -3.1103358268737793, "logps/chosen": -222.21347045898438, "logps/rejected": -563.079833984375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.945950984954834, "rewards/margins": 8.375320434570312, "rewards/rejected": -12.321270942687988, "step": 12914 }, { "epoch": 2.01, "learning_rate": 4.674949945536594e-06, "logits/chosen": -2.342653274536133, "logits/rejected": -3.0270004272460938, "logps/chosen": -120.60157012939453, "logps/rejected": -251.11520385742188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.186594486236572, "rewards/margins": 6.306946754455566, "rewards/rejected": -11.493541717529297, "step": 12915 }, { "epoch": 2.01, "learning_rate": 4.6742165050054465e-06, "logits/chosen": -2.696134567260742, "logits/rejected": -2.669804811477661, "logps/chosen": -510.9266357421875, "logps/rejected": -494.0249938964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6765960454940796, "rewards/margins": 10.594508171081543, "rewards/rejected": -12.27110481262207, "step": 12916 }, { "epoch": 2.01, "learning_rate": 4.673483064474298e-06, "logits/chosen": -2.8328447341918945, "logits/rejected": -1.6958919763565063, "logps/chosen": -366.3310546875, "logps/rejected": -313.76263427734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.060051918029785, "rewards/margins": 9.455193519592285, "rewards/rejected": -12.51524543762207, "step": 12917 }, { "epoch": 2.01, "learning_rate": 4.67274962394315e-06, "logits/chosen": -2.3748273849487305, "logits/rejected": -3.290799379348755, "logps/chosen": -124.32759094238281, "logps/rejected": -385.4659423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8708982467651367, "rewards/margins": 10.29123306274414, "rewards/rejected": -14.162131309509277, "step": 12918 }, { "epoch": 2.01, "learning_rate": 4.672016183412002e-06, "logits/chosen": -2.0933969020843506, "logits/rejected": -3.034334897994995, "logps/chosen": -213.63865661621094, "logps/rejected": -456.50341796875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.26052713394165, "rewards/margins": 9.55473804473877, "rewards/rejected": -13.815265655517578, "step": 12919 }, { "epoch": 2.01, "learning_rate": 4.671282742880855e-06, "logits/chosen": -0.7472636699676514, "logits/rejected": -1.907631278038025, "logps/chosen": -221.87081909179688, "logps/rejected": -493.40252685546875, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -4.846846103668213, "rewards/margins": 9.252727508544922, "rewards/rejected": -14.099574089050293, "step": 12920 }, { "epoch": 2.01, "learning_rate": 4.670549302349707e-06, "logits/chosen": -1.5418277978897095, "logits/rejected": -2.900238275527954, "logps/chosen": -172.8330841064453, "logps/rejected": -534.2175903320312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.931065559387207, "rewards/margins": 8.079214096069336, "rewards/rejected": -12.01028060913086, "step": 12921 }, { "epoch": 2.01, "learning_rate": 4.6698158618185595e-06, "logits/chosen": -2.8249385356903076, "logits/rejected": -3.083707571029663, "logps/chosen": -125.38386535644531, "logps/rejected": -320.614501953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.939493179321289, "rewards/margins": 7.984548568725586, "rewards/rejected": -13.924041748046875, "step": 12922 }, { "epoch": 2.01, "learning_rate": 4.669082421287411e-06, "logits/chosen": -2.808900833129883, "logits/rejected": -1.3404065370559692, "logps/chosen": -831.5858154296875, "logps/rejected": -540.8458251953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -1.0032105445861816, "rewards/margins": 10.520590782165527, "rewards/rejected": -11.52380084991455, "step": 12923 }, { "epoch": 2.01, "learning_rate": 4.668348980756263e-06, "logits/chosen": -2.9850919246673584, "logits/rejected": -2.2420196533203125, "logps/chosen": -171.78564453125, "logps/rejected": -166.99679565429688, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -1.8535265922546387, "rewards/margins": 7.533098220825195, "rewards/rejected": -9.386625289916992, "step": 12924 }, { "epoch": 2.01, "learning_rate": 4.667615540225116e-06, "logits/chosen": -2.3187668323516846, "logits/rejected": -3.2049098014831543, "logps/chosen": -170.5176544189453, "logps/rejected": -328.0503845214844, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -2.4528322219848633, "rewards/margins": 7.158388614654541, "rewards/rejected": -9.611221313476562, "step": 12925 }, { "epoch": 2.01, "learning_rate": 4.666882099693968e-06, "logits/chosen": -2.925408363342285, "logits/rejected": -2.9373157024383545, "logps/chosen": -154.9266357421875, "logps/rejected": -219.9312744140625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.7408483028411865, "rewards/margins": 7.276867866516113, "rewards/rejected": -11.017716407775879, "step": 12926 }, { "epoch": 2.01, "learning_rate": 4.66614865916282e-06, "logits/chosen": -1.9315062761306763, "logits/rejected": -3.036149024963379, "logps/chosen": -255.254638671875, "logps/rejected": -442.95166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7550101280212402, "rewards/margins": 10.249916076660156, "rewards/rejected": -14.004926681518555, "step": 12927 }, { "epoch": 2.01, "learning_rate": 4.665415218631672e-06, "logits/chosen": -1.4445021152496338, "logits/rejected": -2.5869016647338867, "logps/chosen": -138.17300415039062, "logps/rejected": -408.92926025390625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.745716094970703, "rewards/margins": 8.354926109313965, "rewards/rejected": -14.100641250610352, "step": 12928 }, { "epoch": 2.01, "learning_rate": 4.664681778100524e-06, "logits/chosen": -2.137348175048828, "logits/rejected": -2.812849521636963, "logps/chosen": -141.18917846679688, "logps/rejected": -294.9609375, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/chosen": -6.95421028137207, "rewards/margins": 4.1342267990112305, "rewards/rejected": -11.0884370803833, "step": 12929 }, { "epoch": 2.01, "learning_rate": 4.663948337569376e-06, "logits/chosen": -2.4052979946136475, "logits/rejected": -3.185128688812256, "logps/chosen": -71.45751953125, "logps/rejected": -263.4927978515625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.587160587310791, "rewards/margins": 8.63101577758789, "rewards/rejected": -14.218175888061523, "step": 12930 }, { "epoch": 2.01, "learning_rate": 4.663214897038228e-06, "logits/chosen": -3.1577506065368652, "logits/rejected": -2.9360806941986084, "logps/chosen": -145.84112548828125, "logps/rejected": -173.91737365722656, "loss": 1.369, "rewards/accuracies": 0.5, "rewards/chosen": -4.619851589202881, "rewards/margins": 1.9903336763381958, "rewards/rejected": -6.610185623168945, "step": 12931 }, { "epoch": 2.01, "learning_rate": 4.66248145650708e-06, "logits/chosen": -1.4011794328689575, "logits/rejected": -2.26753830909729, "logps/chosen": -230.77169799804688, "logps/rejected": -365.93707275390625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -1.4559707641601562, "rewards/margins": 6.898633003234863, "rewards/rejected": -8.35460376739502, "step": 12932 }, { "epoch": 2.01, "learning_rate": 4.661748015975932e-06, "logits/chosen": -2.578573226928711, "logits/rejected": -1.716745376586914, "logps/chosen": -471.5710754394531, "logps/rejected": -466.3026123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.09976053237915, "rewards/margins": 11.21677303314209, "rewards/rejected": -16.3165340423584, "step": 12933 }, { "epoch": 2.01, "learning_rate": 4.661014575444785e-06, "logits/chosen": -2.746023416519165, "logits/rejected": -2.086893081665039, "logps/chosen": -223.20486450195312, "logps/rejected": -205.64027404785156, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -3.703023672103882, "rewards/margins": 5.6881537437438965, "rewards/rejected": -9.3911771774292, "step": 12934 }, { "epoch": 2.01, "learning_rate": 4.6602811349136365e-06, "logits/chosen": -1.9694470167160034, "logits/rejected": -2.8133180141448975, "logps/chosen": -159.56271362304688, "logps/rejected": -307.5481262207031, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.6463379859924316, "rewards/margins": 6.127859115600586, "rewards/rejected": -9.774197578430176, "step": 12935 }, { "epoch": 2.01, "learning_rate": 4.659547694382488e-06, "logits/chosen": -2.5019912719726562, "logits/rejected": -2.723454236984253, "logps/chosen": -131.69378662109375, "logps/rejected": -145.98480224609375, "loss": 0.0996, "rewards/accuracies": 1.0, "rewards/chosen": -6.577948570251465, "rewards/margins": 3.5019471645355225, "rewards/rejected": -10.07989501953125, "step": 12936 }, { "epoch": 2.01, "learning_rate": 4.65881425385134e-06, "logits/chosen": -2.1261515617370605, "logits/rejected": -1.967675805091858, "logps/chosen": -306.49444580078125, "logps/rejected": -424.3279113769531, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.2787656784057617, "rewards/margins": 10.472101211547852, "rewards/rejected": -13.75086784362793, "step": 12937 }, { "epoch": 2.01, "learning_rate": 4.658080813320193e-06, "logits/chosen": -2.5609519481658936, "logits/rejected": -3.1385104656219482, "logps/chosen": -226.32797241210938, "logps/rejected": -449.72076416015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.98812198638916, "rewards/margins": 7.996172904968262, "rewards/rejected": -13.984294891357422, "step": 12938 }, { "epoch": 2.01, "learning_rate": 4.657347372789046e-06, "logits/chosen": -2.8135476112365723, "logits/rejected": -2.913609743118286, "logps/chosen": -107.78516387939453, "logps/rejected": -313.8288879394531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.705165863037109, "rewards/margins": 8.394044876098633, "rewards/rejected": -13.099210739135742, "step": 12939 }, { "epoch": 2.01, "learning_rate": 4.6566139322578976e-06, "logits/chosen": -1.7012885808944702, "logits/rejected": -0.8596428632736206, "logps/chosen": -653.6015625, "logps/rejected": -584.80712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9257936477661133, "rewards/margins": 12.302499771118164, "rewards/rejected": -16.22829246520996, "step": 12940 }, { "epoch": 2.01, "learning_rate": 4.6558804917267494e-06, "logits/chosen": -1.4502211809158325, "logits/rejected": -2.977006435394287, "logps/chosen": -121.77531433105469, "logps/rejected": -376.83563232421875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.315816402435303, "rewards/margins": 7.263703346252441, "rewards/rejected": -12.579520225524902, "step": 12941 }, { "epoch": 2.01, "learning_rate": 4.655147051195601e-06, "logits/chosen": -1.6537425518035889, "logits/rejected": -2.897125005722046, "logps/chosen": -117.86358642578125, "logps/rejected": -391.816650390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.180035352706909, "rewards/margins": 8.89126968383789, "rewards/rejected": -12.071304321289062, "step": 12942 }, { "epoch": 2.01, "learning_rate": 4.654413610664454e-06, "logits/chosen": -1.7671340703964233, "logits/rejected": -2.8779232501983643, "logps/chosen": -210.98101806640625, "logps/rejected": -421.3351745605469, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.496768951416016, "rewards/margins": 7.318794250488281, "rewards/rejected": -11.815563201904297, "step": 12943 }, { "epoch": 2.01, "learning_rate": 4.653680170133306e-06, "logits/chosen": -2.932645559310913, "logits/rejected": -2.6215522289276123, "logps/chosen": -531.5010375976562, "logps/rejected": -525.5631103515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.114306926727295, "rewards/margins": 9.611409187316895, "rewards/rejected": -13.725716590881348, "step": 12944 }, { "epoch": 2.01, "learning_rate": 4.652946729602158e-06, "logits/chosen": -2.9827914237976074, "logits/rejected": -3.3349123001098633, "logps/chosen": -238.81234741210938, "logps/rejected": -391.096923828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.228883743286133, "rewards/margins": 9.162872314453125, "rewards/rejected": -13.391756057739258, "step": 12945 }, { "epoch": 2.01, "learning_rate": 4.65221328907101e-06, "logits/chosen": -2.5963633060455322, "logits/rejected": -2.2376556396484375, "logps/chosen": -304.7150573730469, "logps/rejected": -403.47515869140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7149319648742676, "rewards/margins": 10.161468505859375, "rewards/rejected": -13.8764009475708, "step": 12946 }, { "epoch": 2.01, "learning_rate": 4.651479848539862e-06, "logits/chosen": -2.8411152362823486, "logits/rejected": -1.8497024774551392, "logps/chosen": -573.834228515625, "logps/rejected": -396.86505126953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.9261536598205566, "rewards/margins": 9.284579277038574, "rewards/rejected": -13.210732460021973, "step": 12947 }, { "epoch": 2.01, "learning_rate": 4.650746408008714e-06, "logits/chosen": -2.281015157699585, "logits/rejected": -3.029472827911377, "logps/chosen": -82.5293960571289, "logps/rejected": -410.41339111328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1300737857818604, "rewards/margins": 10.896078109741211, "rewards/rejected": -14.026151657104492, "step": 12948 }, { "epoch": 2.01, "learning_rate": 4.650012967477566e-06, "logits/chosen": -3.1122336387634277, "logits/rejected": -2.9104363918304443, "logps/chosen": -337.42218017578125, "logps/rejected": -311.1642150878906, "loss": 0.1314, "rewards/accuracies": 1.0, "rewards/chosen": -4.01780366897583, "rewards/margins": 6.259284973144531, "rewards/rejected": -10.277088165283203, "step": 12949 }, { "epoch": 2.01, "learning_rate": 4.649279526946418e-06, "logits/chosen": -2.621026039123535, "logits/rejected": -3.0861406326293945, "logps/chosen": -682.6085205078125, "logps/rejected": -545.442626953125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.711052656173706, "rewards/margins": 7.412844181060791, "rewards/rejected": -11.123897552490234, "step": 12950 }, { "epoch": 2.01, "learning_rate": 4.648546086415271e-06, "logits/chosen": -3.2324724197387695, "logits/rejected": -3.166543483734131, "logps/chosen": -103.82496643066406, "logps/rejected": -230.118896484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.387599468231201, "rewards/margins": 8.674421310424805, "rewards/rejected": -12.062020301818848, "step": 12951 }, { "epoch": 2.01, "learning_rate": 4.647812645884123e-06, "logits/chosen": -2.2694642543792725, "logits/rejected": -2.7151577472686768, "logps/chosen": -97.98345184326172, "logps/rejected": -400.41986083984375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.8518948554992676, "rewards/margins": 7.454632759094238, "rewards/rejected": -11.306528091430664, "step": 12952 }, { "epoch": 2.01, "learning_rate": 4.6470792053529745e-06, "logits/chosen": -1.8483264446258545, "logits/rejected": -2.8490843772888184, "logps/chosen": -205.1842041015625, "logps/rejected": -353.01251220703125, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -4.5893235206604, "rewards/margins": 5.189030647277832, "rewards/rejected": -9.77835464477539, "step": 12953 }, { "epoch": 2.01, "learning_rate": 4.646345764821826e-06, "logits/chosen": -2.803753137588501, "logits/rejected": -1.9856661558151245, "logps/chosen": -285.5826416015625, "logps/rejected": -293.09454345703125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.371095657348633, "rewards/margins": 11.124322891235352, "rewards/rejected": -14.495418548583984, "step": 12954 }, { "epoch": 2.01, "learning_rate": 4.645612324290679e-06, "logits/chosen": -2.8150484561920166, "logits/rejected": -2.331479072570801, "logps/chosen": -126.98712158203125, "logps/rejected": -249.38833618164062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.9057530164718628, "rewards/margins": 10.182138442993164, "rewards/rejected": -12.087891578674316, "step": 12955 }, { "epoch": 2.01, "learning_rate": 4.644878883759532e-06, "logits/chosen": -2.0782852172851562, "logits/rejected": -3.271514415740967, "logps/chosen": -138.21820068359375, "logps/rejected": -420.42694091796875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.082705020904541, "rewards/margins": 8.023192405700684, "rewards/rejected": -11.105897903442383, "step": 12956 }, { "epoch": 2.02, "learning_rate": 4.644145443228384e-06, "logits/chosen": -2.9204914569854736, "logits/rejected": -2.212768077850342, "logps/chosen": -223.98468017578125, "logps/rejected": -275.31195068359375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.279967308044434, "rewards/margins": 9.786928176879883, "rewards/rejected": -17.06689453125, "step": 12957 }, { "epoch": 2.02, "learning_rate": 4.643412002697236e-06, "logits/chosen": -2.727155923843384, "logits/rejected": -1.7473788261413574, "logps/chosen": -513.805908203125, "logps/rejected": -360.31524658203125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.634535789489746, "rewards/margins": 6.384793281555176, "rewards/rejected": -11.019329071044922, "step": 12958 }, { "epoch": 2.02, "learning_rate": 4.6426785621660875e-06, "logits/chosen": -1.5546354055404663, "logits/rejected": -2.9357945919036865, "logps/chosen": -94.31532287597656, "logps/rejected": -448.4915466308594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5250110626220703, "rewards/margins": 11.836936950683594, "rewards/rejected": -15.361948013305664, "step": 12959 }, { "epoch": 2.02, "learning_rate": 4.64194512163494e-06, "logits/chosen": -2.9337265491485596, "logits/rejected": -2.3946166038513184, "logps/chosen": -169.45883178710938, "logps/rejected": -112.3314208984375, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -2.6838769912719727, "rewards/margins": 6.376206398010254, "rewards/rejected": -9.060083389282227, "step": 12960 }, { "epoch": 2.02, "learning_rate": 4.641211681103792e-06, "logits/chosen": -2.808379888534546, "logits/rejected": -2.5195939540863037, "logps/chosen": -148.7532958984375, "logps/rejected": -194.02828979492188, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -2.2640480995178223, "rewards/margins": 4.802150249481201, "rewards/rejected": -7.066198348999023, "step": 12961 }, { "epoch": 2.02, "learning_rate": 4.640478240572644e-06, "logits/chosen": -2.8190786838531494, "logits/rejected": -2.929349422454834, "logps/chosen": -150.43507385253906, "logps/rejected": -277.75091552734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.3707165718078613, "rewards/margins": 8.616779327392578, "rewards/rejected": -11.987496376037598, "step": 12962 }, { "epoch": 2.02, "learning_rate": 4.639744800041496e-06, "logits/chosen": -2.5819015502929688, "logits/rejected": -2.9741008281707764, "logps/chosen": -108.87401580810547, "logps/rejected": -246.65109252929688, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -5.310091495513916, "rewards/margins": 4.6656317710876465, "rewards/rejected": -9.975723266601562, "step": 12963 }, { "epoch": 2.02, "learning_rate": 4.639011359510348e-06, "logits/chosen": -2.1750435829162598, "logits/rejected": -3.014390230178833, "logps/chosen": -79.05520629882812, "logps/rejected": -307.2230224609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.9664812088012695, "rewards/margins": 8.095256805419922, "rewards/rejected": -12.061738014221191, "step": 12964 }, { "epoch": 2.02, "learning_rate": 4.6382779189792004e-06, "logits/chosen": -2.8719913959503174, "logits/rejected": -2.8278372287750244, "logps/chosen": -198.69105529785156, "logps/rejected": -228.60496520996094, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -8.974305152893066, "rewards/margins": 4.341843605041504, "rewards/rejected": -13.31614875793457, "step": 12965 }, { "epoch": 2.02, "learning_rate": 4.637544478448052e-06, "logits/chosen": -2.2926015853881836, "logits/rejected": -2.8172948360443115, "logps/chosen": -62.63038635253906, "logps/rejected": -191.6397247314453, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -4.916308403015137, "rewards/margins": 4.5144243240356445, "rewards/rejected": -9.430732727050781, "step": 12966 }, { "epoch": 2.02, "learning_rate": 4.636811037916904e-06, "logits/chosen": -2.097835063934326, "logits/rejected": -3.0167572498321533, "logps/chosen": -302.33587646484375, "logps/rejected": -769.6353759765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.236335754394531, "rewards/margins": 8.475493431091309, "rewards/rejected": -14.711828231811523, "step": 12967 }, { "epoch": 2.02, "learning_rate": 4.636077597385756e-06, "logits/chosen": -2.2694830894470215, "logits/rejected": -2.972264528274536, "logps/chosen": -348.4578857421875, "logps/rejected": -483.98065185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4725341796875, "rewards/margins": 10.138071060180664, "rewards/rejected": -12.610605239868164, "step": 12968 }, { "epoch": 2.02, "learning_rate": 4.635344156854609e-06, "logits/chosen": -2.021512508392334, "logits/rejected": -2.876322031021118, "logps/chosen": -221.45413208007812, "logps/rejected": -407.63226318359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.382047653198242, "rewards/margins": 8.0866117477417, "rewards/rejected": -13.468659400939941, "step": 12969 }, { "epoch": 2.02, "learning_rate": 4.634610716323461e-06, "logits/chosen": -2.652416944503784, "logits/rejected": -3.245626926422119, "logps/chosen": -536.900146484375, "logps/rejected": -643.4091186523438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8633065223693848, "rewards/margins": 10.444171905517578, "rewards/rejected": -13.307477951049805, "step": 12970 }, { "epoch": 2.02, "learning_rate": 4.6338772757923126e-06, "logits/chosen": -2.954752206802368, "logits/rejected": -1.849014163017273, "logps/chosen": -144.68658447265625, "logps/rejected": -145.33493041992188, "loss": 0.0266, "rewards/accuracies": 1.0, "rewards/chosen": -3.0901737213134766, "rewards/margins": 6.731283187866211, "rewards/rejected": -9.821456909179688, "step": 12971 }, { "epoch": 2.02, "learning_rate": 4.633143835261165e-06, "logits/chosen": -2.3226993083953857, "logits/rejected": -2.6139638423919678, "logps/chosen": -111.104248046875, "logps/rejected": -238.07498168945312, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -4.656746864318848, "rewards/margins": 6.001923084259033, "rewards/rejected": -10.658670425415039, "step": 12972 }, { "epoch": 2.02, "learning_rate": 4.632410394730017e-06, "logits/chosen": -2.85600209236145, "logits/rejected": -2.9451558589935303, "logps/chosen": -181.6532745361328, "logps/rejected": -259.1059875488281, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -6.581576347351074, "rewards/margins": 6.265929222106934, "rewards/rejected": -12.847505569458008, "step": 12973 }, { "epoch": 2.02, "learning_rate": 4.63167695419887e-06, "logits/chosen": -2.2705323696136475, "logits/rejected": -3.103651762008667, "logps/chosen": -86.53765869140625, "logps/rejected": -229.57391357421875, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -3.3420658111572266, "rewards/margins": 4.8226470947265625, "rewards/rejected": -8.164712905883789, "step": 12974 }, { "epoch": 2.02, "learning_rate": 4.630943513667722e-06, "logits/chosen": -2.5703067779541016, "logits/rejected": -1.9386496543884277, "logps/chosen": -428.88372802734375, "logps/rejected": -471.47296142578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.83635139465332, "rewards/margins": 9.19347858428955, "rewards/rejected": -14.029829978942871, "step": 12975 }, { "epoch": 2.02, "learning_rate": 4.630210073136574e-06, "logits/chosen": -1.5986669063568115, "logits/rejected": -2.3356878757476807, "logps/chosen": -154.44781494140625, "logps/rejected": -344.1546325683594, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.4429147243499756, "rewards/margins": 7.429515838623047, "rewards/rejected": -10.872430801391602, "step": 12976 }, { "epoch": 2.02, "learning_rate": 4.6294766326054255e-06, "logits/chosen": -2.978794813156128, "logits/rejected": -2.540386915206909, "logps/chosen": -396.14739990234375, "logps/rejected": -364.3092041015625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.80385422706604, "rewards/margins": 8.830793380737305, "rewards/rejected": -11.634647369384766, "step": 12977 }, { "epoch": 2.02, "learning_rate": 4.628743192074278e-06, "logits/chosen": -1.3958075046539307, "logits/rejected": -2.720432758331299, "logps/chosen": -166.865966796875, "logps/rejected": -455.0909423828125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.674160480499268, "rewards/margins": 6.319466590881348, "rewards/rejected": -10.993627548217773, "step": 12978 }, { "epoch": 2.02, "learning_rate": 4.62800975154313e-06, "logits/chosen": -1.9006065130233765, "logits/rejected": -2.9062323570251465, "logps/chosen": -131.57766723632812, "logps/rejected": -356.060546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.4266815185546875, "rewards/margins": 7.78533411026001, "rewards/rejected": -12.212015151977539, "step": 12979 }, { "epoch": 2.02, "learning_rate": 4.627276311011982e-06, "logits/chosen": -1.5509464740753174, "logits/rejected": -2.3116977214813232, "logps/chosen": -357.0038146972656, "logps/rejected": -664.48779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.463240146636963, "rewards/margins": 17.58080291748047, "rewards/rejected": -24.044044494628906, "step": 12980 }, { "epoch": 2.02, "learning_rate": 4.626542870480834e-06, "logits/chosen": -1.1106411218643188, "logits/rejected": -2.866574764251709, "logps/chosen": -59.996849060058594, "logps/rejected": -722.76611328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.350759983062744, "rewards/margins": 9.486005783081055, "rewards/rejected": -13.83676528930664, "step": 12981 }, { "epoch": 2.02, "learning_rate": 4.625809429949686e-06, "logits/chosen": -3.052823305130005, "logits/rejected": -2.069148302078247, "logps/chosen": -292.85418701171875, "logps/rejected": -297.98004150390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6744499206542969, "rewards/margins": 9.690095901489258, "rewards/rejected": -11.364545822143555, "step": 12982 }, { "epoch": 2.02, "learning_rate": 4.6250759894185385e-06, "logits/chosen": -2.5785861015319824, "logits/rejected": -2.944211959838867, "logps/chosen": -119.72883605957031, "logps/rejected": -355.61651611328125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.6155829429626465, "rewards/margins": 8.562468528747559, "rewards/rejected": -14.178051948547363, "step": 12983 }, { "epoch": 2.02, "learning_rate": 4.62434254888739e-06, "logits/chosen": -2.6020121574401855, "logits/rejected": -2.8118538856506348, "logps/chosen": -471.6313781738281, "logps/rejected": -500.6711120605469, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.3091630935668945, "rewards/margins": 8.03127670288086, "rewards/rejected": -15.340439796447754, "step": 12984 }, { "epoch": 2.02, "learning_rate": 4.623609108356242e-06, "logits/chosen": -3.1772491931915283, "logits/rejected": -3.157364845275879, "logps/chosen": -123.36502075195312, "logps/rejected": -171.10206604003906, "loss": 0.5093, "rewards/accuracies": 0.5, "rewards/chosen": -6.209597587585449, "rewards/margins": 3.999190092086792, "rewards/rejected": -10.20878791809082, "step": 12985 }, { "epoch": 2.02, "learning_rate": 4.622875667825094e-06, "logits/chosen": -2.4046313762664795, "logits/rejected": -2.7115886211395264, "logps/chosen": -380.0838623046875, "logps/rejected": -319.59869384765625, "loss": 0.1002, "rewards/accuracies": 1.0, "rewards/chosen": -3.3632922172546387, "rewards/margins": 4.710929870605469, "rewards/rejected": -8.074222564697266, "step": 12986 }, { "epoch": 2.02, "learning_rate": 4.622142227293947e-06, "logits/chosen": -2.993537664413452, "logits/rejected": -2.814676523208618, "logps/chosen": -271.32611083984375, "logps/rejected": -228.50765991210938, "loss": 0.7376, "rewards/accuracies": 0.5, "rewards/chosen": -3.7492644786834717, "rewards/margins": 4.503880977630615, "rewards/rejected": -8.253145217895508, "step": 12987 }, { "epoch": 2.02, "learning_rate": 4.621408786762799e-06, "logits/chosen": -2.94423508644104, "logits/rejected": -2.0622060298919678, "logps/chosen": -497.6864929199219, "logps/rejected": -365.1138000488281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.482751846313477, "rewards/margins": 9.186460494995117, "rewards/rejected": -15.669212341308594, "step": 12988 }, { "epoch": 2.02, "learning_rate": 4.6206753462316514e-06, "logits/chosen": -2.7845277786254883, "logits/rejected": -2.744438409805298, "logps/chosen": -460.8590087890625, "logps/rejected": -523.0912475585938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.927087783813477, "rewards/margins": 9.44013786315918, "rewards/rejected": -14.367225646972656, "step": 12989 }, { "epoch": 2.02, "learning_rate": 4.619941905700503e-06, "logits/chosen": -2.3047423362731934, "logits/rejected": -3.0943784713745117, "logps/chosen": -452.82611083984375, "logps/rejected": -696.0961303710938, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.359028339385986, "rewards/margins": 8.949360847473145, "rewards/rejected": -14.308389663696289, "step": 12990 }, { "epoch": 2.02, "learning_rate": 4.619208465169355e-06, "logits/chosen": -1.9547640085220337, "logits/rejected": -2.704317092895508, "logps/chosen": -126.08831024169922, "logps/rejected": -336.33087158203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.5127902030944824, "rewards/margins": 10.095691680908203, "rewards/rejected": -13.608481407165527, "step": 12991 }, { "epoch": 2.02, "learning_rate": 4.618475024638208e-06, "logits/chosen": -1.912043571472168, "logits/rejected": -2.7625648975372314, "logps/chosen": -150.5799560546875, "logps/rejected": -327.363037109375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.934167385101318, "rewards/margins": 7.00386905670166, "rewards/rejected": -14.93803596496582, "step": 12992 }, { "epoch": 2.02, "learning_rate": 4.61774158410706e-06, "logits/chosen": -1.9851874113082886, "logits/rejected": -2.676161050796509, "logps/chosen": -443.33123779296875, "logps/rejected": -490.02435302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.788266658782959, "rewards/margins": 11.412564277648926, "rewards/rejected": -18.200830459594727, "step": 12993 }, { "epoch": 2.02, "learning_rate": 4.617008143575912e-06, "logits/chosen": -2.24621319770813, "logits/rejected": -2.2197179794311523, "logps/chosen": -308.05218505859375, "logps/rejected": -433.1262512207031, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.3120803833007812, "rewards/margins": 9.599308967590332, "rewards/rejected": -12.911389350891113, "step": 12994 }, { "epoch": 2.02, "learning_rate": 4.6162747030447636e-06, "logits/chosen": -1.7912155389785767, "logits/rejected": -2.5553078651428223, "logps/chosen": -175.3570556640625, "logps/rejected": -329.53717041015625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.0120315551757812, "rewards/margins": 7.894629955291748, "rewards/rejected": -10.906661987304688, "step": 12995 }, { "epoch": 2.02, "learning_rate": 4.615541262513616e-06, "logits/chosen": -2.691586494445801, "logits/rejected": -1.9961907863616943, "logps/chosen": -269.22344970703125, "logps/rejected": -152.0263214111328, "loss": 0.919, "rewards/accuracies": 0.5, "rewards/chosen": -5.194116115570068, "rewards/margins": 3.0489754676818848, "rewards/rejected": -8.243091583251953, "step": 12996 }, { "epoch": 2.02, "learning_rate": 4.614807821982468e-06, "logits/chosen": -2.1439590454101562, "logits/rejected": -2.8908848762512207, "logps/chosen": -327.353271484375, "logps/rejected": -387.21734619140625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.309215068817139, "rewards/margins": 7.805126190185547, "rewards/rejected": -13.114340782165527, "step": 12997 }, { "epoch": 2.02, "learning_rate": 4.61407438145132e-06, "logits/chosen": -2.5226259231567383, "logits/rejected": -3.0288331508636475, "logps/chosen": -362.00946044921875, "logps/rejected": -560.93017578125, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -3.271084785461426, "rewards/margins": 6.5081467628479, "rewards/rejected": -9.779232025146484, "step": 12998 }, { "epoch": 2.02, "learning_rate": 4.613340940920172e-06, "logits/chosen": -2.047863721847534, "logits/rejected": -2.4170641899108887, "logps/chosen": -265.1141357421875, "logps/rejected": -475.3558349609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.35065221786499, "rewards/margins": 8.757673263549805, "rewards/rejected": -13.108325958251953, "step": 12999 }, { "epoch": 2.02, "learning_rate": 4.612607500389025e-06, "logits/chosen": -2.9783058166503906, "logits/rejected": -2.4275801181793213, "logps/chosen": -206.85641479492188, "logps/rejected": -163.5504913330078, "loss": 0.0249, "rewards/accuracies": 1.0, "rewards/chosen": -1.9952361583709717, "rewards/margins": 4.813137054443359, "rewards/rejected": -6.80837345123291, "step": 13000 }, { "epoch": 2.02, "learning_rate": 4.6118740598578765e-06, "logits/chosen": -1.9059016704559326, "logits/rejected": -2.770219087600708, "logps/chosen": -276.3377380371094, "logps/rejected": -628.3068237304688, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -6.252285003662109, "rewards/margins": 8.463399887084961, "rewards/rejected": -14.71568489074707, "step": 13001 }, { "epoch": 2.02, "learning_rate": 4.611140619326728e-06, "logits/chosen": -1.4172494411468506, "logits/rejected": -2.5200066566467285, "logps/chosen": -188.741455078125, "logps/rejected": -391.03741455078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.583954811096191, "rewards/margins": 8.872703552246094, "rewards/rejected": -13.456659317016602, "step": 13002 }, { "epoch": 2.02, "learning_rate": 4.61040717879558e-06, "logits/chosen": -1.7180516719818115, "logits/rejected": -2.7946581840515137, "logps/chosen": -117.86425018310547, "logps/rejected": -208.62875366210938, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -5.775010108947754, "rewards/margins": 4.938446521759033, "rewards/rejected": -10.713457107543945, "step": 13003 }, { "epoch": 2.02, "learning_rate": 4.609673738264432e-06, "logits/chosen": -2.2845349311828613, "logits/rejected": -2.2766525745391846, "logps/chosen": -318.9686584472656, "logps/rejected": -402.9321594238281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.363367557525635, "rewards/margins": 9.317469596862793, "rewards/rejected": -16.680837631225586, "step": 13004 }, { "epoch": 2.02, "learning_rate": 4.608940297733285e-06, "logits/chosen": -3.05665922164917, "logits/rejected": -3.0782558917999268, "logps/chosen": -276.5230712890625, "logps/rejected": -299.5467529296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.414600372314453, "rewards/margins": 9.07733154296875, "rewards/rejected": -12.491931915283203, "step": 13005 }, { "epoch": 2.02, "learning_rate": 4.608206857202138e-06, "logits/chosen": -2.7694764137268066, "logits/rejected": -3.131330728530884, "logps/chosen": -71.36004638671875, "logps/rejected": -228.38665771484375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.467604398727417, "rewards/margins": 6.4736857414245605, "rewards/rejected": -9.941289901733398, "step": 13006 }, { "epoch": 2.02, "learning_rate": 4.6074734166709895e-06, "logits/chosen": -2.1585946083068848, "logits/rejected": -3.061495065689087, "logps/chosen": -178.2674560546875, "logps/rejected": -1057.0294189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7962639331817627, "rewards/margins": 14.223551750183105, "rewards/rejected": -18.01981544494629, "step": 13007 }, { "epoch": 2.02, "learning_rate": 4.606739976139841e-06, "logits/chosen": -2.390507459640503, "logits/rejected": -3.0208468437194824, "logps/chosen": -388.2602844238281, "logps/rejected": -553.3817138671875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.092120170593262, "rewards/margins": 7.390290260314941, "rewards/rejected": -13.482410430908203, "step": 13008 }, { "epoch": 2.02, "learning_rate": 4.606006535608694e-06, "logits/chosen": -2.224275827407837, "logits/rejected": -3.0347089767456055, "logps/chosen": -190.3714599609375, "logps/rejected": -426.10064697265625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.844632863998413, "rewards/margins": 7.249597549438477, "rewards/rejected": -11.094230651855469, "step": 13009 }, { "epoch": 2.02, "learning_rate": 4.605273095077546e-06, "logits/chosen": -1.967038869857788, "logits/rejected": -3.0456626415252686, "logps/chosen": -128.51121520996094, "logps/rejected": -314.3148193359375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.740962982177734, "rewards/margins": 8.086666107177734, "rewards/rejected": -12.827629089355469, "step": 13010 }, { "epoch": 2.02, "learning_rate": 4.604539654546398e-06, "logits/chosen": -2.8086659908294678, "logits/rejected": -3.0761008262634277, "logps/chosen": -256.2168884277344, "logps/rejected": -350.312744140625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -2.9593236446380615, "rewards/margins": 6.363590717315674, "rewards/rejected": -9.322914123535156, "step": 13011 }, { "epoch": 2.02, "learning_rate": 4.60380621401525e-06, "logits/chosen": -2.5801825523376465, "logits/rejected": -1.8611313104629517, "logps/chosen": -471.6485290527344, "logps/rejected": -456.25531005859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.0825066566467285, "rewards/margins": 9.99923324584961, "rewards/rejected": -16.08173942565918, "step": 13012 }, { "epoch": 2.02, "learning_rate": 4.603072773484102e-06, "logits/chosen": -1.905679702758789, "logits/rejected": -2.697268486022949, "logps/chosen": -112.84498596191406, "logps/rejected": -184.5125732421875, "loss": 0.7653, "rewards/accuracies": 0.5, "rewards/chosen": -8.472699165344238, "rewards/margins": 2.01322865486145, "rewards/rejected": -10.48592758178711, "step": 13013 }, { "epoch": 2.02, "learning_rate": 4.602339332952954e-06, "logits/chosen": -2.8304691314697266, "logits/rejected": -3.018693447113037, "logps/chosen": -169.35235595703125, "logps/rejected": -289.6496276855469, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.3313910961151123, "rewards/margins": 7.918962478637695, "rewards/rejected": -11.250353813171387, "step": 13014 }, { "epoch": 2.02, "learning_rate": 4.601605892421806e-06, "logits/chosen": -2.6944100856781006, "logits/rejected": -2.8835878372192383, "logps/chosen": -135.61904907226562, "logps/rejected": -218.36717224121094, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": -2.9821839332580566, "rewards/margins": 6.0148420333862305, "rewards/rejected": -8.997026443481445, "step": 13015 }, { "epoch": 2.02, "learning_rate": 4.600872451890658e-06, "logits/chosen": -2.194126605987549, "logits/rejected": -2.8467369079589844, "logps/chosen": -310.22125244140625, "logps/rejected": -453.7373046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.372524261474609, "rewards/margins": 7.267852783203125, "rewards/rejected": -12.640377044677734, "step": 13016 }, { "epoch": 2.02, "learning_rate": 4.60013901135951e-06, "logits/chosen": -3.0900161266326904, "logits/rejected": -2.4656012058258057, "logps/chosen": -910.5665283203125, "logps/rejected": -451.881591796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.8281679153442383, "rewards/margins": 8.69760513305664, "rewards/rejected": -12.525772094726562, "step": 13017 }, { "epoch": 2.02, "learning_rate": 4.599405570828363e-06, "logits/chosen": -2.5714097023010254, "logits/rejected": -2.9388484954833984, "logps/chosen": -482.57501220703125, "logps/rejected": -509.6920166015625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.702342987060547, "rewards/margins": 7.856454372406006, "rewards/rejected": -12.558796882629395, "step": 13018 }, { "epoch": 2.02, "learning_rate": 4.5986721302972146e-06, "logits/chosen": -2.8193552494049072, "logits/rejected": -2.7002668380737305, "logps/chosen": -427.0555419921875, "logps/rejected": -449.72430419921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.581464767456055, "rewards/margins": 9.675749778747559, "rewards/rejected": -14.257214546203613, "step": 13019 }, { "epoch": 2.02, "learning_rate": 4.5979386897660665e-06, "logits/chosen": -0.917593777179718, "logits/rejected": -1.9977525472640991, "logps/chosen": -191.57281494140625, "logps/rejected": -525.9548950195312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.6164281368255615, "rewards/margins": 12.232502937316895, "rewards/rejected": -15.848930358886719, "step": 13020 }, { "epoch": 2.03, "learning_rate": 4.597205249234918e-06, "logits/chosen": -1.9616341590881348, "logits/rejected": -2.550752639770508, "logps/chosen": -433.14202880859375, "logps/rejected": -436.5763244628906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.6735734939575195, "rewards/margins": 9.349700927734375, "rewards/rejected": -15.023274421691895, "step": 13021 }, { "epoch": 2.03, "learning_rate": 4.596471808703771e-06, "logits/chosen": -3.190321922302246, "logits/rejected": -3.2238047122955322, "logps/chosen": -545.4700927734375, "logps/rejected": -805.8379516601562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8475730419158936, "rewards/margins": 11.992475509643555, "rewards/rejected": -14.840048789978027, "step": 13022 }, { "epoch": 2.03, "learning_rate": 4.595738368172624e-06, "logits/chosen": -1.082018256187439, "logits/rejected": -1.304256796836853, "logps/chosen": -466.6945495605469, "logps/rejected": -549.4498901367188, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -5.781805515289307, "rewards/margins": 7.922986030578613, "rewards/rejected": -13.704792022705078, "step": 13023 }, { "epoch": 2.03, "learning_rate": 4.595004927641476e-06, "logits/chosen": -2.3535969257354736, "logits/rejected": -2.875882148742676, "logps/chosen": -261.9547424316406, "logps/rejected": -446.61395263671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.08026123046875, "rewards/margins": 9.202436447143555, "rewards/rejected": -15.282697677612305, "step": 13024 }, { "epoch": 2.03, "learning_rate": 4.5942714871103275e-06, "logits/chosen": -3.1063435077667236, "logits/rejected": -3.415059804916382, "logps/chosen": -67.95352935791016, "logps/rejected": -232.76425170898438, "loss": 0.1475, "rewards/accuracies": 1.0, "rewards/chosen": -4.919317245483398, "rewards/margins": 5.813941955566406, "rewards/rejected": -10.733259201049805, "step": 13025 }, { "epoch": 2.03, "learning_rate": 4.593538046579179e-06, "logits/chosen": -0.7218107581138611, "logits/rejected": -2.855832099914551, "logps/chosen": -329.7518615722656, "logps/rejected": -505.79498291015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.747038841247559, "rewards/margins": 12.611974716186523, "rewards/rejected": -18.3590145111084, "step": 13026 }, { "epoch": 2.03, "learning_rate": 4.592804606048032e-06, "logits/chosen": -3.0017077922821045, "logits/rejected": -2.987499237060547, "logps/chosen": -206.24578857421875, "logps/rejected": -258.81927490234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.265097141265869, "rewards/margins": 7.376008033752441, "rewards/rejected": -9.641105651855469, "step": 13027 }, { "epoch": 2.03, "learning_rate": 4.592071165516884e-06, "logits/chosen": -1.9641128778457642, "logits/rejected": -2.774770498275757, "logps/chosen": -317.95294189453125, "logps/rejected": -521.409912109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.681823253631592, "rewards/margins": 8.315143585205078, "rewards/rejected": -13.996967315673828, "step": 13028 }, { "epoch": 2.03, "learning_rate": 4.591337724985736e-06, "logits/chosen": -2.5893070697784424, "logits/rejected": -3.135802745819092, "logps/chosen": -294.67828369140625, "logps/rejected": -475.87945556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.785418510437012, "rewards/margins": 9.241141319274902, "rewards/rejected": -15.026559829711914, "step": 13029 }, { "epoch": 2.03, "learning_rate": 4.590604284454588e-06, "logits/chosen": -2.99261212348938, "logits/rejected": -3.0287461280822754, "logps/chosen": -418.3370666503906, "logps/rejected": -394.65423583984375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -7.5584025382995605, "rewards/margins": 5.8641676902771, "rewards/rejected": -13.42257022857666, "step": 13030 }, { "epoch": 2.03, "learning_rate": 4.58987084392344e-06, "logits/chosen": -2.6395366191864014, "logits/rejected": -2.8564393520355225, "logps/chosen": -64.66478729248047, "logps/rejected": -180.167236328125, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -3.716334819793701, "rewards/margins": 5.366820812225342, "rewards/rejected": -9.083155632019043, "step": 13031 }, { "epoch": 2.03, "learning_rate": 4.589137403392292e-06, "logits/chosen": -1.1398507356643677, "logits/rejected": -1.5228157043457031, "logps/chosen": -234.22056579589844, "logps/rejected": -371.0140686035156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.917567729949951, "rewards/margins": 9.044830322265625, "rewards/rejected": -12.962398529052734, "step": 13032 }, { "epoch": 2.03, "learning_rate": 4.588403962861144e-06, "logits/chosen": -0.5950911045074463, "logits/rejected": -1.5934052467346191, "logps/chosen": -282.7522277832031, "logps/rejected": -661.80712890625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -7.898180961608887, "rewards/margins": 11.506282806396484, "rewards/rejected": -19.404462814331055, "step": 13033 }, { "epoch": 2.03, "learning_rate": 4.587670522329996e-06, "logits/chosen": -2.303760528564453, "logits/rejected": -2.8844945430755615, "logps/chosen": -152.69778442382812, "logps/rejected": -184.8734893798828, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -5.2115254402160645, "rewards/margins": 5.630542278289795, "rewards/rejected": -10.84206771850586, "step": 13034 }, { "epoch": 2.03, "learning_rate": 4.586937081798848e-06, "logits/chosen": -1.5460096597671509, "logits/rejected": -2.666039228439331, "logps/chosen": -119.13282012939453, "logps/rejected": -397.77557373046875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -8.12535285949707, "rewards/margins": 6.84075403213501, "rewards/rejected": -14.966106414794922, "step": 13035 }, { "epoch": 2.03, "learning_rate": 4.586203641267701e-06, "logits/chosen": -2.6836044788360596, "logits/rejected": -2.593886613845825, "logps/chosen": -466.496337890625, "logps/rejected": -587.3796997070312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.506047248840332, "rewards/margins": 13.692709922790527, "rewards/rejected": -19.19875717163086, "step": 13036 }, { "epoch": 2.03, "learning_rate": 4.585470200736553e-06, "logits/chosen": -2.389354705810547, "logits/rejected": -2.9110536575317383, "logps/chosen": -132.9025421142578, "logps/rejected": -346.55230712890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.35136604309082, "rewards/margins": 8.885272979736328, "rewards/rejected": -13.236639022827148, "step": 13037 }, { "epoch": 2.03, "learning_rate": 4.5847367602054045e-06, "logits/chosen": -2.8756766319274902, "logits/rejected": -2.968235731124878, "logps/chosen": -463.97662353515625, "logps/rejected": -548.3609008789062, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -4.412624359130859, "rewards/margins": 7.272066116333008, "rewards/rejected": -11.684690475463867, "step": 13038 }, { "epoch": 2.03, "learning_rate": 4.584003319674257e-06, "logits/chosen": -2.804621934890747, "logits/rejected": -1.803734540939331, "logps/chosen": -157.09884643554688, "logps/rejected": -271.45867919921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.2145187854766846, "rewards/margins": 10.187276840209961, "rewards/rejected": -11.401795387268066, "step": 13039 }, { "epoch": 2.03, "learning_rate": 4.583269879143109e-06, "logits/chosen": -2.5825653076171875, "logits/rejected": -2.540396213531494, "logps/chosen": -164.07261657714844, "logps/rejected": -335.9671630859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.48519229888916, "rewards/margins": 10.694665908813477, "rewards/rejected": -15.179859161376953, "step": 13040 }, { "epoch": 2.03, "learning_rate": 4.582536438611962e-06, "logits/chosen": -2.454604387283325, "logits/rejected": -2.2227697372436523, "logps/chosen": -166.41488647460938, "logps/rejected": -311.4783020019531, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.354811429977417, "rewards/margins": 9.205610275268555, "rewards/rejected": -11.56042194366455, "step": 13041 }, { "epoch": 2.03, "learning_rate": 4.581802998080814e-06, "logits/chosen": -2.791013240814209, "logits/rejected": -2.0425493717193604, "logps/chosen": -222.27810668945312, "logps/rejected": -168.024658203125, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -4.341592788696289, "rewards/margins": 5.196866512298584, "rewards/rejected": -9.538458824157715, "step": 13042 }, { "epoch": 2.03, "learning_rate": 4.581069557549666e-06, "logits/chosen": -2.7616961002349854, "logits/rejected": -1.9900566339492798, "logps/chosen": -644.4005126953125, "logps/rejected": -549.303955078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.063473701477051, "rewards/margins": 9.665574073791504, "rewards/rejected": -16.729047775268555, "step": 13043 }, { "epoch": 2.03, "learning_rate": 4.5803361170185175e-06, "logits/chosen": -1.9073094129562378, "logits/rejected": -2.6974799633026123, "logps/chosen": -155.41714477539062, "logps/rejected": -289.26519775390625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.29249382019043, "rewards/margins": 8.305620193481445, "rewards/rejected": -13.598114013671875, "step": 13044 }, { "epoch": 2.03, "learning_rate": 4.57960267648737e-06, "logits/chosen": -2.0768990516662598, "logits/rejected": -2.7897253036499023, "logps/chosen": -304.86322021484375, "logps/rejected": -463.37139892578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.1675543785095215, "rewards/margins": 8.082508087158203, "rewards/rejected": -10.250062942504883, "step": 13045 }, { "epoch": 2.03, "learning_rate": 4.578869235956222e-06, "logits/chosen": -2.403930425643921, "logits/rejected": -2.810781240463257, "logps/chosen": -234.3801727294922, "logps/rejected": -292.42303466796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.195733547210693, "rewards/margins": 9.294122695922852, "rewards/rejected": -13.489856719970703, "step": 13046 }, { "epoch": 2.03, "learning_rate": 4.578135795425074e-06, "logits/chosen": -1.8794819116592407, "logits/rejected": -2.5487959384918213, "logps/chosen": -451.21759033203125, "logps/rejected": -516.5501708984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.307002305984497, "rewards/margins": 12.03385066986084, "rewards/rejected": -15.340852737426758, "step": 13047 }, { "epoch": 2.03, "learning_rate": 4.577402354893926e-06, "logits/chosen": -1.1619216203689575, "logits/rejected": -2.3146512508392334, "logps/chosen": -203.216796875, "logps/rejected": -452.6986083984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.470770359039307, "rewards/margins": 11.050291061401367, "rewards/rejected": -17.521060943603516, "step": 13048 }, { "epoch": 2.03, "learning_rate": 4.5766689143627785e-06, "logits/chosen": -2.90034818649292, "logits/rejected": -2.694329261779785, "logps/chosen": -437.28875732421875, "logps/rejected": -445.33001708984375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.588482856750488, "rewards/margins": 7.520796775817871, "rewards/rejected": -12.10927963256836, "step": 13049 }, { "epoch": 2.03, "learning_rate": 4.57593547383163e-06, "logits/chosen": -2.580223560333252, "logits/rejected": -2.689544200897217, "logps/chosen": -284.4081115722656, "logps/rejected": -348.8992919921875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -5.216715335845947, "rewards/margins": 5.443553924560547, "rewards/rejected": -10.660268783569336, "step": 13050 }, { "epoch": 2.03, "learning_rate": 4.575202033300482e-06, "logits/chosen": -1.8546682596206665, "logits/rejected": -2.790921688079834, "logps/chosen": -231.08460998535156, "logps/rejected": -563.2572021484375, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -6.505555152893066, "rewards/margins": 7.248934745788574, "rewards/rejected": -13.75448989868164, "step": 13051 }, { "epoch": 2.03, "learning_rate": 4.574468592769334e-06, "logits/chosen": -2.423011064529419, "logits/rejected": -2.9551875591278076, "logps/chosen": -231.3697052001953, "logps/rejected": -405.8414306640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -7.00194787979126, "rewards/margins": 7.048691272735596, "rewards/rejected": -14.050639152526855, "step": 13052 }, { "epoch": 2.03, "learning_rate": 4.573735152238186e-06, "logits/chosen": -2.0035061836242676, "logits/rejected": -2.2896461486816406, "logps/chosen": -223.50393676757812, "logps/rejected": -567.7257690429688, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.8841071128845215, "rewards/margins": 9.689881324768066, "rewards/rejected": -14.57398796081543, "step": 13053 }, { "epoch": 2.03, "learning_rate": 4.573001711707039e-06, "logits/chosen": -2.233487129211426, "logits/rejected": -2.7432777881622314, "logps/chosen": -121.033447265625, "logps/rejected": -203.52053833007812, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -4.9192914962768555, "rewards/margins": 6.35817289352417, "rewards/rejected": -11.277463912963867, "step": 13054 }, { "epoch": 2.03, "learning_rate": 4.572268271175891e-06, "logits/chosen": -2.7543022632598877, "logits/rejected": -2.4158194065093994, "logps/chosen": -119.77875518798828, "logps/rejected": -329.81610107421875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -1.082005262374878, "rewards/margins": 9.095880508422852, "rewards/rejected": -10.177886009216309, "step": 13055 }, { "epoch": 2.03, "learning_rate": 4.5715348306447425e-06, "logits/chosen": -2.658879518508911, "logits/rejected": -3.0385050773620605, "logps/chosen": -133.73019409179688, "logps/rejected": -359.11834716796875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.549201965332031, "rewards/margins": 8.16317081451416, "rewards/rejected": -12.712372779846191, "step": 13056 }, { "epoch": 2.03, "learning_rate": 4.570801390113595e-06, "logits/chosen": -2.6141276359558105, "logits/rejected": -2.608856201171875, "logps/chosen": -651.2085571289062, "logps/rejected": -627.0008544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.617291450500488, "rewards/margins": 10.594099044799805, "rewards/rejected": -15.21139144897461, "step": 13057 }, { "epoch": 2.03, "learning_rate": 4.570067949582448e-06, "logits/chosen": -2.489104747772217, "logits/rejected": -2.918109655380249, "logps/chosen": -162.7547149658203, "logps/rejected": -288.76409912109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.619674205780029, "rewards/margins": 8.299877166748047, "rewards/rejected": -12.919551849365234, "step": 13058 }, { "epoch": 2.03, "learning_rate": 4.5693345090513e-06, "logits/chosen": -2.911412477493286, "logits/rejected": -2.2605555057525635, "logps/chosen": -329.390869140625, "logps/rejected": -225.28878784179688, "loss": 0.2122, "rewards/accuracies": 1.0, "rewards/chosen": -7.933352947235107, "rewards/margins": 4.51172399520874, "rewards/rejected": -12.445076942443848, "step": 13059 }, { "epoch": 2.03, "learning_rate": 4.568601068520152e-06, "logits/chosen": -2.927905321121216, "logits/rejected": -2.1227102279663086, "logps/chosen": -733.5101318359375, "logps/rejected": -433.955810546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -8.651947021484375, "rewards/margins": 7.259769439697266, "rewards/rejected": -15.91171646118164, "step": 13060 }, { "epoch": 2.03, "learning_rate": 4.567867627989004e-06, "logits/chosen": -2.7437570095062256, "logits/rejected": -2.932138204574585, "logps/chosen": -187.63040161132812, "logps/rejected": -348.4907531738281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.107938766479492, "rewards/margins": 10.169568061828613, "rewards/rejected": -15.277506828308105, "step": 13061 }, { "epoch": 2.03, "learning_rate": 4.5671341874578555e-06, "logits/chosen": -3.046203136444092, "logits/rejected": -2.851604700088501, "logps/chosen": -172.81114196777344, "logps/rejected": -125.20770263671875, "loss": 1.5705, "rewards/accuracies": 0.5, "rewards/chosen": -6.70327091217041, "rewards/margins": 1.529205560684204, "rewards/rejected": -8.232476234436035, "step": 13062 }, { "epoch": 2.03, "learning_rate": 4.566400746926708e-06, "logits/chosen": -2.9687142372131348, "logits/rejected": -1.8020235300064087, "logps/chosen": -869.9091796875, "logps/rejected": -589.7994384765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.340142726898193, "rewards/margins": 8.742186546325684, "rewards/rejected": -14.082328796386719, "step": 13063 }, { "epoch": 2.03, "learning_rate": 4.56566730639556e-06, "logits/chosen": -2.203544855117798, "logits/rejected": -2.9193267822265625, "logps/chosen": -62.409339904785156, "logps/rejected": -302.29180908203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.750575542449951, "rewards/margins": 9.159595489501953, "rewards/rejected": -12.910171508789062, "step": 13064 }, { "epoch": 2.03, "learning_rate": 4.564933865864412e-06, "logits/chosen": -2.7730565071105957, "logits/rejected": -2.8829095363616943, "logps/chosen": -100.38818359375, "logps/rejected": -124.73854064941406, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -4.554227352142334, "rewards/margins": 4.746097564697266, "rewards/rejected": -9.300324440002441, "step": 13065 }, { "epoch": 2.03, "learning_rate": 4.564200425333264e-06, "logits/chosen": -2.606928586959839, "logits/rejected": -2.1261000633239746, "logps/chosen": -209.57090759277344, "logps/rejected": -180.17633056640625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -0.9671688079833984, "rewards/margins": 5.989912986755371, "rewards/rejected": -6.9570817947387695, "step": 13066 }, { "epoch": 2.03, "learning_rate": 4.563466984802117e-06, "logits/chosen": -1.417398452758789, "logits/rejected": -2.580631971359253, "logps/chosen": -149.94647216796875, "logps/rejected": -525.358642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.653250217437744, "rewards/margins": 14.59730339050293, "rewards/rejected": -19.250553131103516, "step": 13067 }, { "epoch": 2.03, "learning_rate": 4.5627335442709685e-06, "logits/chosen": -2.1770942211151123, "logits/rejected": -2.669971227645874, "logps/chosen": -206.85293579101562, "logps/rejected": -302.4178466796875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.6166090965271, "rewards/margins": 7.0858001708984375, "rewards/rejected": -11.702409744262695, "step": 13068 }, { "epoch": 2.03, "learning_rate": 4.56200010373982e-06, "logits/chosen": -3.1710801124572754, "logits/rejected": -2.9997212886810303, "logps/chosen": -661.9568481445312, "logps/rejected": -530.4669189453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.053154945373535, "rewards/margins": 7.2944841384887695, "rewards/rejected": -12.347639083862305, "step": 13069 }, { "epoch": 2.03, "learning_rate": 4.561266663208672e-06, "logits/chosen": -2.6092135906219482, "logits/rejected": -1.8966954946517944, "logps/chosen": -821.8653564453125, "logps/rejected": -498.9631652832031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.407292366027832, "rewards/margins": 11.665691375732422, "rewards/rejected": -14.072982788085938, "step": 13070 }, { "epoch": 2.03, "learning_rate": 4.560533222677524e-06, "logits/chosen": -2.300708055496216, "logits/rejected": -2.263683319091797, "logps/chosen": -254.6228790283203, "logps/rejected": -476.05303955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1333229541778564, "rewards/margins": 12.992814064025879, "rewards/rejected": -16.126136779785156, "step": 13071 }, { "epoch": 2.03, "learning_rate": 4.559799782146377e-06, "logits/chosen": -0.8504396677017212, "logits/rejected": -2.2873222827911377, "logps/chosen": -175.2730712890625, "logps/rejected": -629.5035400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.843365669250488, "rewards/margins": 12.883623123168945, "rewards/rejected": -17.72698974609375, "step": 13072 }, { "epoch": 2.03, "learning_rate": 4.559066341615229e-06, "logits/chosen": -1.7655938863754272, "logits/rejected": -2.9466583728790283, "logps/chosen": -205.53753662109375, "logps/rejected": -516.96728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.5214524269104, "rewards/margins": 11.198572158813477, "rewards/rejected": -15.720024108886719, "step": 13073 }, { "epoch": 2.03, "learning_rate": 4.5583329010840814e-06, "logits/chosen": -2.3771631717681885, "logits/rejected": -2.782789945602417, "logps/chosen": -146.07757568359375, "logps/rejected": -375.3595275878906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.5022506713867188, "rewards/margins": 8.832910537719727, "rewards/rejected": -12.335161209106445, "step": 13074 }, { "epoch": 2.03, "learning_rate": 4.557599460552933e-06, "logits/chosen": -2.728149175643921, "logits/rejected": -3.2541863918304443, "logps/chosen": -182.81552124023438, "logps/rejected": -321.3690185546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.9166393280029297, "rewards/margins": 8.382624626159668, "rewards/rejected": -11.299263954162598, "step": 13075 }, { "epoch": 2.03, "learning_rate": 4.556866020021786e-06, "logits/chosen": -2.585923194885254, "logits/rejected": -3.019366502761841, "logps/chosen": -74.9531478881836, "logps/rejected": -355.8121643066406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.829552173614502, "rewards/margins": 8.479484558105469, "rewards/rejected": -13.309037208557129, "step": 13076 }, { "epoch": 2.03, "learning_rate": 4.556132579490638e-06, "logits/chosen": -2.787202835083008, "logits/rejected": -2.712712526321411, "logps/chosen": -138.11044311523438, "logps/rejected": -139.85643005371094, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -3.686131000518799, "rewards/margins": 4.294795036315918, "rewards/rejected": -7.980926036834717, "step": 13077 }, { "epoch": 2.03, "learning_rate": 4.55539913895949e-06, "logits/chosen": -1.8097901344299316, "logits/rejected": -3.0054771900177, "logps/chosen": -218.52444458007812, "logps/rejected": -686.6749267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.248423099517822, "rewards/margins": 12.747753143310547, "rewards/rejected": -16.996177673339844, "step": 13078 }, { "epoch": 2.03, "learning_rate": 4.554665698428342e-06, "logits/chosen": -2.586430311203003, "logits/rejected": -2.6463730335235596, "logps/chosen": -616.716552734375, "logps/rejected": -674.6375732421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.299760818481445, "rewards/margins": 9.332338333129883, "rewards/rejected": -16.632099151611328, "step": 13079 }, { "epoch": 2.03, "learning_rate": 4.5539322578971935e-06, "logits/chosen": -2.291863203048706, "logits/rejected": -2.82108211517334, "logps/chosen": -153.33567810058594, "logps/rejected": -380.9483947753906, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.041290283203125, "rewards/margins": 9.697423934936523, "rewards/rejected": -13.738714218139648, "step": 13080 }, { "epoch": 2.03, "learning_rate": 4.553198817366046e-06, "logits/chosen": -2.266613721847534, "logits/rejected": -2.2774908542633057, "logps/chosen": -598.65380859375, "logps/rejected": -413.76214599609375, "loss": 1.9188, "rewards/accuracies": 0.5, "rewards/chosen": -4.4500837326049805, "rewards/margins": 3.9026851654052734, "rewards/rejected": -8.352768898010254, "step": 13081 }, { "epoch": 2.03, "learning_rate": 4.552465376834898e-06, "logits/chosen": -3.0071637630462646, "logits/rejected": -2.870582103729248, "logps/chosen": -196.81875610351562, "logps/rejected": -477.8551025390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.551621198654175, "rewards/margins": 10.066559791564941, "rewards/rejected": -12.618181228637695, "step": 13082 }, { "epoch": 2.03, "learning_rate": 4.55173193630375e-06, "logits/chosen": -2.740349292755127, "logits/rejected": -2.8485822677612305, "logps/chosen": -94.93013763427734, "logps/rejected": -337.48577880859375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.2520744800567627, "rewards/margins": 6.8439531326293945, "rewards/rejected": -10.096027374267578, "step": 13083 }, { "epoch": 2.03, "learning_rate": 4.550998495772602e-06, "logits/chosen": -2.7359838485717773, "logits/rejected": -2.2289557456970215, "logps/chosen": -514.381591796875, "logps/rejected": -451.076171875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.228817939758301, "rewards/margins": 8.629831314086914, "rewards/rejected": -11.858648300170898, "step": 13084 }, { "epoch": 2.03, "learning_rate": 4.550265055241455e-06, "logits/chosen": -2.1244537830352783, "logits/rejected": -2.739459753036499, "logps/chosen": -100.15686798095703, "logps/rejected": -457.0737609863281, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.5491878986358643, "rewards/margins": 7.062409400939941, "rewards/rejected": -10.611597061157227, "step": 13085 }, { "epoch": 2.04, "learning_rate": 4.5495316147103065e-06, "logits/chosen": -2.450505018234253, "logits/rejected": -2.8248653411865234, "logps/chosen": -151.52320861816406, "logps/rejected": -406.1151123046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.74229621887207, "rewards/margins": 12.05386734008789, "rewards/rejected": -16.79616355895996, "step": 13086 }, { "epoch": 2.04, "learning_rate": 4.548798174179158e-06, "logits/chosen": -2.8435490131378174, "logits/rejected": -3.0576770305633545, "logps/chosen": -650.72412109375, "logps/rejected": -679.5335693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.670694351196289, "rewards/margins": 9.77749252319336, "rewards/rejected": -15.448185920715332, "step": 13087 }, { "epoch": 2.04, "learning_rate": 4.54806473364801e-06, "logits/chosen": -2.6489953994750977, "logits/rejected": -1.7474234104156494, "logps/chosen": -362.4753112792969, "logps/rejected": -292.30950927734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.4997878074646, "rewards/margins": 7.572137832641602, "rewards/rejected": -12.07192611694336, "step": 13088 }, { "epoch": 2.04, "learning_rate": 4.547331293116863e-06, "logits/chosen": -2.629879951477051, "logits/rejected": -2.1880836486816406, "logps/chosen": -373.7381286621094, "logps/rejected": -245.6351318359375, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -2.4951934814453125, "rewards/margins": 6.328847885131836, "rewards/rejected": -8.824041366577148, "step": 13089 }, { "epoch": 2.04, "learning_rate": 4.546597852585715e-06, "logits/chosen": -2.1708662509918213, "logits/rejected": -3.1745877265930176, "logps/chosen": -234.45501708984375, "logps/rejected": -592.578369140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.856812477111816, "rewards/margins": 8.527714729309082, "rewards/rejected": -14.384527206420898, "step": 13090 }, { "epoch": 2.04, "learning_rate": 4.545864412054568e-06, "logits/chosen": -2.8360097408294678, "logits/rejected": -2.8404455184936523, "logps/chosen": -368.250732421875, "logps/rejected": -433.9995422363281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.335598945617676, "rewards/margins": 8.714691162109375, "rewards/rejected": -12.05029010772705, "step": 13091 }, { "epoch": 2.04, "learning_rate": 4.5451309715234195e-06, "logits/chosen": -3.0371251106262207, "logits/rejected": -3.0597920417785645, "logps/chosen": -212.4116973876953, "logps/rejected": -238.8060302734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.573575019836426, "rewards/margins": 7.7510528564453125, "rewards/rejected": -10.324627876281738, "step": 13092 }, { "epoch": 2.04, "learning_rate": 4.544397530992271e-06, "logits/chosen": -2.5484790802001953, "logits/rejected": -2.5822646617889404, "logps/chosen": -260.39788818359375, "logps/rejected": -443.9990234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.785174369812012, "rewards/margins": 10.926797866821289, "rewards/rejected": -15.711973190307617, "step": 13093 }, { "epoch": 2.04, "learning_rate": 4.543664090461124e-06, "logits/chosen": -2.747143268585205, "logits/rejected": -1.6568667888641357, "logps/chosen": -300.12200927734375, "logps/rejected": -214.33901977539062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.122110366821289, "rewards/margins": 7.607662200927734, "rewards/rejected": -10.729772567749023, "step": 13094 }, { "epoch": 2.04, "learning_rate": 4.542930649929976e-06, "logits/chosen": -2.954176187515259, "logits/rejected": -2.561189889907837, "logps/chosen": -305.2401428222656, "logps/rejected": -308.14013671875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.7535468339920044, "rewards/margins": 7.472225189208984, "rewards/rejected": -9.2257719039917, "step": 13095 }, { "epoch": 2.04, "learning_rate": 4.542197209398828e-06, "logits/chosen": -2.5378658771514893, "logits/rejected": -2.864879608154297, "logps/chosen": -159.87503051757812, "logps/rejected": -316.427734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.751807451248169, "rewards/margins": 9.641457557678223, "rewards/rejected": -13.393264770507812, "step": 13096 }, { "epoch": 2.04, "learning_rate": 4.54146376886768e-06, "logits/chosen": -2.597579002380371, "logits/rejected": -3.029081344604492, "logps/chosen": -228.56887817382812, "logps/rejected": -271.3377990722656, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -4.738236904144287, "rewards/margins": 5.75508451461792, "rewards/rejected": -10.493321418762207, "step": 13097 }, { "epoch": 2.04, "learning_rate": 4.5407303283365324e-06, "logits/chosen": -2.1552505493164062, "logits/rejected": -3.105083465576172, "logps/chosen": -62.15610122680664, "logps/rejected": -295.671630859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.736125946044922, "rewards/margins": 6.976391792297363, "rewards/rejected": -11.712517738342285, "step": 13098 }, { "epoch": 2.04, "learning_rate": 4.539996887805384e-06, "logits/chosen": -2.577178478240967, "logits/rejected": -3.1110262870788574, "logps/chosen": -133.6053466796875, "logps/rejected": -297.8387145996094, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -4.968287467956543, "rewards/margins": 4.802756309509277, "rewards/rejected": -9.77104377746582, "step": 13099 }, { "epoch": 2.04, "learning_rate": 4.539263447274236e-06, "logits/chosen": -0.6519309282302856, "logits/rejected": -1.7950690984725952, "logps/chosen": -52.82810974121094, "logps/rejected": -374.661865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.301670551300049, "rewards/margins": 10.085830688476562, "rewards/rejected": -13.387500762939453, "step": 13100 }, { "epoch": 2.04, "learning_rate": 4.538530006743088e-06, "logits/chosen": -1.0214247703552246, "logits/rejected": -2.909994602203369, "logps/chosen": -148.70103454589844, "logps/rejected": -577.435302734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.551459789276123, "rewards/margins": 11.433626174926758, "rewards/rejected": -15.985086441040039, "step": 13101 }, { "epoch": 2.04, "learning_rate": 4.53779656621194e-06, "logits/chosen": -2.2074172496795654, "logits/rejected": -2.8121254444122314, "logps/chosen": -345.2628173828125, "logps/rejected": -442.78558349609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.667926788330078, "rewards/margins": 9.2569580078125, "rewards/rejected": -14.924884796142578, "step": 13102 }, { "epoch": 2.04, "learning_rate": 4.537063125680793e-06, "logits/chosen": -1.8356788158416748, "logits/rejected": -2.0864269733428955, "logps/chosen": -406.4254150390625, "logps/rejected": -492.47039794921875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -5.3831787109375, "rewards/margins": 9.845640182495117, "rewards/rejected": -15.228818893432617, "step": 13103 }, { "epoch": 2.04, "learning_rate": 4.5363296851496446e-06, "logits/chosen": -1.761820673942566, "logits/rejected": -1.9241299629211426, "logps/chosen": -224.02943420410156, "logps/rejected": -300.1705322265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.774304151535034, "rewards/margins": 8.675003051757812, "rewards/rejected": -11.449307441711426, "step": 13104 }, { "epoch": 2.04, "learning_rate": 4.5355962446184964e-06, "logits/chosen": -2.7798924446105957, "logits/rejected": -2.217475175857544, "logps/chosen": -277.0754699707031, "logps/rejected": -390.4510498046875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.350353240966797, "rewards/margins": 8.926395416259766, "rewards/rejected": -14.276748657226562, "step": 13105 }, { "epoch": 2.04, "learning_rate": 4.534862804087348e-06, "logits/chosen": -2.80881667137146, "logits/rejected": -3.08534574508667, "logps/chosen": -62.160091400146484, "logps/rejected": -414.82635498046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.1045563220977783, "rewards/margins": 9.305610656738281, "rewards/rejected": -12.410167694091797, "step": 13106 }, { "epoch": 2.04, "learning_rate": 4.534129363556201e-06, "logits/chosen": -2.4705448150634766, "logits/rejected": -2.8625376224517822, "logps/chosen": -213.67002868652344, "logps/rejected": -269.92120361328125, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/chosen": -1.0840202569961548, "rewards/margins": 5.853037357330322, "rewards/rejected": -6.9370574951171875, "step": 13107 }, { "epoch": 2.04, "learning_rate": 4.533395923025054e-06, "logits/chosen": -2.6000189781188965, "logits/rejected": -2.5848026275634766, "logps/chosen": -125.96630859375, "logps/rejected": -250.20050048828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.368947982788086, "rewards/margins": 6.710068702697754, "rewards/rejected": -11.079017639160156, "step": 13108 }, { "epoch": 2.04, "learning_rate": 4.532662482493906e-06, "logits/chosen": -2.975252628326416, "logits/rejected": -2.4176042079925537, "logps/chosen": -464.6798095703125, "logps/rejected": -339.5843811035156, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.0665955543518066, "rewards/margins": 7.824273109436035, "rewards/rejected": -9.890869140625, "step": 13109 }, { "epoch": 2.04, "learning_rate": 4.5319290419627575e-06, "logits/chosen": -1.3355661630630493, "logits/rejected": -2.5515589714050293, "logps/chosen": -201.94898986816406, "logps/rejected": -607.1046752929688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.1181559562683105, "rewards/margins": 12.01539421081543, "rewards/rejected": -19.133548736572266, "step": 13110 }, { "epoch": 2.04, "learning_rate": 4.531195601431609e-06, "logits/chosen": -2.10614013671875, "logits/rejected": -2.3778440952301025, "logps/chosen": -210.1673583984375, "logps/rejected": -274.61529541015625, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -2.9439544677734375, "rewards/margins": 7.562865734100342, "rewards/rejected": -10.506820678710938, "step": 13111 }, { "epoch": 2.04, "learning_rate": 4.530462160900462e-06, "logits/chosen": -2.6808643341064453, "logits/rejected": -3.094681978225708, "logps/chosen": -423.5186767578125, "logps/rejected": -546.456787109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.61793851852417, "rewards/margins": 7.152999401092529, "rewards/rejected": -10.7709379196167, "step": 13112 }, { "epoch": 2.04, "learning_rate": 4.529728720369314e-06, "logits/chosen": -2.5296242237091064, "logits/rejected": -3.0437397956848145, "logps/chosen": -112.71577453613281, "logps/rejected": -339.8130187988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7587368488311768, "rewards/margins": 10.131143569946289, "rewards/rejected": -12.889881134033203, "step": 13113 }, { "epoch": 2.04, "learning_rate": 4.528995279838166e-06, "logits/chosen": -2.904367685317993, "logits/rejected": -3.073122501373291, "logps/chosen": -140.4625701904297, "logps/rejected": -247.1812744140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.347184658050537, "rewards/margins": 7.851363182067871, "rewards/rejected": -11.19854736328125, "step": 13114 }, { "epoch": 2.04, "learning_rate": 4.528261839307018e-06, "logits/chosen": -2.065943717956543, "logits/rejected": -2.9655861854553223, "logps/chosen": -180.73291015625, "logps/rejected": -324.2261047363281, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -4.909289836883545, "rewards/margins": 6.930631160736084, "rewards/rejected": -11.839920997619629, "step": 13115 }, { "epoch": 2.04, "learning_rate": 4.5275283987758705e-06, "logits/chosen": -1.6136534214019775, "logits/rejected": -2.51945424079895, "logps/chosen": -161.88275146484375, "logps/rejected": -311.12091064453125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -4.149127006530762, "rewards/margins": 6.572539329528809, "rewards/rejected": -10.72166633605957, "step": 13116 }, { "epoch": 2.04, "learning_rate": 4.526794958244722e-06, "logits/chosen": -2.8642303943634033, "logits/rejected": -2.9249982833862305, "logps/chosen": -249.81661987304688, "logps/rejected": -310.6278381347656, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.299232006072998, "rewards/margins": 7.824104309082031, "rewards/rejected": -13.123336791992188, "step": 13117 }, { "epoch": 2.04, "learning_rate": 4.526061517713574e-06, "logits/chosen": -2.4694719314575195, "logits/rejected": -3.11051869392395, "logps/chosen": -166.07205200195312, "logps/rejected": -453.88714599609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.450026988983154, "rewards/margins": 12.750982284545898, "rewards/rejected": -17.201007843017578, "step": 13118 }, { "epoch": 2.04, "learning_rate": 4.525328077182426e-06, "logits/chosen": -2.841400146484375, "logits/rejected": -2.6729507446289062, "logps/chosen": -368.7127685546875, "logps/rejected": -635.0308227539062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.4660370349884033, "rewards/margins": 7.6052045822143555, "rewards/rejected": -11.07124137878418, "step": 13119 }, { "epoch": 2.04, "learning_rate": 4.524594636651278e-06, "logits/chosen": -3.131136894226074, "logits/rejected": -2.9051880836486816, "logps/chosen": -258.38653564453125, "logps/rejected": -305.8333740234375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.533501625061035, "rewards/margins": 8.429633140563965, "rewards/rejected": -10.963134765625, "step": 13120 }, { "epoch": 2.04, "learning_rate": 4.523861196120131e-06, "logits/chosen": -3.0727713108062744, "logits/rejected": -2.2296924591064453, "logps/chosen": -349.8759765625, "logps/rejected": -238.32281494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.20704804360866547, "rewards/margins": 11.222375869750977, "rewards/rejected": -11.015327453613281, "step": 13121 }, { "epoch": 2.04, "learning_rate": 4.523127755588983e-06, "logits/chosen": -1.9165680408477783, "logits/rejected": -2.556464433670044, "logps/chosen": -449.76904296875, "logps/rejected": -523.2676391601562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.0762112140655518, "rewards/margins": 8.757719039916992, "rewards/rejected": -11.833930015563965, "step": 13122 }, { "epoch": 2.04, "learning_rate": 4.5223943150578345e-06, "logits/chosen": -2.1008689403533936, "logits/rejected": -2.781968355178833, "logps/chosen": -231.36109924316406, "logps/rejected": -367.8438720703125, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -5.101253509521484, "rewards/margins": 8.777458190917969, "rewards/rejected": -13.878711700439453, "step": 13123 }, { "epoch": 2.04, "learning_rate": 4.521660874526687e-06, "logits/chosen": -2.062394857406616, "logits/rejected": -2.837315797805786, "logps/chosen": -296.70318603515625, "logps/rejected": -494.92437744140625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -5.412128448486328, "rewards/margins": 7.741762161254883, "rewards/rejected": -13.153890609741211, "step": 13124 }, { "epoch": 2.04, "learning_rate": 4.52092743399554e-06, "logits/chosen": -2.559985399246216, "logits/rejected": -2.197420835494995, "logps/chosen": -521.3798828125, "logps/rejected": -490.5948791503906, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.204948425292969, "rewards/margins": 6.8214311599731445, "rewards/rejected": -11.026379585266113, "step": 13125 }, { "epoch": 2.04, "learning_rate": 4.520193993464392e-06, "logits/chosen": -2.975722074508667, "logits/rejected": -2.2892301082611084, "logps/chosen": -184.11639404296875, "logps/rejected": -344.0382080078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.871699094772339, "rewards/margins": 12.434270858764648, "rewards/rejected": -16.30596923828125, "step": 13126 }, { "epoch": 2.04, "learning_rate": 4.519460552933244e-06, "logits/chosen": -2.2482705116271973, "logits/rejected": -3.094048500061035, "logps/chosen": -324.01593017578125, "logps/rejected": -485.43096923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.581709861755371, "rewards/margins": 10.969770431518555, "rewards/rejected": -14.55147933959961, "step": 13127 }, { "epoch": 2.04, "learning_rate": 4.5187271124020956e-06, "logits/chosen": -3.000382661819458, "logits/rejected": -3.048265218734741, "logps/chosen": -56.42254638671875, "logps/rejected": -162.53558349609375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.674534320831299, "rewards/margins": 7.605052947998047, "rewards/rejected": -11.279586791992188, "step": 13128 }, { "epoch": 2.04, "learning_rate": 4.5179936718709474e-06, "logits/chosen": -2.5368950366973877, "logits/rejected": -2.8084845542907715, "logps/chosen": -239.3680419921875, "logps/rejected": -293.9556884765625, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -5.222192764282227, "rewards/margins": 5.905169486999512, "rewards/rejected": -11.127361297607422, "step": 13129 }, { "epoch": 2.04, "learning_rate": 4.5172602313398e-06, "logits/chosen": -2.933894634246826, "logits/rejected": -2.1462197303771973, "logps/chosen": -740.1866455078125, "logps/rejected": -479.7706604003906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.231789588928223, "rewards/margins": 8.861125946044922, "rewards/rejected": -17.092914581298828, "step": 13130 }, { "epoch": 2.04, "learning_rate": 4.516526790808652e-06, "logits/chosen": -2.7760555744171143, "logits/rejected": -2.5245261192321777, "logps/chosen": -144.23870849609375, "logps/rejected": -313.34613037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9101390838623047, "rewards/margins": 10.358185768127441, "rewards/rejected": -13.268325805664062, "step": 13131 }, { "epoch": 2.04, "learning_rate": 4.515793350277504e-06, "logits/chosen": -2.589181661605835, "logits/rejected": -2.711787700653076, "logps/chosen": -259.1112060546875, "logps/rejected": -330.6531066894531, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -3.1856689453125, "rewards/margins": 8.094781875610352, "rewards/rejected": -11.280450820922852, "step": 13132 }, { "epoch": 2.04, "learning_rate": 4.515059909746356e-06, "logits/chosen": -1.995021104812622, "logits/rejected": -2.312981128692627, "logps/chosen": -133.15667724609375, "logps/rejected": -263.86090087890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.555027723312378, "rewards/margins": 11.538534164428711, "rewards/rejected": -13.093561172485352, "step": 13133 }, { "epoch": 2.04, "learning_rate": 4.5143264692152085e-06, "logits/chosen": -1.6915110349655151, "logits/rejected": -2.3798811435699463, "logps/chosen": -93.4893798828125, "logps/rejected": -326.13433837890625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.1316540241241455, "rewards/margins": 10.314023971557617, "rewards/rejected": -13.4456787109375, "step": 13134 }, { "epoch": 2.04, "learning_rate": 4.51359302868406e-06, "logits/chosen": -1.4702107906341553, "logits/rejected": -2.6961874961853027, "logps/chosen": -179.83920288085938, "logps/rejected": -430.4862365722656, "loss": 0.2175, "rewards/accuracies": 1.0, "rewards/chosen": -8.668050765991211, "rewards/margins": 6.895999908447266, "rewards/rejected": -15.564050674438477, "step": 13135 }, { "epoch": 2.04, "learning_rate": 4.512859588152912e-06, "logits/chosen": -2.363588809967041, "logits/rejected": -2.085523843765259, "logps/chosen": -723.9110717773438, "logps/rejected": -661.95703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.345494270324707, "rewards/margins": 10.167346954345703, "rewards/rejected": -17.512842178344727, "step": 13136 }, { "epoch": 2.04, "learning_rate": 4.512126147621764e-06, "logits/chosen": -2.8098647594451904, "logits/rejected": -2.096555709838867, "logps/chosen": -403.1907958984375, "logps/rejected": -210.58953857421875, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -6.148418426513672, "rewards/margins": 4.834502220153809, "rewards/rejected": -10.982921600341797, "step": 13137 }, { "epoch": 2.04, "learning_rate": 4.511392707090617e-06, "logits/chosen": -1.091346025466919, "logits/rejected": -2.3271727561950684, "logps/chosen": -112.94178771972656, "logps/rejected": -241.13462829589844, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.267482757568359, "rewards/margins": 7.060410499572754, "rewards/rejected": -11.327893257141113, "step": 13138 }, { "epoch": 2.04, "learning_rate": 4.510659266559469e-06, "logits/chosen": -2.8556625843048096, "logits/rejected": -2.7750751972198486, "logps/chosen": -212.54440307617188, "logps/rejected": -281.30078125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.135342597961426, "rewards/margins": 7.35592794418335, "rewards/rejected": -13.491270065307617, "step": 13139 }, { "epoch": 2.04, "learning_rate": 4.509925826028321e-06, "logits/chosen": -2.8886568546295166, "logits/rejected": -2.383605718612671, "logps/chosen": -463.3144836425781, "logps/rejected": -217.3350830078125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -4.871877670288086, "rewards/margins": 8.441915512084961, "rewards/rejected": -13.313793182373047, "step": 13140 }, { "epoch": 2.04, "learning_rate": 4.509192385497173e-06, "logits/chosen": -3.1143500804901123, "logits/rejected": -2.8362441062927246, "logps/chosen": -521.9884033203125, "logps/rejected": -593.2960205078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.183917045593262, "rewards/margins": 8.19897747039795, "rewards/rejected": -14.382894515991211, "step": 13141 }, { "epoch": 2.04, "learning_rate": 4.508458944966025e-06, "logits/chosen": -2.50887131690979, "logits/rejected": -2.4547922611236572, "logps/chosen": -253.3047332763672, "logps/rejected": -328.6051025390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.617800235748291, "rewards/margins": 8.697158813476562, "rewards/rejected": -12.314958572387695, "step": 13142 }, { "epoch": 2.04, "learning_rate": 4.507725504434878e-06, "logits/chosen": -1.9205795526504517, "logits/rejected": -3.069467306137085, "logps/chosen": -193.1961212158203, "logps/rejected": -492.5340576171875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.526141166687012, "rewards/margins": 10.66454792022705, "rewards/rejected": -18.190689086914062, "step": 13143 }, { "epoch": 2.04, "learning_rate": 4.50699206390373e-06, "logits/chosen": -3.110656499862671, "logits/rejected": -3.101576328277588, "logps/chosen": -163.68194580078125, "logps/rejected": -349.05511474609375, "loss": 0.2661, "rewards/accuracies": 1.0, "rewards/chosen": -6.315781593322754, "rewards/margins": 4.741766452789307, "rewards/rejected": -11.057548522949219, "step": 13144 }, { "epoch": 2.04, "learning_rate": 4.506258623372582e-06, "logits/chosen": -2.456226110458374, "logits/rejected": -3.114946126937866, "logps/chosen": -85.96243286132812, "logps/rejected": -284.171875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -4.613459587097168, "rewards/margins": 7.337104797363281, "rewards/rejected": -11.95056438446045, "step": 13145 }, { "epoch": 2.04, "learning_rate": 4.505525182841434e-06, "logits/chosen": -0.8140876889228821, "logits/rejected": -2.411433219909668, "logps/chosen": -136.95782470703125, "logps/rejected": -510.70916748046875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -6.8453688621521, "rewards/margins": 9.589410781860352, "rewards/rejected": -16.43478012084961, "step": 13146 }, { "epoch": 2.04, "learning_rate": 4.504791742310286e-06, "logits/chosen": -1.3827345371246338, "logits/rejected": -2.936819076538086, "logps/chosen": -113.99845886230469, "logps/rejected": -479.11346435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.484248161315918, "rewards/margins": 10.620869636535645, "rewards/rejected": -18.105117797851562, "step": 13147 }, { "epoch": 2.04, "learning_rate": 4.504058301779138e-06, "logits/chosen": -2.4414448738098145, "logits/rejected": -3.202901601791382, "logps/chosen": -126.83891296386719, "logps/rejected": -317.4295349121094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.024552345275879, "rewards/margins": 10.590999603271484, "rewards/rejected": -14.615551948547363, "step": 13148 }, { "epoch": 2.04, "learning_rate": 4.50332486124799e-06, "logits/chosen": -1.584285855293274, "logits/rejected": -2.4331448078155518, "logps/chosen": -206.40476989746094, "logps/rejected": -425.90875244140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.099289894104004, "rewards/margins": 10.648643493652344, "rewards/rejected": -13.747933387756348, "step": 13149 }, { "epoch": 2.05, "learning_rate": 4.502591420716842e-06, "logits/chosen": -1.8704415559768677, "logits/rejected": -2.6187145709991455, "logps/chosen": -201.89724731445312, "logps/rejected": -431.6150817871094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.677994251251221, "rewards/margins": 9.633134841918945, "rewards/rejected": -14.311128616333008, "step": 13150 }, { "epoch": 2.05, "learning_rate": 4.501857980185694e-06, "logits/chosen": -2.7207515239715576, "logits/rejected": -2.4873828887939453, "logps/chosen": -316.1004333496094, "logps/rejected": -468.38330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2018256187438965, "rewards/margins": 13.999902725219727, "rewards/rejected": -16.20172882080078, "step": 13151 }, { "epoch": 2.05, "learning_rate": 4.5011245396545466e-06, "logits/chosen": -1.1821626424789429, "logits/rejected": -2.768552541732788, "logps/chosen": -86.96507263183594, "logps/rejected": -387.3347473144531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.7716176509857178, "rewards/margins": 11.213835716247559, "rewards/rejected": -14.985452651977539, "step": 13152 }, { "epoch": 2.05, "learning_rate": 4.5003910991233984e-06, "logits/chosen": -2.885765552520752, "logits/rejected": -2.975945234298706, "logps/chosen": -610.6563720703125, "logps/rejected": -632.4088745117188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.553483009338379, "rewards/margins": 9.170745849609375, "rewards/rejected": -14.72422981262207, "step": 13153 }, { "epoch": 2.05, "learning_rate": 4.49965765859225e-06, "logits/chosen": -1.8689440488815308, "logits/rejected": -3.0037314891815186, "logps/chosen": -180.73435974121094, "logps/rejected": -491.09649658203125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -5.028560638427734, "rewards/margins": 8.203522682189941, "rewards/rejected": -13.232083320617676, "step": 13154 }, { "epoch": 2.05, "learning_rate": 4.498924218061102e-06, "logits/chosen": -2.7960610389709473, "logits/rejected": -2.8208882808685303, "logps/chosen": -224.12564086914062, "logps/rejected": -390.91143798828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.126307964324951, "rewards/margins": 9.90044116973877, "rewards/rejected": -13.026748657226562, "step": 13155 }, { "epoch": 2.05, "learning_rate": 4.498190777529955e-06, "logits/chosen": -2.681462049484253, "logits/rejected": -3.2029218673706055, "logps/chosen": -168.70916748046875, "logps/rejected": -419.55426025390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.615612506866455, "rewards/margins": 8.713733673095703, "rewards/rejected": -14.329345703125, "step": 13156 }, { "epoch": 2.05, "learning_rate": 4.497457336998807e-06, "logits/chosen": -3.0521481037139893, "logits/rejected": -3.097691297531128, "logps/chosen": -633.24951171875, "logps/rejected": -565.18115234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.749307632446289, "rewards/margins": 7.692439079284668, "rewards/rejected": -13.441746711730957, "step": 13157 }, { "epoch": 2.05, "learning_rate": 4.4967238964676595e-06, "logits/chosen": -2.7963240146636963, "logits/rejected": -3.10141921043396, "logps/chosen": -175.11703491210938, "logps/rejected": -215.897216796875, "loss": 0.0825, "rewards/accuracies": 1.0, "rewards/chosen": -5.444841384887695, "rewards/margins": 5.460259437561035, "rewards/rejected": -10.90510082244873, "step": 13158 }, { "epoch": 2.05, "learning_rate": 4.495990455936511e-06, "logits/chosen": -2.631364107131958, "logits/rejected": -2.8321735858917236, "logps/chosen": -117.86251831054688, "logps/rejected": -247.83160400390625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.997330665588379, "rewards/margins": 7.9641313552856445, "rewards/rejected": -11.961462020874023, "step": 13159 }, { "epoch": 2.05, "learning_rate": 4.495257015405363e-06, "logits/chosen": -2.8093836307525635, "logits/rejected": -1.842786192893982, "logps/chosen": -512.1812133789062, "logps/rejected": -487.08416748046875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -9.235664367675781, "rewards/margins": 7.46587610244751, "rewards/rejected": -16.701539993286133, "step": 13160 }, { "epoch": 2.05, "learning_rate": 4.494523574874216e-06, "logits/chosen": -2.283496141433716, "logits/rejected": -2.8080551624298096, "logps/chosen": -190.31463623046875, "logps/rejected": -394.93646240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2367196083068848, "rewards/margins": 10.282142639160156, "rewards/rejected": -12.518861770629883, "step": 13161 }, { "epoch": 2.05, "learning_rate": 4.493790134343068e-06, "logits/chosen": -2.0570592880249023, "logits/rejected": -2.765127658843994, "logps/chosen": -524.6214599609375, "logps/rejected": -580.3817749023438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.85822868347168, "rewards/margins": 10.778339385986328, "rewards/rejected": -17.636568069458008, "step": 13162 }, { "epoch": 2.05, "learning_rate": 4.49305669381192e-06, "logits/chosen": -2.027254581451416, "logits/rejected": -2.919210195541382, "logps/chosen": -176.46092224121094, "logps/rejected": -447.20574951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.009291172027588, "rewards/margins": 13.679511070251465, "rewards/rejected": -20.68880271911621, "step": 13163 }, { "epoch": 2.05, "learning_rate": 4.492323253280772e-06, "logits/chosen": -2.845519542694092, "logits/rejected": -2.5703635215759277, "logps/chosen": -224.8077392578125, "logps/rejected": -342.3556823730469, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.498629570007324, "rewards/margins": 7.7710371017456055, "rewards/rejected": -13.26966667175293, "step": 13164 }, { "epoch": 2.05, "learning_rate": 4.491589812749624e-06, "logits/chosen": -2.721060037612915, "logits/rejected": -3.075443983078003, "logps/chosen": -169.33128356933594, "logps/rejected": -315.6705322265625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -4.91013765335083, "rewards/margins": 5.710606575012207, "rewards/rejected": -10.620744705200195, "step": 13165 }, { "epoch": 2.05, "learning_rate": 4.490856372218476e-06, "logits/chosen": -2.782064199447632, "logits/rejected": -1.488699197769165, "logps/chosen": -401.0975036621094, "logps/rejected": -367.96173095703125, "loss": 0.1165, "rewards/accuracies": 1.0, "rewards/chosen": -2.8222527503967285, "rewards/margins": 6.384096145629883, "rewards/rejected": -9.20634937286377, "step": 13166 }, { "epoch": 2.05, "learning_rate": 4.490122931687328e-06, "logits/chosen": -2.6683449745178223, "logits/rejected": -2.995021343231201, "logps/chosen": -337.2350158691406, "logps/rejected": -433.61279296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.5148348808288574, "rewards/margins": 7.838666915893555, "rewards/rejected": -11.35350227355957, "step": 13167 }, { "epoch": 2.05, "learning_rate": 4.48938949115618e-06, "logits/chosen": -1.9832594394683838, "logits/rejected": -3.1996376514434814, "logps/chosen": -138.73297119140625, "logps/rejected": -452.98895263671875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.503800392150879, "rewards/margins": 8.402563095092773, "rewards/rejected": -13.906362533569336, "step": 13168 }, { "epoch": 2.05, "learning_rate": 4.488656050625032e-06, "logits/chosen": -0.9373947978019714, "logits/rejected": -2.0571916103363037, "logps/chosen": -661.6394653320312, "logps/rejected": -425.26092529296875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -8.686666488647461, "rewards/margins": 6.104926586151123, "rewards/rejected": -14.791593551635742, "step": 13169 }, { "epoch": 2.05, "learning_rate": 4.487922610093885e-06, "logits/chosen": -2.085305690765381, "logits/rejected": -3.028412103652954, "logps/chosen": -111.4159164428711, "logps/rejected": -368.6023254394531, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.388771057128906, "rewards/margins": 8.558755874633789, "rewards/rejected": -16.947526931762695, "step": 13170 }, { "epoch": 2.05, "learning_rate": 4.4871891695627365e-06, "logits/chosen": -2.768259048461914, "logits/rejected": -3.1380844116210938, "logps/chosen": -76.37283325195312, "logps/rejected": -219.86692810058594, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -4.730884552001953, "rewards/margins": 4.431905746459961, "rewards/rejected": -9.162790298461914, "step": 13171 }, { "epoch": 2.05, "learning_rate": 4.486455729031588e-06, "logits/chosen": -2.697071075439453, "logits/rejected": -2.86775803565979, "logps/chosen": -83.63973999023438, "logps/rejected": -268.694091796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.758979320526123, "rewards/margins": 8.58357048034668, "rewards/rejected": -13.342550277709961, "step": 13172 }, { "epoch": 2.05, "learning_rate": 4.48572228850044e-06, "logits/chosen": -1.5245230197906494, "logits/rejected": -2.589365005493164, "logps/chosen": -147.3374786376953, "logps/rejected": -306.65191650390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.6255884170532227, "rewards/margins": 9.511018753051758, "rewards/rejected": -13.136606216430664, "step": 13173 }, { "epoch": 2.05, "learning_rate": 4.484988847969293e-06, "logits/chosen": -1.1829502582550049, "logits/rejected": -2.460644245147705, "logps/chosen": -195.05221557617188, "logps/rejected": -595.9351806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.266937494277954, "rewards/margins": 15.205757141113281, "rewards/rejected": -18.472694396972656, "step": 13174 }, { "epoch": 2.05, "learning_rate": 4.484255407438146e-06, "logits/chosen": -2.855046033859253, "logits/rejected": -3.148070812225342, "logps/chosen": -116.55364990234375, "logps/rejected": -390.09478759765625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.59619140625, "rewards/margins": 11.523219108581543, "rewards/rejected": -17.119409561157227, "step": 13175 }, { "epoch": 2.05, "learning_rate": 4.483521966906998e-06, "logits/chosen": -2.8276774883270264, "logits/rejected": -2.947007417678833, "logps/chosen": -187.45614624023438, "logps/rejected": -269.75238037109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.2160093784332275, "rewards/margins": 8.482149124145508, "rewards/rejected": -11.698159217834473, "step": 13176 }, { "epoch": 2.05, "learning_rate": 4.4827885263758495e-06, "logits/chosen": -2.46445369720459, "logits/rejected": -2.696927785873413, "logps/chosen": -240.44442749023438, "logps/rejected": -439.8583679199219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.757124423980713, "rewards/margins": 11.517656326293945, "rewards/rejected": -15.2747802734375, "step": 13177 }, { "epoch": 2.05, "learning_rate": 4.482055085844701e-06, "logits/chosen": -1.9035052061080933, "logits/rejected": -2.7980408668518066, "logps/chosen": -121.81924438476562, "logps/rejected": -333.67236328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.377110004425049, "rewards/margins": 9.266474723815918, "rewards/rejected": -12.643585205078125, "step": 13178 }, { "epoch": 2.05, "learning_rate": 4.481321645313554e-06, "logits/chosen": -2.7814407348632812, "logits/rejected": -2.9108524322509766, "logps/chosen": -375.2622985839844, "logps/rejected": -463.2973327636719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.567779541015625, "rewards/margins": 8.310079574584961, "rewards/rejected": -14.877859115600586, "step": 13179 }, { "epoch": 2.05, "learning_rate": 4.480588204782406e-06, "logits/chosen": -2.802534341812134, "logits/rejected": -2.9516894817352295, "logps/chosen": -279.0059814453125, "logps/rejected": -239.85562133789062, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.917961597442627, "rewards/margins": 7.307632923126221, "rewards/rejected": -13.225594520568848, "step": 13180 }, { "epoch": 2.05, "learning_rate": 4.479854764251258e-06, "logits/chosen": -2.421398162841797, "logits/rejected": -2.7662267684936523, "logps/chosen": -182.90586853027344, "logps/rejected": -395.9930725097656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.523260116577148, "rewards/margins": 9.944313049316406, "rewards/rejected": -14.467573165893555, "step": 13181 }, { "epoch": 2.05, "learning_rate": 4.47912132372011e-06, "logits/chosen": -1.5044068098068237, "logits/rejected": -2.4338457584381104, "logps/chosen": -591.16796875, "logps/rejected": -905.430419921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.6679182052612305, "rewards/margins": 12.381874084472656, "rewards/rejected": -18.049793243408203, "step": 13182 }, { "epoch": 2.05, "learning_rate": 4.478387883188962e-06, "logits/chosen": -1.9402774572372437, "logits/rejected": -2.686156988143921, "logps/chosen": -248.74298095703125, "logps/rejected": -301.8236083984375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -5.15753173828125, "rewards/margins": 4.841139316558838, "rewards/rejected": -9.99867057800293, "step": 13183 }, { "epoch": 2.05, "learning_rate": 4.477654442657814e-06, "logits/chosen": -2.4159297943115234, "logits/rejected": -2.9447736740112305, "logps/chosen": -255.23219299316406, "logps/rejected": -477.9452209472656, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.068007946014404, "rewards/margins": 7.912356376647949, "rewards/rejected": -11.980363845825195, "step": 13184 }, { "epoch": 2.05, "learning_rate": 4.476921002126666e-06, "logits/chosen": -2.406937837600708, "logits/rejected": -2.6895933151245117, "logps/chosen": -109.5800552368164, "logps/rejected": -245.89544677734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.337250709533691, "rewards/margins": 9.948746681213379, "rewards/rejected": -14.28599739074707, "step": 13185 }, { "epoch": 2.05, "learning_rate": 4.476187561595518e-06, "logits/chosen": -2.5361580848693848, "logits/rejected": -2.4074184894561768, "logps/chosen": -494.5538635253906, "logps/rejected": -432.70733642578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.345176696777344, "rewards/margins": 7.674983024597168, "rewards/rejected": -13.020159721374512, "step": 13186 }, { "epoch": 2.05, "learning_rate": 4.475454121064371e-06, "logits/chosen": -1.9833756685256958, "logits/rejected": -2.811277151107788, "logps/chosen": -317.8720397949219, "logps/rejected": -621.0948486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.45409870147705, "rewards/margins": 11.79200267791748, "rewards/rejected": -20.24610137939453, "step": 13187 }, { "epoch": 2.05, "learning_rate": 4.474720680533223e-06, "logits/chosen": -0.42567604780197144, "logits/rejected": -1.5568464994430542, "logps/chosen": -173.8468017578125, "logps/rejected": -277.6087951660156, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.061941146850586, "rewards/margins": 8.538166046142578, "rewards/rejected": -14.600107192993164, "step": 13188 }, { "epoch": 2.05, "learning_rate": 4.4739872400020745e-06, "logits/chosen": -0.8527665138244629, "logits/rejected": -2.8112621307373047, "logps/chosen": -134.27572631835938, "logps/rejected": -490.4647216796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.583733558654785, "rewards/margins": 9.628215789794922, "rewards/rejected": -15.211949348449707, "step": 13189 }, { "epoch": 2.05, "learning_rate": 4.473253799470926e-06, "logits/chosen": -2.426551580429077, "logits/rejected": -2.854653835296631, "logps/chosen": -97.24105834960938, "logps/rejected": -265.76007080078125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.915947914123535, "rewards/margins": 7.359025955200195, "rewards/rejected": -14.27497386932373, "step": 13190 }, { "epoch": 2.05, "learning_rate": 4.472520358939779e-06, "logits/chosen": -2.7117767333984375, "logits/rejected": -2.4471020698547363, "logps/chosen": -184.000244140625, "logps/rejected": -266.9930725097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8945884704589844, "rewards/margins": 11.52879524230957, "rewards/rejected": -15.423383712768555, "step": 13191 }, { "epoch": 2.05, "learning_rate": 4.471786918408632e-06, "logits/chosen": -2.7690517902374268, "logits/rejected": -2.1100027561187744, "logps/chosen": -349.85076904296875, "logps/rejected": -409.255859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.9391894340515137, "rewards/margins": 8.708593368530273, "rewards/rejected": -12.647783279418945, "step": 13192 }, { "epoch": 2.05, "learning_rate": 4.471053477877484e-06, "logits/chosen": -2.191824197769165, "logits/rejected": -2.7835700511932373, "logps/chosen": -478.13885498046875, "logps/rejected": -622.6883544921875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -8.771758079528809, "rewards/margins": 8.599184036254883, "rewards/rejected": -17.370941162109375, "step": 13193 }, { "epoch": 2.05, "learning_rate": 4.470320037346336e-06, "logits/chosen": -2.0441458225250244, "logits/rejected": -2.7738888263702393, "logps/chosen": -61.26488494873047, "logps/rejected": -507.2955322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.676234245300293, "rewards/margins": 17.456806182861328, "rewards/rejected": -20.133041381835938, "step": 13194 }, { "epoch": 2.05, "learning_rate": 4.4695865968151875e-06, "logits/chosen": -1.5237064361572266, "logits/rejected": -2.8313772678375244, "logps/chosen": -171.04396057128906, "logps/rejected": -348.4976806640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.437559604644775, "rewards/margins": 8.023147583007812, "rewards/rejected": -13.460707664489746, "step": 13195 }, { "epoch": 2.05, "learning_rate": 4.46885315628404e-06, "logits/chosen": -2.17647123336792, "logits/rejected": -2.9915847778320312, "logps/chosen": -108.29808044433594, "logps/rejected": -256.5277099609375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.0922086238861084, "rewards/margins": 8.039271354675293, "rewards/rejected": -11.13148021697998, "step": 13196 }, { "epoch": 2.05, "learning_rate": 4.468119715752892e-06, "logits/chosen": -2.7842798233032227, "logits/rejected": -2.163515090942383, "logps/chosen": -417.56011962890625, "logps/rejected": -241.51329040527344, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -4.824326515197754, "rewards/margins": 7.027161121368408, "rewards/rejected": -11.85148811340332, "step": 13197 }, { "epoch": 2.05, "learning_rate": 4.467386275221744e-06, "logits/chosen": -2.4926364421844482, "logits/rejected": -2.852534532546997, "logps/chosen": -193.9264678955078, "logps/rejected": -376.893310546875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -6.504761695861816, "rewards/margins": 8.23161792755127, "rewards/rejected": -14.736379623413086, "step": 13198 }, { "epoch": 2.05, "learning_rate": 4.466652834690596e-06, "logits/chosen": -2.8719022274017334, "logits/rejected": -2.8287739753723145, "logps/chosen": -321.16522216796875, "logps/rejected": -321.4408264160156, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -4.441815376281738, "rewards/margins": 5.062324523925781, "rewards/rejected": -9.50413990020752, "step": 13199 }, { "epoch": 2.05, "learning_rate": 4.465919394159448e-06, "logits/chosen": -2.733490467071533, "logits/rejected": -2.222752094268799, "logps/chosen": -944.716796875, "logps/rejected": -660.4242553710938, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.125669956207275, "rewards/margins": 8.772886276245117, "rewards/rejected": -12.89855670928955, "step": 13200 }, { "epoch": 2.05, "learning_rate": 4.4651859536283005e-06, "logits/chosen": -2.8711135387420654, "logits/rejected": -2.6512327194213867, "logps/chosen": -317.10638427734375, "logps/rejected": -345.31353759765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.897245407104492, "rewards/margins": 9.288097381591797, "rewards/rejected": -15.185342788696289, "step": 13201 }, { "epoch": 2.05, "learning_rate": 4.464452513097152e-06, "logits/chosen": -2.118694305419922, "logits/rejected": -2.8545281887054443, "logps/chosen": -145.95944213867188, "logps/rejected": -264.8254699707031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5664219856262207, "rewards/margins": 9.36098575592041, "rewards/rejected": -12.927408218383789, "step": 13202 }, { "epoch": 2.05, "learning_rate": 4.463719072566004e-06, "logits/chosen": -2.8185720443725586, "logits/rejected": -3.2766377925872803, "logps/chosen": -47.125526428222656, "logps/rejected": -232.32862854003906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.294050693511963, "rewards/margins": 9.606164932250977, "rewards/rejected": -12.900216102600098, "step": 13203 }, { "epoch": 2.05, "learning_rate": 4.462985632034856e-06, "logits/chosen": -2.5878822803497314, "logits/rejected": -3.1506288051605225, "logps/chosen": -252.86279296875, "logps/rejected": -493.4004211425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9549641609191895, "rewards/margins": 10.921981811523438, "rewards/rejected": -14.876945495605469, "step": 13204 }, { "epoch": 2.05, "learning_rate": 4.462252191503709e-06, "logits/chosen": -2.608989953994751, "logits/rejected": -2.6889865398406982, "logps/chosen": -58.660369873046875, "logps/rejected": -268.0491638183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0782241821289062, "rewards/margins": 12.264394760131836, "rewards/rejected": -15.342618942260742, "step": 13205 }, { "epoch": 2.05, "learning_rate": 4.461518750972561e-06, "logits/chosen": -2.576427936553955, "logits/rejected": -2.450681686401367, "logps/chosen": -156.7483367919922, "logps/rejected": -246.5684814453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.543243885040283, "rewards/margins": 10.274344444274902, "rewards/rejected": -13.817588806152344, "step": 13206 }, { "epoch": 2.05, "learning_rate": 4.460785310441413e-06, "logits/chosen": -2.695542335510254, "logits/rejected": -1.5353233814239502, "logps/chosen": -463.43829345703125, "logps/rejected": -429.7354736328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.381014347076416, "rewards/margins": 10.908382415771484, "rewards/rejected": -15.289396286010742, "step": 13207 }, { "epoch": 2.05, "learning_rate": 4.460051869910265e-06, "logits/chosen": -2.891719341278076, "logits/rejected": -3.069401979446411, "logps/chosen": -136.34759521484375, "logps/rejected": -233.42442321777344, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.252921104431152, "rewards/margins": 8.58067798614502, "rewards/rejected": -14.833599090576172, "step": 13208 }, { "epoch": 2.05, "learning_rate": 4.459318429379117e-06, "logits/chosen": -2.4064321517944336, "logits/rejected": -2.7510759830474854, "logps/chosen": -606.194091796875, "logps/rejected": -690.4584350585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.791168212890625, "rewards/margins": 12.57154369354248, "rewards/rejected": -19.362712860107422, "step": 13209 }, { "epoch": 2.05, "learning_rate": 4.45858498884797e-06, "logits/chosen": -1.8209993839263916, "logits/rejected": -3.0286402702331543, "logps/chosen": -146.31541442871094, "logps/rejected": -430.7939147949219, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -5.817685127258301, "rewards/margins": 8.720199584960938, "rewards/rejected": -14.537883758544922, "step": 13210 }, { "epoch": 2.05, "learning_rate": 4.457851548316822e-06, "logits/chosen": -2.07527756690979, "logits/rejected": -2.624363899230957, "logps/chosen": -323.65374755859375, "logps/rejected": -409.1258239746094, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -3.770268201828003, "rewards/margins": 7.8903656005859375, "rewards/rejected": -11.66063404083252, "step": 13211 }, { "epoch": 2.05, "learning_rate": 4.457118107785674e-06, "logits/chosen": -1.8985475301742554, "logits/rejected": -2.832512140274048, "logps/chosen": -190.19863891601562, "logps/rejected": -454.0069580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.7633240222930908, "rewards/margins": 13.702896118164062, "rewards/rejected": -15.466218948364258, "step": 13212 }, { "epoch": 2.05, "learning_rate": 4.4563846672545255e-06, "logits/chosen": -1.5074509382247925, "logits/rejected": -2.3920376300811768, "logps/chosen": -105.72600555419922, "logps/rejected": -328.537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7808117866516113, "rewards/margins": 11.829944610595703, "rewards/rejected": -14.610755920410156, "step": 13213 }, { "epoch": 2.06, "learning_rate": 4.455651226723378e-06, "logits/chosen": -3.1478431224823, "logits/rejected": -2.6555845737457275, "logps/chosen": -146.98666381835938, "logps/rejected": -188.28585815429688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.5336520671844482, "rewards/margins": 9.040264129638672, "rewards/rejected": -11.5739164352417, "step": 13214 }, { "epoch": 2.06, "learning_rate": 4.45491778619223e-06, "logits/chosen": -2.5927460193634033, "logits/rejected": -2.2985517978668213, "logps/chosen": -579.042724609375, "logps/rejected": -546.1854248046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.323915481567383, "rewards/margins": 7.990106105804443, "rewards/rejected": -13.314022064208984, "step": 13215 }, { "epoch": 2.06, "learning_rate": 4.454184345661082e-06, "logits/chosen": -2.6743342876434326, "logits/rejected": -1.9634257555007935, "logps/chosen": -230.88217163085938, "logps/rejected": -375.533447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.904515266418457, "rewards/margins": 9.593780517578125, "rewards/rejected": -14.498295783996582, "step": 13216 }, { "epoch": 2.06, "learning_rate": 4.453450905129934e-06, "logits/chosen": -2.4887261390686035, "logits/rejected": -2.981980562210083, "logps/chosen": -50.27831268310547, "logps/rejected": -223.1390838623047, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1826369762420654, "rewards/margins": 10.17227554321289, "rewards/rejected": -13.354911804199219, "step": 13217 }, { "epoch": 2.06, "learning_rate": 4.452717464598786e-06, "logits/chosen": -1.140702247619629, "logits/rejected": -2.328436851501465, "logps/chosen": -122.38691711425781, "logps/rejected": -325.28790283203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.153656959533691, "rewards/margins": 9.557038307189941, "rewards/rejected": -15.710695266723633, "step": 13218 }, { "epoch": 2.06, "learning_rate": 4.4519840240676385e-06, "logits/chosen": -1.7089864015579224, "logits/rejected": -2.3160524368286133, "logps/chosen": -185.00570678710938, "logps/rejected": -394.9667663574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6723222732543945, "rewards/margins": 10.236870765686035, "rewards/rejected": -16.90919303894043, "step": 13219 }, { "epoch": 2.06, "learning_rate": 4.45125058353649e-06, "logits/chosen": -2.2290163040161133, "logits/rejected": -1.601804256439209, "logps/chosen": -401.3679504394531, "logps/rejected": -404.7285461425781, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -5.613162994384766, "rewards/margins": 11.332273483276367, "rewards/rejected": -16.945436477661133, "step": 13220 }, { "epoch": 2.06, "learning_rate": 4.450517143005342e-06, "logits/chosen": -2.1335017681121826, "logits/rejected": -2.7106359004974365, "logps/chosen": -389.74102783203125, "logps/rejected": -445.1252136230469, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.675887107849121, "rewards/margins": 7.166563987731934, "rewards/rejected": -14.842451095581055, "step": 13221 }, { "epoch": 2.06, "learning_rate": 4.449783702474194e-06, "logits/chosen": -2.4791407585144043, "logits/rejected": -1.8316348791122437, "logps/chosen": -253.38223266601562, "logps/rejected": -426.734130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1545982360839844, "rewards/margins": 12.34019660949707, "rewards/rejected": -14.494794845581055, "step": 13222 }, { "epoch": 2.06, "learning_rate": 4.449050261943047e-06, "logits/chosen": -3.09976863861084, "logits/rejected": -3.0690481662750244, "logps/chosen": -208.08663940429688, "logps/rejected": -391.2997741699219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.312643527984619, "rewards/margins": 10.684671401977539, "rewards/rejected": -14.997315406799316, "step": 13223 }, { "epoch": 2.06, "learning_rate": 4.448316821411899e-06, "logits/chosen": -2.7898366451263428, "logits/rejected": -2.317021608352661, "logps/chosen": -181.8533935546875, "logps/rejected": -242.81240844726562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.043572187423706, "rewards/margins": 9.791504859924316, "rewards/rejected": -11.835077285766602, "step": 13224 }, { "epoch": 2.06, "learning_rate": 4.4475833808807515e-06, "logits/chosen": -2.6753647327423096, "logits/rejected": -2.9843804836273193, "logps/chosen": -158.39102172851562, "logps/rejected": -380.30047607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.935458660125732, "rewards/margins": 11.609254837036133, "rewards/rejected": -18.544713973999023, "step": 13225 }, { "epoch": 2.06, "learning_rate": 4.446849940349603e-06, "logits/chosen": -2.3469438552856445, "logits/rejected": -2.991793155670166, "logps/chosen": -389.24176025390625, "logps/rejected": -555.9341430664062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.269728183746338, "rewards/margins": 11.157825469970703, "rewards/rejected": -14.427553176879883, "step": 13226 }, { "epoch": 2.06, "learning_rate": 4.446116499818455e-06, "logits/chosen": -2.307041645050049, "logits/rejected": -2.6185030937194824, "logps/chosen": -673.40771484375, "logps/rejected": -643.3526000976562, "loss": 0.0839, "rewards/accuracies": 1.0, "rewards/chosen": -9.861944198608398, "rewards/margins": 4.750708103179932, "rewards/rejected": -14.612651824951172, "step": 13227 }, { "epoch": 2.06, "learning_rate": 4.445383059287308e-06, "logits/chosen": -2.290585994720459, "logits/rejected": -0.9147248864173889, "logps/chosen": -366.4515686035156, "logps/rejected": -334.6167907714844, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.164227485656738, "rewards/margins": 7.438096046447754, "rewards/rejected": -13.602323532104492, "step": 13228 }, { "epoch": 2.06, "learning_rate": 4.44464961875616e-06, "logits/chosen": -2.5313234329223633, "logits/rejected": -3.1420140266418457, "logps/chosen": -754.800537109375, "logps/rejected": -1070.8126220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.486850261688232, "rewards/margins": 11.895525932312012, "rewards/rejected": -18.382375717163086, "step": 13229 }, { "epoch": 2.06, "learning_rate": 4.443916178225012e-06, "logits/chosen": -2.57906436920166, "logits/rejected": -2.417171001434326, "logps/chosen": -361.481689453125, "logps/rejected": -429.6507873535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.924323081970215, "rewards/margins": 10.843158721923828, "rewards/rejected": -18.767480850219727, "step": 13230 }, { "epoch": 2.06, "learning_rate": 4.443182737693864e-06, "logits/chosen": -2.245713472366333, "logits/rejected": -2.6646697521209717, "logps/chosen": -367.7352294921875, "logps/rejected": -448.77362060546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.531694412231445, "rewards/margins": 9.006563186645508, "rewards/rejected": -14.538257598876953, "step": 13231 }, { "epoch": 2.06, "learning_rate": 4.442449297162716e-06, "logits/chosen": -2.2811317443847656, "logits/rejected": -2.852586030960083, "logps/chosen": -333.2784729003906, "logps/rejected": -536.343994140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.606858730316162, "rewards/margins": 9.236421585083008, "rewards/rejected": -13.843280792236328, "step": 13232 }, { "epoch": 2.06, "learning_rate": 4.441715856631568e-06, "logits/chosen": -2.7066702842712402, "logits/rejected": -2.8319618701934814, "logps/chosen": -241.45892333984375, "logps/rejected": -410.0126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.196693420410156, "rewards/margins": 10.01015853881836, "rewards/rejected": -15.206851959228516, "step": 13233 }, { "epoch": 2.06, "learning_rate": 4.44098241610042e-06, "logits/chosen": -2.6772994995117188, "logits/rejected": -1.7731006145477295, "logps/chosen": -478.0679626464844, "logps/rejected": -366.23126220703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5700771808624268, "rewards/margins": 10.072484970092773, "rewards/rejected": -13.642561912536621, "step": 13234 }, { "epoch": 2.06, "learning_rate": 4.440248975569272e-06, "logits/chosen": -0.6515629291534424, "logits/rejected": -2.309102773666382, "logps/chosen": -114.2645263671875, "logps/rejected": -514.5536499023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.884108543395996, "rewards/margins": 11.770380020141602, "rewards/rejected": -18.65448760986328, "step": 13235 }, { "epoch": 2.06, "learning_rate": 4.439515535038125e-06, "logits/chosen": -2.5424294471740723, "logits/rejected": -2.1112475395202637, "logps/chosen": -186.6905517578125, "logps/rejected": -380.86181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1774725914001465, "rewards/margins": 11.9072904586792, "rewards/rejected": -18.084762573242188, "step": 13236 }, { "epoch": 2.06, "learning_rate": 4.4387820945069766e-06, "logits/chosen": -2.3335671424865723, "logits/rejected": -3.1036922931671143, "logps/chosen": -534.4352416992188, "logps/rejected": -601.2430419921875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -5.613210678100586, "rewards/margins": 7.008548736572266, "rewards/rejected": -12.621759414672852, "step": 13237 }, { "epoch": 2.06, "learning_rate": 4.4380486539758284e-06, "logits/chosen": -2.764522075653076, "logits/rejected": -2.837563991546631, "logps/chosen": -89.78666687011719, "logps/rejected": -247.8369140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.320594310760498, "rewards/margins": 10.661468505859375, "rewards/rejected": -16.98206329345703, "step": 13238 }, { "epoch": 2.06, "learning_rate": 4.43731521344468e-06, "logits/chosen": -2.8552324771881104, "logits/rejected": -2.057915449142456, "logps/chosen": -136.99884033203125, "logps/rejected": -182.1707763671875, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -4.156671524047852, "rewards/margins": 5.857019424438477, "rewards/rejected": -10.013690948486328, "step": 13239 }, { "epoch": 2.06, "learning_rate": 4.436581772913532e-06, "logits/chosen": -2.8192524909973145, "logits/rejected": -1.6865026950836182, "logps/chosen": -249.85447692871094, "logps/rejected": -149.9447021484375, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -5.334476470947266, "rewards/margins": 5.062859535217285, "rewards/rejected": -10.39733600616455, "step": 13240 }, { "epoch": 2.06, "learning_rate": 4.435848332382385e-06, "logits/chosen": -2.8620588779449463, "logits/rejected": -2.1082489490509033, "logps/chosen": -437.2303466796875, "logps/rejected": -383.8321838378906, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -8.03023910522461, "rewards/margins": 8.909305572509766, "rewards/rejected": -16.939544677734375, "step": 13241 }, { "epoch": 2.06, "learning_rate": 4.435114891851238e-06, "logits/chosen": -2.742298126220703, "logits/rejected": -3.01908278465271, "logps/chosen": -125.6468734741211, "logps/rejected": -274.0059814453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.48557710647583, "rewards/margins": 9.459415435791016, "rewards/rejected": -14.944993019104004, "step": 13242 }, { "epoch": 2.06, "learning_rate": 4.4343814513200895e-06, "logits/chosen": -2.7837727069854736, "logits/rejected": -2.6290290355682373, "logps/chosen": -326.403564453125, "logps/rejected": -189.87506103515625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.0898056030273438, "rewards/margins": 6.5653510093688965, "rewards/rejected": -9.655157089233398, "step": 13243 }, { "epoch": 2.06, "learning_rate": 4.433648010788941e-06, "logits/chosen": -1.2237756252288818, "logits/rejected": -2.529784679412842, "logps/chosen": -278.7891845703125, "logps/rejected": -605.5053100585938, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.807443618774414, "rewards/margins": 8.496319770812988, "rewards/rejected": -17.303762435913086, "step": 13244 }, { "epoch": 2.06, "learning_rate": 4.432914570257794e-06, "logits/chosen": -2.557044267654419, "logits/rejected": -2.815361976623535, "logps/chosen": -178.6457977294922, "logps/rejected": -307.14605712890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.607261657714844, "rewards/margins": 10.095396041870117, "rewards/rejected": -15.702657699584961, "step": 13245 }, { "epoch": 2.06, "learning_rate": 4.432181129726646e-06, "logits/chosen": -1.6380449533462524, "logits/rejected": -2.8477163314819336, "logps/chosen": -237.4364776611328, "logps/rejected": -638.3119506835938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.077677249908447, "rewards/margins": 17.63298225402832, "rewards/rejected": -21.71065902709961, "step": 13246 }, { "epoch": 2.06, "learning_rate": 4.431447689195498e-06, "logits/chosen": -2.6398403644561768, "logits/rejected": -2.9893643856048584, "logps/chosen": -175.6627960205078, "logps/rejected": -342.23828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.560662269592285, "rewards/margins": 9.427391052246094, "rewards/rejected": -15.988054275512695, "step": 13247 }, { "epoch": 2.06, "learning_rate": 4.43071424866435e-06, "logits/chosen": -2.3849411010742188, "logits/rejected": -2.5476858615875244, "logps/chosen": -262.8792419433594, "logps/rejected": -306.72100830078125, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/chosen": -9.182416915893555, "rewards/margins": 5.302563190460205, "rewards/rejected": -14.484979629516602, "step": 13248 }, { "epoch": 2.06, "learning_rate": 4.429980808133202e-06, "logits/chosen": -2.549093246459961, "logits/rejected": -3.008913040161133, "logps/chosen": -109.68753051757812, "logps/rejected": -372.1266174316406, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.220928192138672, "rewards/margins": 8.097590446472168, "rewards/rejected": -14.318519592285156, "step": 13249 }, { "epoch": 2.06, "learning_rate": 4.429247367602054e-06, "logits/chosen": -2.736307144165039, "logits/rejected": -2.6173105239868164, "logps/chosen": -282.20819091796875, "logps/rejected": -325.0258483886719, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.084308624267578, "rewards/margins": 7.801826000213623, "rewards/rejected": -13.88613510131836, "step": 13250 }, { "epoch": 2.06, "learning_rate": 4.428513927070906e-06, "logits/chosen": -1.648371934890747, "logits/rejected": -1.7955586910247803, "logps/chosen": -459.8871765136719, "logps/rejected": -543.48681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2028305530548096, "rewards/margins": 14.155044555664062, "rewards/rejected": -17.35787582397461, "step": 13251 }, { "epoch": 2.06, "learning_rate": 4.427780486539758e-06, "logits/chosen": -2.29290509223938, "logits/rejected": -2.9599506855010986, "logps/chosen": -94.03321838378906, "logps/rejected": -250.11500549316406, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -5.4100141525268555, "rewards/margins": 5.250211715698242, "rewards/rejected": -10.660226821899414, "step": 13252 }, { "epoch": 2.06, "learning_rate": 4.42704704600861e-06, "logits/chosen": -3.0863897800445557, "logits/rejected": -3.019270420074463, "logps/chosen": -167.5628662109375, "logps/rejected": -371.62799072265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.934049129486084, "rewards/margins": 9.745450973510742, "rewards/rejected": -14.679499626159668, "step": 13253 }, { "epoch": 2.06, "learning_rate": 4.426313605477463e-06, "logits/chosen": -2.4423325061798096, "logits/rejected": -3.0676229000091553, "logps/chosen": -182.9773712158203, "logps/rejected": -368.7017822265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.859156608581543, "rewards/margins": 8.005069732666016, "rewards/rejected": -13.864225387573242, "step": 13254 }, { "epoch": 2.06, "learning_rate": 4.425580164946315e-06, "logits/chosen": -1.0725913047790527, "logits/rejected": -2.464388847351074, "logps/chosen": -104.3733139038086, "logps/rejected": -340.118896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.243780136108398, "rewards/margins": 11.503646850585938, "rewards/rejected": -16.747426986694336, "step": 13255 }, { "epoch": 2.06, "learning_rate": 4.4248467244151665e-06, "logits/chosen": -1.4086068868637085, "logits/rejected": -2.953456401824951, "logps/chosen": -104.93699645996094, "logps/rejected": -483.83648681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5761258602142334, "rewards/margins": 10.88226318359375, "rewards/rejected": -14.458389282226562, "step": 13256 }, { "epoch": 2.06, "learning_rate": 4.424113283884018e-06, "logits/chosen": -1.2381337881088257, "logits/rejected": -2.679279088973999, "logps/chosen": -151.7708740234375, "logps/rejected": -377.2311096191406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.755584239959717, "rewards/margins": 6.0665669441223145, "rewards/rejected": -10.822151184082031, "step": 13257 }, { "epoch": 2.06, "learning_rate": 4.423379843352871e-06, "logits/chosen": -1.4758386611938477, "logits/rejected": -2.7944936752319336, "logps/chosen": -143.78912353515625, "logps/rejected": -429.7008972167969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.5896406173706055, "rewards/margins": 8.674592018127441, "rewards/rejected": -15.264232635498047, "step": 13258 }, { "epoch": 2.06, "learning_rate": 4.422646402821724e-06, "logits/chosen": -2.689707040786743, "logits/rejected": -2.2203714847564697, "logps/chosen": -469.7981872558594, "logps/rejected": -405.88446044921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.565669059753418, "rewards/margins": 9.39255428314209, "rewards/rejected": -12.958223342895508, "step": 13259 }, { "epoch": 2.06, "learning_rate": 4.421912962290576e-06, "logits/chosen": -2.0125033855438232, "logits/rejected": -2.8342814445495605, "logps/chosen": -211.98805236816406, "logps/rejected": -531.2384033203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.170615196228027, "rewards/margins": 10.28480339050293, "rewards/rejected": -17.455419540405273, "step": 13260 }, { "epoch": 2.06, "learning_rate": 4.4211795217594276e-06, "logits/chosen": -2.6060609817504883, "logits/rejected": -2.5423851013183594, "logps/chosen": -188.193359375, "logps/rejected": -347.933837890625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.115255355834961, "rewards/margins": 10.937766075134277, "rewards/rejected": -15.053020477294922, "step": 13261 }, { "epoch": 2.06, "learning_rate": 4.4204460812282794e-06, "logits/chosen": -2.6757442951202393, "logits/rejected": -2.2667596340179443, "logps/chosen": -259.47381591796875, "logps/rejected": -317.59600830078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.2693891525268555, "rewards/margins": 9.531692504882812, "rewards/rejected": -15.801082611083984, "step": 13262 }, { "epoch": 2.06, "learning_rate": 4.419712640697132e-06, "logits/chosen": -2.7324652671813965, "logits/rejected": -2.8154494762420654, "logps/chosen": -108.0767822265625, "logps/rejected": -142.37496948242188, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.260215759277344, "rewards/margins": 5.9728498458862305, "rewards/rejected": -10.23306655883789, "step": 13263 }, { "epoch": 2.06, "learning_rate": 4.418979200165984e-06, "logits/chosen": -2.971065044403076, "logits/rejected": -2.363887071609497, "logps/chosen": -363.5382995605469, "logps/rejected": -275.2691650390625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -7.6585798263549805, "rewards/margins": 6.721988677978516, "rewards/rejected": -14.38056755065918, "step": 13264 }, { "epoch": 2.06, "learning_rate": 4.418245759634836e-06, "logits/chosen": -0.5823631882667542, "logits/rejected": -2.8241233825683594, "logps/chosen": -150.46051025390625, "logps/rejected": -603.664306640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.134225845336914, "rewards/margins": 9.479631423950195, "rewards/rejected": -16.61385726928711, "step": 13265 }, { "epoch": 2.06, "learning_rate": 4.417512319103688e-06, "logits/chosen": -2.036921739578247, "logits/rejected": -2.9317715167999268, "logps/chosen": -102.82193756103516, "logps/rejected": -342.5827941894531, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.2238898277282715, "rewards/margins": 8.873477935791016, "rewards/rejected": -13.097368240356445, "step": 13266 }, { "epoch": 2.06, "learning_rate": 4.41677887857254e-06, "logits/chosen": -2.5275094509124756, "logits/rejected": -2.671440601348877, "logps/chosen": -401.80303955078125, "logps/rejected": -524.29150390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.1911818981170654, "rewards/margins": 11.645851135253906, "rewards/rejected": -13.83703327178955, "step": 13267 }, { "epoch": 2.06, "learning_rate": 4.416045438041392e-06, "logits/chosen": -0.8573737144470215, "logits/rejected": -2.378425121307373, "logps/chosen": -127.33101654052734, "logps/rejected": -429.5651550292969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.310512065887451, "rewards/margins": 11.745926856994629, "rewards/rejected": -15.056438446044922, "step": 13268 }, { "epoch": 2.06, "learning_rate": 4.415311997510244e-06, "logits/chosen": -2.4924027919769287, "logits/rejected": -2.763822317123413, "logps/chosen": -94.6699447631836, "logps/rejected": -256.07049560546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.637174129486084, "rewards/margins": 9.033374786376953, "rewards/rejected": -14.670549392700195, "step": 13269 }, { "epoch": 2.06, "learning_rate": 4.414578556979096e-06, "logits/chosen": -2.8730552196502686, "logits/rejected": -2.9658336639404297, "logps/chosen": -279.0936584472656, "logps/rejected": -454.8175048828125, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -5.077586650848389, "rewards/margins": 6.642662048339844, "rewards/rejected": -11.72024917602539, "step": 13270 }, { "epoch": 2.06, "learning_rate": 4.413845116447948e-06, "logits/chosen": -2.735459089279175, "logits/rejected": -2.650719404220581, "logps/chosen": -367.302734375, "logps/rejected": -452.00592041015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.712764263153076, "rewards/margins": 10.84211254119873, "rewards/rejected": -16.55487632751465, "step": 13271 }, { "epoch": 2.06, "learning_rate": 4.413111675916801e-06, "logits/chosen": -2.582362651824951, "logits/rejected": -2.699281692504883, "logps/chosen": -247.27383422851562, "logps/rejected": -314.48529052734375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -6.6644086837768555, "rewards/margins": 9.536869049072266, "rewards/rejected": -16.201278686523438, "step": 13272 }, { "epoch": 2.06, "learning_rate": 4.412378235385653e-06, "logits/chosen": -2.857917070388794, "logits/rejected": -2.0783443450927734, "logps/chosen": -248.2894744873047, "logps/rejected": -215.74363708496094, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -6.351372241973877, "rewards/margins": 5.91143798828125, "rewards/rejected": -12.262809753417969, "step": 13273 }, { "epoch": 2.06, "learning_rate": 4.4116447948545045e-06, "logits/chosen": -2.899327278137207, "logits/rejected": -3.219407320022583, "logps/chosen": -198.04998779296875, "logps/rejected": -375.654541015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.399624824523926, "rewards/margins": 10.442819595336914, "rewards/rejected": -14.84244441986084, "step": 13274 }, { "epoch": 2.06, "learning_rate": 4.410911354323357e-06, "logits/chosen": -2.4022886753082275, "logits/rejected": -2.9652974605560303, "logps/chosen": -408.23114013671875, "logps/rejected": -671.23388671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.426508903503418, "rewards/margins": 8.87618637084961, "rewards/rejected": -15.302694320678711, "step": 13275 }, { "epoch": 2.06, "learning_rate": 4.410177913792209e-06, "logits/chosen": -2.397752046585083, "logits/rejected": -2.824770450592041, "logps/chosen": -214.23684692382812, "logps/rejected": -671.07763671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.384025573730469, "rewards/margins": 14.218456268310547, "rewards/rejected": -20.602481842041016, "step": 13276 }, { "epoch": 2.06, "learning_rate": 4.409444473261062e-06, "logits/chosen": -1.9451336860656738, "logits/rejected": -2.7674965858459473, "logps/chosen": -225.92025756835938, "logps/rejected": -432.2974853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.794448137283325, "rewards/margins": 11.927550315856934, "rewards/rejected": -15.72199821472168, "step": 13277 }, { "epoch": 2.07, "learning_rate": 4.408711032729914e-06, "logits/chosen": -2.8361759185791016, "logits/rejected": -2.5086913108825684, "logps/chosen": -518.4713134765625, "logps/rejected": -455.93231201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.273999214172363, "rewards/margins": 15.046612739562988, "rewards/rejected": -21.32061195373535, "step": 13278 }, { "epoch": 2.07, "learning_rate": 4.407977592198766e-06, "logits/chosen": -3.119046926498413, "logits/rejected": -1.8982974290847778, "logps/chosen": -574.0276489257812, "logps/rejected": -361.065673828125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -3.9516663551330566, "rewards/margins": 7.76120662689209, "rewards/rejected": -11.712873458862305, "step": 13279 }, { "epoch": 2.07, "learning_rate": 4.4072441516676175e-06, "logits/chosen": -2.8984854221343994, "logits/rejected": -1.1809282302856445, "logps/chosen": -347.01507568359375, "logps/rejected": -258.7550048828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.252193927764893, "rewards/margins": 9.253227233886719, "rewards/rejected": -13.50542163848877, "step": 13280 }, { "epoch": 2.07, "learning_rate": 4.40651071113647e-06, "logits/chosen": -2.5108869075775146, "logits/rejected": -2.7961761951446533, "logps/chosen": -187.8694610595703, "logps/rejected": -437.19268798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.152693271636963, "rewards/margins": 9.996716499328613, "rewards/rejected": -14.149410247802734, "step": 13281 }, { "epoch": 2.07, "learning_rate": 4.405777270605322e-06, "logits/chosen": -2.7203783988952637, "logits/rejected": -2.7833380699157715, "logps/chosen": -697.8692016601562, "logps/rejected": -603.2636108398438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.545135498046875, "rewards/margins": 9.46044635772705, "rewards/rejected": -13.005581855773926, "step": 13282 }, { "epoch": 2.07, "learning_rate": 4.405043830074174e-06, "logits/chosen": -1.7094439268112183, "logits/rejected": -2.787611961364746, "logps/chosen": -139.22674560546875, "logps/rejected": -512.0609741210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.862453460693359, "rewards/margins": 14.862195014953613, "rewards/rejected": -20.724647521972656, "step": 13283 }, { "epoch": 2.07, "learning_rate": 4.404310389543026e-06, "logits/chosen": -0.9818867444992065, "logits/rejected": -1.9284635782241821, "logps/chosen": -142.29034423828125, "logps/rejected": -302.805419921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.063366889953613, "rewards/margins": 8.856551170349121, "rewards/rejected": -15.919918060302734, "step": 13284 }, { "epoch": 2.07, "learning_rate": 4.4035769490118786e-06, "logits/chosen": -2.522922992706299, "logits/rejected": -2.2629685401916504, "logps/chosen": -392.1839904785156, "logps/rejected": -344.6968688964844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.108168840408325, "rewards/margins": 9.55698013305664, "rewards/rejected": -11.665149688720703, "step": 13285 }, { "epoch": 2.07, "learning_rate": 4.4028435084807304e-06, "logits/chosen": -2.5945820808410645, "logits/rejected": -2.9603569507598877, "logps/chosen": -109.2759780883789, "logps/rejected": -445.14483642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.050752639770508, "rewards/margins": 11.494993209838867, "rewards/rejected": -15.545745849609375, "step": 13286 }, { "epoch": 2.07, "learning_rate": 4.402110067949582e-06, "logits/chosen": -2.8305163383483887, "logits/rejected": -2.6767690181732178, "logps/chosen": -175.09725952148438, "logps/rejected": -221.73880004882812, "loss": 0.0894, "rewards/accuracies": 1.0, "rewards/chosen": -5.921694755554199, "rewards/margins": 5.759746074676514, "rewards/rejected": -11.681441307067871, "step": 13287 }, { "epoch": 2.07, "learning_rate": 4.401376627418434e-06, "logits/chosen": -1.659837245941162, "logits/rejected": -2.667078971862793, "logps/chosen": -186.20062255859375, "logps/rejected": -241.73033142089844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.739288330078125, "rewards/margins": 9.33505916595459, "rewards/rejected": -12.074347496032715, "step": 13288 }, { "epoch": 2.07, "learning_rate": 4.400643186887286e-06, "logits/chosen": -3.06547212600708, "logits/rejected": -2.9963901042938232, "logps/chosen": -149.2191619873047, "logps/rejected": -224.0523223876953, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.763818264007568, "rewards/margins": 7.346111297607422, "rewards/rejected": -14.109930038452148, "step": 13289 }, { "epoch": 2.07, "learning_rate": 4.399909746356139e-06, "logits/chosen": -2.39913272857666, "logits/rejected": -2.70473313331604, "logps/chosen": -384.5618591308594, "logps/rejected": -376.9236755371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.388038635253906, "rewards/margins": 9.328145980834961, "rewards/rejected": -13.716184616088867, "step": 13290 }, { "epoch": 2.07, "learning_rate": 4.399176305824991e-06, "logits/chosen": -2.354707717895508, "logits/rejected": -3.352776527404785, "logps/chosen": -208.56234741210938, "logps/rejected": -430.92742919921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6263937950134277, "rewards/margins": 9.79793930053711, "rewards/rejected": -13.424333572387695, "step": 13291 }, { "epoch": 2.07, "learning_rate": 4.398442865293843e-06, "logits/chosen": -1.7960103750228882, "logits/rejected": -2.458047866821289, "logps/chosen": -247.95411682128906, "logps/rejected": -395.16668701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.646585702896118, "rewards/margins": 10.61246109008789, "rewards/rejected": -13.25904655456543, "step": 13292 }, { "epoch": 2.07, "learning_rate": 4.397709424762695e-06, "logits/chosen": -2.7535290718078613, "logits/rejected": -2.5652687549591064, "logps/chosen": -277.436767578125, "logps/rejected": -437.56512451171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.614967346191406, "rewards/margins": 11.148290634155273, "rewards/rejected": -15.76325798034668, "step": 13293 }, { "epoch": 2.07, "learning_rate": 4.396975984231548e-06, "logits/chosen": -2.910945415496826, "logits/rejected": -2.3311989307403564, "logps/chosen": -267.398193359375, "logps/rejected": -260.13067626953125, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": -5.512874603271484, "rewards/margins": 7.119367599487305, "rewards/rejected": -12.632242202758789, "step": 13294 }, { "epoch": 2.07, "learning_rate": 4.3962425437004e-06, "logits/chosen": -2.548095703125, "logits/rejected": -2.0221168994903564, "logps/chosen": -180.7567138671875, "logps/rejected": -309.2314758300781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.225980758666992, "rewards/margins": 9.418346405029297, "rewards/rejected": -15.644327163696289, "step": 13295 }, { "epoch": 2.07, "learning_rate": 4.395509103169252e-06, "logits/chosen": -2.471726655960083, "logits/rejected": -1.1423583030700684, "logps/chosen": -278.2261962890625, "logps/rejected": -380.2459716796875, "loss": 0.0478, "rewards/accuracies": 1.0, "rewards/chosen": -4.050147533416748, "rewards/margins": 8.499749183654785, "rewards/rejected": -12.549897193908691, "step": 13296 }, { "epoch": 2.07, "learning_rate": 4.394775662638104e-06, "logits/chosen": -2.623857259750366, "logits/rejected": -2.9366261959075928, "logps/chosen": -108.31798553466797, "logps/rejected": -181.9493408203125, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -4.142892837524414, "rewards/margins": 7.498749732971191, "rewards/rejected": -11.641642570495605, "step": 13297 }, { "epoch": 2.07, "learning_rate": 4.3940422221069555e-06, "logits/chosen": -2.5627050399780273, "logits/rejected": -1.989268183708191, "logps/chosen": -259.9652404785156, "logps/rejected": -260.7695617675781, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.1713547706604, "rewards/margins": 7.412502765655518, "rewards/rejected": -13.583857536315918, "step": 13298 }, { "epoch": 2.07, "learning_rate": 4.393308781575808e-06, "logits/chosen": -3.185540199279785, "logits/rejected": -2.8441030979156494, "logps/chosen": -147.5638427734375, "logps/rejected": -253.97967529296875, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": -4.291720390319824, "rewards/margins": 8.070451736450195, "rewards/rejected": -12.362171173095703, "step": 13299 }, { "epoch": 2.07, "learning_rate": 4.39257534104466e-06, "logits/chosen": -1.2015583515167236, "logits/rejected": -2.814147472381592, "logps/chosen": -80.82826232910156, "logps/rejected": -379.47869873046875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -5.893884658813477, "rewards/margins": 8.85777759552002, "rewards/rejected": -14.751663208007812, "step": 13300 }, { "epoch": 2.07, "learning_rate": 4.391841900513512e-06, "logits/chosen": -2.3954808712005615, "logits/rejected": -1.934065341949463, "logps/chosen": -210.413818359375, "logps/rejected": -316.9447326660156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.481531620025635, "rewards/margins": 8.19343376159668, "rewards/rejected": -14.674964904785156, "step": 13301 }, { "epoch": 2.07, "learning_rate": 4.391108459982364e-06, "logits/chosen": -2.677553653717041, "logits/rejected": -2.467306613922119, "logps/chosen": -315.14154052734375, "logps/rejected": -373.302490234375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -7.112892150878906, "rewards/margins": 6.196505546569824, "rewards/rejected": -13.30939769744873, "step": 13302 }, { "epoch": 2.07, "learning_rate": 4.390375019451217e-06, "logits/chosen": -2.608450174331665, "logits/rejected": -2.360161542892456, "logps/chosen": -207.99539184570312, "logps/rejected": -287.35546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0013394355773926, "rewards/margins": 10.44448471069336, "rewards/rejected": -13.44582462310791, "step": 13303 }, { "epoch": 2.07, "learning_rate": 4.3896415789200685e-06, "logits/chosen": -2.313938856124878, "logits/rejected": -3.0873467922210693, "logps/chosen": -88.92094421386719, "logps/rejected": -263.6864013671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.788848876953125, "rewards/margins": 9.09244441986084, "rewards/rejected": -13.881292343139648, "step": 13304 }, { "epoch": 2.07, "learning_rate": 4.38890813838892e-06, "logits/chosen": -2.916607618331909, "logits/rejected": -3.367386817932129, "logps/chosen": -130.50375366210938, "logps/rejected": -325.136962890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.456725120544434, "rewards/margins": 7.493934631347656, "rewards/rejected": -11.950658798217773, "step": 13305 }, { "epoch": 2.07, "learning_rate": 4.388174697857772e-06, "logits/chosen": -2.61555552482605, "logits/rejected": -2.725369453430176, "logps/chosen": -88.62403106689453, "logps/rejected": -199.0581817626953, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.7831549644470215, "rewards/margins": 8.405848503112793, "rewards/rejected": -13.189002990722656, "step": 13306 }, { "epoch": 2.07, "learning_rate": 4.387441257326624e-06, "logits/chosen": -1.7994621992111206, "logits/rejected": -2.854424476623535, "logps/chosen": -87.43048095703125, "logps/rejected": -360.4412841796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.5775477886199951, "rewards/margins": 10.0972261428833, "rewards/rejected": -11.674774169921875, "step": 13307 }, { "epoch": 2.07, "learning_rate": 4.386707816795477e-06, "logits/chosen": -1.7751015424728394, "logits/rejected": -3.0298070907592773, "logps/chosen": -119.66143798828125, "logps/rejected": -372.5904541015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.078617095947266, "rewards/margins": 6.751450538635254, "rewards/rejected": -11.83006763458252, "step": 13308 }, { "epoch": 2.07, "learning_rate": 4.38597437626433e-06, "logits/chosen": -1.791469931602478, "logits/rejected": -2.5871636867523193, "logps/chosen": -184.62777709960938, "logps/rejected": -306.56427001953125, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -7.247561454772949, "rewards/margins": 5.4458208084106445, "rewards/rejected": -12.693382263183594, "step": 13309 }, { "epoch": 2.07, "learning_rate": 4.3852409357331815e-06, "logits/chosen": -2.683138847351074, "logits/rejected": -3.2094128131866455, "logps/chosen": -220.81634521484375, "logps/rejected": -437.0494079589844, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.789583206176758, "rewards/margins": 8.595035552978516, "rewards/rejected": -14.384618759155273, "step": 13310 }, { "epoch": 2.07, "learning_rate": 4.384507495202033e-06, "logits/chosen": -2.635596752166748, "logits/rejected": -3.0592362880706787, "logps/chosen": -227.28541564941406, "logps/rejected": -401.78131103515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.943609237670898, "rewards/margins": 10.37657356262207, "rewards/rejected": -16.32018280029297, "step": 13311 }, { "epoch": 2.07, "learning_rate": 4.383774054670886e-06, "logits/chosen": -2.3276264667510986, "logits/rejected": -2.811535120010376, "logps/chosen": -85.18148803710938, "logps/rejected": -200.2264404296875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.651843547821045, "rewards/margins": 7.185794830322266, "rewards/rejected": -10.837638854980469, "step": 13312 }, { "epoch": 2.07, "learning_rate": 4.383040614139738e-06, "logits/chosen": -2.6908349990844727, "logits/rejected": -3.0533876419067383, "logps/chosen": -573.6183471679688, "logps/rejected": -731.663330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.183680534362793, "rewards/margins": 12.747271537780762, "rewards/rejected": -16.930952072143555, "step": 13313 }, { "epoch": 2.07, "learning_rate": 4.38230717360859e-06, "logits/chosen": -2.422008514404297, "logits/rejected": -2.071061134338379, "logps/chosen": -316.5093994140625, "logps/rejected": -302.88763427734375, "loss": 0.0852, "rewards/accuracies": 1.0, "rewards/chosen": -6.119498252868652, "rewards/margins": 3.1767380237579346, "rewards/rejected": -9.296236038208008, "step": 13314 }, { "epoch": 2.07, "learning_rate": 4.381573733077442e-06, "logits/chosen": -2.808809518814087, "logits/rejected": -2.6375672817230225, "logps/chosen": -192.700927734375, "logps/rejected": -283.369873046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.0765910148620605, "rewards/margins": 8.09405517578125, "rewards/rejected": -12.170646667480469, "step": 13315 }, { "epoch": 2.07, "learning_rate": 4.3808402925462936e-06, "logits/chosen": -2.659231185913086, "logits/rejected": -2.0648751258850098, "logps/chosen": -538.1298217773438, "logps/rejected": -504.0595703125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.967237949371338, "rewards/margins": 7.004876136779785, "rewards/rejected": -11.972114562988281, "step": 13316 }, { "epoch": 2.07, "learning_rate": 4.380106852015146e-06, "logits/chosen": -1.7819970846176147, "logits/rejected": -2.7687699794769287, "logps/chosen": -152.96554565429688, "logps/rejected": -175.3188018798828, "loss": 1.924, "rewards/accuracies": 0.5, "rewards/chosen": -5.953847408294678, "rewards/margins": 2.5716373920440674, "rewards/rejected": -8.525485038757324, "step": 13317 }, { "epoch": 2.07, "learning_rate": 4.379373411483998e-06, "logits/chosen": -2.9733364582061768, "logits/rejected": -2.6854562759399414, "logps/chosen": -352.755615234375, "logps/rejected": -611.84326171875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.7750637531280518, "rewards/margins": 7.95604133605957, "rewards/rejected": -11.73110580444336, "step": 13318 }, { "epoch": 2.07, "learning_rate": 4.37863997095285e-06, "logits/chosen": -2.7421786785125732, "logits/rejected": -2.890578269958496, "logps/chosen": -296.2743835449219, "logps/rejected": -363.52301025390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.493989944458008, "rewards/margins": 9.470111846923828, "rewards/rejected": -16.964101791381836, "step": 13319 }, { "epoch": 2.07, "learning_rate": 4.377906530421702e-06, "logits/chosen": -2.6560511589050293, "logits/rejected": -2.8927371501922607, "logps/chosen": -85.07273864746094, "logps/rejected": -392.0223388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.701233386993408, "rewards/margins": 14.339261054992676, "rewards/rejected": -17.04049301147461, "step": 13320 }, { "epoch": 2.07, "learning_rate": 4.377173089890555e-06, "logits/chosen": -0.7900184392929077, "logits/rejected": -2.7463629245758057, "logps/chosen": -76.98692321777344, "logps/rejected": -358.0535888671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.869062423706055, "rewards/margins": 8.825576782226562, "rewards/rejected": -13.6946382522583, "step": 13321 }, { "epoch": 2.07, "learning_rate": 4.3764396493594065e-06, "logits/chosen": -1.1611416339874268, "logits/rejected": -2.3468141555786133, "logps/chosen": -81.77171325683594, "logps/rejected": -327.5384826660156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.5376152992248535, "rewards/margins": 10.297259330749512, "rewards/rejected": -15.834875106811523, "step": 13322 }, { "epoch": 2.07, "learning_rate": 4.375706208828258e-06, "logits/chosen": -2.599151849746704, "logits/rejected": -2.5846385955810547, "logps/chosen": -127.18783569335938, "logps/rejected": -233.47340393066406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.5521159172058105, "rewards/margins": 10.290924072265625, "rewards/rejected": -12.843040466308594, "step": 13323 }, { "epoch": 2.07, "learning_rate": 4.37497276829711e-06, "logits/chosen": -1.8053654432296753, "logits/rejected": -2.732879638671875, "logps/chosen": -220.01287841796875, "logps/rejected": -529.1927490234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.543785572052002, "rewards/margins": 9.492712020874023, "rewards/rejected": -15.036497116088867, "step": 13324 }, { "epoch": 2.07, "learning_rate": 4.374239327765963e-06, "logits/chosen": -2.5618340969085693, "logits/rejected": -1.6172728538513184, "logps/chosen": -219.331787109375, "logps/rejected": -241.61151123046875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -7.368157863616943, "rewards/margins": 6.078591346740723, "rewards/rejected": -13.446748733520508, "step": 13325 }, { "epoch": 2.07, "learning_rate": 4.373505887234816e-06, "logits/chosen": -2.0292181968688965, "logits/rejected": -2.510659694671631, "logps/chosen": -403.3954162597656, "logps/rejected": -490.80987548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.061243057250977, "rewards/margins": 13.038437843322754, "rewards/rejected": -18.099679946899414, "step": 13326 }, { "epoch": 2.07, "learning_rate": 4.372772446703668e-06, "logits/chosen": -2.487870454788208, "logits/rejected": -2.904499053955078, "logps/chosen": -260.848876953125, "logps/rejected": -429.1562194824219, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -8.195945739746094, "rewards/margins": 10.953826904296875, "rewards/rejected": -19.14977264404297, "step": 13327 }, { "epoch": 2.07, "learning_rate": 4.3720390061725195e-06, "logits/chosen": -1.3360241651535034, "logits/rejected": -2.115781784057617, "logps/chosen": -249.86842346191406, "logps/rejected": -374.14251708984375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.471945762634277, "rewards/margins": 7.896425247192383, "rewards/rejected": -13.368371963500977, "step": 13328 }, { "epoch": 2.07, "learning_rate": 4.371305565641371e-06, "logits/chosen": -2.13689923286438, "logits/rejected": -2.420255422592163, "logps/chosen": -237.68560791015625, "logps/rejected": -390.28240966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.86488151550293, "rewards/margins": 12.111056327819824, "rewards/rejected": -16.97593879699707, "step": 13329 }, { "epoch": 2.07, "learning_rate": 4.370572125110224e-06, "logits/chosen": -1.9998629093170166, "logits/rejected": -2.7972536087036133, "logps/chosen": -239.16412353515625, "logps/rejected": -589.9019165039062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.775775909423828, "rewards/margins": 9.009541511535645, "rewards/rejected": -14.785318374633789, "step": 13330 }, { "epoch": 2.07, "learning_rate": 4.369838684579076e-06, "logits/chosen": -1.1296765804290771, "logits/rejected": -2.3643274307250977, "logps/chosen": -245.52316284179688, "logps/rejected": -532.2679443359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.80194091796875, "rewards/margins": 8.851591110229492, "rewards/rejected": -15.653532028198242, "step": 13331 }, { "epoch": 2.07, "learning_rate": 4.369105244047928e-06, "logits/chosen": -2.9426565170288086, "logits/rejected": -2.8790640830993652, "logps/chosen": -89.9985122680664, "logps/rejected": -152.26333618164062, "loss": 0.0682, "rewards/accuracies": 1.0, "rewards/chosen": -5.150174140930176, "rewards/margins": 5.180621147155762, "rewards/rejected": -10.330795288085938, "step": 13332 }, { "epoch": 2.07, "learning_rate": 4.36837180351678e-06, "logits/chosen": -1.8255853652954102, "logits/rejected": -2.935115337371826, "logps/chosen": -162.835205078125, "logps/rejected": -445.02655029296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.9932384490966797, "rewards/margins": 8.527350425720215, "rewards/rejected": -12.520588874816895, "step": 13333 }, { "epoch": 2.07, "learning_rate": 4.3676383629856325e-06, "logits/chosen": -1.1263530254364014, "logits/rejected": -2.3073527812957764, "logps/chosen": -88.74136352539062, "logps/rejected": -316.5045166015625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.100045204162598, "rewards/margins": 7.876220703125, "rewards/rejected": -11.976265907287598, "step": 13334 }, { "epoch": 2.07, "learning_rate": 4.366904922454484e-06, "logits/chosen": -2.9628379344940186, "logits/rejected": -2.816873788833618, "logps/chosen": -296.13616943359375, "logps/rejected": -281.123046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.51406192779541, "rewards/margins": 8.405923843383789, "rewards/rejected": -14.919986724853516, "step": 13335 }, { "epoch": 2.07, "learning_rate": 4.366171481923336e-06, "logits/chosen": -2.8272433280944824, "logits/rejected": -3.0586700439453125, "logps/chosen": -72.31796264648438, "logps/rejected": -215.2166748046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.966217041015625, "rewards/margins": 7.794656753540039, "rewards/rejected": -11.760873794555664, "step": 13336 }, { "epoch": 2.07, "learning_rate": 4.365438041392188e-06, "logits/chosen": -1.8243170976638794, "logits/rejected": -2.822904348373413, "logps/chosen": -169.0576629638672, "logps/rejected": -386.31195068359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.075522422790527, "rewards/margins": 8.957728385925293, "rewards/rejected": -14.03325080871582, "step": 13337 }, { "epoch": 2.07, "learning_rate": 4.36470460086104e-06, "logits/chosen": -2.5963821411132812, "logits/rejected": -3.204770803451538, "logps/chosen": -56.053375244140625, "logps/rejected": -269.35760498046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.1634039878845215, "rewards/margins": 9.065105438232422, "rewards/rejected": -13.228509902954102, "step": 13338 }, { "epoch": 2.07, "learning_rate": 4.363971160329893e-06, "logits/chosen": -2.982996940612793, "logits/rejected": -3.1077828407287598, "logps/chosen": -440.87249755859375, "logps/rejected": -588.2071533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8138680458068848, "rewards/margins": 11.044454574584961, "rewards/rejected": -14.858322143554688, "step": 13339 }, { "epoch": 2.07, "learning_rate": 4.363237719798745e-06, "logits/chosen": -1.637048602104187, "logits/rejected": -2.9062745571136475, "logps/chosen": -313.97601318359375, "logps/rejected": -677.65673828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.453121185302734, "rewards/margins": 11.118036270141602, "rewards/rejected": -16.57115936279297, "step": 13340 }, { "epoch": 2.07, "learning_rate": 4.3625042792675965e-06, "logits/chosen": -2.713809013366699, "logits/rejected": -2.6264500617980957, "logps/chosen": -669.0938110351562, "logps/rejected": -720.3248901367188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7532522678375244, "rewards/margins": 12.859485626220703, "rewards/rejected": -16.61273956298828, "step": 13341 }, { "epoch": 2.07, "learning_rate": 4.361770838736449e-06, "logits/chosen": -2.69254732131958, "logits/rejected": -2.649449586868286, "logps/chosen": -144.99290466308594, "logps/rejected": -258.4045715332031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.080108165740967, "rewards/margins": 7.406728744506836, "rewards/rejected": -11.486837387084961, "step": 13342 }, { "epoch": 2.08, "learning_rate": 4.361037398205302e-06, "logits/chosen": -2.9212818145751953, "logits/rejected": -2.287153720855713, "logps/chosen": -321.1471862792969, "logps/rejected": -227.40997314453125, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -5.311000347137451, "rewards/margins": 5.836697101593018, "rewards/rejected": -11.147697448730469, "step": 13343 }, { "epoch": 2.08, "learning_rate": 4.360303957674154e-06, "logits/chosen": -2.8410003185272217, "logits/rejected": -3.2093725204467773, "logps/chosen": -64.95454406738281, "logps/rejected": -364.8042297363281, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8997905254364014, "rewards/margins": 11.149490356445312, "rewards/rejected": -14.049280166625977, "step": 13344 }, { "epoch": 2.08, "learning_rate": 4.359570517143006e-06, "logits/chosen": -1.595870018005371, "logits/rejected": -2.7371394634246826, "logps/chosen": -148.87954711914062, "logps/rejected": -359.60369873046875, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": -7.2758026123046875, "rewards/margins": 4.978792190551758, "rewards/rejected": -12.254594802856445, "step": 13345 }, { "epoch": 2.08, "learning_rate": 4.3588370766118575e-06, "logits/chosen": -2.511629581451416, "logits/rejected": -2.842233896255493, "logps/chosen": -55.099525451660156, "logps/rejected": -224.27944946289062, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.2019853591918945, "rewards/margins": 9.19662094116211, "rewards/rejected": -12.398605346679688, "step": 13346 }, { "epoch": 2.08, "learning_rate": 4.3581036360807094e-06, "logits/chosen": -2.782073736190796, "logits/rejected": -2.742743968963623, "logps/chosen": -223.02256774902344, "logps/rejected": -260.624755859375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -4.196382999420166, "rewards/margins": 8.11640739440918, "rewards/rejected": -12.312790870666504, "step": 13347 }, { "epoch": 2.08, "learning_rate": 4.357370195549562e-06, "logits/chosen": -3.1672422885894775, "logits/rejected": -2.827035665512085, "logps/chosen": -545.7274169921875, "logps/rejected": -343.2598571777344, "loss": 0.7427, "rewards/accuracies": 0.5, "rewards/chosen": -5.342279434204102, "rewards/margins": 2.5265464782714844, "rewards/rejected": -7.868825912475586, "step": 13348 }, { "epoch": 2.08, "learning_rate": 4.356636755018414e-06, "logits/chosen": -2.493112325668335, "logits/rejected": -2.7355899810791016, "logps/chosen": -160.19349670410156, "logps/rejected": -294.26324462890625, "loss": 0.1425, "rewards/accuracies": 1.0, "rewards/chosen": -3.1316776275634766, "rewards/margins": 5.683017253875732, "rewards/rejected": -8.814695358276367, "step": 13349 }, { "epoch": 2.08, "learning_rate": 4.355903314487266e-06, "logits/chosen": -2.0841963291168213, "logits/rejected": -3.040388822555542, "logps/chosen": -122.03611755371094, "logps/rejected": -488.0447692871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.777015686035156, "rewards/margins": 13.472655296325684, "rewards/rejected": -19.249671936035156, "step": 13350 }, { "epoch": 2.08, "learning_rate": 4.355169873956118e-06, "logits/chosen": -2.502293586730957, "logits/rejected": -2.8713080883026123, "logps/chosen": -111.3565444946289, "logps/rejected": -330.41278076171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.590999603271484, "rewards/margins": 8.943524360656738, "rewards/rejected": -14.534523963928223, "step": 13351 }, { "epoch": 2.08, "learning_rate": 4.3544364334249705e-06, "logits/chosen": -1.3605233430862427, "logits/rejected": -2.653686761856079, "logps/chosen": -192.51519775390625, "logps/rejected": -545.045166015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.5995476245880127, "rewards/margins": 14.150450706481934, "rewards/rejected": -15.749998092651367, "step": 13352 }, { "epoch": 2.08, "learning_rate": 4.353702992893822e-06, "logits/chosen": -2.7065482139587402, "logits/rejected": -3.176088333129883, "logps/chosen": -85.80882263183594, "logps/rejected": -273.1292724609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.297563552856445, "rewards/margins": 7.210121154785156, "rewards/rejected": -13.507684707641602, "step": 13353 }, { "epoch": 2.08, "learning_rate": 4.352969552362674e-06, "logits/chosen": -2.783154249191284, "logits/rejected": -2.939150333404541, "logps/chosen": -159.96437072753906, "logps/rejected": -357.3312683105469, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -6.110410213470459, "rewards/margins": 6.638889789581299, "rewards/rejected": -12.749300003051758, "step": 13354 }, { "epoch": 2.08, "learning_rate": 4.352236111831526e-06, "logits/chosen": -1.8277442455291748, "logits/rejected": -2.9679551124572754, "logps/chosen": -327.1280517578125, "logps/rejected": -811.956787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4517502784729004, "rewards/margins": 15.59019947052002, "rewards/rejected": -19.041950225830078, "step": 13355 }, { "epoch": 2.08, "learning_rate": 4.351502671300378e-06, "logits/chosen": -2.1735496520996094, "logits/rejected": -2.888278007507324, "logps/chosen": -226.64955139160156, "logps/rejected": -299.76287841796875, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/chosen": -4.825214385986328, "rewards/margins": 4.711513996124268, "rewards/rejected": -9.536727905273438, "step": 13356 }, { "epoch": 2.08, "learning_rate": 4.350769230769231e-06, "logits/chosen": -2.174250364303589, "logits/rejected": -3.012359857559204, "logps/chosen": -124.31348419189453, "logps/rejected": -403.8790283203125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -8.462063789367676, "rewards/margins": 6.826174736022949, "rewards/rejected": -15.288238525390625, "step": 13357 }, { "epoch": 2.08, "learning_rate": 4.350035790238083e-06, "logits/chosen": -1.31660795211792, "logits/rejected": -2.680150270462036, "logps/chosen": -112.61465454101562, "logps/rejected": -473.9992980957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2207508087158203, "rewards/margins": 10.979331016540527, "rewards/rejected": -12.200081825256348, "step": 13358 }, { "epoch": 2.08, "learning_rate": 4.349302349706935e-06, "logits/chosen": -2.048068046569824, "logits/rejected": -2.888209581375122, "logps/chosen": -265.12567138671875, "logps/rejected": -634.13037109375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -6.472214698791504, "rewards/margins": 8.521917343139648, "rewards/rejected": -14.994132041931152, "step": 13359 }, { "epoch": 2.08, "learning_rate": 4.348568909175787e-06, "logits/chosen": -1.5252175331115723, "logits/rejected": -2.4577078819274902, "logps/chosen": -245.94915771484375, "logps/rejected": -470.10137939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2392401695251465, "rewards/margins": 11.280624389648438, "rewards/rejected": -14.519865036010742, "step": 13360 }, { "epoch": 2.08, "learning_rate": 4.34783546864464e-06, "logits/chosen": -2.2602145671844482, "logits/rejected": -2.9120142459869385, "logps/chosen": -181.28814697265625, "logps/rejected": -294.416748046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.33429217338562, "rewards/margins": 9.61773681640625, "rewards/rejected": -12.952028274536133, "step": 13361 }, { "epoch": 2.08, "learning_rate": 4.347102028113492e-06, "logits/chosen": -2.152526617050171, "logits/rejected": -3.1272456645965576, "logps/chosen": -215.79913330078125, "logps/rejected": -465.8928527832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.043527603149414, "rewards/margins": 12.239436149597168, "rewards/rejected": -16.282962799072266, "step": 13362 }, { "epoch": 2.08, "learning_rate": 4.346368587582344e-06, "logits/chosen": -2.7173094749450684, "logits/rejected": -2.1713478565216064, "logps/chosen": -772.04296875, "logps/rejected": -462.3790283203125, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -3.750932216644287, "rewards/margins": 9.144474983215332, "rewards/rejected": -12.895407676696777, "step": 13363 }, { "epoch": 2.08, "learning_rate": 4.345635147051196e-06, "logits/chosen": -2.0597083568573, "logits/rejected": -2.9204728603363037, "logps/chosen": -97.59367370605469, "logps/rejected": -485.45318603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.265018939971924, "rewards/margins": 15.420601844787598, "rewards/rejected": -19.68562126159668, "step": 13364 }, { "epoch": 2.08, "learning_rate": 4.3449017065200475e-06, "logits/chosen": -2.4937963485717773, "logits/rejected": -1.9514089822769165, "logps/chosen": -154.4613800048828, "logps/rejected": -193.1737518310547, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.995343208312988, "rewards/margins": 6.919336318969727, "rewards/rejected": -11.914680480957031, "step": 13365 }, { "epoch": 2.08, "learning_rate": 4.3441682659889e-06, "logits/chosen": -1.3053282499313354, "logits/rejected": -1.9500528573989868, "logps/chosen": -677.8424682617188, "logps/rejected": -632.3433837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.417163372039795, "rewards/margins": 11.662335395812988, "rewards/rejected": -17.079498291015625, "step": 13366 }, { "epoch": 2.08, "learning_rate": 4.343434825457752e-06, "logits/chosen": -2.7542920112609863, "logits/rejected": -2.8064846992492676, "logps/chosen": -635.5701293945312, "logps/rejected": -610.3399047851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.38316535949707, "rewards/margins": 11.985843658447266, "rewards/rejected": -17.369009017944336, "step": 13367 }, { "epoch": 2.08, "learning_rate": 4.342701384926604e-06, "logits/chosen": -2.829045295715332, "logits/rejected": -3.0599822998046875, "logps/chosen": -296.73016357421875, "logps/rejected": -440.06427001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7239694595336914, "rewards/margins": 10.857666969299316, "rewards/rejected": -14.581636428833008, "step": 13368 }, { "epoch": 2.08, "learning_rate": 4.341967944395456e-06, "logits/chosen": -2.9159963130950928, "logits/rejected": -3.048659563064575, "logps/chosen": -241.5975341796875, "logps/rejected": -306.44659423828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.823370933532715, "rewards/margins": 6.829891204833984, "rewards/rejected": -12.6532621383667, "step": 13369 }, { "epoch": 2.08, "learning_rate": 4.3412345038643086e-06, "logits/chosen": -2.5763397216796875, "logits/rejected": -2.614851236343384, "logps/chosen": -487.22186279296875, "logps/rejected": -523.1019287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.099742412567139, "rewards/margins": 10.491724967956543, "rewards/rejected": -15.591466903686523, "step": 13370 }, { "epoch": 2.08, "learning_rate": 4.3405010633331604e-06, "logits/chosen": -2.7546496391296387, "logits/rejected": -2.530329465866089, "logps/chosen": -325.0602111816406, "logps/rejected": -499.9033203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.8004150390625, "rewards/margins": 11.400715827941895, "rewards/rejected": -17.20113182067871, "step": 13371 }, { "epoch": 2.08, "learning_rate": 4.339767622802012e-06, "logits/chosen": -2.728313684463501, "logits/rejected": -2.6953389644622803, "logps/chosen": -189.3504638671875, "logps/rejected": -254.53338623046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.26629638671875, "rewards/margins": 9.501222610473633, "rewards/rejected": -13.767518997192383, "step": 13372 }, { "epoch": 2.08, "learning_rate": 4.339034182270864e-06, "logits/chosen": -2.8341829776763916, "logits/rejected": -2.8693437576293945, "logps/chosen": -407.9178161621094, "logps/rejected": -311.51275634765625, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -7.405174255371094, "rewards/margins": 4.310817718505859, "rewards/rejected": -11.715991973876953, "step": 13373 }, { "epoch": 2.08, "learning_rate": 4.338300741739717e-06, "logits/chosen": -1.050126075744629, "logits/rejected": -2.217141628265381, "logps/chosen": -327.14459228515625, "logps/rejected": -485.58746337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9885644912719727, "rewards/margins": 12.031257629394531, "rewards/rejected": -16.01982307434082, "step": 13374 }, { "epoch": 2.08, "learning_rate": 4.337567301208569e-06, "logits/chosen": -1.152525544166565, "logits/rejected": -2.929464101791382, "logps/chosen": -89.757080078125, "logps/rejected": -557.742919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5699567794799805, "rewards/margins": 11.719581604003906, "rewards/rejected": -15.289539337158203, "step": 13375 }, { "epoch": 2.08, "learning_rate": 4.3368338606774215e-06, "logits/chosen": -2.672757625579834, "logits/rejected": -2.7917981147766113, "logps/chosen": -167.92330932617188, "logps/rejected": -286.37481689453125, "loss": 0.8016, "rewards/accuracies": 0.5, "rewards/chosen": -7.559739589691162, "rewards/margins": 4.379759788513184, "rewards/rejected": -11.939498901367188, "step": 13376 }, { "epoch": 2.08, "learning_rate": 4.336100420146273e-06, "logits/chosen": -3.066843032836914, "logits/rejected": -2.955500602722168, "logps/chosen": -508.59796142578125, "logps/rejected": -495.84423828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.191100120544434, "rewards/margins": 7.683177471160889, "rewards/rejected": -15.87427806854248, "step": 13377 }, { "epoch": 2.08, "learning_rate": 4.335366979615125e-06, "logits/chosen": -2.3725509643554688, "logits/rejected": -2.7069058418273926, "logps/chosen": -575.5570678710938, "logps/rejected": -545.4020385742188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.865105390548706, "rewards/margins": 8.326278686523438, "rewards/rejected": -11.191384315490723, "step": 13378 }, { "epoch": 2.08, "learning_rate": 4.334633539083978e-06, "logits/chosen": -2.4469106197357178, "logits/rejected": -2.6223649978637695, "logps/chosen": -448.4581604003906, "logps/rejected": -539.0073852539062, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.27176570892334, "rewards/margins": 12.770305633544922, "rewards/rejected": -15.042072296142578, "step": 13379 }, { "epoch": 2.08, "learning_rate": 4.33390009855283e-06, "logits/chosen": -2.0885894298553467, "logits/rejected": -2.4822564125061035, "logps/chosen": -586.13623046875, "logps/rejected": -541.5509033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9963746070861816, "rewards/margins": 14.86577033996582, "rewards/rejected": -17.862144470214844, "step": 13380 }, { "epoch": 2.08, "learning_rate": 4.333166658021682e-06, "logits/chosen": -3.1092336177825928, "logits/rejected": -2.6296627521514893, "logps/chosen": -167.824462890625, "logps/rejected": -303.25714111328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.105072498321533, "rewards/margins": 8.089444160461426, "rewards/rejected": -13.194517135620117, "step": 13381 }, { "epoch": 2.08, "learning_rate": 4.332433217490534e-06, "logits/chosen": -2.023254632949829, "logits/rejected": -2.795619249343872, "logps/chosen": -158.56155395507812, "logps/rejected": -309.67681884765625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.948970079421997, "rewards/margins": 7.678752899169922, "rewards/rejected": -11.627723693847656, "step": 13382 }, { "epoch": 2.08, "learning_rate": 4.331699776959386e-06, "logits/chosen": -1.367998719215393, "logits/rejected": -2.9709248542785645, "logps/chosen": -99.033447265625, "logps/rejected": -539.2965698242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9848670959472656, "rewards/margins": 10.821910858154297, "rewards/rejected": -14.806777954101562, "step": 13383 }, { "epoch": 2.08, "learning_rate": 4.330966336428238e-06, "logits/chosen": -2.569697856903076, "logits/rejected": -2.705369472503662, "logps/chosen": -462.8632507324219, "logps/rejected": -687.0269775390625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -5.1957550048828125, "rewards/margins": 6.535252571105957, "rewards/rejected": -11.73100757598877, "step": 13384 }, { "epoch": 2.08, "learning_rate": 4.33023289589709e-06, "logits/chosen": -1.777685284614563, "logits/rejected": -2.8457539081573486, "logps/chosen": -292.34088134765625, "logps/rejected": -463.8239440917969, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.024682998657227, "rewards/margins": 10.010942459106445, "rewards/rejected": -16.035625457763672, "step": 13385 }, { "epoch": 2.08, "learning_rate": 4.329499455365942e-06, "logits/chosen": -1.9778120517730713, "logits/rejected": -2.8214480876922607, "logps/chosen": -163.57101440429688, "logps/rejected": -429.8543395996094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.533509254455566, "rewards/margins": 9.758779525756836, "rewards/rejected": -14.292288780212402, "step": 13386 }, { "epoch": 2.08, "learning_rate": 4.328766014834794e-06, "logits/chosen": -2.6910228729248047, "logits/rejected": -1.7617677450180054, "logps/chosen": -349.525634765625, "logps/rejected": -402.06390380859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.822493076324463, "rewards/margins": 11.515299797058105, "rewards/rejected": -15.337793350219727, "step": 13387 }, { "epoch": 2.08, "learning_rate": 4.328032574303647e-06, "logits/chosen": -2.699524402618408, "logits/rejected": -2.4000139236450195, "logps/chosen": -174.74847412109375, "logps/rejected": -293.74365234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.149568557739258, "rewards/margins": 7.483922958374023, "rewards/rejected": -11.633491516113281, "step": 13388 }, { "epoch": 2.08, "learning_rate": 4.3272991337724985e-06, "logits/chosen": -2.666900634765625, "logits/rejected": -2.661693572998047, "logps/chosen": -266.2069091796875, "logps/rejected": -247.88287353515625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -4.422192573547363, "rewards/margins": 7.180386066436768, "rewards/rejected": -11.602579116821289, "step": 13389 }, { "epoch": 2.08, "learning_rate": 4.32656569324135e-06, "logits/chosen": -2.3357832431793213, "logits/rejected": -3.0167455673217773, "logps/chosen": -142.2429962158203, "logps/rejected": -270.1146240234375, "loss": 3.2331, "rewards/accuracies": 0.5, "rewards/chosen": -9.963475227355957, "rewards/margins": 3.234715223312378, "rewards/rejected": -13.198190689086914, "step": 13390 }, { "epoch": 2.08, "learning_rate": 4.325832252710202e-06, "logits/chosen": -2.1484084129333496, "logits/rejected": -3.27897047996521, "logps/chosen": -262.6307067871094, "logps/rejected": -521.804443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.970407009124756, "rewards/margins": 10.963088035583496, "rewards/rejected": -14.933494567871094, "step": 13391 }, { "epoch": 2.08, "learning_rate": 4.325098812179055e-06, "logits/chosen": -2.868344783782959, "logits/rejected": -2.6398096084594727, "logps/chosen": -152.6928253173828, "logps/rejected": -152.2080078125, "loss": 0.2648, "rewards/accuracies": 1.0, "rewards/chosen": -4.562203884124756, "rewards/margins": 6.346597194671631, "rewards/rejected": -10.908801078796387, "step": 13392 }, { "epoch": 2.08, "learning_rate": 4.324365371647908e-06, "logits/chosen": -2.4833579063415527, "logits/rejected": -2.728067398071289, "logps/chosen": -519.6817016601562, "logps/rejected": -465.01959228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.247705936431885, "rewards/margins": 14.792287826538086, "rewards/rejected": -19.039993286132812, "step": 13393 }, { "epoch": 2.08, "learning_rate": 4.3236319311167596e-06, "logits/chosen": -2.1974573135375977, "logits/rejected": -2.482923984527588, "logps/chosen": -111.75396728515625, "logps/rejected": -253.06930541992188, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.331947326660156, "rewards/margins": 8.030208587646484, "rewards/rejected": -12.36215591430664, "step": 13394 }, { "epoch": 2.08, "learning_rate": 4.3228984905856114e-06, "logits/chosen": -2.137298107147217, "logits/rejected": -2.9052987098693848, "logps/chosen": -406.0015869140625, "logps/rejected": -576.121826171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.46101188659668, "rewards/margins": 10.345298767089844, "rewards/rejected": -19.806310653686523, "step": 13395 }, { "epoch": 2.08, "learning_rate": 4.322165050054463e-06, "logits/chosen": -1.6579177379608154, "logits/rejected": -2.294682502746582, "logps/chosen": -440.77264404296875, "logps/rejected": -753.7939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8715884685516357, "rewards/margins": 14.233781814575195, "rewards/rejected": -18.105371475219727, "step": 13396 }, { "epoch": 2.08, "learning_rate": 4.321431609523316e-06, "logits/chosen": -2.71445631980896, "logits/rejected": -2.98193621635437, "logps/chosen": -110.5567398071289, "logps/rejected": -245.4111328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.7692527770996094, "rewards/margins": 8.907044410705566, "rewards/rejected": -12.676297187805176, "step": 13397 }, { "epoch": 2.08, "learning_rate": 4.320698168992168e-06, "logits/chosen": -2.3128275871276855, "logits/rejected": -3.0842318534851074, "logps/chosen": -348.23577880859375, "logps/rejected": -331.07501220703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.849902153015137, "rewards/margins": 7.813453674316406, "rewards/rejected": -15.663355827331543, "step": 13398 }, { "epoch": 2.08, "learning_rate": 4.31996472846102e-06, "logits/chosen": -1.7144129276275635, "logits/rejected": -2.7357990741729736, "logps/chosen": -118.76419830322266, "logps/rejected": -278.0569763183594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.35477352142334, "rewards/margins": 8.310314178466797, "rewards/rejected": -13.66508674621582, "step": 13399 }, { "epoch": 2.08, "learning_rate": 4.319231287929872e-06, "logits/chosen": -2.3690783977508545, "logits/rejected": -3.0468013286590576, "logps/chosen": -182.7200469970703, "logps/rejected": -356.089111328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.493661403656006, "rewards/margins": 11.01488971710205, "rewards/rejected": -14.508550643920898, "step": 13400 }, { "epoch": 2.08, "learning_rate": 4.318497847398724e-06, "logits/chosen": -2.8517441749572754, "logits/rejected": -2.451754093170166, "logps/chosen": -293.61846923828125, "logps/rejected": -292.015869140625, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -6.674073219299316, "rewards/margins": 4.7061614990234375, "rewards/rejected": -11.380234718322754, "step": 13401 }, { "epoch": 2.08, "learning_rate": 4.317764406867576e-06, "logits/chosen": -2.685533046722412, "logits/rejected": -2.37075138092041, "logps/chosen": -372.84991455078125, "logps/rejected": -474.1282958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.594874620437622, "rewards/margins": 11.345173835754395, "rewards/rejected": -14.940048217773438, "step": 13402 }, { "epoch": 2.08, "learning_rate": 4.317030966336428e-06, "logits/chosen": -1.1482388973236084, "logits/rejected": -2.5443942546844482, "logps/chosen": -164.55909729003906, "logps/rejected": -256.6339416503906, "loss": 0.0437, "rewards/accuracies": 1.0, "rewards/chosen": -6.993095874786377, "rewards/margins": 4.096898078918457, "rewards/rejected": -11.089994430541992, "step": 13403 }, { "epoch": 2.08, "learning_rate": 4.31629752580528e-06, "logits/chosen": -1.9856127500534058, "logits/rejected": -2.914383888244629, "logps/chosen": -150.91249084472656, "logps/rejected": -499.93060302734375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.7599663734436035, "rewards/margins": 12.968378067016602, "rewards/rejected": -18.728343963623047, "step": 13404 }, { "epoch": 2.08, "learning_rate": 4.315564085274132e-06, "logits/chosen": -1.9594957828521729, "logits/rejected": -2.681434392929077, "logps/chosen": -197.45172119140625, "logps/rejected": -552.6793212890625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.600259304046631, "rewards/margins": 10.080097198486328, "rewards/rejected": -16.680356979370117, "step": 13405 }, { "epoch": 2.08, "learning_rate": 4.314830644742985e-06, "logits/chosen": -2.2511818408966064, "logits/rejected": -3.0248305797576904, "logps/chosen": -149.0183868408203, "logps/rejected": -281.00244140625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.828650951385498, "rewards/margins": 7.532360076904297, "rewards/rejected": -13.361011505126953, "step": 13406 }, { "epoch": 2.09, "learning_rate": 4.3140972042118365e-06, "logits/chosen": -3.061201572418213, "logits/rejected": -2.921543836593628, "logps/chosen": -411.83709716796875, "logps/rejected": -621.5062255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.181562900543213, "rewards/margins": 10.509956359863281, "rewards/rejected": -16.69152069091797, "step": 13407 }, { "epoch": 2.09, "learning_rate": 4.313363763680688e-06, "logits/chosen": -2.229220390319824, "logits/rejected": -2.6592185497283936, "logps/chosen": -210.23291015625, "logps/rejected": -274.32891845703125, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -6.084491729736328, "rewards/margins": 6.101103782653809, "rewards/rejected": -12.185596466064453, "step": 13408 }, { "epoch": 2.09, "learning_rate": 4.312630323149541e-06, "logits/chosen": -2.85799503326416, "logits/rejected": -2.7537686824798584, "logps/chosen": -331.5418395996094, "logps/rejected": -271.1285400390625, "loss": 1.4531, "rewards/accuracies": 0.5, "rewards/chosen": -7.787322521209717, "rewards/margins": 2.3190112113952637, "rewards/rejected": -10.10633373260498, "step": 13409 }, { "epoch": 2.09, "learning_rate": 4.311896882618394e-06, "logits/chosen": -1.393011212348938, "logits/rejected": -2.782843828201294, "logps/chosen": -129.5274658203125, "logps/rejected": -326.2398681640625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.165217399597168, "rewards/margins": 6.391387939453125, "rewards/rejected": -10.556605339050293, "step": 13410 }, { "epoch": 2.09, "learning_rate": 4.311163442087246e-06, "logits/chosen": -0.7864423990249634, "logits/rejected": -2.003742218017578, "logps/chosen": -283.59332275390625, "logps/rejected": -864.4901123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7508347034454346, "rewards/margins": 22.836193084716797, "rewards/rejected": -26.58702850341797, "step": 13411 }, { "epoch": 2.09, "learning_rate": 4.310430001556098e-06, "logits/chosen": -2.330401659011841, "logits/rejected": -2.713712692260742, "logps/chosen": -248.54627990722656, "logps/rejected": -341.72003173828125, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -7.242964744567871, "rewards/margins": 6.194118022918701, "rewards/rejected": -13.437082290649414, "step": 13412 }, { "epoch": 2.09, "learning_rate": 4.3096965610249495e-06, "logits/chosen": -1.6849733591079712, "logits/rejected": -2.6183581352233887, "logps/chosen": -187.93572998046875, "logps/rejected": -309.6422119140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.318680763244629, "rewards/margins": 8.920923233032227, "rewards/rejected": -15.239604949951172, "step": 13413 }, { "epoch": 2.09, "learning_rate": 4.308963120493801e-06, "logits/chosen": -2.596933364868164, "logits/rejected": -2.5972797870635986, "logps/chosen": -484.35577392578125, "logps/rejected": -551.2106323242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.171760559082031, "rewards/margins": 12.762630462646484, "rewards/rejected": -16.934391021728516, "step": 13414 }, { "epoch": 2.09, "learning_rate": 4.308229679962654e-06, "logits/chosen": -1.9921724796295166, "logits/rejected": -2.818138837814331, "logps/chosen": -325.9378662109375, "logps/rejected": -522.010986328125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -8.473429679870605, "rewards/margins": 7.175322532653809, "rewards/rejected": -15.648752212524414, "step": 13415 }, { "epoch": 2.09, "learning_rate": 4.307496239431506e-06, "logits/chosen": -2.477771043777466, "logits/rejected": -3.2965381145477295, "logps/chosen": -103.2857666015625, "logps/rejected": -349.77099609375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -3.7942748069763184, "rewards/margins": 6.080163955688477, "rewards/rejected": -9.874439239501953, "step": 13416 }, { "epoch": 2.09, "learning_rate": 4.306762798900358e-06, "logits/chosen": -2.1644134521484375, "logits/rejected": -2.824265241622925, "logps/chosen": -150.3385772705078, "logps/rejected": -312.1175537109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.655064344406128, "rewards/margins": 7.968326568603516, "rewards/rejected": -10.623391151428223, "step": 13417 }, { "epoch": 2.09, "learning_rate": 4.30602935836921e-06, "logits/chosen": -3.0495312213897705, "logits/rejected": -2.4472081661224365, "logps/chosen": -195.010009765625, "logps/rejected": -172.88626098632812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -1.4567383527755737, "rewards/margins": 7.651051044464111, "rewards/rejected": -9.107789993286133, "step": 13418 }, { "epoch": 2.09, "learning_rate": 4.3052959178380624e-06, "logits/chosen": -3.040437936782837, "logits/rejected": -2.889488935470581, "logps/chosen": -642.3216552734375, "logps/rejected": -682.9656982421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.005912780761719, "rewards/margins": 12.450645446777344, "rewards/rejected": -16.456558227539062, "step": 13419 }, { "epoch": 2.09, "learning_rate": 4.304562477306914e-06, "logits/chosen": -2.966191053390503, "logits/rejected": -3.1432385444641113, "logps/chosen": -597.9181518554688, "logps/rejected": -715.9784545898438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.694406509399414, "rewards/margins": 12.063894271850586, "rewards/rejected": -16.75830078125, "step": 13420 }, { "epoch": 2.09, "learning_rate": 4.303829036775766e-06, "logits/chosen": -3.0707499980926514, "logits/rejected": -2.743525505065918, "logps/chosen": -693.4447021484375, "logps/rejected": -449.8568115234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.700313568115234, "rewards/margins": 8.691915512084961, "rewards/rejected": -14.392229080200195, "step": 13421 }, { "epoch": 2.09, "learning_rate": 4.303095596244618e-06, "logits/chosen": -2.5832834243774414, "logits/rejected": -3.1419830322265625, "logps/chosen": -87.42538452148438, "logps/rejected": -253.62721252441406, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -6.229890823364258, "rewards/margins": 6.069397926330566, "rewards/rejected": -12.299288749694824, "step": 13422 }, { "epoch": 2.09, "learning_rate": 4.302362155713471e-06, "logits/chosen": -1.1363565921783447, "logits/rejected": -2.378875970840454, "logps/chosen": -127.49473571777344, "logps/rejected": -658.1114501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.93087100982666, "rewards/margins": 14.601712226867676, "rewards/rejected": -20.532583236694336, "step": 13423 }, { "epoch": 2.09, "learning_rate": 4.301628715182323e-06, "logits/chosen": -2.7823216915130615, "logits/rejected": -2.6655585765838623, "logps/chosen": -550.6420288085938, "logps/rejected": -500.82733154296875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -5.007109642028809, "rewards/margins": 8.579482078552246, "rewards/rejected": -13.586591720581055, "step": 13424 }, { "epoch": 2.09, "learning_rate": 4.3008952746511746e-06, "logits/chosen": -2.2090659141540527, "logits/rejected": -2.7967724800109863, "logps/chosen": -134.2376708984375, "logps/rejected": -291.65057373046875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -4.138495922088623, "rewards/margins": 7.374670028686523, "rewards/rejected": -11.513166427612305, "step": 13425 }, { "epoch": 2.09, "learning_rate": 4.300161834120027e-06, "logits/chosen": -2.0400140285491943, "logits/rejected": -2.6534533500671387, "logps/chosen": -157.67245483398438, "logps/rejected": -333.1781921386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2481441497802734, "rewards/margins": 10.371540069580078, "rewards/rejected": -13.619684219360352, "step": 13426 }, { "epoch": 2.09, "learning_rate": 4.299428393588879e-06, "logits/chosen": -3.089754104614258, "logits/rejected": -3.271320104598999, "logps/chosen": -76.49830627441406, "logps/rejected": -230.78720092773438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.679407119750977, "rewards/margins": 9.002546310424805, "rewards/rejected": -13.681954383850098, "step": 13427 }, { "epoch": 2.09, "learning_rate": 4.298694953057732e-06, "logits/chosen": -1.2994630336761475, "logits/rejected": -2.443917751312256, "logps/chosen": -126.45753479003906, "logps/rejected": -320.42559814453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.584117889404297, "rewards/margins": 8.714035034179688, "rewards/rejected": -11.298152923583984, "step": 13428 }, { "epoch": 2.09, "learning_rate": 4.297961512526584e-06, "logits/chosen": -2.336215019226074, "logits/rejected": -2.827669382095337, "logps/chosen": -220.3921356201172, "logps/rejected": -452.42547607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.67976188659668, "rewards/margins": 10.200380325317383, "rewards/rejected": -14.880142211914062, "step": 13429 }, { "epoch": 2.09, "learning_rate": 4.297228071995436e-06, "logits/chosen": -2.6065073013305664, "logits/rejected": -2.831669807434082, "logps/chosen": -115.02978515625, "logps/rejected": -261.1167297363281, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -8.372265815734863, "rewards/margins": 7.877645015716553, "rewards/rejected": -16.249910354614258, "step": 13430 }, { "epoch": 2.09, "learning_rate": 4.2964946314642875e-06, "logits/chosen": -2.6194725036621094, "logits/rejected": -3.110985040664673, "logps/chosen": -462.6063232421875, "logps/rejected": -633.8760375976562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.601264953613281, "rewards/margins": 11.197525024414062, "rewards/rejected": -17.798789978027344, "step": 13431 }, { "epoch": 2.09, "learning_rate": 4.29576119093314e-06, "logits/chosen": -0.9084821343421936, "logits/rejected": -2.232680559158325, "logps/chosen": -168.0418701171875, "logps/rejected": -507.391845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.680330753326416, "rewards/margins": 12.015082359313965, "rewards/rejected": -17.695411682128906, "step": 13432 }, { "epoch": 2.09, "learning_rate": 4.295027750401992e-06, "logits/chosen": -2.688163995742798, "logits/rejected": -2.041752576828003, "logps/chosen": -214.3472900390625, "logps/rejected": -254.8531951904297, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -4.578770160675049, "rewards/margins": 5.057002544403076, "rewards/rejected": -9.635772705078125, "step": 13433 }, { "epoch": 2.09, "learning_rate": 4.294294309870844e-06, "logits/chosen": -1.9382976293563843, "logits/rejected": -2.872225522994995, "logps/chosen": -135.48634338378906, "logps/rejected": -500.99920654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.424932956695557, "rewards/margins": 13.062143325805664, "rewards/rejected": -18.487075805664062, "step": 13434 }, { "epoch": 2.09, "learning_rate": 4.293560869339696e-06, "logits/chosen": -1.4857046604156494, "logits/rejected": -2.7190592288970947, "logps/chosen": -129.4575653076172, "logps/rejected": -353.3897705078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.6284990310668945, "rewards/margins": 8.168455123901367, "rewards/rejected": -12.796953201293945, "step": 13435 }, { "epoch": 2.09, "learning_rate": 4.292827428808548e-06, "logits/chosen": -2.1719861030578613, "logits/rejected": -2.49845814704895, "logps/chosen": -150.14060974121094, "logps/rejected": -302.8919372558594, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.375922679901123, "rewards/margins": 7.531410217285156, "rewards/rejected": -11.907332420349121, "step": 13436 }, { "epoch": 2.09, "learning_rate": 4.2920939882774005e-06, "logits/chosen": -2.740039348602295, "logits/rejected": -2.882956027984619, "logps/chosen": -147.9054718017578, "logps/rejected": -294.8170471191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0731499195098877, "rewards/margins": 11.164474487304688, "rewards/rejected": -13.237624168395996, "step": 13437 }, { "epoch": 2.09, "learning_rate": 4.291360547746252e-06, "logits/chosen": -2.6553664207458496, "logits/rejected": -2.6970181465148926, "logps/chosen": -386.65228271484375, "logps/rejected": -470.1986389160156, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -5.984737396240234, "rewards/margins": 6.274811744689941, "rewards/rejected": -12.259549140930176, "step": 13438 }, { "epoch": 2.09, "learning_rate": 4.290627107215104e-06, "logits/chosen": -2.1237974166870117, "logits/rejected": -2.73028826713562, "logps/chosen": -132.68264770507812, "logps/rejected": -214.00051879882812, "loss": 0.0608, "rewards/accuracies": 1.0, "rewards/chosen": -4.457813739776611, "rewards/margins": 5.775093078613281, "rewards/rejected": -10.232906341552734, "step": 13439 }, { "epoch": 2.09, "learning_rate": 4.289893666683956e-06, "logits/chosen": -2.455960750579834, "logits/rejected": -2.729536533355713, "logps/chosen": -237.64016723632812, "logps/rejected": -371.4710693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.697150230407715, "rewards/margins": 9.298171997070312, "rewards/rejected": -15.995323181152344, "step": 13440 }, { "epoch": 2.09, "learning_rate": 4.289160226152809e-06, "logits/chosen": -2.8845274448394775, "logits/rejected": -2.622957944869995, "logps/chosen": -226.35865783691406, "logps/rejected": -294.4079895019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5161827206611633, "rewards/margins": 10.586862564086914, "rewards/rejected": -11.103044509887695, "step": 13441 }, { "epoch": 2.09, "learning_rate": 4.288426785621661e-06, "logits/chosen": -2.2309837341308594, "logits/rejected": -2.9385251998901367, "logps/chosen": -185.37306213378906, "logps/rejected": -379.9483642578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.090726137161255, "rewards/margins": 6.6059722900390625, "rewards/rejected": -9.696698188781738, "step": 13442 }, { "epoch": 2.09, "learning_rate": 4.287693345090513e-06, "logits/chosen": -3.0000925064086914, "logits/rejected": -2.945221185684204, "logps/chosen": -214.6302490234375, "logps/rejected": -222.84706115722656, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -4.854013919830322, "rewards/margins": 5.66741418838501, "rewards/rejected": -10.521428108215332, "step": 13443 }, { "epoch": 2.09, "learning_rate": 4.286959904559365e-06, "logits/chosen": -2.95082426071167, "logits/rejected": -2.2111189365386963, "logps/chosen": -249.0408172607422, "logps/rejected": -269.7311706542969, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.533026695251465, "rewards/margins": 6.790755748748779, "rewards/rejected": -11.323781967163086, "step": 13444 }, { "epoch": 2.09, "learning_rate": 4.286226464028217e-06, "logits/chosen": -2.5450758934020996, "logits/rejected": -1.8655704259872437, "logps/chosen": -695.4517211914062, "logps/rejected": -571.6106567382812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.071530342102051, "rewards/margins": 12.990878105163574, "rewards/rejected": -18.062408447265625, "step": 13445 }, { "epoch": 2.09, "learning_rate": 4.28549302349707e-06, "logits/chosen": -2.6661031246185303, "logits/rejected": -2.7562081813812256, "logps/chosen": -79.77099609375, "logps/rejected": -274.77545166015625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.8036744594573975, "rewards/margins": 9.934144020080566, "rewards/rejected": -13.737817764282227, "step": 13446 }, { "epoch": 2.09, "learning_rate": 4.284759582965922e-06, "logits/chosen": -0.6425809264183044, "logits/rejected": -2.4160385131835938, "logps/chosen": -150.9534912109375, "logps/rejected": -436.81268310546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.704758644104004, "rewards/margins": 21.654184341430664, "rewards/rejected": -25.358943939208984, "step": 13447 }, { "epoch": 2.09, "learning_rate": 4.284026142434774e-06, "logits/chosen": -2.4372642040252686, "logits/rejected": -1.0794540643692017, "logps/chosen": -335.3633728027344, "logps/rejected": -155.0264129638672, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/chosen": -3.5018982887268066, "rewards/margins": 5.762509346008301, "rewards/rejected": -9.264408111572266, "step": 13448 }, { "epoch": 2.09, "learning_rate": 4.2832927019036256e-06, "logits/chosen": -1.8215893507003784, "logits/rejected": -3.0183329582214355, "logps/chosen": -89.03271484375, "logps/rejected": -370.72021484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.4114837646484375, "rewards/margins": 10.508295059204102, "rewards/rejected": -15.919778823852539, "step": 13449 }, { "epoch": 2.09, "learning_rate": 4.282559261372478e-06, "logits/chosen": -1.7405368089675903, "logits/rejected": -2.656243324279785, "logps/chosen": -157.1578369140625, "logps/rejected": -381.3482666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.177818298339844, "rewards/margins": 11.212772369384766, "rewards/rejected": -18.39059066772461, "step": 13450 }, { "epoch": 2.09, "learning_rate": 4.28182582084133e-06, "logits/chosen": -2.2917704582214355, "logits/rejected": -3.0799028873443604, "logps/chosen": -80.41694641113281, "logps/rejected": -468.573486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.250617980957031, "rewards/margins": 13.633454322814941, "rewards/rejected": -17.884071350097656, "step": 13451 }, { "epoch": 2.09, "learning_rate": 4.281092380310182e-06, "logits/chosen": -2.110585927963257, "logits/rejected": -2.79024338722229, "logps/chosen": -412.02362060546875, "logps/rejected": -456.755126953125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.79387092590332, "rewards/margins": 7.288177013397217, "rewards/rejected": -13.082048416137695, "step": 13452 }, { "epoch": 2.09, "learning_rate": 4.280358939779034e-06, "logits/chosen": -2.7439610958099365, "logits/rejected": -2.6522302627563477, "logps/chosen": -141.464111328125, "logps/rejected": -267.67010498046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.1505478620529175, "rewards/margins": 7.580322742462158, "rewards/rejected": -8.730870246887207, "step": 13453 }, { "epoch": 2.09, "learning_rate": 4.279625499247886e-06, "logits/chosen": -1.2321789264678955, "logits/rejected": -2.828639268875122, "logps/chosen": -91.92523193359375, "logps/rejected": -308.6322021484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.206395149230957, "rewards/margins": 8.180635452270508, "rewards/rejected": -12.387030601501465, "step": 13454 }, { "epoch": 2.09, "learning_rate": 4.2788920587167385e-06, "logits/chosen": -2.4672958850860596, "logits/rejected": -2.8302290439605713, "logps/chosen": -129.58595275878906, "logps/rejected": -394.23089599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.576972961425781, "rewards/margins": 12.914999961853027, "rewards/rejected": -18.491973876953125, "step": 13455 }, { "epoch": 2.09, "learning_rate": 4.27815861818559e-06, "logits/chosen": -2.3136281967163086, "logits/rejected": -2.483327865600586, "logps/chosen": -161.97940063476562, "logps/rejected": -360.14361572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3443779945373535, "rewards/margins": 12.709905624389648, "rewards/rejected": -15.054283142089844, "step": 13456 }, { "epoch": 2.09, "learning_rate": 4.277425177654442e-06, "logits/chosen": -2.4209768772125244, "logits/rejected": -2.49928617477417, "logps/chosen": -316.2004089355469, "logps/rejected": -526.9609985351562, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.05968713760376, "rewards/margins": 8.007953643798828, "rewards/rejected": -14.06764030456543, "step": 13457 }, { "epoch": 2.09, "learning_rate": 4.276691737123294e-06, "logits/chosen": -2.6505701541900635, "logits/rejected": -2.9378135204315186, "logps/chosen": -132.68182373046875, "logps/rejected": -359.26007080078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.509786128997803, "rewards/margins": 9.882888793945312, "rewards/rejected": -14.392675399780273, "step": 13458 }, { "epoch": 2.09, "learning_rate": 4.275958296592147e-06, "logits/chosen": -2.3402183055877686, "logits/rejected": -2.76279354095459, "logps/chosen": -328.1082763671875, "logps/rejected": -289.68414306640625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -8.284295082092285, "rewards/margins": 5.287273406982422, "rewards/rejected": -13.571568489074707, "step": 13459 }, { "epoch": 2.09, "learning_rate": 4.275224856060999e-06, "logits/chosen": -2.615825653076172, "logits/rejected": -2.1708357334136963, "logps/chosen": -555.69189453125, "logps/rejected": -536.2179565429688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.569530487060547, "rewards/margins": 7.500986099243164, "rewards/rejected": -15.070516586303711, "step": 13460 }, { "epoch": 2.09, "learning_rate": 4.2744914155298515e-06, "logits/chosen": -2.2722644805908203, "logits/rejected": -2.9738168716430664, "logps/chosen": -152.01437377929688, "logps/rejected": -290.14422607421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -2.9497318267822266, "rewards/margins": 9.493130683898926, "rewards/rejected": -12.442861557006836, "step": 13461 }, { "epoch": 2.09, "learning_rate": 4.273757974998703e-06, "logits/chosen": -2.053764581680298, "logits/rejected": -2.9777305126190186, "logps/chosen": -315.24676513671875, "logps/rejected": -416.89959716796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.959012746810913, "rewards/margins": 8.009981155395508, "rewards/rejected": -11.968994140625, "step": 13462 }, { "epoch": 2.09, "learning_rate": 4.273024534467555e-06, "logits/chosen": -2.350050687789917, "logits/rejected": -2.670246124267578, "logps/chosen": -149.2418670654297, "logps/rejected": -350.65771484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.689934730529785, "rewards/margins": 9.474905967712402, "rewards/rejected": -14.164840698242188, "step": 13463 }, { "epoch": 2.09, "learning_rate": 4.272291093936408e-06, "logits/chosen": -2.765850067138672, "logits/rejected": -2.681603193283081, "logps/chosen": -425.02545166015625, "logps/rejected": -279.9478759765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.675950527191162, "rewards/margins": 8.212265968322754, "rewards/rejected": -13.888216018676758, "step": 13464 }, { "epoch": 2.09, "learning_rate": 4.27155765340526e-06, "logits/chosen": -2.6525349617004395, "logits/rejected": -2.8376190662384033, "logps/chosen": -109.69454956054688, "logps/rejected": -265.1019592285156, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.4380269050598145, "rewards/margins": 6.324337482452393, "rewards/rejected": -10.762364387512207, "step": 13465 }, { "epoch": 2.09, "learning_rate": 4.270824212874112e-06, "logits/chosen": -1.4017727375030518, "logits/rejected": -2.3829357624053955, "logps/chosen": -190.21136474609375, "logps/rejected": -494.53009033203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.038651466369629, "rewards/margins": 10.958978652954102, "rewards/rejected": -14.997631072998047, "step": 13466 }, { "epoch": 2.09, "learning_rate": 4.270090772342964e-06, "logits/chosen": -1.9898884296417236, "logits/rejected": -2.7712395191192627, "logps/chosen": -175.77932739257812, "logps/rejected": -383.7653503417969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.962944984436035, "rewards/margins": 12.210939407348633, "rewards/rejected": -17.173885345458984, "step": 13467 }, { "epoch": 2.09, "learning_rate": 4.269357331811816e-06, "logits/chosen": -2.5251922607421875, "logits/rejected": -2.7586963176727295, "logps/chosen": -57.560455322265625, "logps/rejected": -216.22711181640625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.162381172180176, "rewards/margins": 7.801504135131836, "rewards/rejected": -11.963885307312012, "step": 13468 }, { "epoch": 2.09, "learning_rate": 4.268623891280668e-06, "logits/chosen": -2.3398752212524414, "logits/rejected": -2.7595674991607666, "logps/chosen": -125.08794403076172, "logps/rejected": -211.87533569335938, "loss": 0.2887, "rewards/accuracies": 1.0, "rewards/chosen": -6.558241844177246, "rewards/margins": 3.968407154083252, "rewards/rejected": -10.526649475097656, "step": 13469 }, { "epoch": 2.09, "learning_rate": 4.26789045074952e-06, "logits/chosen": -1.725466012954712, "logits/rejected": -2.9247899055480957, "logps/chosen": -136.28616333007812, "logps/rejected": -362.8371887207031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1559019088745117, "rewards/margins": 10.816827774047852, "rewards/rejected": -13.97273063659668, "step": 13470 }, { "epoch": 2.1, "learning_rate": 4.267157010218372e-06, "logits/chosen": -1.5554441213607788, "logits/rejected": -2.944678544998169, "logps/chosen": -87.43070983886719, "logps/rejected": -439.9387512207031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.134422302246094, "rewards/margins": 9.513792037963867, "rewards/rejected": -13.648214340209961, "step": 13471 }, { "epoch": 2.1, "learning_rate": 4.266423569687225e-06, "logits/chosen": -2.895953893661499, "logits/rejected": -2.380523920059204, "logps/chosen": -145.67547607421875, "logps/rejected": -156.11669921875, "loss": 0.2673, "rewards/accuracies": 1.0, "rewards/chosen": -3.9274964332580566, "rewards/margins": 3.368248224258423, "rewards/rejected": -7.2957444190979, "step": 13472 }, { "epoch": 2.1, "learning_rate": 4.265690129156077e-06, "logits/chosen": -1.908883810043335, "logits/rejected": -2.926225185394287, "logps/chosen": -207.54861450195312, "logps/rejected": -394.04254150390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.276361465454102, "rewards/margins": 7.781377792358398, "rewards/rejected": -15.0577392578125, "step": 13473 }, { "epoch": 2.1, "learning_rate": 4.2649566886249285e-06, "logits/chosen": -2.019284725189209, "logits/rejected": -3.037160634994507, "logps/chosen": -118.98504638671875, "logps/rejected": -546.9116821289062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.152336597442627, "rewards/margins": 12.705469131469727, "rewards/rejected": -16.857805252075195, "step": 13474 }, { "epoch": 2.1, "learning_rate": 4.26422324809378e-06, "logits/chosen": -2.7192893028259277, "logits/rejected": -2.2975172996520996, "logps/chosen": -291.5489196777344, "logps/rejected": -281.8553771972656, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.4310102462768555, "rewards/margins": 7.204222679138184, "rewards/rejected": -11.635232925415039, "step": 13475 }, { "epoch": 2.1, "learning_rate": 4.263489807562632e-06, "logits/chosen": -2.1540868282318115, "logits/rejected": -2.7614800930023193, "logps/chosen": -97.1854248046875, "logps/rejected": -288.7610168457031, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.363124370574951, "rewards/margins": 8.920341491699219, "rewards/rejected": -12.283466339111328, "step": 13476 }, { "epoch": 2.1, "learning_rate": 4.262756367031485e-06, "logits/chosen": -2.3099679946899414, "logits/rejected": -2.9015183448791504, "logps/chosen": -200.46490478515625, "logps/rejected": -413.49237060546875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.65058708190918, "rewards/margins": 7.7500481605529785, "rewards/rejected": -14.400634765625, "step": 13477 }, { "epoch": 2.1, "learning_rate": 4.262022926500338e-06, "logits/chosen": -2.974038600921631, "logits/rejected": -3.1782314777374268, "logps/chosen": -86.39920043945312, "logps/rejected": -165.2060546875, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -6.44062614440918, "rewards/margins": 5.1206560134887695, "rewards/rejected": -11.561283111572266, "step": 13478 }, { "epoch": 2.1, "learning_rate": 4.2612894859691895e-06, "logits/chosen": -2.5297658443450928, "logits/rejected": -2.571829319000244, "logps/chosen": -243.31834411621094, "logps/rejected": -510.61505126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.264333724975586, "rewards/margins": 11.522932052612305, "rewards/rejected": -16.78726577758789, "step": 13479 }, { "epoch": 2.1, "learning_rate": 4.2605560454380414e-06, "logits/chosen": -1.8781551122665405, "logits/rejected": -2.837193012237549, "logps/chosen": -88.37045288085938, "logps/rejected": -299.6123046875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -5.858246326446533, "rewards/margins": 8.368656158447266, "rewards/rejected": -14.22690200805664, "step": 13480 }, { "epoch": 2.1, "learning_rate": 4.259822604906894e-06, "logits/chosen": -1.1647922992706299, "logits/rejected": -2.903454303741455, "logps/chosen": -68.84872436523438, "logps/rejected": -422.32464599609375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.909013748168945, "rewards/margins": 6.937350273132324, "rewards/rejected": -11.84636402130127, "step": 13481 }, { "epoch": 2.1, "learning_rate": 4.259089164375746e-06, "logits/chosen": -2.1645150184631348, "logits/rejected": -2.8214049339294434, "logps/chosen": -150.30296325683594, "logps/rejected": -258.05572509765625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -0.8835238218307495, "rewards/margins": 10.738128662109375, "rewards/rejected": -11.621652603149414, "step": 13482 }, { "epoch": 2.1, "learning_rate": 4.258355723844598e-06, "logits/chosen": -2.4178335666656494, "logits/rejected": -2.9994242191314697, "logps/chosen": -67.29660034179688, "logps/rejected": -283.63525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7341456413269043, "rewards/margins": 11.949185371398926, "rewards/rejected": -15.683330535888672, "step": 13483 }, { "epoch": 2.1, "learning_rate": 4.25762228331345e-06, "logits/chosen": -2.7530555725097656, "logits/rejected": -2.252026081085205, "logps/chosen": -448.71923828125, "logps/rejected": -453.4024658203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.034367561340332, "rewards/margins": 9.564438819885254, "rewards/rejected": -16.598806381225586, "step": 13484 }, { "epoch": 2.1, "learning_rate": 4.256888842782302e-06, "logits/chosen": -1.5473477840423584, "logits/rejected": -2.750401735305786, "logps/chosen": -80.69149017333984, "logps/rejected": -390.837646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.793771982192993, "rewards/margins": 9.903280258178711, "rewards/rejected": -12.697052955627441, "step": 13485 }, { "epoch": 2.1, "learning_rate": 4.256155402251154e-06, "logits/chosen": -2.5047333240509033, "logits/rejected": -2.4056859016418457, "logps/chosen": -152.01947021484375, "logps/rejected": -216.56838989257812, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -5.7399444580078125, "rewards/margins": 5.6319475173950195, "rewards/rejected": -11.371891975402832, "step": 13486 }, { "epoch": 2.1, "learning_rate": 4.255421961720006e-06, "logits/chosen": -0.8350979089736938, "logits/rejected": -2.81219744682312, "logps/chosen": -142.51705932617188, "logps/rejected": -715.4886474609375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.809796333312988, "rewards/margins": 7.477801322937012, "rewards/rejected": -14.28759765625, "step": 13487 }, { "epoch": 2.1, "learning_rate": 4.254688521188858e-06, "logits/chosen": -2.4813315868377686, "logits/rejected": -2.8593926429748535, "logps/chosen": -304.4574890136719, "logps/rejected": -578.3688354492188, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.155921936035156, "rewards/margins": 9.076115608215332, "rewards/rejected": -13.232038497924805, "step": 13488 }, { "epoch": 2.1, "learning_rate": 4.25395508065771e-06, "logits/chosen": -2.1263391971588135, "logits/rejected": -2.5119802951812744, "logps/chosen": -432.29107666015625, "logps/rejected": -559.0291748046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -9.457559585571289, "rewards/margins": 11.80996322631836, "rewards/rejected": -21.26752471923828, "step": 13489 }, { "epoch": 2.1, "learning_rate": 4.253221640126563e-06, "logits/chosen": -2.1175644397735596, "logits/rejected": -2.780324697494507, "logps/chosen": -167.2679443359375, "logps/rejected": -641.7108154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.01207160949707, "rewards/margins": 15.279040336608887, "rewards/rejected": -19.29111099243164, "step": 13490 }, { "epoch": 2.1, "learning_rate": 4.252488199595415e-06, "logits/chosen": -2.1210741996765137, "logits/rejected": -3.14202618598938, "logps/chosen": -205.51263427734375, "logps/rejected": -519.1502685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.77292537689209, "rewards/margins": 13.073506355285645, "rewards/rejected": -17.846431732177734, "step": 13491 }, { "epoch": 2.1, "learning_rate": 4.2517547590642665e-06, "logits/chosen": -2.831871747970581, "logits/rejected": -1.5773404836654663, "logps/chosen": -219.17086791992188, "logps/rejected": -241.74227905273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1108298301696777, "rewards/margins": 11.189611434936523, "rewards/rejected": -13.30044174194336, "step": 13492 }, { "epoch": 2.1, "learning_rate": 4.251021318533118e-06, "logits/chosen": -1.327409267425537, "logits/rejected": -2.166232109069824, "logps/chosen": -339.9345703125, "logps/rejected": -847.477783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.72265625, "rewards/margins": 15.991260528564453, "rewards/rejected": -23.713916778564453, "step": 13493 }, { "epoch": 2.1, "learning_rate": 4.250287878001971e-06, "logits/chosen": -2.4532365798950195, "logits/rejected": -2.974012851715088, "logps/chosen": -242.83921813964844, "logps/rejected": -272.46295166015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.693403720855713, "rewards/margins": 8.065767288208008, "rewards/rejected": -11.759170532226562, "step": 13494 }, { "epoch": 2.1, "learning_rate": 4.249554437470824e-06, "logits/chosen": -2.1129133701324463, "logits/rejected": -2.733938455581665, "logps/chosen": -205.708740234375, "logps/rejected": -420.574951171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.839052438735962, "rewards/margins": 8.30397891998291, "rewards/rejected": -12.143031120300293, "step": 13495 }, { "epoch": 2.1, "learning_rate": 4.248820996939676e-06, "logits/chosen": -1.3726462125778198, "logits/rejected": -2.748900890350342, "logps/chosen": -193.60247802734375, "logps/rejected": -414.9609375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.810097694396973, "rewards/margins": 7.838184356689453, "rewards/rejected": -14.648282051086426, "step": 13496 }, { "epoch": 2.1, "learning_rate": 4.248087556408528e-06, "logits/chosen": -1.9422413110733032, "logits/rejected": -2.871791124343872, "logps/chosen": -76.3807601928711, "logps/rejected": -315.0894775390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7610630989074707, "rewards/margins": 10.011358261108398, "rewards/rejected": -13.772421836853027, "step": 13497 }, { "epoch": 2.1, "learning_rate": 4.2473541158773795e-06, "logits/chosen": -1.8865717649459839, "logits/rejected": -2.7884163856506348, "logps/chosen": -159.3729705810547, "logps/rejected": -379.66143798828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.9714536666870117, "rewards/margins": 9.03597640991211, "rewards/rejected": -13.007429122924805, "step": 13498 }, { "epoch": 2.1, "learning_rate": 4.246620675346232e-06, "logits/chosen": -1.9505990743637085, "logits/rejected": -3.081510543823242, "logps/chosen": -145.42787170410156, "logps/rejected": -529.0933227539062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.427197456359863, "rewards/margins": 14.015913009643555, "rewards/rejected": -21.443111419677734, "step": 13499 }, { "epoch": 2.1, "learning_rate": 4.245887234815084e-06, "logits/chosen": -1.8416106700897217, "logits/rejected": -2.715404748916626, "logps/chosen": -128.67996215820312, "logps/rejected": -370.3341064453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.8810954093933105, "rewards/margins": 7.498676300048828, "rewards/rejected": -11.379772186279297, "step": 13500 }, { "epoch": 2.1, "learning_rate": 4.245153794283936e-06, "logits/chosen": -2.6289212703704834, "logits/rejected": -2.3312768936157227, "logps/chosen": -172.9761199951172, "logps/rejected": -278.08074951171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.5643930435180664, "rewards/margins": 7.590516090393066, "rewards/rejected": -11.154909133911133, "step": 13501 }, { "epoch": 2.1, "learning_rate": 4.244420353752788e-06, "logits/chosen": -2.8992292881011963, "logits/rejected": -3.3198630809783936, "logps/chosen": -81.98236083984375, "logps/rejected": -359.2540283203125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.040091514587402, "rewards/margins": 7.733409881591797, "rewards/rejected": -12.773502349853516, "step": 13502 }, { "epoch": 2.1, "learning_rate": 4.24368691322164e-06, "logits/chosen": -2.3885891437530518, "logits/rejected": -3.1160459518432617, "logps/chosen": -302.8427734375, "logps/rejected": -412.9934997558594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1987173557281494, "rewards/margins": 9.999346733093262, "rewards/rejected": -13.198063850402832, "step": 13503 }, { "epoch": 2.1, "learning_rate": 4.2429534726904924e-06, "logits/chosen": -2.711463451385498, "logits/rejected": -2.837759494781494, "logps/chosen": -136.8081817626953, "logps/rejected": -384.9345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.320711135864258, "rewards/margins": 10.968241691589355, "rewards/rejected": -17.288951873779297, "step": 13504 }, { "epoch": 2.1, "learning_rate": 4.242220032159344e-06, "logits/chosen": -2.384467601776123, "logits/rejected": -3.219634532928467, "logps/chosen": -241.1177978515625, "logps/rejected": -484.237548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.471775531768799, "rewards/margins": 9.461816787719727, "rewards/rejected": -13.933592796325684, "step": 13505 }, { "epoch": 2.1, "learning_rate": 4.241486591628196e-06, "logits/chosen": -2.896043062210083, "logits/rejected": -3.108367681503296, "logps/chosen": -245.51869201660156, "logps/rejected": -425.18804931640625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.810547828674316, "rewards/margins": 7.681201457977295, "rewards/rejected": -13.491748809814453, "step": 13506 }, { "epoch": 2.1, "learning_rate": 4.240753151097048e-06, "logits/chosen": -2.5443527698516846, "logits/rejected": -2.526188611984253, "logps/chosen": -241.15341186523438, "logps/rejected": -304.13360595703125, "loss": 1.0102, "rewards/accuracies": 0.5, "rewards/chosen": -5.941763877868652, "rewards/margins": 4.98431396484375, "rewards/rejected": -10.926077842712402, "step": 13507 }, { "epoch": 2.1, "learning_rate": 4.240019710565901e-06, "logits/chosen": -2.654355049133301, "logits/rejected": -2.8092620372772217, "logps/chosen": -145.97457885742188, "logps/rejected": -291.797119140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.404325485229492, "rewards/margins": 9.412763595581055, "rewards/rejected": -13.817089080810547, "step": 13508 }, { "epoch": 2.1, "learning_rate": 4.239286270034753e-06, "logits/chosen": -1.7784637212753296, "logits/rejected": -2.4919776916503906, "logps/chosen": -112.64315795898438, "logps/rejected": -417.8624267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.792149066925049, "rewards/margins": 11.51157283782959, "rewards/rejected": -16.303722381591797, "step": 13509 }, { "epoch": 2.1, "learning_rate": 4.2385528295036045e-06, "logits/chosen": -2.679305076599121, "logits/rejected": -2.8792366981506348, "logps/chosen": -180.48724365234375, "logps/rejected": -184.39035034179688, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -3.1329338550567627, "rewards/margins": 4.7719316482543945, "rewards/rejected": -7.904865264892578, "step": 13510 }, { "epoch": 2.1, "learning_rate": 4.237819388972457e-06, "logits/chosen": -2.3239028453826904, "logits/rejected": -2.7284162044525146, "logps/chosen": -254.52157592773438, "logps/rejected": -335.1444091796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4029908180236816, "rewards/margins": 10.539968490600586, "rewards/rejected": -13.94295883178711, "step": 13511 }, { "epoch": 2.1, "learning_rate": 4.23708594844131e-06, "logits/chosen": -2.2705183029174805, "logits/rejected": -2.6062822341918945, "logps/chosen": -621.247802734375, "logps/rejected": -571.1990966796875, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/chosen": -9.076305389404297, "rewards/margins": 6.303478717803955, "rewards/rejected": -15.379783630371094, "step": 13512 }, { "epoch": 2.1, "learning_rate": 4.236352507910162e-06, "logits/chosen": -1.8472652435302734, "logits/rejected": -3.0015299320220947, "logps/chosen": -213.8438720703125, "logps/rejected": -602.6864013671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.498502731323242, "rewards/margins": 9.703873634338379, "rewards/rejected": -17.202375411987305, "step": 13513 }, { "epoch": 2.1, "learning_rate": 4.235619067379014e-06, "logits/chosen": -2.7263295650482178, "logits/rejected": -2.715888738632202, "logps/chosen": -304.52423095703125, "logps/rejected": -291.3600158691406, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -9.71662425994873, "rewards/margins": 6.8681793212890625, "rewards/rejected": -16.58480453491211, "step": 13514 }, { "epoch": 2.1, "learning_rate": 4.234885626847866e-06, "logits/chosen": -2.999959945678711, "logits/rejected": -2.6541383266448975, "logps/chosen": -464.54547119140625, "logps/rejected": -393.6825256347656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.143972873687744, "rewards/margins": 9.085596084594727, "rewards/rejected": -12.229568481445312, "step": 13515 }, { "epoch": 2.1, "learning_rate": 4.2341521863167175e-06, "logits/chosen": -2.8820199966430664, "logits/rejected": -3.148176670074463, "logps/chosen": -107.75896453857422, "logps/rejected": -179.3193359375, "loss": 0.1258, "rewards/accuracies": 1.0, "rewards/chosen": -3.6432485580444336, "rewards/margins": 5.435942649841309, "rewards/rejected": -9.079191207885742, "step": 13516 }, { "epoch": 2.1, "learning_rate": 4.23341874578557e-06, "logits/chosen": -2.783510684967041, "logits/rejected": -2.532627582550049, "logps/chosen": -169.12799072265625, "logps/rejected": -242.947509765625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -6.354820728302002, "rewards/margins": 7.3324384689331055, "rewards/rejected": -13.687259674072266, "step": 13517 }, { "epoch": 2.1, "learning_rate": 4.232685305254422e-06, "logits/chosen": -2.111966371536255, "logits/rejected": -2.184030294418335, "logps/chosen": -559.13330078125, "logps/rejected": -483.6651611328125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -7.366305351257324, "rewards/margins": 7.10548734664917, "rewards/rejected": -14.471793174743652, "step": 13518 }, { "epoch": 2.1, "learning_rate": 4.231951864723274e-06, "logits/chosen": -2.2184255123138428, "logits/rejected": -2.85756778717041, "logps/chosen": -131.00949096679688, "logps/rejected": -279.83721923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.100193977355957, "rewards/margins": 12.243021965026855, "rewards/rejected": -16.343215942382812, "step": 13519 }, { "epoch": 2.1, "learning_rate": 4.231218424192126e-06, "logits/chosen": -2.749993085861206, "logits/rejected": -1.675581932067871, "logps/chosen": -321.848388671875, "logps/rejected": -298.56890869140625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.007933139801025, "rewards/margins": 7.799406051635742, "rewards/rejected": -13.80733871459961, "step": 13520 }, { "epoch": 2.1, "learning_rate": 4.230484983660979e-06, "logits/chosen": -2.0343587398529053, "logits/rejected": -2.7754571437835693, "logps/chosen": -213.81793212890625, "logps/rejected": -378.75970458984375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -6.431405067443848, "rewards/margins": 9.192705154418945, "rewards/rejected": -15.62411117553711, "step": 13521 }, { "epoch": 2.1, "learning_rate": 4.2297515431298305e-06, "logits/chosen": -2.219792604446411, "logits/rejected": -2.5911335945129395, "logps/chosen": -334.25811767578125, "logps/rejected": -412.866455078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.9102020263671875, "rewards/margins": 7.85622501373291, "rewards/rejected": -13.766426086425781, "step": 13522 }, { "epoch": 2.1, "learning_rate": 4.229018102598682e-06, "logits/chosen": -1.4830387830734253, "logits/rejected": -2.5989668369293213, "logps/chosen": -137.0104217529297, "logps/rejected": -484.3755187988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.301265716552734, "rewards/margins": 12.027850151062012, "rewards/rejected": -19.329116821289062, "step": 13523 }, { "epoch": 2.1, "learning_rate": 4.228284662067534e-06, "logits/chosen": -1.5659806728363037, "logits/rejected": -2.766317129135132, "logps/chosen": -156.42578125, "logps/rejected": -397.56317138671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0777430534362793, "rewards/margins": 9.618824005126953, "rewards/rejected": -12.69656753540039, "step": 13524 }, { "epoch": 2.1, "learning_rate": 4.227551221536386e-06, "logits/chosen": -2.248627185821533, "logits/rejected": -2.6308813095092773, "logps/chosen": -227.43572998046875, "logps/rejected": -269.067626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0766830444335938, "rewards/margins": 10.230504989624023, "rewards/rejected": -13.307188034057617, "step": 13525 }, { "epoch": 2.1, "learning_rate": 4.226817781005239e-06, "logits/chosen": -2.389874219894409, "logits/rejected": -2.621452569961548, "logps/chosen": -118.23385620117188, "logps/rejected": -344.2039794921875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.998078346252441, "rewards/margins": 6.5122551918029785, "rewards/rejected": -11.510334014892578, "step": 13526 }, { "epoch": 2.1, "learning_rate": 4.226084340474091e-06, "logits/chosen": -2.371198892593384, "logits/rejected": -2.696213722229004, "logps/chosen": -157.17477416992188, "logps/rejected": -443.1480407714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9062609672546387, "rewards/margins": 15.395161628723145, "rewards/rejected": -18.301422119140625, "step": 13527 }, { "epoch": 2.1, "learning_rate": 4.2253508999429434e-06, "logits/chosen": -2.8713276386260986, "logits/rejected": -2.78580904006958, "logps/chosen": -369.79388427734375, "logps/rejected": -393.4813232421875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.782771587371826, "rewards/margins": 8.579000473022461, "rewards/rejected": -14.361772537231445, "step": 13528 }, { "epoch": 2.1, "learning_rate": 4.224617459411795e-06, "logits/chosen": -2.6713147163391113, "logits/rejected": -1.6850764751434326, "logps/chosen": -651.9496459960938, "logps/rejected": -371.92291259765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.174129009246826, "rewards/margins": 7.747251510620117, "rewards/rejected": -10.921380996704102, "step": 13529 }, { "epoch": 2.1, "learning_rate": 4.223884018880648e-06, "logits/chosen": -2.775641679763794, "logits/rejected": -2.3055570125579834, "logps/chosen": -328.7310485839844, "logps/rejected": -316.63385009765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.207256317138672, "rewards/margins": 9.405370712280273, "rewards/rejected": -12.612627029418945, "step": 13530 }, { "epoch": 2.1, "learning_rate": 4.2231505783495e-06, "logits/chosen": -2.7015936374664307, "logits/rejected": -2.8780949115753174, "logps/chosen": -207.76182556152344, "logps/rejected": -380.21185302734375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -6.294029235839844, "rewards/margins": 6.532654762268066, "rewards/rejected": -12.82668399810791, "step": 13531 }, { "epoch": 2.1, "learning_rate": 4.222417137818352e-06, "logits/chosen": -2.1539525985717773, "logits/rejected": -3.0408706665039062, "logps/chosen": -106.68557739257812, "logps/rejected": -351.0980224609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.7454395294189453, "rewards/margins": 9.466241836547852, "rewards/rejected": -12.211681365966797, "step": 13532 }, { "epoch": 2.1, "learning_rate": 4.221683697287204e-06, "logits/chosen": -1.9833998680114746, "logits/rejected": -2.8480265140533447, "logps/chosen": -174.445068359375, "logps/rejected": -635.61962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.059144020080566, "rewards/margins": 12.169960975646973, "rewards/rejected": -18.22910499572754, "step": 13533 }, { "epoch": 2.1, "learning_rate": 4.2209502567560556e-06, "logits/chosen": -1.2722429037094116, "logits/rejected": -2.5813965797424316, "logps/chosen": -95.87496185302734, "logps/rejected": -293.6050109863281, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.076533317565918, "rewards/margins": 6.468437194824219, "rewards/rejected": -12.544970512390137, "step": 13534 }, { "epoch": 2.1, "learning_rate": 4.220216816224908e-06, "logits/chosen": -2.557349681854248, "logits/rejected": -2.297562837600708, "logps/chosen": -448.67047119140625, "logps/rejected": -475.0109558105469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.186372756958008, "rewards/margins": 9.494501113891602, "rewards/rejected": -14.68087387084961, "step": 13535 }, { "epoch": 2.11, "learning_rate": 4.21948337569376e-06, "logits/chosen": -2.195484161376953, "logits/rejected": -3.1579177379608154, "logps/chosen": -147.08734130859375, "logps/rejected": -398.103759765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -8.568181991577148, "rewards/margins": 7.300994873046875, "rewards/rejected": -15.869176864624023, "step": 13536 }, { "epoch": 2.11, "learning_rate": 4.218749935162612e-06, "logits/chosen": -2.7806403636932373, "logits/rejected": -2.877113103866577, "logps/chosen": -218.32579040527344, "logps/rejected": -476.5221252441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.392763614654541, "rewards/margins": 13.123515129089355, "rewards/rejected": -14.516279220581055, "step": 13537 }, { "epoch": 2.11, "learning_rate": 4.218016494631464e-06, "logits/chosen": -1.9350838661193848, "logits/rejected": -2.0874698162078857, "logps/chosen": -494.86761474609375, "logps/rejected": -650.112060546875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -9.089313507080078, "rewards/margins": 8.544872283935547, "rewards/rejected": -17.634185791015625, "step": 13538 }, { "epoch": 2.11, "learning_rate": 4.217283054100317e-06, "logits/chosen": -2.7855284214019775, "logits/rejected": -3.0321695804595947, "logps/chosen": -196.8699493408203, "logps/rejected": -387.0317077636719, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.049540042877197, "rewards/margins": 7.088255882263184, "rewards/rejected": -11.137796401977539, "step": 13539 }, { "epoch": 2.11, "learning_rate": 4.2165496135691685e-06, "logits/chosen": -3.1143264770507812, "logits/rejected": -2.6597869396209717, "logps/chosen": -246.25946044921875, "logps/rejected": -238.38223266601562, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.79387366771698, "rewards/margins": 7.379054069519043, "rewards/rejected": -9.172927856445312, "step": 13540 }, { "epoch": 2.11, "learning_rate": 4.21581617303802e-06, "logits/chosen": -2.571420192718506, "logits/rejected": -1.43413245677948, "logps/chosen": -475.2622985839844, "logps/rejected": -273.32244873046875, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -3.8283209800720215, "rewards/margins": 5.006773471832275, "rewards/rejected": -8.835094451904297, "step": 13541 }, { "epoch": 2.11, "learning_rate": 4.215082732506872e-06, "logits/chosen": -2.752373456954956, "logits/rejected": -2.1251654624938965, "logps/chosen": -257.80596923828125, "logps/rejected": -277.9384460449219, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.233001708984375, "rewards/margins": 8.680562973022461, "rewards/rejected": -13.913564682006836, "step": 13542 }, { "epoch": 2.11, "learning_rate": 4.214349291975724e-06, "logits/chosen": -2.9054224491119385, "logits/rejected": -2.9516713619232178, "logps/chosen": -168.4684600830078, "logps/rejected": -313.99127197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.160920143127441, "rewards/margins": 9.534307479858398, "rewards/rejected": -13.69522762298584, "step": 13543 }, { "epoch": 2.11, "learning_rate": 4.213615851444577e-06, "logits/chosen": -3.092880964279175, "logits/rejected": -3.208142042160034, "logps/chosen": -116.41240692138672, "logps/rejected": -278.1838684082031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.794609069824219, "rewards/margins": 10.493656158447266, "rewards/rejected": -15.288264274597168, "step": 13544 }, { "epoch": 2.11, "learning_rate": 4.21288241091343e-06, "logits/chosen": -2.8391611576080322, "logits/rejected": -3.1393356323242188, "logps/chosen": -62.524627685546875, "logps/rejected": -240.60760498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7985243797302246, "rewards/margins": 10.974501609802246, "rewards/rejected": -13.773025512695312, "step": 13545 }, { "epoch": 2.11, "learning_rate": 4.2121489703822815e-06, "logits/chosen": -2.7320306301116943, "logits/rejected": -2.737663984298706, "logps/chosen": -258.5797119140625, "logps/rejected": -337.165771484375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.5186805725097656, "rewards/margins": 8.302556991577148, "rewards/rejected": -10.821237564086914, "step": 13546 }, { "epoch": 2.11, "learning_rate": 4.211415529851133e-06, "logits/chosen": -2.739079475402832, "logits/rejected": -2.629176139831543, "logps/chosen": -174.75045776367188, "logps/rejected": -172.72027587890625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -6.228855133056641, "rewards/margins": 5.97105598449707, "rewards/rejected": -12.199911117553711, "step": 13547 }, { "epoch": 2.11, "learning_rate": 4.210682089319986e-06, "logits/chosen": -0.8769434094429016, "logits/rejected": -2.2379963397979736, "logps/chosen": -104.24298095703125, "logps/rejected": -311.505126953125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -6.559091091156006, "rewards/margins": 8.322378158569336, "rewards/rejected": -14.8814697265625, "step": 13548 }, { "epoch": 2.11, "learning_rate": 4.209948648788838e-06, "logits/chosen": -1.913258671760559, "logits/rejected": -2.716905355453491, "logps/chosen": -565.6351928710938, "logps/rejected": -780.851318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.348617553710938, "rewards/margins": 10.926109313964844, "rewards/rejected": -19.27472686767578, "step": 13549 }, { "epoch": 2.11, "learning_rate": 4.20921520825769e-06, "logits/chosen": -2.7922239303588867, "logits/rejected": -2.7143893241882324, "logps/chosen": -464.529052734375, "logps/rejected": -508.99725341796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.2282142639160156, "rewards/margins": 9.278785705566406, "rewards/rejected": -12.507000923156738, "step": 13550 }, { "epoch": 2.11, "learning_rate": 4.208481767726542e-06, "logits/chosen": -2.746812105178833, "logits/rejected": -2.3520328998565674, "logps/chosen": -475.7890930175781, "logps/rejected": -464.06549072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.334549903869629, "rewards/margins": 11.925565719604492, "rewards/rejected": -16.260116577148438, "step": 13551 }, { "epoch": 2.11, "learning_rate": 4.207748327195394e-06, "logits/chosen": -2.56142258644104, "logits/rejected": -2.747164249420166, "logps/chosen": -647.7473754882812, "logps/rejected": -641.1309204101562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.09417200088501, "rewards/margins": 8.267294883728027, "rewards/rejected": -13.361467361450195, "step": 13552 }, { "epoch": 2.11, "learning_rate": 4.207014886664246e-06, "logits/chosen": -3.1027166843414307, "logits/rejected": -2.45758056640625, "logps/chosen": -295.76348876953125, "logps/rejected": -301.04425048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.451602935791016, "rewards/margins": 10.408027648925781, "rewards/rejected": -14.859630584716797, "step": 13553 }, { "epoch": 2.11, "learning_rate": 4.206281446133098e-06, "logits/chosen": -2.6831836700439453, "logits/rejected": -2.3600687980651855, "logps/chosen": -502.70068359375, "logps/rejected": -452.0151062011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.284597873687744, "rewards/margins": 11.124883651733398, "rewards/rejected": -15.409481048583984, "step": 13554 }, { "epoch": 2.11, "learning_rate": 4.20554800560195e-06, "logits/chosen": -2.2319860458374023, "logits/rejected": -2.80668306350708, "logps/chosen": -164.15313720703125, "logps/rejected": -317.8746643066406, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.325841903686523, "rewards/margins": 8.028205871582031, "rewards/rejected": -13.354047775268555, "step": 13555 }, { "epoch": 2.11, "learning_rate": 4.204814565070802e-06, "logits/chosen": -0.8321822881698608, "logits/rejected": -2.815326452255249, "logps/chosen": -118.59522247314453, "logps/rejected": -665.4625854492188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.081142425537109, "rewards/margins": 11.32618236541748, "rewards/rejected": -16.407325744628906, "step": 13556 }, { "epoch": 2.11, "learning_rate": 4.204081124539655e-06, "logits/chosen": -0.9097719788551331, "logits/rejected": -2.642080545425415, "logps/chosen": -176.0261688232422, "logps/rejected": -694.66064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.835960865020752, "rewards/margins": 12.47392463684082, "rewards/rejected": -18.309885025024414, "step": 13557 }, { "epoch": 2.11, "learning_rate": 4.2033476840085066e-06, "logits/chosen": -2.9938154220581055, "logits/rejected": -3.0031166076660156, "logps/chosen": -344.708740234375, "logps/rejected": -435.4491882324219, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.769436359405518, "rewards/margins": 6.679923057556152, "rewards/rejected": -13.449359893798828, "step": 13558 }, { "epoch": 2.11, "learning_rate": 4.2026142434773584e-06, "logits/chosen": -2.687239646911621, "logits/rejected": -2.9126713275909424, "logps/chosen": -225.96481323242188, "logps/rejected": -451.8332214355469, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -3.586949348449707, "rewards/margins": 11.742927551269531, "rewards/rejected": -15.329877853393555, "step": 13559 }, { "epoch": 2.11, "learning_rate": 4.20188080294621e-06, "logits/chosen": -2.8595635890960693, "logits/rejected": -2.938328742980957, "logps/chosen": -123.29913330078125, "logps/rejected": -239.64744567871094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.741501569747925, "rewards/margins": 9.903999328613281, "rewards/rejected": -13.645501136779785, "step": 13560 }, { "epoch": 2.11, "learning_rate": 4.201147362415063e-06, "logits/chosen": -2.864241361618042, "logits/rejected": -3.2632029056549072, "logps/chosen": -51.31946563720703, "logps/rejected": -200.6617889404297, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5602874755859375, "rewards/margins": 9.804193496704102, "rewards/rejected": -13.364480972290039, "step": 13561 }, { "epoch": 2.11, "learning_rate": 4.200413921883916e-06, "logits/chosen": -1.929704189300537, "logits/rejected": -2.7731049060821533, "logps/chosen": -323.64849853515625, "logps/rejected": -500.12255859375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.092877388000488, "rewards/margins": 9.185078620910645, "rewards/rejected": -16.277956008911133, "step": 13562 }, { "epoch": 2.11, "learning_rate": 4.199680481352768e-06, "logits/chosen": -2.0029001235961914, "logits/rejected": -3.0120551586151123, "logps/chosen": -172.62716674804688, "logps/rejected": -379.070068359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.471466064453125, "rewards/margins": 8.507526397705078, "rewards/rejected": -12.978992462158203, "step": 13563 }, { "epoch": 2.11, "learning_rate": 4.1989470408216195e-06, "logits/chosen": -1.90931236743927, "logits/rejected": -2.9229440689086914, "logps/chosen": -83.69754028320312, "logps/rejected": -277.317626953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.8930768966674805, "rewards/margins": 9.324945449829102, "rewards/rejected": -13.218022346496582, "step": 13564 }, { "epoch": 2.11, "learning_rate": 4.198213600290471e-06, "logits/chosen": -1.54816472530365, "logits/rejected": -2.898123025894165, "logps/chosen": -210.767333984375, "logps/rejected": -495.63714599609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.300832748413086, "rewards/margins": 10.45492935180664, "rewards/rejected": -15.755762100219727, "step": 13565 }, { "epoch": 2.11, "learning_rate": 4.197480159759324e-06, "logits/chosen": -2.843773126602173, "logits/rejected": -1.734005093574524, "logps/chosen": -241.27488708496094, "logps/rejected": -172.9115753173828, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.19463849067688, "rewards/margins": 9.991434097290039, "rewards/rejected": -12.186073303222656, "step": 13566 }, { "epoch": 2.11, "learning_rate": 4.196746719228176e-06, "logits/chosen": -3.063703775405884, "logits/rejected": -3.08258318901062, "logps/chosen": -153.8013153076172, "logps/rejected": -147.85858154296875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -2.509139060974121, "rewards/margins": 6.665914058685303, "rewards/rejected": -9.175052642822266, "step": 13567 }, { "epoch": 2.11, "learning_rate": 4.196013278697028e-06, "logits/chosen": -2.398333787918091, "logits/rejected": -2.86708664894104, "logps/chosen": -135.8287353515625, "logps/rejected": -373.569580078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.114350318908691, "rewards/margins": 9.618671417236328, "rewards/rejected": -14.73302173614502, "step": 13568 }, { "epoch": 2.11, "learning_rate": 4.19527983816588e-06, "logits/chosen": -1.9494261741638184, "logits/rejected": -2.9788360595703125, "logps/chosen": -128.37477111816406, "logps/rejected": -442.398681640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.927654504776001, "rewards/margins": 10.242136001586914, "rewards/rejected": -13.169790267944336, "step": 13569 }, { "epoch": 2.11, "learning_rate": 4.1945463976347325e-06, "logits/chosen": -2.318680763244629, "logits/rejected": -3.119072675704956, "logps/chosen": -71.32272338867188, "logps/rejected": -219.49154663085938, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.27876615524292, "rewards/margins": 6.508139133453369, "rewards/rejected": -11.786905288696289, "step": 13570 }, { "epoch": 2.11, "learning_rate": 4.193812957103584e-06, "logits/chosen": -2.0682432651519775, "logits/rejected": -3.082040548324585, "logps/chosen": -161.85540771484375, "logps/rejected": -296.147705078125, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -8.557901382446289, "rewards/margins": 5.076432228088379, "rewards/rejected": -13.634333610534668, "step": 13571 }, { "epoch": 2.11, "learning_rate": 4.193079516572436e-06, "logits/chosen": -1.40012526512146, "logits/rejected": -2.2583680152893066, "logps/chosen": -100.49640655517578, "logps/rejected": -266.74688720703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.775134086608887, "rewards/margins": 8.622400283813477, "rewards/rejected": -13.397533416748047, "step": 13572 }, { "epoch": 2.11, "learning_rate": 4.192346076041288e-06, "logits/chosen": -1.596740484237671, "logits/rejected": -2.904066562652588, "logps/chosen": -307.71380615234375, "logps/rejected": -575.828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.868840217590332, "rewards/margins": 10.793891906738281, "rewards/rejected": -15.662732124328613, "step": 13573 }, { "epoch": 2.11, "learning_rate": 4.19161263551014e-06, "logits/chosen": -2.6715221405029297, "logits/rejected": -2.9631617069244385, "logps/chosen": -162.09811401367188, "logps/rejected": -431.59100341796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.909319877624512, "rewards/margins": 10.278584480285645, "rewards/rejected": -15.187904357910156, "step": 13574 }, { "epoch": 2.11, "learning_rate": 4.190879194978993e-06, "logits/chosen": -2.729243040084839, "logits/rejected": -2.7610456943511963, "logps/chosen": -53.107669830322266, "logps/rejected": -171.13165283203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.657442331314087, "rewards/margins": 8.448339462280273, "rewards/rejected": -12.105781555175781, "step": 13575 }, { "epoch": 2.11, "learning_rate": 4.190145754447845e-06, "logits/chosen": -2.4481475353240967, "logits/rejected": -1.2052593231201172, "logps/chosen": -221.2113037109375, "logps/rejected": -193.9552001953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -3.0042724609375, "rewards/margins": 8.873735427856445, "rewards/rejected": -11.878007888793945, "step": 13576 }, { "epoch": 2.11, "learning_rate": 4.1894123139166965e-06, "logits/chosen": -2.561403274536133, "logits/rejected": -2.7486484050750732, "logps/chosen": -70.27684020996094, "logps/rejected": -235.4003143310547, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.310417652130127, "rewards/margins": 9.528388977050781, "rewards/rejected": -12.83880615234375, "step": 13577 }, { "epoch": 2.11, "learning_rate": 4.188678873385549e-06, "logits/chosen": -1.879111647605896, "logits/rejected": -2.9116103649139404, "logps/chosen": -161.20919799804688, "logps/rejected": -505.64678955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.527719497680664, "rewards/margins": 14.212564468383789, "rewards/rejected": -20.740283966064453, "step": 13578 }, { "epoch": 2.11, "learning_rate": 4.187945432854402e-06, "logits/chosen": -2.8231325149536133, "logits/rejected": -2.7607340812683105, "logps/chosen": -274.50341796875, "logps/rejected": -414.4183044433594, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -7.934708595275879, "rewards/margins": 8.603148460388184, "rewards/rejected": -16.537857055664062, "step": 13579 }, { "epoch": 2.11, "learning_rate": 4.187211992323254e-06, "logits/chosen": -0.3027937710285187, "logits/rejected": -2.194469451904297, "logps/chosen": -161.00308227539062, "logps/rejected": -941.725341796875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.585367202758789, "rewards/margins": 13.989869117736816, "rewards/rejected": -19.57523536682129, "step": 13580 }, { "epoch": 2.11, "learning_rate": 4.186478551792106e-06, "logits/chosen": -2.175811290740967, "logits/rejected": -2.599738359451294, "logps/chosen": -214.73236083984375, "logps/rejected": -356.4143981933594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.804075717926025, "rewards/margins": 9.643616676330566, "rewards/rejected": -14.44769287109375, "step": 13581 }, { "epoch": 2.11, "learning_rate": 4.1857451112609576e-06, "logits/chosen": -1.42951238155365, "logits/rejected": -2.877859592437744, "logps/chosen": -209.7368621826172, "logps/rejected": -813.0926513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.524024963378906, "rewards/margins": 11.21567153930664, "rewards/rejected": -18.739696502685547, "step": 13582 }, { "epoch": 2.11, "learning_rate": 4.1850116707298095e-06, "logits/chosen": -2.399289608001709, "logits/rejected": -2.586639404296875, "logps/chosen": -136.20375061035156, "logps/rejected": -287.34527587890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.165456771850586, "rewards/margins": 11.639948844909668, "rewards/rejected": -16.805404663085938, "step": 13583 }, { "epoch": 2.11, "learning_rate": 4.184278230198662e-06, "logits/chosen": -1.8071280717849731, "logits/rejected": -2.4715487957000732, "logps/chosen": -287.98419189453125, "logps/rejected": -445.5350341796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.076852798461914, "rewards/margins": 8.804601669311523, "rewards/rejected": -15.881454467773438, "step": 13584 }, { "epoch": 2.11, "learning_rate": 4.183544789667514e-06, "logits/chosen": -1.7551888227462769, "logits/rejected": -2.714836597442627, "logps/chosen": -153.42555236816406, "logps/rejected": -376.8917236328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.808535099029541, "rewards/margins": 8.908395767211914, "rewards/rejected": -12.716930389404297, "step": 13585 }, { "epoch": 2.11, "learning_rate": 4.182811349136366e-06, "logits/chosen": -2.7105519771575928, "logits/rejected": -1.8143353462219238, "logps/chosen": -164.12362670898438, "logps/rejected": -320.0522766113281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.08101749420166, "rewards/margins": 11.442634582519531, "rewards/rejected": -14.523652076721191, "step": 13586 }, { "epoch": 2.11, "learning_rate": 4.182077908605218e-06, "logits/chosen": -2.1448142528533936, "logits/rejected": -2.4930756092071533, "logps/chosen": -172.26296997070312, "logps/rejected": -209.29379272460938, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -2.9934287071228027, "rewards/margins": 5.567809104919434, "rewards/rejected": -8.561238288879395, "step": 13587 }, { "epoch": 2.11, "learning_rate": 4.1813444680740705e-06, "logits/chosen": -2.7556986808776855, "logits/rejected": -2.925048351287842, "logps/chosen": -144.16737365722656, "logps/rejected": -368.8032531738281, "loss": 0.832, "rewards/accuracies": 0.5, "rewards/chosen": -8.856738090515137, "rewards/margins": 5.242983341217041, "rewards/rejected": -14.099721908569336, "step": 13588 }, { "epoch": 2.11, "learning_rate": 4.180611027542922e-06, "logits/chosen": -2.5660181045532227, "logits/rejected": -2.7291038036346436, "logps/chosen": -141.16403198242188, "logps/rejected": -318.24334716796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -2.454437255859375, "rewards/margins": 7.15067195892334, "rewards/rejected": -9.605109214782715, "step": 13589 }, { "epoch": 2.11, "learning_rate": 4.179877587011774e-06, "logits/chosen": -2.5543041229248047, "logits/rejected": -2.886395215988159, "logps/chosen": -75.60689544677734, "logps/rejected": -554.5498657226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.700366020202637, "rewards/margins": 14.259531021118164, "rewards/rejected": -18.959897994995117, "step": 13590 }, { "epoch": 2.11, "learning_rate": 4.179144146480626e-06, "logits/chosen": -3.2502150535583496, "logits/rejected": -2.82692813873291, "logps/chosen": -431.7590637207031, "logps/rejected": -346.1333923339844, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.5991902351379395, "rewards/margins": 7.506697177886963, "rewards/rejected": -12.105887413024902, "step": 13591 }, { "epoch": 2.11, "learning_rate": 4.178410705949478e-06, "logits/chosen": -2.8657288551330566, "logits/rejected": -3.1983389854431152, "logps/chosen": -159.98715209960938, "logps/rejected": -230.62625122070312, "loss": 0.0706, "rewards/accuracies": 1.0, "rewards/chosen": -7.419272422790527, "rewards/margins": 4.730401039123535, "rewards/rejected": -12.149673461914062, "step": 13592 }, { "epoch": 2.11, "learning_rate": 4.177677265418331e-06, "logits/chosen": -2.7040345668792725, "logits/rejected": -1.8512264490127563, "logps/chosen": -382.2913513183594, "logps/rejected": -441.8742980957031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.257294654846191, "rewards/margins": 7.850613594055176, "rewards/rejected": -17.107908248901367, "step": 13593 }, { "epoch": 2.11, "learning_rate": 4.176943824887183e-06, "logits/chosen": -2.500824213027954, "logits/rejected": -2.7679035663604736, "logps/chosen": -325.90460205078125, "logps/rejected": -382.64654541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.589986801147461, "rewards/margins": 12.275300025939941, "rewards/rejected": -16.86528778076172, "step": 13594 }, { "epoch": 2.11, "learning_rate": 4.176210384356035e-06, "logits/chosen": -2.4103317260742188, "logits/rejected": -2.86147403717041, "logps/chosen": -76.57377624511719, "logps/rejected": -381.98162841796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.016592979431152, "rewards/margins": 10.901822090148926, "rewards/rejected": -15.918415069580078, "step": 13595 }, { "epoch": 2.11, "learning_rate": 4.175476943824887e-06, "logits/chosen": -2.4337430000305176, "logits/rejected": -2.637749195098877, "logps/chosen": -167.7761688232422, "logps/rejected": -356.72650146484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.700273036956787, "rewards/margins": 10.269760131835938, "rewards/rejected": -13.970033645629883, "step": 13596 }, { "epoch": 2.11, "learning_rate": 4.17474350329374e-06, "logits/chosen": -1.814091682434082, "logits/rejected": -2.930002450942993, "logps/chosen": -197.51805114746094, "logps/rejected": -326.71728515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.669487476348877, "rewards/margins": 9.698781967163086, "rewards/rejected": -14.368268966674805, "step": 13597 }, { "epoch": 2.11, "learning_rate": 4.174010062762592e-06, "logits/chosen": -2.339111089706421, "logits/rejected": -2.7174370288848877, "logps/chosen": -243.13351440429688, "logps/rejected": -403.04840087890625, "loss": 0.1042, "rewards/accuracies": 1.0, "rewards/chosen": -9.099796295166016, "rewards/margins": 3.439375877380371, "rewards/rejected": -12.539173126220703, "step": 13598 }, { "epoch": 2.11, "learning_rate": 4.173276622231444e-06, "logits/chosen": -2.7181105613708496, "logits/rejected": -2.980476140975952, "logps/chosen": -134.73178100585938, "logps/rejected": -301.12823486328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.227797746658325, "rewards/margins": 7.731581687927246, "rewards/rejected": -9.959379196166992, "step": 13599 }, { "epoch": 2.12, "learning_rate": 4.172543181700296e-06, "logits/chosen": -2.3464813232421875, "logits/rejected": -3.0033576488494873, "logps/chosen": -143.1990966796875, "logps/rejected": -407.3475646972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.794647216796875, "rewards/margins": 11.53536605834961, "rewards/rejected": -15.330013275146484, "step": 13600 }, { "epoch": 2.12, "learning_rate": 4.1718097411691475e-06, "logits/chosen": -2.230973720550537, "logits/rejected": -2.8444292545318604, "logps/chosen": -284.53253173828125, "logps/rejected": -433.9889221191406, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.055702209472656, "rewards/margins": 7.987321853637695, "rewards/rejected": -13.043024063110352, "step": 13601 }, { "epoch": 2.12, "learning_rate": 4.171076300638e-06, "logits/chosen": -2.3562562465667725, "logits/rejected": -2.664882183074951, "logps/chosen": -179.06246948242188, "logps/rejected": -278.2996826171875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -9.429376602172852, "rewards/margins": 6.145920276641846, "rewards/rejected": -15.575296401977539, "step": 13602 }, { "epoch": 2.12, "learning_rate": 4.170342860106852e-06, "logits/chosen": -1.8912771940231323, "logits/rejected": -2.2990126609802246, "logps/chosen": -115.67546081542969, "logps/rejected": -271.2598876953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.499215602874756, "rewards/margins": 9.364092826843262, "rewards/rejected": -13.863308906555176, "step": 13603 }, { "epoch": 2.12, "learning_rate": 4.169609419575704e-06, "logits/chosen": -1.8446369171142578, "logits/rejected": -2.4607720375061035, "logps/chosen": -218.561279296875, "logps/rejected": -510.06622314453125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -8.438677787780762, "rewards/margins": 12.742700576782227, "rewards/rejected": -21.181377410888672, "step": 13604 }, { "epoch": 2.12, "learning_rate": 4.168875979044556e-06, "logits/chosen": -2.282503366470337, "logits/rejected": -2.575971841812134, "logps/chosen": -232.60899353027344, "logps/rejected": -206.87255859375, "loss": 0.0825, "rewards/accuracies": 1.0, "rewards/chosen": -4.088125705718994, "rewards/margins": 4.019896507263184, "rewards/rejected": -8.108022689819336, "step": 13605 }, { "epoch": 2.12, "learning_rate": 4.168142538513409e-06, "logits/chosen": -2.6235315799713135, "logits/rejected": -2.3687338829040527, "logps/chosen": -215.79949951171875, "logps/rejected": -361.7232360839844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.722895622253418, "rewards/margins": 10.836791038513184, "rewards/rejected": -18.5596866607666, "step": 13606 }, { "epoch": 2.12, "learning_rate": 4.1674090979822605e-06, "logits/chosen": -2.867520570755005, "logits/rejected": -1.354482889175415, "logps/chosen": -454.2115173339844, "logps/rejected": -222.98068237304688, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.483376502990723, "rewards/margins": 7.500594139099121, "rewards/rejected": -13.983970642089844, "step": 13607 }, { "epoch": 2.12, "learning_rate": 4.166675657451112e-06, "logits/chosen": -2.1264257431030273, "logits/rejected": -2.822211503982544, "logps/chosen": -219.22113037109375, "logps/rejected": -361.60394287109375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.727939605712891, "rewards/margins": 8.665512084960938, "rewards/rejected": -13.393451690673828, "step": 13608 }, { "epoch": 2.12, "learning_rate": 4.165942216919964e-06, "logits/chosen": -1.3207993507385254, "logits/rejected": -2.504690647125244, "logps/chosen": -175.3983612060547, "logps/rejected": -508.1679992675781, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -3.712336540222168, "rewards/margins": 8.128154754638672, "rewards/rejected": -11.840490341186523, "step": 13609 }, { "epoch": 2.12, "learning_rate": 4.165208776388817e-06, "logits/chosen": -2.607550859451294, "logits/rejected": -2.933581590652466, "logps/chosen": -104.13957977294922, "logps/rejected": -196.80126953125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.5092806816101074, "rewards/margins": 6.425483226776123, "rewards/rejected": -9.93476390838623, "step": 13610 }, { "epoch": 2.12, "learning_rate": 4.164475335857669e-06, "logits/chosen": -2.8386971950531006, "logits/rejected": -1.968061923980713, "logps/chosen": -500.5518493652344, "logps/rejected": -291.2356262207031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.1473114490509033, "rewards/margins": 11.398087501525879, "rewards/rejected": -13.545398712158203, "step": 13611 }, { "epoch": 2.12, "learning_rate": 4.1637418953265215e-06, "logits/chosen": -2.0989673137664795, "logits/rejected": -2.6749770641326904, "logps/chosen": -812.595458984375, "logps/rejected": -796.0543823242188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.584189414978027, "rewards/margins": 8.066122055053711, "rewards/rejected": -17.650310516357422, "step": 13612 }, { "epoch": 2.12, "learning_rate": 4.163008454795373e-06, "logits/chosen": -2.173384666442871, "logits/rejected": -2.797064781188965, "logps/chosen": -55.80155944824219, "logps/rejected": -234.36578369140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.839246988296509, "rewards/margins": 8.052047729492188, "rewards/rejected": -10.891295433044434, "step": 13613 }, { "epoch": 2.12, "learning_rate": 4.162275014264225e-06, "logits/chosen": -2.420442581176758, "logits/rejected": -2.792963981628418, "logps/chosen": -506.3865051269531, "logps/rejected": -672.304931640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.453926086425781, "rewards/margins": 8.91338062286377, "rewards/rejected": -15.36730670928955, "step": 13614 }, { "epoch": 2.12, "learning_rate": 4.161541573733078e-06, "logits/chosen": -2.8785336017608643, "logits/rejected": -3.1605801582336426, "logps/chosen": -416.8171081542969, "logps/rejected": -557.3226928710938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.955196380615234, "rewards/margins": 10.514049530029297, "rewards/rejected": -16.46924591064453, "step": 13615 }, { "epoch": 2.12, "learning_rate": 4.16080813320193e-06, "logits/chosen": -3.003570079803467, "logits/rejected": -2.8348450660705566, "logps/chosen": -146.6183319091797, "logps/rejected": -179.3968505859375, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -8.202245712280273, "rewards/margins": 4.585751056671143, "rewards/rejected": -12.787996292114258, "step": 13616 }, { "epoch": 2.12, "learning_rate": 4.160074692670782e-06, "logits/chosen": -2.9227073192596436, "logits/rejected": -2.12304425239563, "logps/chosen": -581.739501953125, "logps/rejected": -329.1575927734375, "loss": 0.0515, "rewards/accuracies": 1.0, "rewards/chosen": -5.675132751464844, "rewards/margins": 5.1320905685424805, "rewards/rejected": -10.807223320007324, "step": 13617 }, { "epoch": 2.12, "learning_rate": 4.159341252139634e-06, "logits/chosen": -2.1570513248443604, "logits/rejected": -2.861112356185913, "logps/chosen": -409.20263671875, "logps/rejected": -417.6105651855469, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.184669494628906, "rewards/margins": 6.906966686248779, "rewards/rejected": -12.091636657714844, "step": 13618 }, { "epoch": 2.12, "learning_rate": 4.158607811608486e-06, "logits/chosen": -1.0374934673309326, "logits/rejected": -2.323371171951294, "logps/chosen": -68.47783660888672, "logps/rejected": -270.37530517578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0937676429748535, "rewards/margins": 10.219878196716309, "rewards/rejected": -13.31364631652832, "step": 13619 }, { "epoch": 2.12, "learning_rate": 4.157874371077338e-06, "logits/chosen": -1.2632089853286743, "logits/rejected": -2.826505422592163, "logps/chosen": -194.06298828125, "logps/rejected": -482.50872802734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.224099636077881, "rewards/margins": 9.966719627380371, "rewards/rejected": -14.190818786621094, "step": 13620 }, { "epoch": 2.12, "learning_rate": 4.15714093054619e-06, "logits/chosen": -2.1464946269989014, "logits/rejected": -2.7202372550964355, "logps/chosen": -141.4044189453125, "logps/rejected": -351.9840087890625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -3.7892894744873047, "rewards/margins": 9.496973037719727, "rewards/rejected": -13.286262512207031, "step": 13621 }, { "epoch": 2.12, "learning_rate": 4.156407490015042e-06, "logits/chosen": -2.781867504119873, "logits/rejected": -2.9621076583862305, "logps/chosen": -248.168701171875, "logps/rejected": -363.8928527832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.511871337890625, "rewards/margins": 10.514557838439941, "rewards/rejected": -18.02642822265625, "step": 13622 }, { "epoch": 2.12, "learning_rate": 4.155674049483894e-06, "logits/chosen": -1.8272247314453125, "logits/rejected": -2.9066812992095947, "logps/chosen": -101.96104431152344, "logps/rejected": -395.7464599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.071157455444336, "rewards/margins": 12.787137031555176, "rewards/rejected": -16.858295440673828, "step": 13623 }, { "epoch": 2.12, "learning_rate": 4.154940608952747e-06, "logits/chosen": -3.0575971603393555, "logits/rejected": -2.970139741897583, "logps/chosen": -256.5260009765625, "logps/rejected": -301.37469482421875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.852327823638916, "rewards/margins": 7.628006458282471, "rewards/rejected": -11.480334281921387, "step": 13624 }, { "epoch": 2.12, "learning_rate": 4.1542071684215985e-06, "logits/chosen": -2.323620557785034, "logits/rejected": -2.616746664047241, "logps/chosen": -182.1069793701172, "logps/rejected": -313.5383605957031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.093539714813232, "rewards/margins": 8.941919326782227, "rewards/rejected": -13.0354585647583, "step": 13625 }, { "epoch": 2.12, "learning_rate": 4.15347372789045e-06, "logits/chosen": -2.792137384414673, "logits/rejected": -2.691314935684204, "logps/chosen": -377.90301513671875, "logps/rejected": -423.77001953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.403858184814453, "rewards/margins": 10.010828018188477, "rewards/rejected": -16.41468620300293, "step": 13626 }, { "epoch": 2.12, "learning_rate": 4.152740287359302e-06, "logits/chosen": -2.6467208862304688, "logits/rejected": -2.5937135219573975, "logps/chosen": -339.49847412109375, "logps/rejected": -322.983154296875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.553147315979004, "rewards/margins": 6.975218772888184, "rewards/rejected": -11.528366088867188, "step": 13627 }, { "epoch": 2.12, "learning_rate": 4.152006846828155e-06, "logits/chosen": -1.5596566200256348, "logits/rejected": -2.52626633644104, "logps/chosen": -135.33154296875, "logps/rejected": -550.3268432617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.478747367858887, "rewards/margins": 13.343753814697266, "rewards/rejected": -18.82250213623047, "step": 13628 }, { "epoch": 2.12, "learning_rate": 4.151273406297008e-06, "logits/chosen": -2.8010945320129395, "logits/rejected": -2.808749198913574, "logps/chosen": -180.7218780517578, "logps/rejected": -292.6268310546875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.993147373199463, "rewards/margins": 7.741358757019043, "rewards/rejected": -11.734506607055664, "step": 13629 }, { "epoch": 2.12, "learning_rate": 4.15053996576586e-06, "logits/chosen": -2.7474489212036133, "logits/rejected": -2.7330286502838135, "logps/chosen": -275.3608703613281, "logps/rejected": -488.70159912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.408710479736328, "rewards/margins": 14.436478614807129, "rewards/rejected": -20.84518814086914, "step": 13630 }, { "epoch": 2.12, "learning_rate": 4.1498065252347115e-06, "logits/chosen": -2.5172367095947266, "logits/rejected": -2.8952767848968506, "logps/chosen": -425.23895263671875, "logps/rejected": -532.060302734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.105359077453613, "rewards/margins": 9.015525817871094, "rewards/rejected": -17.12088394165039, "step": 13631 }, { "epoch": 2.12, "learning_rate": 4.149073084703563e-06, "logits/chosen": -1.187276005744934, "logits/rejected": -2.6618642807006836, "logps/chosen": -194.09715270996094, "logps/rejected": -445.8139343261719, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -6.48939847946167, "rewards/margins": 7.170475959777832, "rewards/rejected": -13.659873962402344, "step": 13632 }, { "epoch": 2.12, "learning_rate": 4.148339644172416e-06, "logits/chosen": -2.5203938484191895, "logits/rejected": -1.6108185052871704, "logps/chosen": -160.02745056152344, "logps/rejected": -249.662841796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.476102590560913, "rewards/margins": 8.824199676513672, "rewards/rejected": -12.300301551818848, "step": 13633 }, { "epoch": 2.12, "learning_rate": 4.147606203641268e-06, "logits/chosen": -2.4984333515167236, "logits/rejected": -2.281632661819458, "logps/chosen": -234.84573364257812, "logps/rejected": -305.7332458496094, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -10.357500076293945, "rewards/margins": 7.637272357940674, "rewards/rejected": -17.994773864746094, "step": 13634 }, { "epoch": 2.12, "learning_rate": 4.14687276311012e-06, "logits/chosen": -2.606060743331909, "logits/rejected": -2.1597414016723633, "logps/chosen": -205.558349609375, "logps/rejected": -228.861328125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -6.007315158843994, "rewards/margins": 6.0759148597717285, "rewards/rejected": -12.083230018615723, "step": 13635 }, { "epoch": 2.12, "learning_rate": 4.146139322578972e-06, "logits/chosen": -2.9548094272613525, "logits/rejected": -1.9913054704666138, "logps/chosen": -427.08685302734375, "logps/rejected": -323.50323486328125, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -4.497351169586182, "rewards/margins": 5.543735027313232, "rewards/rejected": -10.041086196899414, "step": 13636 }, { "epoch": 2.12, "learning_rate": 4.1454058820478244e-06, "logits/chosen": -1.893280029296875, "logits/rejected": -2.8848390579223633, "logps/chosen": -84.09542083740234, "logps/rejected": -422.089599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.5441662073135376, "rewards/margins": 13.200906753540039, "rewards/rejected": -13.745073318481445, "step": 13637 }, { "epoch": 2.12, "learning_rate": 4.144672441516676e-06, "logits/chosen": -1.947982668876648, "logits/rejected": -2.9045658111572266, "logps/chosen": -192.2812957763672, "logps/rejected": -591.34228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4141969680786133, "rewards/margins": 14.637662887573242, "rewards/rejected": -18.051860809326172, "step": 13638 }, { "epoch": 2.12, "learning_rate": 4.143939000985528e-06, "logits/chosen": -2.7611782550811768, "logits/rejected": -2.5533177852630615, "logps/chosen": -350.42755126953125, "logps/rejected": -387.77935791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.648698568344116, "rewards/margins": 10.937541961669922, "rewards/rejected": -14.5862398147583, "step": 13639 }, { "epoch": 2.12, "learning_rate": 4.14320556045438e-06, "logits/chosen": -2.634364128112793, "logits/rejected": -0.8354650139808655, "logps/chosen": -377.48345947265625, "logps/rejected": -110.99600219726562, "loss": 0.1124, "rewards/accuracies": 1.0, "rewards/chosen": -0.959459662437439, "rewards/margins": 5.7447075843811035, "rewards/rejected": -6.704167366027832, "step": 13640 }, { "epoch": 2.12, "learning_rate": 4.142472119923232e-06, "logits/chosen": -2.4010324478149414, "logits/rejected": -2.891336679458618, "logps/chosen": -106.76497650146484, "logps/rejected": -380.16387939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.130368232727051, "rewards/margins": 11.235462188720703, "rewards/rejected": -15.36583137512207, "step": 13641 }, { "epoch": 2.12, "learning_rate": 4.141738679392085e-06, "logits/chosen": -2.114949941635132, "logits/rejected": -2.903442859649658, "logps/chosen": -545.0433349609375, "logps/rejected": -676.2111206054688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.636590957641602, "rewards/margins": 8.434768676757812, "rewards/rejected": -16.071359634399414, "step": 13642 }, { "epoch": 2.12, "learning_rate": 4.1410052388609365e-06, "logits/chosen": -1.832013487815857, "logits/rejected": -2.650847911834717, "logps/chosen": -124.05803680419922, "logps/rejected": -312.1311340332031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.1325531005859375, "rewards/margins": 8.954580307006836, "rewards/rejected": -13.08713436126709, "step": 13643 }, { "epoch": 2.12, "learning_rate": 4.1402717983297884e-06, "logits/chosen": -2.521594285964966, "logits/rejected": -2.599738121032715, "logps/chosen": -163.9622802734375, "logps/rejected": -261.3514099121094, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.791787147521973, "rewards/margins": 7.884736061096191, "rewards/rejected": -13.676523208618164, "step": 13644 }, { "epoch": 2.12, "learning_rate": 4.139538357798641e-06, "logits/chosen": -2.351757287979126, "logits/rejected": -2.8972485065460205, "logps/chosen": -83.80758666992188, "logps/rejected": -234.81246948242188, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.1162333488464355, "rewards/margins": 8.058263778686523, "rewards/rejected": -12.174497604370117, "step": 13645 }, { "epoch": 2.12, "learning_rate": 4.138804917267494e-06, "logits/chosen": -2.689405679702759, "logits/rejected": -2.8191800117492676, "logps/chosen": -326.3908386230469, "logps/rejected": -330.2706298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.166696548461914, "rewards/margins": 13.732149124145508, "rewards/rejected": -19.898845672607422, "step": 13646 }, { "epoch": 2.12, "learning_rate": 4.138071476736346e-06, "logits/chosen": -1.7674154043197632, "logits/rejected": -2.925236701965332, "logps/chosen": -177.6007080078125, "logps/rejected": -537.248291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.522437334060669, "rewards/margins": 10.872539520263672, "rewards/rejected": -13.394977569580078, "step": 13647 }, { "epoch": 2.12, "learning_rate": 4.137338036205198e-06, "logits/chosen": -2.6276090145111084, "logits/rejected": -2.46152663230896, "logps/chosen": -385.59912109375, "logps/rejected": -437.40802001953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.762864589691162, "rewards/margins": 9.28688907623291, "rewards/rejected": -13.049753189086914, "step": 13648 }, { "epoch": 2.12, "learning_rate": 4.1366045956740495e-06, "logits/chosen": -2.7553904056549072, "logits/rejected": -1.924770712852478, "logps/chosen": -287.3602600097656, "logps/rejected": -348.1402282714844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.6183271408081055, "rewards/margins": 9.134590148925781, "rewards/rejected": -14.752917289733887, "step": 13649 }, { "epoch": 2.12, "learning_rate": 4.135871155142901e-06, "logits/chosen": -2.114320993423462, "logits/rejected": -2.5456647872924805, "logps/chosen": -118.0638427734375, "logps/rejected": -237.1377410888672, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.603428840637207, "rewards/margins": 11.661651611328125, "rewards/rejected": -14.265081405639648, "step": 13650 }, { "epoch": 2.12, "learning_rate": 4.135137714611754e-06, "logits/chosen": -2.7187340259552, "logits/rejected": -1.117196798324585, "logps/chosen": -230.1384735107422, "logps/rejected": -169.4002685546875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.521111488342285, "rewards/margins": 7.3320841789245605, "rewards/rejected": -12.853195190429688, "step": 13651 }, { "epoch": 2.12, "learning_rate": 4.134404274080606e-06, "logits/chosen": -2.7009706497192383, "logits/rejected": -3.024217128753662, "logps/chosen": -121.30025482177734, "logps/rejected": -292.8539123535156, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.915158271789551, "rewards/margins": 6.889398574829102, "rewards/rejected": -12.804557800292969, "step": 13652 }, { "epoch": 2.12, "learning_rate": 4.133670833549458e-06, "logits/chosen": -2.243492841720581, "logits/rejected": -2.693138360977173, "logps/chosen": -338.0848693847656, "logps/rejected": -467.70233154296875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -8.254526138305664, "rewards/margins": 8.54696273803711, "rewards/rejected": -16.801488876342773, "step": 13653 }, { "epoch": 2.12, "learning_rate": 4.13293739301831e-06, "logits/chosen": -2.093132495880127, "logits/rejected": -2.7143285274505615, "logps/chosen": -715.8695068359375, "logps/rejected": -805.877197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.974291801452637, "rewards/margins": 17.378524780273438, "rewards/rejected": -25.35281753540039, "step": 13654 }, { "epoch": 2.12, "learning_rate": 4.1322039524871625e-06, "logits/chosen": -2.2758030891418457, "logits/rejected": -2.7953577041625977, "logps/chosen": -113.95011901855469, "logps/rejected": -292.8326721191406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.192995071411133, "rewards/margins": 8.843976974487305, "rewards/rejected": -16.036972045898438, "step": 13655 }, { "epoch": 2.12, "learning_rate": 4.131470511956014e-06, "logits/chosen": -1.6954526901245117, "logits/rejected": -2.3014602661132812, "logps/chosen": -159.90377807617188, "logps/rejected": -308.56573486328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -2.4863553047180176, "rewards/margins": 7.967560768127441, "rewards/rejected": -10.4539155960083, "step": 13656 }, { "epoch": 2.12, "learning_rate": 4.130737071424866e-06, "logits/chosen": -3.16821026802063, "logits/rejected": -3.2872989177703857, "logps/chosen": -63.30924987792969, "logps/rejected": -191.57623291015625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.673689365386963, "rewards/margins": 8.632954597473145, "rewards/rejected": -13.306644439697266, "step": 13657 }, { "epoch": 2.12, "learning_rate": 4.130003630893718e-06, "logits/chosen": -1.9407176971435547, "logits/rejected": -2.8479974269866943, "logps/chosen": -186.211669921875, "logps/rejected": -404.92413330078125, "loss": 1.1201, "rewards/accuracies": 0.5, "rewards/chosen": -7.260339736938477, "rewards/margins": 6.336097240447998, "rewards/rejected": -13.596437454223633, "step": 13658 }, { "epoch": 2.12, "learning_rate": 4.129270190362571e-06, "logits/chosen": -0.7084012031555176, "logits/rejected": -2.7529332637786865, "logps/chosen": -231.27130126953125, "logps/rejected": -655.3966064453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.7398881912231445, "rewards/margins": 9.753132820129395, "rewards/rejected": -17.49302101135254, "step": 13659 }, { "epoch": 2.12, "learning_rate": 4.128536749831423e-06, "logits/chosen": -2.5175156593322754, "logits/rejected": -2.070671796798706, "logps/chosen": -220.7743682861328, "logps/rejected": -269.0495300292969, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -8.750883102416992, "rewards/margins": 7.2697248458862305, "rewards/rejected": -16.020606994628906, "step": 13660 }, { "epoch": 2.12, "learning_rate": 4.127803309300275e-06, "logits/chosen": -1.795868158340454, "logits/rejected": -2.7628438472747803, "logps/chosen": -154.20599365234375, "logps/rejected": -369.6261291503906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.185176849365234, "rewards/margins": 8.433460235595703, "rewards/rejected": -14.618637084960938, "step": 13661 }, { "epoch": 2.12, "learning_rate": 4.127069868769127e-06, "logits/chosen": -2.899562358856201, "logits/rejected": -2.425642490386963, "logps/chosen": -200.2908935546875, "logps/rejected": -235.68099975585938, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -4.716814994812012, "rewards/margins": 8.560002326965332, "rewards/rejected": -13.276817321777344, "step": 13662 }, { "epoch": 2.12, "learning_rate": 4.126336428237979e-06, "logits/chosen": -2.8699395656585693, "logits/rejected": -2.3138182163238525, "logps/chosen": -330.2472839355469, "logps/rejected": -433.96026611328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.414657115936279, "rewards/margins": 8.766438484191895, "rewards/rejected": -13.181096076965332, "step": 13663 }, { "epoch": 2.13, "learning_rate": 4.125602987706832e-06, "logits/chosen": -2.0194778442382812, "logits/rejected": -3.0129616260528564, "logps/chosen": -187.8645477294922, "logps/rejected": -404.7711181640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.5402421951293945, "rewards/margins": 9.317251205444336, "rewards/rejected": -15.857494354248047, "step": 13664 }, { "epoch": 2.13, "learning_rate": 4.124869547175684e-06, "logits/chosen": -2.6630635261535645, "logits/rejected": -2.247976541519165, "logps/chosen": -262.60321044921875, "logps/rejected": -285.76800537109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.907293319702148, "rewards/margins": 10.87417984008789, "rewards/rejected": -17.78147315979004, "step": 13665 }, { "epoch": 2.13, "learning_rate": 4.124136106644536e-06, "logits/chosen": -2.847682476043701, "logits/rejected": -2.4877781867980957, "logps/chosen": -479.009521484375, "logps/rejected": -400.2464294433594, "loss": 1.0778, "rewards/accuracies": 0.5, "rewards/chosen": -10.456323623657227, "rewards/margins": 1.9062459468841553, "rewards/rejected": -12.362569808959961, "step": 13666 }, { "epoch": 2.13, "learning_rate": 4.1234026661133876e-06, "logits/chosen": -1.691208004951477, "logits/rejected": -2.191663980484009, "logps/chosen": -101.67967987060547, "logps/rejected": -247.50904846191406, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -6.514252185821533, "rewards/margins": 5.550375938415527, "rewards/rejected": -12.064628601074219, "step": 13667 }, { "epoch": 2.13, "learning_rate": 4.12266922558224e-06, "logits/chosen": -3.0009262561798096, "logits/rejected": -2.6913087368011475, "logps/chosen": -453.361083984375, "logps/rejected": -443.19378662109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -1.624307632446289, "rewards/margins": 11.851741790771484, "rewards/rejected": -13.476049423217773, "step": 13668 }, { "epoch": 2.13, "learning_rate": 4.121935785051092e-06, "logits/chosen": -2.8441524505615234, "logits/rejected": -1.9244166612625122, "logps/chosen": -203.22549438476562, "logps/rejected": -153.43807983398438, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": -5.146240234375, "rewards/margins": 5.356081008911133, "rewards/rejected": -10.502320289611816, "step": 13669 }, { "epoch": 2.13, "learning_rate": 4.121202344519944e-06, "logits/chosen": -2.522852659225464, "logits/rejected": -2.681579828262329, "logps/chosen": -564.2761840820312, "logps/rejected": -726.0074462890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.217681884765625, "rewards/margins": 15.78873348236084, "rewards/rejected": -22.00641441345215, "step": 13670 }, { "epoch": 2.13, "learning_rate": 4.120468903988796e-06, "logits/chosen": -2.8056211471557617, "logits/rejected": -2.4460887908935547, "logps/chosen": -190.25665283203125, "logps/rejected": -197.69615173339844, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -8.175397872924805, "rewards/margins": 6.618405342102051, "rewards/rejected": -14.793804168701172, "step": 13671 }, { "epoch": 2.13, "learning_rate": 4.119735463457648e-06, "logits/chosen": -2.674720287322998, "logits/rejected": -2.7153570652008057, "logps/chosen": -242.73489379882812, "logps/rejected": -328.5911560058594, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -5.954365253448486, "rewards/margins": 4.5426225662231445, "rewards/rejected": -10.496988296508789, "step": 13672 }, { "epoch": 2.13, "learning_rate": 4.1190020229265005e-06, "logits/chosen": -2.2266647815704346, "logits/rejected": -2.772209644317627, "logps/chosen": -105.39875793457031, "logps/rejected": -323.13226318359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.575568675994873, "rewards/margins": 8.993790626525879, "rewards/rejected": -13.569358825683594, "step": 13673 }, { "epoch": 2.13, "learning_rate": 4.118268582395352e-06, "logits/chosen": -2.502079963684082, "logits/rejected": -2.24222469329834, "logps/chosen": -233.16744995117188, "logps/rejected": -250.80606079101562, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -5.28917932510376, "rewards/margins": 7.102757930755615, "rewards/rejected": -12.391937255859375, "step": 13674 }, { "epoch": 2.13, "learning_rate": 4.117535141864204e-06, "logits/chosen": -1.3411328792572021, "logits/rejected": -2.6156165599823, "logps/chosen": -93.59422302246094, "logps/rejected": -329.68829345703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.095635414123535, "rewards/margins": 7.646001815795898, "rewards/rejected": -13.741637229919434, "step": 13675 }, { "epoch": 2.13, "learning_rate": 4.116801701333056e-06, "logits/chosen": -2.834296464920044, "logits/rejected": -2.7681171894073486, "logps/chosen": -214.03399658203125, "logps/rejected": -211.660888671875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.077133655548096, "rewards/margins": 7.410329818725586, "rewards/rejected": -12.487462997436523, "step": 13676 }, { "epoch": 2.13, "learning_rate": 4.116068260801909e-06, "logits/chosen": -2.1491832733154297, "logits/rejected": -3.0088748931884766, "logps/chosen": -99.10753631591797, "logps/rejected": -330.0824890136719, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -6.84428071975708, "rewards/margins": 6.111917018890381, "rewards/rejected": -12.956197738647461, "step": 13677 }, { "epoch": 2.13, "learning_rate": 4.115334820270761e-06, "logits/chosen": -2.72148060798645, "logits/rejected": -2.473618507385254, "logps/chosen": -199.88882446289062, "logps/rejected": -274.5994873046875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -2.4820778369903564, "rewards/margins": 6.257199287414551, "rewards/rejected": -8.739276885986328, "step": 13678 }, { "epoch": 2.13, "learning_rate": 4.1146013797396135e-06, "logits/chosen": -1.6143181324005127, "logits/rejected": -2.553489923477173, "logps/chosen": -230.4324493408203, "logps/rejected": -439.88934326171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.210864543914795, "rewards/margins": 9.331673622131348, "rewards/rejected": -15.542537689208984, "step": 13679 }, { "epoch": 2.13, "learning_rate": 4.113867939208465e-06, "logits/chosen": -3.0076606273651123, "logits/rejected": -2.305302143096924, "logps/chosen": -303.81024169921875, "logps/rejected": -272.62408447265625, "loss": 0.0817, "rewards/accuracies": 1.0, "rewards/chosen": -4.732564926147461, "rewards/margins": 4.90354061126709, "rewards/rejected": -9.636104583740234, "step": 13680 }, { "epoch": 2.13, "learning_rate": 4.113134498677317e-06, "logits/chosen": -2.9473021030426025, "logits/rejected": -3.1628940105438232, "logps/chosen": -100.13333129882812, "logps/rejected": -240.9875946044922, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.5466151237487793, "rewards/margins": 7.635079860687256, "rewards/rejected": -11.181694984436035, "step": 13681 }, { "epoch": 2.13, "learning_rate": 4.11240105814617e-06, "logits/chosen": -2.2226788997650146, "logits/rejected": -2.8891773223876953, "logps/chosen": -202.24954223632812, "logps/rejected": -366.31878662109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.6126701831817627, "rewards/margins": 8.776975631713867, "rewards/rejected": -12.389646530151367, "step": 13682 }, { "epoch": 2.13, "learning_rate": 4.111667617615022e-06, "logits/chosen": -2.230555534362793, "logits/rejected": -2.7489912509918213, "logps/chosen": -78.13272857666016, "logps/rejected": -220.8714141845703, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.089669227600098, "rewards/margins": 7.329413890838623, "rewards/rejected": -12.419082641601562, "step": 13683 }, { "epoch": 2.13, "learning_rate": 4.110934177083874e-06, "logits/chosen": -2.0929408073425293, "logits/rejected": -2.6072049140930176, "logps/chosen": -473.13128662109375, "logps/rejected": -592.85595703125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.945089340209961, "rewards/margins": 6.969786643981934, "rewards/rejected": -15.914875984191895, "step": 13684 }, { "epoch": 2.13, "learning_rate": 4.110200736552726e-06, "logits/chosen": -2.3106982707977295, "logits/rejected": -2.488525629043579, "logps/chosen": -188.20924377441406, "logps/rejected": -282.47381591796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.228916168212891, "rewards/margins": 9.568859100341797, "rewards/rejected": -16.797775268554688, "step": 13685 }, { "epoch": 2.13, "learning_rate": 4.109467296021578e-06, "logits/chosen": -2.1865899562835693, "logits/rejected": -2.9917683601379395, "logps/chosen": -437.63531494140625, "logps/rejected": -609.5645141601562, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -6.4669036865234375, "rewards/margins": 5.787332057952881, "rewards/rejected": -12.254236221313477, "step": 13686 }, { "epoch": 2.13, "learning_rate": 4.10873385549043e-06, "logits/chosen": -2.5587940216064453, "logits/rejected": -2.9660513401031494, "logps/chosen": -222.15573120117188, "logps/rejected": -379.283203125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -5.039463996887207, "rewards/margins": 7.482831954956055, "rewards/rejected": -12.522296905517578, "step": 13687 }, { "epoch": 2.13, "learning_rate": 4.108000414959282e-06, "logits/chosen": -2.7349233627319336, "logits/rejected": -3.142885208129883, "logps/chosen": -324.35137939453125, "logps/rejected": -424.0714416503906, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -4.811507225036621, "rewards/margins": 6.310052871704102, "rewards/rejected": -11.121560096740723, "step": 13688 }, { "epoch": 2.13, "learning_rate": 4.107266974428134e-06, "logits/chosen": -2.587470054626465, "logits/rejected": -2.761249303817749, "logps/chosen": -245.37405395507812, "logps/rejected": -283.7286682128906, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.720358371734619, "rewards/margins": 11.290952682495117, "rewards/rejected": -16.011310577392578, "step": 13689 }, { "epoch": 2.13, "learning_rate": 4.106533533896986e-06, "logits/chosen": -2.5539238452911377, "logits/rejected": -2.8197991847991943, "logps/chosen": -85.779052734375, "logps/rejected": -272.04986572265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.5194008350372314, "rewards/margins": 9.920831680297852, "rewards/rejected": -12.44023323059082, "step": 13690 }, { "epoch": 2.13, "learning_rate": 4.1058000933658386e-06, "logits/chosen": -2.8200674057006836, "logits/rejected": -2.838184356689453, "logps/chosen": -211.61514282226562, "logps/rejected": -322.88983154296875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -2.8065476417541504, "rewards/margins": 7.113858699798584, "rewards/rejected": -9.920406341552734, "step": 13691 }, { "epoch": 2.13, "learning_rate": 4.1050666528346904e-06, "logits/chosen": -1.7913318872451782, "logits/rejected": -2.949683427810669, "logps/chosen": -133.80096435546875, "logps/rejected": -523.1612548828125, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -6.97360372543335, "rewards/margins": 5.745217800140381, "rewards/rejected": -12.71882152557373, "step": 13692 }, { "epoch": 2.13, "learning_rate": 4.104333212303542e-06, "logits/chosen": -2.095828056335449, "logits/rejected": -2.7844221591949463, "logps/chosen": -529.994384765625, "logps/rejected": -796.1181640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.257106781005859, "rewards/margins": 10.684804916381836, "rewards/rejected": -14.941911697387695, "step": 13693 }, { "epoch": 2.13, "learning_rate": 4.103599771772394e-06, "logits/chosen": -2.6994547843933105, "logits/rejected": -2.4218804836273193, "logps/chosen": -587.2327270507812, "logps/rejected": -473.0831298828125, "loss": 0.1963, "rewards/accuracies": 1.0, "rewards/chosen": -4.657068729400635, "rewards/margins": 8.53913688659668, "rewards/rejected": -13.196205139160156, "step": 13694 }, { "epoch": 2.13, "learning_rate": 4.102866331241247e-06, "logits/chosen": -2.548621654510498, "logits/rejected": -2.7286922931671143, "logps/chosen": -119.61953735351562, "logps/rejected": -335.5385437011719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.817649841308594, "rewards/margins": 10.043956756591797, "rewards/rejected": -15.86160659790039, "step": 13695 }, { "epoch": 2.13, "learning_rate": 4.1021328907101e-06, "logits/chosen": -2.5411558151245117, "logits/rejected": -3.2315170764923096, "logps/chosen": -60.3658332824707, "logps/rejected": -249.07196044921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.647516250610352, "rewards/margins": 7.767290115356445, "rewards/rejected": -12.414806365966797, "step": 13696 }, { "epoch": 2.13, "learning_rate": 4.1013994501789515e-06, "logits/chosen": -2.0920519828796387, "logits/rejected": -2.7471182346343994, "logps/chosen": -142.9272003173828, "logps/rejected": -184.18267822265625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -4.229030132293701, "rewards/margins": 5.683633327484131, "rewards/rejected": -9.912663459777832, "step": 13697 }, { "epoch": 2.13, "learning_rate": 4.100666009647803e-06, "logits/chosen": -1.8829110860824585, "logits/rejected": -2.8456149101257324, "logps/chosen": -418.5245361328125, "logps/rejected": -613.70654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.961091041564941, "rewards/margins": 13.258293151855469, "rewards/rejected": -20.219385147094727, "step": 13698 }, { "epoch": 2.13, "learning_rate": 4.099932569116656e-06, "logits/chosen": -2.6374824047088623, "logits/rejected": -2.5872020721435547, "logps/chosen": -490.2726135253906, "logps/rejected": -546.7847290039062, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.3197526931762695, "rewards/margins": 7.418087005615234, "rewards/rejected": -13.737838745117188, "step": 13699 }, { "epoch": 2.13, "learning_rate": 4.099199128585508e-06, "logits/chosen": -2.608535051345825, "logits/rejected": -2.06050968170166, "logps/chosen": -215.30355834960938, "logps/rejected": -282.9027099609375, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -5.735726356506348, "rewards/margins": 4.397659778594971, "rewards/rejected": -10.133386611938477, "step": 13700 }, { "epoch": 2.13, "learning_rate": 4.09846568805436e-06, "logits/chosen": -2.7928810119628906, "logits/rejected": -2.5695013999938965, "logps/chosen": -126.10863494873047, "logps/rejected": -165.34744262695312, "loss": 0.3783, "rewards/accuracies": 0.5, "rewards/chosen": -6.074990272521973, "rewards/margins": 5.520351409912109, "rewards/rejected": -11.595341682434082, "step": 13701 }, { "epoch": 2.13, "learning_rate": 4.097732247523212e-06, "logits/chosen": -1.853752851486206, "logits/rejected": -2.8500988483428955, "logps/chosen": -227.23736572265625, "logps/rejected": -437.28021240234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.0977888107299805, "rewards/margins": 7.876336097717285, "rewards/rejected": -13.974124908447266, "step": 13702 }, { "epoch": 2.13, "learning_rate": 4.096998806992064e-06, "logits/chosen": -2.420428514480591, "logits/rejected": -2.8652141094207764, "logps/chosen": -162.72384643554688, "logps/rejected": -327.561279296875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.695059776306152, "rewards/margins": 7.684433937072754, "rewards/rejected": -15.379493713378906, "step": 13703 }, { "epoch": 2.13, "learning_rate": 4.096265366460916e-06, "logits/chosen": -2.5670247077941895, "logits/rejected": -2.793048143386841, "logps/chosen": -114.87443542480469, "logps/rejected": -288.78326416015625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.374122619628906, "rewards/margins": 9.529742240905762, "rewards/rejected": -15.903864860534668, "step": 13704 }, { "epoch": 2.13, "learning_rate": 4.095531925929768e-06, "logits/chosen": -2.642970561981201, "logits/rejected": -2.93503999710083, "logps/chosen": -155.03396606445312, "logps/rejected": -407.857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.2278828620910645, "rewards/margins": 10.691034317016602, "rewards/rejected": -15.918917655944824, "step": 13705 }, { "epoch": 2.13, "learning_rate": 4.09479848539862e-06, "logits/chosen": -2.8024909496307373, "logits/rejected": -1.7545653581619263, "logps/chosen": -407.6239929199219, "logps/rejected": -279.80535888671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.190420627593994, "rewards/margins": 10.01743221282959, "rewards/rejected": -12.207853317260742, "step": 13706 }, { "epoch": 2.13, "learning_rate": 4.094065044867472e-06, "logits/chosen": -1.7375469207763672, "logits/rejected": -2.731228828430176, "logps/chosen": -244.1476593017578, "logps/rejected": -562.4638061523438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.51426100730896, "rewards/margins": 11.492756843566895, "rewards/rejected": -15.007017135620117, "step": 13707 }, { "epoch": 2.13, "learning_rate": 4.093331604336325e-06, "logits/chosen": -2.743048906326294, "logits/rejected": -2.169426202774048, "logps/chosen": -565.0469970703125, "logps/rejected": -453.22857666015625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -6.1473708152771, "rewards/margins": 6.719783782958984, "rewards/rejected": -12.867155075073242, "step": 13708 }, { "epoch": 2.13, "learning_rate": 4.092598163805177e-06, "logits/chosen": -2.7594470977783203, "logits/rejected": -2.6433961391448975, "logps/chosen": -555.6323852539062, "logps/rejected": -560.221435546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.180865287780762, "rewards/margins": 11.106729507446289, "rewards/rejected": -16.287595748901367, "step": 13709 }, { "epoch": 2.13, "learning_rate": 4.0918647232740285e-06, "logits/chosen": -2.6764893531799316, "logits/rejected": -1.687517523765564, "logps/chosen": -542.635498046875, "logps/rejected": -402.34466552734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.564244270324707, "rewards/margins": 7.285243988037109, "rewards/rejected": -12.8494873046875, "step": 13710 }, { "epoch": 2.13, "learning_rate": 4.09113128274288e-06, "logits/chosen": -1.5043309926986694, "logits/rejected": -2.2083349227905273, "logps/chosen": -239.70953369140625, "logps/rejected": -483.40966796875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.7899675369262695, "rewards/margins": 11.330707550048828, "rewards/rejected": -15.120676040649414, "step": 13711 }, { "epoch": 2.13, "learning_rate": 4.090397842211733e-06, "logits/chosen": -2.854641914367676, "logits/rejected": -2.8928816318511963, "logps/chosen": -161.61099243164062, "logps/rejected": -445.30523681640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.441083908081055, "rewards/margins": 11.472015380859375, "rewards/rejected": -15.91309928894043, "step": 13712 }, { "epoch": 2.13, "learning_rate": 4.089664401680586e-06, "logits/chosen": -2.5408248901367188, "logits/rejected": -2.7600016593933105, "logps/chosen": -282.068359375, "logps/rejected": -477.75653076171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.466494560241699, "rewards/margins": 11.675468444824219, "rewards/rejected": -16.141963958740234, "step": 13713 }, { "epoch": 2.13, "learning_rate": 4.088930961149438e-06, "logits/chosen": -1.5780606269836426, "logits/rejected": -2.866619348526001, "logps/chosen": -186.57952880859375, "logps/rejected": -447.13037109375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.92044734954834, "rewards/margins": 8.019646644592285, "rewards/rejected": -12.940093994140625, "step": 13714 }, { "epoch": 2.13, "learning_rate": 4.0881975206182896e-06, "logits/chosen": -2.737051486968994, "logits/rejected": -2.5667054653167725, "logps/chosen": -104.92145538330078, "logps/rejected": -239.9977264404297, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7397096157073975, "rewards/margins": 12.785572052001953, "rewards/rejected": -15.52528190612793, "step": 13715 }, { "epoch": 2.13, "learning_rate": 4.0874640800871415e-06, "logits/chosen": -1.5548415184020996, "logits/rejected": -2.824700355529785, "logps/chosen": -345.3916320800781, "logps/rejected": -535.0867309570312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.133901596069336, "rewards/margins": 9.034120559692383, "rewards/rejected": -17.16802215576172, "step": 13716 }, { "epoch": 2.13, "learning_rate": 4.086730639555994e-06, "logits/chosen": -2.3135159015655518, "logits/rejected": -2.557629108428955, "logps/chosen": -166.26206970214844, "logps/rejected": -210.1938018798828, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": -5.835227012634277, "rewards/margins": 4.648995399475098, "rewards/rejected": -10.484222412109375, "step": 13717 }, { "epoch": 2.13, "learning_rate": 4.085997199024846e-06, "logits/chosen": -2.792937994003296, "logits/rejected": -2.0258560180664062, "logps/chosen": -603.353515625, "logps/rejected": -396.1375732421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.192365646362305, "rewards/margins": 6.778068542480469, "rewards/rejected": -14.970434188842773, "step": 13718 }, { "epoch": 2.13, "learning_rate": 4.085263758493698e-06, "logits/chosen": -1.955031156539917, "logits/rejected": -2.79809308052063, "logps/chosen": -162.0751190185547, "logps/rejected": -524.8887939453125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.455588340759277, "rewards/margins": 8.639321327209473, "rewards/rejected": -14.09490966796875, "step": 13719 }, { "epoch": 2.13, "learning_rate": 4.08453031796255e-06, "logits/chosen": -2.1737899780273438, "logits/rejected": -3.0869338512420654, "logps/chosen": -309.9947509765625, "logps/rejected": -657.2815551757812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.834546089172363, "rewards/margins": 10.345417022705078, "rewards/rejected": -17.179962158203125, "step": 13720 }, { "epoch": 2.13, "learning_rate": 4.083796877431402e-06, "logits/chosen": -1.9649293422698975, "logits/rejected": -2.7559220790863037, "logps/chosen": -136.3239288330078, "logps/rejected": -481.7355651855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.330893516540527, "rewards/margins": 10.9814453125, "rewards/rejected": -19.312339782714844, "step": 13721 }, { "epoch": 2.13, "learning_rate": 4.083063436900254e-06, "logits/chosen": -2.3811728954315186, "logits/rejected": -2.958980083465576, "logps/chosen": -163.1407470703125, "logps/rejected": -416.80828857421875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.5184197425842285, "rewards/margins": 10.026946067810059, "rewards/rejected": -14.545366287231445, "step": 13722 }, { "epoch": 2.13, "learning_rate": 4.082329996369106e-06, "logits/chosen": -2.4527158737182617, "logits/rejected": -2.974433660507202, "logps/chosen": -167.74832153320312, "logps/rejected": -357.13525390625, "loss": 0.0502, "rewards/accuracies": 1.0, "rewards/chosen": -6.417706489562988, "rewards/margins": 5.333400249481201, "rewards/rejected": -11.751106262207031, "step": 13723 }, { "epoch": 2.13, "learning_rate": 4.081596555837958e-06, "logits/chosen": -2.6206305027008057, "logits/rejected": -2.9875524044036865, "logps/chosen": -154.4298095703125, "logps/rejected": -541.7840576171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.809823989868164, "rewards/margins": 9.279881477355957, "rewards/rejected": -17.089706420898438, "step": 13724 }, { "epoch": 2.13, "learning_rate": 4.08086311530681e-06, "logits/chosen": -1.8180131912231445, "logits/rejected": -2.6852288246154785, "logps/chosen": -202.09971618652344, "logps/rejected": -392.1807861328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.01435661315918, "rewards/margins": 7.790922164916992, "rewards/rejected": -13.805278778076172, "step": 13725 }, { "epoch": 2.13, "learning_rate": 4.080129674775663e-06, "logits/chosen": -2.5966978073120117, "logits/rejected": -1.5130888223648071, "logps/chosen": -332.83184814453125, "logps/rejected": -285.4932556152344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.0967445373535156, "rewards/margins": 9.45545482635498, "rewards/rejected": -10.552199363708496, "step": 13726 }, { "epoch": 2.13, "learning_rate": 4.079396234244515e-06, "logits/chosen": -2.2754719257354736, "logits/rejected": -2.1417477130889893, "logps/chosen": -700.3211669921875, "logps/rejected": -448.15155029296875, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -8.272720336914062, "rewards/margins": 5.071920871734619, "rewards/rejected": -13.344640731811523, "step": 13727 }, { "epoch": 2.13, "learning_rate": 4.0786627937133665e-06, "logits/chosen": -1.9143589735031128, "logits/rejected": -2.9081177711486816, "logps/chosen": -280.311279296875, "logps/rejected": -427.58221435546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.411988735198975, "rewards/margins": 10.384037971496582, "rewards/rejected": -14.796026229858398, "step": 13728 }, { "epoch": 2.14, "learning_rate": 4.077929353182219e-06, "logits/chosen": -2.499878168106079, "logits/rejected": -1.8641055822372437, "logps/chosen": -562.9071655273438, "logps/rejected": -513.7421264648438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.886460304260254, "rewards/margins": 13.561725616455078, "rewards/rejected": -17.448184967041016, "step": 13729 }, { "epoch": 2.14, "learning_rate": 4.077195912651071e-06, "logits/chosen": -2.919907569885254, "logits/rejected": -3.120288610458374, "logps/chosen": -90.36299133300781, "logps/rejected": -177.05364990234375, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/chosen": -6.865422248840332, "rewards/margins": 4.313100337982178, "rewards/rejected": -11.178522109985352, "step": 13730 }, { "epoch": 2.14, "learning_rate": 4.076462472119924e-06, "logits/chosen": -2.782750368118286, "logits/rejected": -1.5563410520553589, "logps/chosen": -249.30416870117188, "logps/rejected": -373.81005859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.131312370300293, "rewards/margins": 10.450490951538086, "rewards/rejected": -15.581803321838379, "step": 13731 }, { "epoch": 2.14, "learning_rate": 4.075729031588776e-06, "logits/chosen": -2.9013071060180664, "logits/rejected": -2.494541645050049, "logps/chosen": -608.9998779296875, "logps/rejected": -518.1951904296875, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -6.723031997680664, "rewards/margins": 6.847973346710205, "rewards/rejected": -13.571004867553711, "step": 13732 }, { "epoch": 2.14, "learning_rate": 4.074995591057628e-06, "logits/chosen": -1.6188666820526123, "logits/rejected": -2.9332613945007324, "logps/chosen": -189.516845703125, "logps/rejected": -379.4144287109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.744675636291504, "rewards/margins": 8.466540336608887, "rewards/rejected": -11.21121597290039, "step": 13733 }, { "epoch": 2.14, "learning_rate": 4.0742621505264795e-06, "logits/chosen": -2.8545732498168945, "logits/rejected": -2.7947425842285156, "logps/chosen": -347.943359375, "logps/rejected": -420.3659973144531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.356088161468506, "rewards/margins": 9.618202209472656, "rewards/rejected": -15.97429084777832, "step": 13734 }, { "epoch": 2.14, "learning_rate": 4.073528709995332e-06, "logits/chosen": -2.5612473487854004, "logits/rejected": -3.0484628677368164, "logps/chosen": -170.5655517578125, "logps/rejected": -336.4124755859375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.7297773361206055, "rewards/margins": 7.484235763549805, "rewards/rejected": -15.21401309967041, "step": 13735 }, { "epoch": 2.14, "learning_rate": 4.072795269464184e-06, "logits/chosen": -2.3655130863189697, "logits/rejected": -2.688753604888916, "logps/chosen": -120.27174377441406, "logps/rejected": -249.1231689453125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.043529510498047, "rewards/margins": 6.928201675415039, "rewards/rejected": -11.971731185913086, "step": 13736 }, { "epoch": 2.14, "learning_rate": 4.072061828933036e-06, "logits/chosen": -2.5906732082366943, "logits/rejected": -2.9508519172668457, "logps/chosen": -143.4080810546875, "logps/rejected": -432.7310791015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9672155380249023, "rewards/margins": 10.72488784790039, "rewards/rejected": -13.692102432250977, "step": 13737 }, { "epoch": 2.14, "learning_rate": 4.071328388401888e-06, "logits/chosen": -1.983168363571167, "logits/rejected": -1.6795026063919067, "logps/chosen": -627.9930419921875, "logps/rejected": -587.3114013671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.882131576538086, "rewards/margins": 14.211647033691406, "rewards/rejected": -19.093778610229492, "step": 13738 }, { "epoch": 2.14, "learning_rate": 4.07059494787074e-06, "logits/chosen": -2.5609254837036133, "logits/rejected": -2.1266767978668213, "logps/chosen": -412.60357666015625, "logps/rejected": -464.796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.757436752319336, "rewards/margins": 11.535764694213867, "rewards/rejected": -15.293201446533203, "step": 13739 }, { "epoch": 2.14, "learning_rate": 4.0698615073395925e-06, "logits/chosen": -1.9772981405258179, "logits/rejected": -3.0605247020721436, "logps/chosen": -133.64382934570312, "logps/rejected": -479.09710693359375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.919643878936768, "rewards/margins": 8.87910270690918, "rewards/rejected": -15.798746109008789, "step": 13740 }, { "epoch": 2.14, "learning_rate": 4.069128066808444e-06, "logits/chosen": -2.6416661739349365, "logits/rejected": -1.9408776760101318, "logps/chosen": -387.359130859375, "logps/rejected": -384.15887451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.814011573791504, "rewards/margins": 10.13803482055664, "rewards/rejected": -18.95204734802246, "step": 13741 }, { "epoch": 2.14, "learning_rate": 4.068394626277296e-06, "logits/chosen": -2.505882501602173, "logits/rejected": -1.8623528480529785, "logps/chosen": -182.01974487304688, "logps/rejected": -216.77976989746094, "loss": 0.0321, "rewards/accuracies": 1.0, "rewards/chosen": -4.45009708404541, "rewards/margins": 8.90000057220459, "rewards/rejected": -13.35009765625, "step": 13742 }, { "epoch": 2.14, "learning_rate": 4.067661185746148e-06, "logits/chosen": -1.3313981294631958, "logits/rejected": -2.7432100772857666, "logps/chosen": -193.1180877685547, "logps/rejected": -543.5632934570312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.2442522048950195, "rewards/margins": 10.829663276672363, "rewards/rejected": -16.073915481567383, "step": 13743 }, { "epoch": 2.14, "learning_rate": 4.066927745215001e-06, "logits/chosen": -2.9846644401550293, "logits/rejected": -3.0086302757263184, "logps/chosen": -190.79344177246094, "logps/rejected": -365.240478515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.2512588500976562, "rewards/margins": 10.91618537902832, "rewards/rejected": -13.167444229125977, "step": 13744 }, { "epoch": 2.14, "learning_rate": 4.066194304683853e-06, "logits/chosen": -2.166360855102539, "logits/rejected": -2.6547038555145264, "logps/chosen": -250.600341796875, "logps/rejected": -460.7273254394531, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.132045745849609, "rewards/margins": 10.426787376403809, "rewards/rejected": -16.558834075927734, "step": 13745 }, { "epoch": 2.14, "learning_rate": 4.065460864152705e-06, "logits/chosen": -0.7104806303977966, "logits/rejected": -2.3196020126342773, "logps/chosen": -268.83038330078125, "logps/rejected": -764.9122314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.305222988128662, "rewards/margins": 18.917959213256836, "rewards/rejected": -22.223182678222656, "step": 13746 }, { "epoch": 2.14, "learning_rate": 4.064727423621557e-06, "logits/chosen": -2.631755828857422, "logits/rejected": -2.7777698040008545, "logps/chosen": -107.23668670654297, "logps/rejected": -293.30126953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -7.8327531814575195, "rewards/margins": 6.721249580383301, "rewards/rejected": -14.55400276184082, "step": 13747 }, { "epoch": 2.14, "learning_rate": 4.06399398309041e-06, "logits/chosen": -2.7533533573150635, "logits/rejected": -1.8156358003616333, "logps/chosen": -569.2152099609375, "logps/rejected": -505.93505859375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -5.838818550109863, "rewards/margins": 12.819579124450684, "rewards/rejected": -18.658397674560547, "step": 13748 }, { "epoch": 2.14, "learning_rate": 4.063260542559262e-06, "logits/chosen": -2.779975175857544, "logits/rejected": -2.8710615634918213, "logps/chosen": -124.66205596923828, "logps/rejected": -277.45709228515625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -6.59211540222168, "rewards/margins": 8.666860580444336, "rewards/rejected": -15.258975982666016, "step": 13749 }, { "epoch": 2.14, "learning_rate": 4.062527102028114e-06, "logits/chosen": -2.377840995788574, "logits/rejected": -2.5666165351867676, "logps/chosen": -252.410400390625, "logps/rejected": -457.96368408203125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.728377342224121, "rewards/margins": 7.540085792541504, "rewards/rejected": -15.268463134765625, "step": 13750 }, { "epoch": 2.14, "learning_rate": 4.061793661496966e-06, "logits/chosen": -2.6000423431396484, "logits/rejected": -1.7441540956497192, "logps/chosen": -563.9710693359375, "logps/rejected": -750.3466796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.499320030212402, "rewards/margins": 9.320816040039062, "rewards/rejected": -17.82013702392578, "step": 13751 }, { "epoch": 2.14, "learning_rate": 4.0610602209658175e-06, "logits/chosen": -2.710691213607788, "logits/rejected": -1.5922220945358276, "logps/chosen": -226.65264892578125, "logps/rejected": -228.85833740234375, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -4.8957061767578125, "rewards/margins": 7.430456161499023, "rewards/rejected": -12.326162338256836, "step": 13752 }, { "epoch": 2.14, "learning_rate": 4.06032678043467e-06, "logits/chosen": -2.4410057067871094, "logits/rejected": -2.8817222118377686, "logps/chosen": -392.7728271484375, "logps/rejected": -534.66650390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.719364166259766, "rewards/margins": 12.337982177734375, "rewards/rejected": -17.05734634399414, "step": 13753 }, { "epoch": 2.14, "learning_rate": 4.059593339903522e-06, "logits/chosen": -1.7258275747299194, "logits/rejected": -2.7742581367492676, "logps/chosen": -166.235107421875, "logps/rejected": -456.037353515625, "loss": 0.3205, "rewards/accuracies": 1.0, "rewards/chosen": -9.630837440490723, "rewards/margins": 3.1384027004241943, "rewards/rejected": -12.76923942565918, "step": 13754 }, { "epoch": 2.14, "learning_rate": 4.058859899372374e-06, "logits/chosen": -3.1870572566986084, "logits/rejected": -3.0094406604766846, "logps/chosen": -418.91595458984375, "logps/rejected": -253.48947143554688, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -6.635901927947998, "rewards/margins": 8.358345985412598, "rewards/rejected": -14.994248390197754, "step": 13755 }, { "epoch": 2.14, "learning_rate": 4.058126458841226e-06, "logits/chosen": -3.126237392425537, "logits/rejected": -1.0428009033203125, "logps/chosen": -617.3875732421875, "logps/rejected": -317.9306640625, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -5.9474778175354, "rewards/margins": 7.663708686828613, "rewards/rejected": -13.611186981201172, "step": 13756 }, { "epoch": 2.14, "learning_rate": 4.057393018310079e-06, "logits/chosen": -2.2541213035583496, "logits/rejected": -2.4174373149871826, "logps/chosen": -179.7427520751953, "logps/rejected": -399.3213195800781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.706541061401367, "rewards/margins": 8.77103042602539, "rewards/rejected": -19.477571487426758, "step": 13757 }, { "epoch": 2.14, "learning_rate": 4.0566595777789305e-06, "logits/chosen": -2.8411173820495605, "logits/rejected": -2.7323102951049805, "logps/chosen": -293.4375, "logps/rejected": -297.69207763671875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.534597873687744, "rewards/margins": 7.663568496704102, "rewards/rejected": -11.198166847229004, "step": 13758 }, { "epoch": 2.14, "learning_rate": 4.055926137247782e-06, "logits/chosen": -2.1176838874816895, "logits/rejected": -2.6498830318450928, "logps/chosen": -93.90071105957031, "logps/rejected": -230.30767822265625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.549544334411621, "rewards/margins": 6.263580322265625, "rewards/rejected": -11.813124656677246, "step": 13759 }, { "epoch": 2.14, "learning_rate": 4.055192696716634e-06, "logits/chosen": -2.9344635009765625, "logits/rejected": -3.2974889278411865, "logps/chosen": -135.47023010253906, "logps/rejected": -356.677978515625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.25752067565918, "rewards/margins": 8.024858474731445, "rewards/rejected": -12.282379150390625, "step": 13760 }, { "epoch": 2.14, "learning_rate": 4.054459256185486e-06, "logits/chosen": -2.60019850730896, "logits/rejected": -2.8423962593078613, "logps/chosen": -172.10443115234375, "logps/rejected": -298.2863464355469, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.7856645584106445, "rewards/margins": 8.203460693359375, "rewards/rejected": -14.989124298095703, "step": 13761 }, { "epoch": 2.14, "learning_rate": 4.053725815654339e-06, "logits/chosen": -1.6560931205749512, "logits/rejected": -2.657252073287964, "logps/chosen": -140.79360961914062, "logps/rejected": -391.59564208984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.996941089630127, "rewards/margins": 12.948345184326172, "rewards/rejected": -16.94528579711914, "step": 13762 }, { "epoch": 2.14, "learning_rate": 4.052992375123192e-06, "logits/chosen": -2.9601175785064697, "logits/rejected": -3.001819133758545, "logps/chosen": -116.9952392578125, "logps/rejected": -236.73611450195312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.757171154022217, "rewards/margins": 8.843618392944336, "rewards/rejected": -13.600790023803711, "step": 13763 }, { "epoch": 2.14, "learning_rate": 4.0522589345920435e-06, "logits/chosen": -2.534960985183716, "logits/rejected": -2.7830381393432617, "logps/chosen": -172.37612915039062, "logps/rejected": -251.8964385986328, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.924855709075928, "rewards/margins": 8.011524200439453, "rewards/rejected": -12.936379432678223, "step": 13764 }, { "epoch": 2.14, "learning_rate": 4.051525494060895e-06, "logits/chosen": -1.1916347742080688, "logits/rejected": -2.5841009616851807, "logps/chosen": -150.15484619140625, "logps/rejected": -444.28790283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.757545471191406, "rewards/margins": 10.245488166809082, "rewards/rejected": -15.003033638000488, "step": 13765 }, { "epoch": 2.14, "learning_rate": 4.050792053529748e-06, "logits/chosen": -2.300006866455078, "logits/rejected": -2.820599317550659, "logps/chosen": -198.72561645507812, "logps/rejected": -451.9366455078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.615756988525391, "rewards/margins": 10.283496856689453, "rewards/rejected": -15.899253845214844, "step": 13766 }, { "epoch": 2.14, "learning_rate": 4.0500586129986e-06, "logits/chosen": -1.3975250720977783, "logits/rejected": -2.6345200538635254, "logps/chosen": -579.44189453125, "logps/rejected": -811.2117919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.691555976867676, "rewards/margins": 14.52468204498291, "rewards/rejected": -21.216238021850586, "step": 13767 }, { "epoch": 2.14, "learning_rate": 4.049325172467452e-06, "logits/chosen": -2.240025520324707, "logits/rejected": -2.1904332637786865, "logps/chosen": -162.51483154296875, "logps/rejected": -209.52853393554688, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -8.44872760772705, "rewards/margins": 5.702483177185059, "rewards/rejected": -14.15121078491211, "step": 13768 }, { "epoch": 2.14, "learning_rate": 4.048591731936304e-06, "logits/chosen": -0.5526634454727173, "logits/rejected": -2.693185567855835, "logps/chosen": -99.14092254638672, "logps/rejected": -574.5208740234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.81038236618042, "rewards/margins": 11.538885116577148, "rewards/rejected": -18.349267959594727, "step": 13769 }, { "epoch": 2.14, "learning_rate": 4.047858291405156e-06, "logits/chosen": -2.3832976818084717, "logits/rejected": -2.418628692626953, "logps/chosen": -631.454833984375, "logps/rejected": -670.0533447265625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -7.218172550201416, "rewards/margins": 14.624626159667969, "rewards/rejected": -21.84280014038086, "step": 13770 }, { "epoch": 2.14, "learning_rate": 4.047124850874008e-06, "logits/chosen": -2.3981223106384277, "logits/rejected": -2.8689002990722656, "logps/chosen": -170.88290405273438, "logps/rejected": -384.0285949707031, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/chosen": -8.542194366455078, "rewards/margins": 5.654444694519043, "rewards/rejected": -14.196638107299805, "step": 13771 }, { "epoch": 2.14, "learning_rate": 4.04639141034286e-06, "logits/chosen": -2.2111570835113525, "logits/rejected": -2.7356150150299072, "logps/chosen": -173.0273895263672, "logps/rejected": -345.73822021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4210267066955566, "rewards/margins": 11.590620040893555, "rewards/rejected": -14.011646270751953, "step": 13772 }, { "epoch": 2.14, "learning_rate": 4.045657969811712e-06, "logits/chosen": -0.15728794038295746, "logits/rejected": -2.7435808181762695, "logps/chosen": -128.55755615234375, "logps/rejected": -359.3206787109375, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -7.3182830810546875, "rewards/margins": 10.426815032958984, "rewards/rejected": -17.745098114013672, "step": 13773 }, { "epoch": 2.14, "learning_rate": 4.044924529280564e-06, "logits/chosen": -2.1974120140075684, "logits/rejected": -2.7241156101226807, "logps/chosen": -283.0848388671875, "logps/rejected": -502.44281005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.392975807189941, "rewards/margins": 11.44255542755127, "rewards/rejected": -17.83553123474121, "step": 13774 }, { "epoch": 2.14, "learning_rate": 4.044191088749417e-06, "logits/chosen": -2.658889055252075, "logits/rejected": -2.061049461364746, "logps/chosen": -157.34072875976562, "logps/rejected": -252.497802734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.227655410766602, "rewards/margins": 10.108948707580566, "rewards/rejected": -15.336604118347168, "step": 13775 }, { "epoch": 2.14, "learning_rate": 4.0434576482182685e-06, "logits/chosen": -2.758082866668701, "logits/rejected": -2.300506353378296, "logps/chosen": -187.21466064453125, "logps/rejected": -215.50680541992188, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -5.276371479034424, "rewards/margins": 6.480669975280762, "rewards/rejected": -11.757041931152344, "step": 13776 }, { "epoch": 2.14, "learning_rate": 4.0427242076871204e-06, "logits/chosen": -1.5161550045013428, "logits/rejected": -2.827234983444214, "logps/chosen": -242.45530700683594, "logps/rejected": -493.0555114746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.037803649902344, "rewards/margins": 10.479381561279297, "rewards/rejected": -18.51718521118164, "step": 13777 }, { "epoch": 2.14, "learning_rate": 4.041990767155972e-06, "logits/chosen": -1.7561700344085693, "logits/rejected": -2.3183958530426025, "logps/chosen": -133.3843994140625, "logps/rejected": -374.0107727050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.377381324768066, "rewards/margins": 16.02393341064453, "rewards/rejected": -22.401315689086914, "step": 13778 }, { "epoch": 2.14, "learning_rate": 4.041257326624825e-06, "logits/chosen": -3.0987050533294678, "logits/rejected": -2.8200466632843018, "logps/chosen": -724.3070678710938, "logps/rejected": -652.3699951171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.25401496887207, "rewards/margins": 9.828323364257812, "rewards/rejected": -15.082338333129883, "step": 13779 }, { "epoch": 2.14, "learning_rate": 4.040523886093678e-06, "logits/chosen": -1.9044842720031738, "logits/rejected": -2.816387891769409, "logps/chosen": -162.1346435546875, "logps/rejected": -587.74462890625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.462987899780273, "rewards/margins": 10.51972770690918, "rewards/rejected": -18.982715606689453, "step": 13780 }, { "epoch": 2.14, "learning_rate": 4.03979044556253e-06, "logits/chosen": -2.739208698272705, "logits/rejected": -2.885842800140381, "logps/chosen": -212.41757202148438, "logps/rejected": -454.13470458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.442559242248535, "rewards/margins": 12.238515853881836, "rewards/rejected": -19.681076049804688, "step": 13781 }, { "epoch": 2.14, "learning_rate": 4.0390570050313815e-06, "logits/chosen": -2.79445219039917, "logits/rejected": -2.853085517883301, "logps/chosen": -96.25274658203125, "logps/rejected": -176.76495361328125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.313372850418091, "rewards/margins": 6.516304016113281, "rewards/rejected": -9.82967758178711, "step": 13782 }, { "epoch": 2.14, "learning_rate": 4.038323564500233e-06, "logits/chosen": -2.376601219177246, "logits/rejected": -2.727524518966675, "logps/chosen": -186.2405242919922, "logps/rejected": -649.3836059570312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.698474884033203, "rewards/margins": 10.425613403320312, "rewards/rejected": -19.124088287353516, "step": 13783 }, { "epoch": 2.14, "learning_rate": 4.037590123969086e-06, "logits/chosen": -2.9596107006073, "logits/rejected": -2.453604221343994, "logps/chosen": -538.4330444335938, "logps/rejected": -458.32171630859375, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -7.624786376953125, "rewards/margins": 8.371583938598633, "rewards/rejected": -15.996370315551758, "step": 13784 }, { "epoch": 2.14, "learning_rate": 4.036856683437938e-06, "logits/chosen": -2.226966619491577, "logits/rejected": -2.8634774684906006, "logps/chosen": -82.55565643310547, "logps/rejected": -465.5758361816406, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -6.218318939208984, "rewards/margins": 12.379312515258789, "rewards/rejected": -18.597631454467773, "step": 13785 }, { "epoch": 2.14, "learning_rate": 4.03612324290679e-06, "logits/chosen": -2.5331056118011475, "logits/rejected": -1.2914382219314575, "logps/chosen": -530.9613037109375, "logps/rejected": -524.284423828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.756470680236816, "rewards/margins": 8.388843536376953, "rewards/rejected": -15.14531421661377, "step": 13786 }, { "epoch": 2.14, "learning_rate": 4.035389802375642e-06, "logits/chosen": -2.426938056945801, "logits/rejected": -2.9801876544952393, "logps/chosen": -94.75790405273438, "logps/rejected": -389.36297607421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.000125885009766, "rewards/margins": 11.616769790649414, "rewards/rejected": -17.616897583007812, "step": 13787 }, { "epoch": 2.14, "learning_rate": 4.034656361844494e-06, "logits/chosen": -2.840691328048706, "logits/rejected": -2.4141080379486084, "logps/chosen": -222.70486450195312, "logps/rejected": -235.775146484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.664051055908203, "rewards/margins": 8.651047706604004, "rewards/rejected": -12.31509780883789, "step": 13788 }, { "epoch": 2.14, "learning_rate": 4.033922921313346e-06, "logits/chosen": -2.605329990386963, "logits/rejected": -2.9305028915405273, "logps/chosen": -363.0617980957031, "logps/rejected": -448.64788818359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.332557678222656, "rewards/margins": 10.78872013092041, "rewards/rejected": -16.121278762817383, "step": 13789 }, { "epoch": 2.14, "learning_rate": 4.033189480782198e-06, "logits/chosen": -2.7322635650634766, "logits/rejected": -2.771141290664673, "logps/chosen": -388.26580810546875, "logps/rejected": -536.5324096679688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.937771797180176, "rewards/margins": 12.607295036315918, "rewards/rejected": -18.545066833496094, "step": 13790 }, { "epoch": 2.14, "learning_rate": 4.03245604025105e-06, "logits/chosen": -2.3173108100891113, "logits/rejected": -1.0692250728607178, "logps/chosen": -536.0477905273438, "logps/rejected": -387.1189880371094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -9.199446678161621, "rewards/margins": 7.352865219116211, "rewards/rejected": -16.552310943603516, "step": 13791 }, { "epoch": 2.14, "learning_rate": 4.031722599719902e-06, "logits/chosen": -1.383528709411621, "logits/rejected": -2.8054463863372803, "logps/chosen": -135.20904541015625, "logps/rejected": -566.1742553710938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.136477470397949, "rewards/margins": 12.310416221618652, "rewards/rejected": -16.4468936920166, "step": 13792 }, { "epoch": 2.15, "learning_rate": 4.030989159188755e-06, "logits/chosen": -2.5697884559631348, "logits/rejected": -2.5487513542175293, "logps/chosen": -469.7576599121094, "logps/rejected": -501.66741943359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.130618572235107, "rewards/margins": 9.425727844238281, "rewards/rejected": -16.556346893310547, "step": 13793 }, { "epoch": 2.15, "learning_rate": 4.030255718657607e-06, "logits/chosen": -1.8703854084014893, "logits/rejected": -2.9099528789520264, "logps/chosen": -187.253173828125, "logps/rejected": -478.593994140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.764317512512207, "rewards/margins": 12.422906875610352, "rewards/rejected": -20.187225341796875, "step": 13794 }, { "epoch": 2.15, "learning_rate": 4.0295222781264585e-06, "logits/chosen": -2.4176840782165527, "logits/rejected": -2.658703565597534, "logps/chosen": -267.3076477050781, "logps/rejected": -258.47259521484375, "loss": 1.0038, "rewards/accuracies": 0.5, "rewards/chosen": -11.245664596557617, "rewards/margins": 4.505331516265869, "rewards/rejected": -15.750995635986328, "step": 13795 }, { "epoch": 2.15, "learning_rate": 4.028788837595311e-06, "logits/chosen": -2.4178407192230225, "logits/rejected": -2.428051710128784, "logps/chosen": -97.91655731201172, "logps/rejected": -278.0256042480469, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -5.2472381591796875, "rewards/margins": 5.08643913269043, "rewards/rejected": -10.333677291870117, "step": 13796 }, { "epoch": 2.15, "learning_rate": 4.028055397064164e-06, "logits/chosen": -2.491440773010254, "logits/rejected": -2.8243212699890137, "logps/chosen": -160.50518798828125, "logps/rejected": -346.0138854980469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.714163303375244, "rewards/margins": 10.499407768249512, "rewards/rejected": -15.213571548461914, "step": 13797 }, { "epoch": 2.15, "learning_rate": 4.027321956533016e-06, "logits/chosen": -2.3637197017669678, "logits/rejected": -1.9916903972625732, "logps/chosen": -163.68673706054688, "logps/rejected": -210.86074829101562, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.209733963012695, "rewards/margins": 6.520806312561035, "rewards/rejected": -11.73054027557373, "step": 13798 }, { "epoch": 2.15, "learning_rate": 4.026588516001868e-06, "logits/chosen": -2.7069122791290283, "logits/rejected": -2.4112136363983154, "logps/chosen": -531.2076416015625, "logps/rejected": -624.5704956054688, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -8.048165321350098, "rewards/margins": 6.692424774169922, "rewards/rejected": -14.74059009552002, "step": 13799 }, { "epoch": 2.15, "learning_rate": 4.0258550754707196e-06, "logits/chosen": -2.6136035919189453, "logits/rejected": -2.7554702758789062, "logps/chosen": -871.91259765625, "logps/rejected": -643.1863403320312, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -7.460545539855957, "rewards/margins": 6.668002128601074, "rewards/rejected": -14.128547668457031, "step": 13800 }, { "epoch": 2.15, "learning_rate": 4.0251216349395714e-06, "logits/chosen": -2.0918612480163574, "logits/rejected": -3.0319621562957764, "logps/chosen": -246.82662963867188, "logps/rejected": -438.2117004394531, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -8.093921661376953, "rewards/margins": 7.208972930908203, "rewards/rejected": -15.302894592285156, "step": 13801 }, { "epoch": 2.15, "learning_rate": 4.024388194408424e-06, "logits/chosen": -1.215832233428955, "logits/rejected": -2.6656785011291504, "logps/chosen": -163.77520751953125, "logps/rejected": -318.4613342285156, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.089971542358398, "rewards/margins": 7.57280158996582, "rewards/rejected": -14.662773132324219, "step": 13802 }, { "epoch": 2.15, "learning_rate": 4.023654753877276e-06, "logits/chosen": -2.5458877086639404, "logits/rejected": -1.830907940864563, "logps/chosen": -252.0303955078125, "logps/rejected": -308.6524353027344, "loss": 1.5946, "rewards/accuracies": 0.5, "rewards/chosen": -8.218297958374023, "rewards/margins": 5.823417663574219, "rewards/rejected": -14.041715621948242, "step": 13803 }, { "epoch": 2.15, "learning_rate": 4.022921313346128e-06, "logits/chosen": -2.8941268920898438, "logits/rejected": -2.0724263191223145, "logps/chosen": -285.8766174316406, "logps/rejected": -206.57644653320312, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -6.157841682434082, "rewards/margins": 4.345523357391357, "rewards/rejected": -10.503364562988281, "step": 13804 }, { "epoch": 2.15, "learning_rate": 4.02218787281498e-06, "logits/chosen": -2.6696760654449463, "logits/rejected": -0.9975183010101318, "logps/chosen": -900.443115234375, "logps/rejected": -571.5736083984375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -9.479792594909668, "rewards/margins": 10.398435592651367, "rewards/rejected": -19.87822723388672, "step": 13805 }, { "epoch": 2.15, "learning_rate": 4.0214544322838325e-06, "logits/chosen": -2.2389767169952393, "logits/rejected": -2.721224546432495, "logps/chosen": -208.7153778076172, "logps/rejected": -409.9448547363281, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -10.66029167175293, "rewards/margins": 5.085499286651611, "rewards/rejected": -15.7457914352417, "step": 13806 }, { "epoch": 2.15, "learning_rate": 4.020720991752684e-06, "logits/chosen": -2.0539968013763428, "logits/rejected": -2.6901233196258545, "logps/chosen": -99.92318725585938, "logps/rejected": -376.4246520996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.111418724060059, "rewards/margins": 14.413156509399414, "rewards/rejected": -18.524574279785156, "step": 13807 }, { "epoch": 2.15, "learning_rate": 4.019987551221536e-06, "logits/chosen": -2.858388662338257, "logits/rejected": -2.1276628971099854, "logps/chosen": -560.2939453125, "logps/rejected": -466.03533935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.499765396118164, "rewards/margins": 11.343997955322266, "rewards/rejected": -17.843761444091797, "step": 13808 }, { "epoch": 2.15, "learning_rate": 4.019254110690388e-06, "logits/chosen": -1.4180375337600708, "logits/rejected": -2.230182409286499, "logps/chosen": -187.08480834960938, "logps/rejected": -448.12060546875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -6.96706485748291, "rewards/margins": 13.87433910369873, "rewards/rejected": -20.84140396118164, "step": 13809 }, { "epoch": 2.15, "learning_rate": 4.01852067015924e-06, "logits/chosen": -2.619211435317993, "logits/rejected": -2.8431248664855957, "logps/chosen": -402.46954345703125, "logps/rejected": -615.6378784179688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.276006698608398, "rewards/margins": 11.334819793701172, "rewards/rejected": -18.61082649230957, "step": 13810 }, { "epoch": 2.15, "learning_rate": 4.017787229628093e-06, "logits/chosen": -2.429924964904785, "logits/rejected": -2.9526782035827637, "logps/chosen": -340.4298400878906, "logps/rejected": -429.75823974609375, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -9.632465362548828, "rewards/margins": 7.105255603790283, "rewards/rejected": -16.737720489501953, "step": 13811 }, { "epoch": 2.15, "learning_rate": 4.017053789096945e-06, "logits/chosen": -2.55023455619812, "logits/rejected": -2.9301340579986572, "logps/chosen": -145.81138610839844, "logps/rejected": -432.79095458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.293527126312256, "rewards/margins": 13.51270580291748, "rewards/rejected": -19.806232452392578, "step": 13812 }, { "epoch": 2.15, "learning_rate": 4.0163203485657965e-06, "logits/chosen": -2.6633849143981934, "logits/rejected": -2.607132911682129, "logps/chosen": -136.90379333496094, "logps/rejected": -297.873291015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.948470115661621, "rewards/margins": 8.152360916137695, "rewards/rejected": -14.100831031799316, "step": 13813 }, { "epoch": 2.15, "learning_rate": 4.015586908034649e-06, "logits/chosen": -2.262577772140503, "logits/rejected": -2.8769285678863525, "logps/chosen": -96.21804809570312, "logps/rejected": -388.0960998535156, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -7.9710283279418945, "rewards/margins": 9.482717514038086, "rewards/rejected": -17.453746795654297, "step": 13814 }, { "epoch": 2.15, "learning_rate": 4.014853467503502e-06, "logits/chosen": -2.7457170486450195, "logits/rejected": -2.4851837158203125, "logps/chosen": -202.29354858398438, "logps/rejected": -224.85928344726562, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": -5.795154571533203, "rewards/margins": 3.139043092727661, "rewards/rejected": -8.934198379516602, "step": 13815 }, { "epoch": 2.15, "learning_rate": 4.014120026972354e-06, "logits/chosen": -2.80094313621521, "logits/rejected": -1.768042802810669, "logps/chosen": -298.0548095703125, "logps/rejected": -433.16583251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.229569435119629, "rewards/margins": 14.108548164367676, "rewards/rejected": -20.338117599487305, "step": 13816 }, { "epoch": 2.15, "learning_rate": 4.013386586441206e-06, "logits/chosen": -2.295870542526245, "logits/rejected": -2.9116857051849365, "logps/chosen": -115.10115051269531, "logps/rejected": -275.9493713378906, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -8.561326026916504, "rewards/margins": 6.517727851867676, "rewards/rejected": -15.07905387878418, "step": 13817 }, { "epoch": 2.15, "learning_rate": 4.012653145910058e-06, "logits/chosen": -2.3666887283325195, "logits/rejected": -2.862570285797119, "logps/chosen": -124.59004974365234, "logps/rejected": -417.17852783203125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -7.681624412536621, "rewards/margins": 8.0918607711792, "rewards/rejected": -15.77348518371582, "step": 13818 }, { "epoch": 2.15, "learning_rate": 4.0119197053789095e-06, "logits/chosen": -2.383392810821533, "logits/rejected": -2.6001434326171875, "logps/chosen": -296.30023193359375, "logps/rejected": -566.2874145507812, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -7.73541259765625, "rewards/margins": 9.565933227539062, "rewards/rejected": -17.301345825195312, "step": 13819 }, { "epoch": 2.15, "learning_rate": 4.011186264847762e-06, "logits/chosen": -2.6448962688446045, "logits/rejected": -1.3652782440185547, "logps/chosen": -532.7919921875, "logps/rejected": -317.16656494140625, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -5.865187168121338, "rewards/margins": 6.236666679382324, "rewards/rejected": -12.10185432434082, "step": 13820 }, { "epoch": 2.15, "learning_rate": 4.010452824316614e-06, "logits/chosen": -2.495906352996826, "logits/rejected": -2.896857261657715, "logps/chosen": -285.427978515625, "logps/rejected": -330.53277587890625, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -7.148251533508301, "rewards/margins": 6.126163482666016, "rewards/rejected": -13.2744140625, "step": 13821 }, { "epoch": 2.15, "learning_rate": 4.009719383785466e-06, "logits/chosen": -2.650887966156006, "logits/rejected": -1.9883514642715454, "logps/chosen": -601.253662109375, "logps/rejected": -413.80126953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.16234016418457, "rewards/margins": 9.087076187133789, "rewards/rejected": -14.24941635131836, "step": 13822 }, { "epoch": 2.15, "learning_rate": 4.008985943254318e-06, "logits/chosen": -2.460587739944458, "logits/rejected": -2.7065887451171875, "logps/chosen": -264.1133728027344, "logps/rejected": -407.4773864746094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.59235954284668, "rewards/margins": 8.115253448486328, "rewards/rejected": -13.707612991333008, "step": 13823 }, { "epoch": 2.15, "learning_rate": 4.0082525027231706e-06, "logits/chosen": -2.944117546081543, "logits/rejected": -2.4506328105926514, "logps/chosen": -314.7548522949219, "logps/rejected": -257.6871032714844, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.649773597717285, "rewards/margins": 8.335095405578613, "rewards/rejected": -11.984869003295898, "step": 13824 }, { "epoch": 2.15, "learning_rate": 4.0075190621920224e-06, "logits/chosen": -2.3726539611816406, "logits/rejected": -2.883481502532959, "logps/chosen": -128.8292694091797, "logps/rejected": -302.5375671386719, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -7.5984721183776855, "rewards/margins": 6.167605400085449, "rewards/rejected": -13.766077041625977, "step": 13825 }, { "epoch": 2.15, "learning_rate": 4.006785621660874e-06, "logits/chosen": -1.560347557067871, "logits/rejected": -2.590003728866577, "logps/chosen": -148.02398681640625, "logps/rejected": -449.136474609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.82834529876709, "rewards/margins": 10.370737075805664, "rewards/rejected": -18.199081420898438, "step": 13826 }, { "epoch": 2.15, "learning_rate": 4.006052181129726e-06, "logits/chosen": -3.0237057209014893, "logits/rejected": -2.3734958171844482, "logps/chosen": -211.4165802001953, "logps/rejected": -128.3549041748047, "loss": 0.0829, "rewards/accuracies": 1.0, "rewards/chosen": -4.769927024841309, "rewards/margins": 4.562819480895996, "rewards/rejected": -9.332746505737305, "step": 13827 }, { "epoch": 2.15, "learning_rate": 4.005318740598578e-06, "logits/chosen": -2.9323923587799072, "logits/rejected": -2.7928504943847656, "logps/chosen": -395.4092102050781, "logps/rejected": -484.8138732910156, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.188747406005859, "rewards/margins": 8.936405181884766, "rewards/rejected": -13.125152587890625, "step": 13828 }, { "epoch": 2.15, "learning_rate": 4.004585300067431e-06, "logits/chosen": -2.842430830001831, "logits/rejected": -2.8704161643981934, "logps/chosen": -187.93453979492188, "logps/rejected": -193.07363891601562, "loss": 0.8944, "rewards/accuracies": 0.5, "rewards/chosen": -9.429691314697266, "rewards/margins": 3.108002185821533, "rewards/rejected": -12.53769302368164, "step": 13829 }, { "epoch": 2.15, "learning_rate": 4.003851859536283e-06, "logits/chosen": -2.609149932861328, "logits/rejected": -1.8024553060531616, "logps/chosen": -375.407470703125, "logps/rejected": -390.45172119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.230046272277832, "rewards/margins": 13.274723052978516, "rewards/rejected": -15.504768371582031, "step": 13830 }, { "epoch": 2.15, "learning_rate": 4.003118419005135e-06, "logits/chosen": -1.8279242515563965, "logits/rejected": -2.877873182296753, "logps/chosen": -141.513671875, "logps/rejected": -413.321533203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.56469202041626, "rewards/margins": 8.99606990814209, "rewards/rejected": -13.560762405395508, "step": 13831 }, { "epoch": 2.15, "learning_rate": 4.002384978473987e-06, "logits/chosen": -2.99240779876709, "logits/rejected": -2.098910331726074, "logps/chosen": -316.2773132324219, "logps/rejected": -172.85650634765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.809925079345703, "rewards/margins": 8.96769905090332, "rewards/rejected": -13.777624130249023, "step": 13832 }, { "epoch": 2.15, "learning_rate": 4.00165153794284e-06, "logits/chosen": -2.2425663471221924, "logits/rejected": -2.807156562805176, "logps/chosen": -126.76290130615234, "logps/rejected": -394.1404113769531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.742269515991211, "rewards/margins": 11.172767639160156, "rewards/rejected": -16.915035247802734, "step": 13833 }, { "epoch": 2.15, "learning_rate": 4.000918097411692e-06, "logits/chosen": -2.7616302967071533, "logits/rejected": -3.0597994327545166, "logps/chosen": -621.3570556640625, "logps/rejected": -617.1643676757812, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.488892078399658, "rewards/margins": 11.356523513793945, "rewards/rejected": -17.845417022705078, "step": 13834 }, { "epoch": 2.15, "learning_rate": 4.000184656880544e-06, "logits/chosen": -2.4102323055267334, "logits/rejected": -2.512796401977539, "logps/chosen": -62.89130401611328, "logps/rejected": -245.18910217285156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.590579032897949, "rewards/margins": 9.547030448913574, "rewards/rejected": -14.137609481811523, "step": 13835 }, { "epoch": 2.15, "learning_rate": 3.999451216349396e-06, "logits/chosen": -2.6368765830993652, "logits/rejected": -3.0517523288726807, "logps/chosen": -104.04622650146484, "logps/rejected": -223.75616455078125, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -6.976451873779297, "rewards/margins": 7.627166748046875, "rewards/rejected": -14.603618621826172, "step": 13836 }, { "epoch": 2.15, "learning_rate": 3.9987177758182475e-06, "logits/chosen": -1.6844826936721802, "logits/rejected": -2.5409674644470215, "logps/chosen": -156.51661682128906, "logps/rejected": -397.471435546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.244635581970215, "rewards/margins": 10.774744033813477, "rewards/rejected": -17.019380569458008, "step": 13837 }, { "epoch": 2.15, "learning_rate": 3.9979843352871e-06, "logits/chosen": -2.0647151470184326, "logits/rejected": -2.4705896377563477, "logps/chosen": -225.23533630371094, "logps/rejected": -371.3041687011719, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.279191017150879, "rewards/margins": 10.314903259277344, "rewards/rejected": -15.594093322753906, "step": 13838 }, { "epoch": 2.15, "learning_rate": 3.997250894755952e-06, "logits/chosen": -2.327817916870117, "logits/rejected": -2.9877493381500244, "logps/chosen": -395.122802734375, "logps/rejected": -556.6549072265625, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -9.381012916564941, "rewards/margins": 5.048019886016846, "rewards/rejected": -14.429033279418945, "step": 13839 }, { "epoch": 2.15, "learning_rate": 3.996517454224804e-06, "logits/chosen": -2.696444034576416, "logits/rejected": -2.0731422901153564, "logps/chosen": -289.877197265625, "logps/rejected": -242.82473754882812, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -7.141756534576416, "rewards/margins": 6.805619716644287, "rewards/rejected": -13.947376251220703, "step": 13840 }, { "epoch": 2.15, "learning_rate": 3.995784013693656e-06, "logits/chosen": -2.7415354251861572, "logits/rejected": -2.0192980766296387, "logps/chosen": -623.7545776367188, "logps/rejected": -504.9955749511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.206097602844238, "rewards/margins": 11.614313125610352, "rewards/rejected": -19.820409774780273, "step": 13841 }, { "epoch": 2.15, "learning_rate": 3.995050573162509e-06, "logits/chosen": -2.9103682041168213, "logits/rejected": -1.9537508487701416, "logps/chosen": -387.17205810546875, "logps/rejected": -298.0026550292969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.890535831451416, "rewards/margins": 9.203256607055664, "rewards/rejected": -15.093791961669922, "step": 13842 }, { "epoch": 2.15, "learning_rate": 3.9943171326313605e-06, "logits/chosen": -2.485381841659546, "logits/rejected": -2.5723674297332764, "logps/chosen": -140.72500610351562, "logps/rejected": -405.5775146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.628835678100586, "rewards/margins": 12.253131866455078, "rewards/rejected": -19.881967544555664, "step": 13843 }, { "epoch": 2.15, "learning_rate": 3.993583692100212e-06, "logits/chosen": -2.582061767578125, "logits/rejected": -2.8016817569732666, "logps/chosen": -261.1086730957031, "logps/rejected": -464.3691101074219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.828601360321045, "rewards/margins": 9.973968505859375, "rewards/rejected": -16.802570343017578, "step": 13844 }, { "epoch": 2.15, "learning_rate": 3.992850251569064e-06, "logits/chosen": -2.4986021518707275, "logits/rejected": -2.5602357387542725, "logps/chosen": -240.83169555664062, "logps/rejected": -408.1706848144531, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -8.197436332702637, "rewards/margins": 9.590002059936523, "rewards/rejected": -17.787437438964844, "step": 13845 }, { "epoch": 2.15, "learning_rate": 3.992116811037917e-06, "logits/chosen": -1.7738547325134277, "logits/rejected": -2.8695719242095947, "logps/chosen": -372.51971435546875, "logps/rejected": -719.6574096679688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.332027435302734, "rewards/margins": 13.31765365600586, "rewards/rejected": -22.649681091308594, "step": 13846 }, { "epoch": 2.15, "learning_rate": 3.991383370506769e-06, "logits/chosen": -3.094531774520874, "logits/rejected": -2.335604667663574, "logps/chosen": -861.7052001953125, "logps/rejected": -426.06378173828125, "loss": 0.7732, "rewards/accuracies": 0.5, "rewards/chosen": -7.176059722900391, "rewards/margins": 5.9402289390563965, "rewards/rejected": -13.116288185119629, "step": 13847 }, { "epoch": 2.15, "learning_rate": 3.9906499299756216e-06, "logits/chosen": -2.4963953495025635, "logits/rejected": -2.8745527267456055, "logps/chosen": -269.15985107421875, "logps/rejected": -449.1974792480469, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -5.12161922454834, "rewards/margins": 9.079313278198242, "rewards/rejected": -14.200933456420898, "step": 13848 }, { "epoch": 2.15, "learning_rate": 3.9899164894444735e-06, "logits/chosen": -1.3621315956115723, "logits/rejected": -2.532766342163086, "logps/chosen": -157.85903930664062, "logps/rejected": -421.0860290527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1740784645080566, "rewards/margins": 12.68193531036377, "rewards/rejected": -15.856013298034668, "step": 13849 }, { "epoch": 2.15, "learning_rate": 3.989183048913325e-06, "logits/chosen": -2.452894926071167, "logits/rejected": -2.542515516281128, "logps/chosen": -368.9599914550781, "logps/rejected": -569.442138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.049419403076172, "rewards/margins": 16.62999153137207, "rewards/rejected": -21.679412841796875, "step": 13850 }, { "epoch": 2.15, "learning_rate": 3.988449608382178e-06, "logits/chosen": -2.6896302700042725, "logits/rejected": -2.454981565475464, "logps/chosen": -434.92657470703125, "logps/rejected": -385.2164001464844, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.471798896789551, "rewards/margins": 6.724471092224121, "rewards/rejected": -11.196269989013672, "step": 13851 }, { "epoch": 2.15, "learning_rate": 3.98771616785103e-06, "logits/chosen": -2.618528127670288, "logits/rejected": -2.867017984390259, "logps/chosen": -167.90679931640625, "logps/rejected": -290.8005065917969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7795729637146, "rewards/margins": 10.290853500366211, "rewards/rejected": -16.07042694091797, "step": 13852 }, { "epoch": 2.15, "learning_rate": 3.986982727319882e-06, "logits/chosen": -2.4519829750061035, "logits/rejected": -1.5671701431274414, "logps/chosen": -194.96400451660156, "logps/rejected": -320.789794921875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.251631736755371, "rewards/margins": 10.724723815917969, "rewards/rejected": -15.976356506347656, "step": 13853 }, { "epoch": 2.15, "learning_rate": 3.986249286788734e-06, "logits/chosen": -2.949653148651123, "logits/rejected": -2.965388298034668, "logps/chosen": -454.7547607421875, "logps/rejected": -498.14410400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.277027130126953, "rewards/margins": 10.150630950927734, "rewards/rejected": -15.427657127380371, "step": 13854 }, { "epoch": 2.15, "learning_rate": 3.985515846257586e-06, "logits/chosen": -3.0965144634246826, "logits/rejected": -2.369091749191284, "logps/chosen": -372.224365234375, "logps/rejected": -251.31396484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.887981414794922, "rewards/margins": 7.259372711181641, "rewards/rejected": -10.147354125976562, "step": 13855 }, { "epoch": 2.15, "learning_rate": 3.984782405726438e-06, "logits/chosen": -2.251086473464966, "logits/rejected": -2.665398597717285, "logps/chosen": -275.2167663574219, "logps/rejected": -402.0352478027344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.209591388702393, "rewards/margins": 8.779654502868652, "rewards/rejected": -12.989246368408203, "step": 13856 }, { "epoch": 2.16, "learning_rate": 3.98404896519529e-06, "logits/chosen": -3.0439488887786865, "logits/rejected": -2.8008687496185303, "logps/chosen": -443.25555419921875, "logps/rejected": -413.04022216796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.631111145019531, "rewards/margins": 10.60653018951416, "rewards/rejected": -15.237640380859375, "step": 13857 }, { "epoch": 2.16, "learning_rate": 3.983315524664142e-06, "logits/chosen": -1.2247782945632935, "logits/rejected": -2.700420618057251, "logps/chosen": -118.45706176757812, "logps/rejected": -473.16888427734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.1891865730285645, "rewards/margins": 10.577791213989258, "rewards/rejected": -14.766977310180664, "step": 13858 }, { "epoch": 2.16, "learning_rate": 3.982582084132994e-06, "logits/chosen": -2.6938838958740234, "logits/rejected": -1.4456651210784912, "logps/chosen": -349.7181701660156, "logps/rejected": -265.61956787109375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -8.392831802368164, "rewards/margins": 8.27117919921875, "rewards/rejected": -16.664011001586914, "step": 13859 }, { "epoch": 2.16, "learning_rate": 3.981848643601847e-06, "logits/chosen": -2.2665059566497803, "logits/rejected": -2.9634008407592773, "logps/chosen": -260.9574279785156, "logps/rejected": -553.9649658203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.438673973083496, "rewards/margins": 8.584907531738281, "rewards/rejected": -16.023582458496094, "step": 13860 }, { "epoch": 2.16, "learning_rate": 3.9811152030706985e-06, "logits/chosen": -2.9031081199645996, "logits/rejected": -2.9401769638061523, "logps/chosen": -221.29974365234375, "logps/rejected": -232.3099822998047, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -8.036487579345703, "rewards/margins": 6.329007148742676, "rewards/rejected": -14.365494728088379, "step": 13861 }, { "epoch": 2.16, "learning_rate": 3.98038176253955e-06, "logits/chosen": -1.4842437505722046, "logits/rejected": -2.4607458114624023, "logps/chosen": -175.8710479736328, "logps/rejected": -519.09521484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.111355781555176, "rewards/margins": 11.693490028381348, "rewards/rejected": -18.804845809936523, "step": 13862 }, { "epoch": 2.16, "learning_rate": 3.979648322008402e-06, "logits/chosen": -2.7357141971588135, "logits/rejected": -1.6528351306915283, "logps/chosen": -602.2099609375, "logps/rejected": -353.5411376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.536600112915039, "rewards/margins": 10.193894386291504, "rewards/rejected": -18.73049545288086, "step": 13863 }, { "epoch": 2.16, "learning_rate": 3.978914881477255e-06, "logits/chosen": -2.6434969902038574, "logits/rejected": -1.7911827564239502, "logps/chosen": -404.54071044921875, "logps/rejected": -310.6185302734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.671684265136719, "rewards/margins": 8.732510566711426, "rewards/rejected": -14.404194831848145, "step": 13864 }, { "epoch": 2.16, "learning_rate": 3.978181440946108e-06, "logits/chosen": -2.49241304397583, "logits/rejected": -2.8897018432617188, "logps/chosen": -264.53680419921875, "logps/rejected": -472.4082946777344, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -6.203059673309326, "rewards/margins": 9.366600036621094, "rewards/rejected": -15.569660186767578, "step": 13865 }, { "epoch": 2.16, "learning_rate": 3.97744800041496e-06, "logits/chosen": -3.0275583267211914, "logits/rejected": -2.7374558448791504, "logps/chosen": -225.7225799560547, "logps/rejected": -280.4368591308594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.832403182983398, "rewards/margins": 10.225824356079102, "rewards/rejected": -20.0582275390625, "step": 13866 }, { "epoch": 2.16, "learning_rate": 3.9767145598838115e-06, "logits/chosen": -2.7545957565307617, "logits/rejected": -2.9139339923858643, "logps/chosen": -649.3326416015625, "logps/rejected": -436.0196228027344, "loss": 0.1884, "rewards/accuracies": 1.0, "rewards/chosen": -6.691720485687256, "rewards/margins": 5.879374980926514, "rewards/rejected": -12.57109546661377, "step": 13867 }, { "epoch": 2.16, "learning_rate": 3.975981119352663e-06, "logits/chosen": -1.8126075267791748, "logits/rejected": -2.3165643215179443, "logps/chosen": -267.24395751953125, "logps/rejected": -448.6252136230469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.896501541137695, "rewards/margins": 10.539183616638184, "rewards/rejected": -18.435684204101562, "step": 13868 }, { "epoch": 2.16, "learning_rate": 3.975247678821516e-06, "logits/chosen": -2.442405939102173, "logits/rejected": -2.514427661895752, "logps/chosen": -711.5296020507812, "logps/rejected": -535.5577392578125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -6.15119743347168, "rewards/margins": 9.197508811950684, "rewards/rejected": -15.348706245422363, "step": 13869 }, { "epoch": 2.16, "learning_rate": 3.974514238290368e-06, "logits/chosen": -1.4236721992492676, "logits/rejected": -2.748549222946167, "logps/chosen": -365.18890380859375, "logps/rejected": -528.4286499023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.656332969665527, "rewards/margins": 12.061710357666016, "rewards/rejected": -18.71804428100586, "step": 13870 }, { "epoch": 2.16, "learning_rate": 3.97378079775922e-06, "logits/chosen": -2.6738338470458984, "logits/rejected": -3.0108139514923096, "logps/chosen": -530.8488159179688, "logps/rejected": -517.443603515625, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -8.3317289352417, "rewards/margins": 6.134445667266846, "rewards/rejected": -14.466175079345703, "step": 13871 }, { "epoch": 2.16, "learning_rate": 3.973047357228072e-06, "logits/chosen": -2.338351011276245, "logits/rejected": -2.9585375785827637, "logps/chosen": -180.25599670410156, "logps/rejected": -381.6407470703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.054685592651367, "rewards/margins": 7.678784370422363, "rewards/rejected": -12.733470916748047, "step": 13872 }, { "epoch": 2.16, "learning_rate": 3.9723139166969245e-06, "logits/chosen": -1.0862892866134644, "logits/rejected": -2.5817534923553467, "logps/chosen": -184.8106231689453, "logps/rejected": -431.4366760253906, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -6.4070539474487305, "rewards/margins": 7.720660209655762, "rewards/rejected": -14.127714157104492, "step": 13873 }, { "epoch": 2.16, "learning_rate": 3.971580476165776e-06, "logits/chosen": -1.573509931564331, "logits/rejected": -2.7123563289642334, "logps/chosen": -154.8477783203125, "logps/rejected": -567.27783203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.672602653503418, "rewards/margins": 7.994994640350342, "rewards/rejected": -15.667596817016602, "step": 13874 }, { "epoch": 2.16, "learning_rate": 3.970847035634628e-06, "logits/chosen": -2.756875991821289, "logits/rejected": -2.9009346961975098, "logps/chosen": -204.1781005859375, "logps/rejected": -337.885986328125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.435608863830566, "rewards/margins": 7.551792144775391, "rewards/rejected": -12.987401008605957, "step": 13875 }, { "epoch": 2.16, "learning_rate": 3.97011359510348e-06, "logits/chosen": -1.9001573324203491, "logits/rejected": -2.8014638423919678, "logps/chosen": -186.84255981445312, "logps/rejected": -529.8117065429688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.557511329650879, "rewards/margins": 11.211854934692383, "rewards/rejected": -18.769367218017578, "step": 13876 }, { "epoch": 2.16, "learning_rate": 3.969380154572332e-06, "logits/chosen": -2.799563407897949, "logits/rejected": -1.9100462198257446, "logps/chosen": -334.65216064453125, "logps/rejected": -455.7379150390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.227364540100098, "rewards/margins": 10.96792221069336, "rewards/rejected": -17.195287704467773, "step": 13877 }, { "epoch": 2.16, "learning_rate": 3.968646714041185e-06, "logits/chosen": -2.182464599609375, "logits/rejected": -2.760690927505493, "logps/chosen": -222.2671661376953, "logps/rejected": -299.10662841796875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -7.380043029785156, "rewards/margins": 6.28475284576416, "rewards/rejected": -13.664795875549316, "step": 13878 }, { "epoch": 2.16, "learning_rate": 3.9679132735100366e-06, "logits/chosen": -2.3051834106445312, "logits/rejected": -2.881559371948242, "logps/chosen": -1019.458984375, "logps/rejected": -995.815185546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.7178316116333, "rewards/margins": 9.4541597366333, "rewards/rejected": -18.1719913482666, "step": 13879 }, { "epoch": 2.16, "learning_rate": 3.9671798329788885e-06, "logits/chosen": -2.591942548751831, "logits/rejected": -2.6172008514404297, "logps/chosen": -253.05796813964844, "logps/rejected": -273.4450988769531, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -8.431427955627441, "rewards/margins": 6.452218055725098, "rewards/rejected": -14.883646011352539, "step": 13880 }, { "epoch": 2.16, "learning_rate": 3.966446392447741e-06, "logits/chosen": -2.660494327545166, "logits/rejected": -2.6934406757354736, "logps/chosen": -260.5204772949219, "logps/rejected": -473.45001220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.830026626586914, "rewards/margins": 15.573601722717285, "rewards/rejected": -24.403629302978516, "step": 13881 }, { "epoch": 2.16, "learning_rate": 3.965712951916594e-06, "logits/chosen": -1.581885814666748, "logits/rejected": -2.642643928527832, "logps/chosen": -176.6285400390625, "logps/rejected": -400.01300048828125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -8.846330642700195, "rewards/margins": 7.77146053314209, "rewards/rejected": -16.61779022216797, "step": 13882 }, { "epoch": 2.16, "learning_rate": 3.964979511385446e-06, "logits/chosen": -2.62117862701416, "logits/rejected": -2.5774929523468018, "logps/chosen": -271.85260009765625, "logps/rejected": -311.4832763671875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.19912576675415, "rewards/margins": 6.949583053588867, "rewards/rejected": -11.14870834350586, "step": 13883 }, { "epoch": 2.16, "learning_rate": 3.964246070854298e-06, "logits/chosen": -2.0037472248077393, "logits/rejected": -2.6359190940856934, "logps/chosen": -254.7601318359375, "logps/rejected": -414.0537109375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -4.6979169845581055, "rewards/margins": 6.346760272979736, "rewards/rejected": -11.044677734375, "step": 13884 }, { "epoch": 2.16, "learning_rate": 3.9635126303231495e-06, "logits/chosen": -2.8411409854888916, "logits/rejected": -3.0146894454956055, "logps/chosen": -770.0177612304688, "logps/rejected": -763.707763671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -10.560409545898438, "rewards/margins": 6.952757835388184, "rewards/rejected": -17.513168334960938, "step": 13885 }, { "epoch": 2.16, "learning_rate": 3.962779189792002e-06, "logits/chosen": -1.3206589221954346, "logits/rejected": -2.5933187007904053, "logps/chosen": -178.46603393554688, "logps/rejected": -438.87225341796875, "loss": 0.1228, "rewards/accuracies": 1.0, "rewards/chosen": -8.532148361206055, "rewards/margins": 7.72760009765625, "rewards/rejected": -16.259748458862305, "step": 13886 }, { "epoch": 2.16, "learning_rate": 3.962045749260854e-06, "logits/chosen": -2.818092107772827, "logits/rejected": -2.2297890186309814, "logps/chosen": -813.2696533203125, "logps/rejected": -589.208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.792937278747559, "rewards/margins": 10.598630905151367, "rewards/rejected": -16.391569137573242, "step": 13887 }, { "epoch": 2.16, "learning_rate": 3.961312308729706e-06, "logits/chosen": -2.0886785984039307, "logits/rejected": -3.1799540519714355, "logps/chosen": -132.19473266601562, "logps/rejected": -515.701171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.728342533111572, "rewards/margins": 13.706719398498535, "rewards/rejected": -18.435062408447266, "step": 13888 }, { "epoch": 2.16, "learning_rate": 3.960578868198558e-06, "logits/chosen": -2.8538098335266113, "logits/rejected": -2.8771040439605713, "logps/chosen": -299.6394958496094, "logps/rejected": -548.89794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.555356502532959, "rewards/margins": 12.260019302368164, "rewards/rejected": -16.81537628173828, "step": 13889 }, { "epoch": 2.16, "learning_rate": 3.95984542766741e-06, "logits/chosen": -2.3586556911468506, "logits/rejected": -1.8889309167861938, "logps/chosen": -322.47320556640625, "logps/rejected": -349.31658935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.138816833496094, "rewards/margins": 10.044628143310547, "rewards/rejected": -17.18344497680664, "step": 13890 }, { "epoch": 2.16, "learning_rate": 3.9591119871362625e-06, "logits/chosen": -2.9135289192199707, "logits/rejected": -2.7022030353546143, "logps/chosen": -168.53811645507812, "logps/rejected": -243.47073364257812, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.434457778930664, "rewards/margins": 7.334836483001709, "rewards/rejected": -13.769294738769531, "step": 13891 }, { "epoch": 2.16, "learning_rate": 3.958378546605114e-06, "logits/chosen": -2.6372358798980713, "logits/rejected": -3.211793899536133, "logps/chosen": -207.53684997558594, "logps/rejected": -548.7518920898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.755996227264404, "rewards/margins": 14.586288452148438, "rewards/rejected": -19.34228515625, "step": 13892 }, { "epoch": 2.16, "learning_rate": 3.957645106073966e-06, "logits/chosen": -3.1881158351898193, "logits/rejected": -2.946192502975464, "logps/chosen": -117.88838958740234, "logps/rejected": -225.19961547851562, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.762845754623413, "rewards/margins": 10.948674201965332, "rewards/rejected": -14.711519241333008, "step": 13893 }, { "epoch": 2.16, "learning_rate": 3.956911665542818e-06, "logits/chosen": -2.5099353790283203, "logits/rejected": -3.149585247039795, "logps/chosen": -87.15447998046875, "logps/rejected": -311.9560546875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.1617889404296875, "rewards/margins": 8.119277000427246, "rewards/rejected": -15.281065940856934, "step": 13894 }, { "epoch": 2.16, "learning_rate": 3.956178225011671e-06, "logits/chosen": -2.835655450820923, "logits/rejected": -2.095693349838257, "logps/chosen": -311.3836975097656, "logps/rejected": -292.4474182128906, "loss": 0.098, "rewards/accuracies": 1.0, "rewards/chosen": -6.48702335357666, "rewards/margins": 5.819309234619141, "rewards/rejected": -12.3063325881958, "step": 13895 }, { "epoch": 2.16, "learning_rate": 3.955444784480523e-06, "logits/chosen": -2.5447776317596436, "logits/rejected": -2.6793460845947266, "logps/chosen": -155.71621704101562, "logps/rejected": -303.1287536621094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.940435409545898, "rewards/margins": 8.206816673278809, "rewards/rejected": -16.14725112915039, "step": 13896 }, { "epoch": 2.16, "learning_rate": 3.954711343949375e-06, "logits/chosen": -3.1353907585144043, "logits/rejected": -2.211118459701538, "logps/chosen": -207.31369018554688, "logps/rejected": -198.36190795898438, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.244908332824707, "rewards/margins": 8.397461891174316, "rewards/rejected": -11.642370223999023, "step": 13897 }, { "epoch": 2.16, "learning_rate": 3.953977903418227e-06, "logits/chosen": -2.799076557159424, "logits/rejected": -2.910984754562378, "logps/chosen": -62.86017608642578, "logps/rejected": -222.05523681640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.5937347412109375, "rewards/margins": 11.070754051208496, "rewards/rejected": -15.664487838745117, "step": 13898 }, { "epoch": 2.16, "learning_rate": 3.953244462887079e-06, "logits/chosen": -2.6094470024108887, "logits/rejected": -2.267801284790039, "logps/chosen": -480.3792419433594, "logps/rejected": -415.24237060546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.145585060119629, "rewards/margins": 9.040117263793945, "rewards/rejected": -15.18570327758789, "step": 13899 }, { "epoch": 2.16, "learning_rate": 3.952511022355932e-06, "logits/chosen": -2.468630075454712, "logits/rejected": -2.9150171279907227, "logps/chosen": -216.8912353515625, "logps/rejected": -332.2292785644531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.912827491760254, "rewards/margins": 8.63483715057373, "rewards/rejected": -13.547664642333984, "step": 13900 }, { "epoch": 2.16, "learning_rate": 3.951777581824784e-06, "logits/chosen": -1.4709223508834839, "logits/rejected": -2.758280038833618, "logps/chosen": -147.88937377929688, "logps/rejected": -386.716064453125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.33928108215332, "rewards/margins": 10.34939193725586, "rewards/rejected": -15.68867301940918, "step": 13901 }, { "epoch": 2.16, "learning_rate": 3.951044141293636e-06, "logits/chosen": -1.8327621221542358, "logits/rejected": -2.7770352363586426, "logps/chosen": -164.2095489501953, "logps/rejected": -332.3067321777344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.814371585845947, "rewards/margins": 8.002851486206055, "rewards/rejected": -15.817222595214844, "step": 13902 }, { "epoch": 2.16, "learning_rate": 3.950310700762488e-06, "logits/chosen": -2.882420301437378, "logits/rejected": -1.783262848854065, "logps/chosen": -1101.73193359375, "logps/rejected": -479.3472900390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.548652648925781, "rewards/margins": 9.618413925170898, "rewards/rejected": -21.16706657409668, "step": 13903 }, { "epoch": 2.16, "learning_rate": 3.94957726023134e-06, "logits/chosen": -3.0208988189697266, "logits/rejected": -2.967834949493408, "logps/chosen": -378.11883544921875, "logps/rejected": -413.0569152832031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.522472381591797, "rewards/margins": 14.208174705505371, "rewards/rejected": -18.730648040771484, "step": 13904 }, { "epoch": 2.16, "learning_rate": 3.948843819700192e-06, "logits/chosen": -2.5622634887695312, "logits/rejected": -2.807588815689087, "logps/chosen": -246.7235870361328, "logps/rejected": -276.39044189453125, "loss": 1.6968, "rewards/accuracies": 0.5, "rewards/chosen": -8.929922103881836, "rewards/margins": 0.39279651641845703, "rewards/rejected": -9.32271957397461, "step": 13905 }, { "epoch": 2.16, "learning_rate": 3.948110379169044e-06, "logits/chosen": -2.468595266342163, "logits/rejected": -2.7468020915985107, "logps/chosen": -204.81515502929688, "logps/rejected": -446.07305908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.830973625183105, "rewards/margins": 10.278007507324219, "rewards/rejected": -19.108980178833008, "step": 13906 }, { "epoch": 2.16, "learning_rate": 3.947376938637896e-06, "logits/chosen": -2.7784433364868164, "logits/rejected": -1.9028551578521729, "logps/chosen": -727.6115112304688, "logps/rejected": -494.6007080078125, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -13.868518829345703, "rewards/margins": 6.908939838409424, "rewards/rejected": -20.77745819091797, "step": 13907 }, { "epoch": 2.16, "learning_rate": 3.946643498106748e-06, "logits/chosen": -2.0295486450195312, "logits/rejected": -2.728433847427368, "logps/chosen": -191.36544799804688, "logps/rejected": -358.9544677734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.158381462097168, "rewards/margins": 8.978141784667969, "rewards/rejected": -15.136523246765137, "step": 13908 }, { "epoch": 2.16, "learning_rate": 3.9459100575756005e-06, "logits/chosen": -2.3878517150878906, "logits/rejected": -2.5449771881103516, "logps/chosen": -185.39971923828125, "logps/rejected": -379.03961181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.83444881439209, "rewards/margins": 12.372526168823242, "rewards/rejected": -20.206974029541016, "step": 13909 }, { "epoch": 2.16, "learning_rate": 3.9451766170444524e-06, "logits/chosen": -1.7334071397781372, "logits/rejected": -2.4524664878845215, "logps/chosen": -163.87965393066406, "logps/rejected": -230.54702758789062, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -7.837973594665527, "rewards/margins": 5.97134256362915, "rewards/rejected": -13.809316635131836, "step": 13910 }, { "epoch": 2.16, "learning_rate": 3.944443176513304e-06, "logits/chosen": -2.7062203884124756, "logits/rejected": -1.2244197130203247, "logps/chosen": -613.093505859375, "logps/rejected": -521.2728881835938, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.751152038574219, "rewards/margins": 8.500325202941895, "rewards/rejected": -14.251477241516113, "step": 13911 }, { "epoch": 2.16, "learning_rate": 3.943709735982156e-06, "logits/chosen": -2.5041539669036865, "logits/rejected": -2.4763028621673584, "logps/chosen": -143.6426239013672, "logps/rejected": -386.02679443359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.1386003494262695, "rewards/margins": 11.23441219329834, "rewards/rejected": -15.37301254272461, "step": 13912 }, { "epoch": 2.16, "learning_rate": 3.942976295451009e-06, "logits/chosen": -2.71419358253479, "logits/rejected": -1.7143200635910034, "logps/chosen": -264.1645202636719, "logps/rejected": -408.9949645996094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.481688499450684, "rewards/margins": 11.153825759887695, "rewards/rejected": -16.635515213012695, "step": 13913 }, { "epoch": 2.16, "learning_rate": 3.942242854919861e-06, "logits/chosen": -2.1300222873687744, "logits/rejected": -2.9577457904815674, "logps/chosen": -132.93080139160156, "logps/rejected": -530.2837524414062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.061697006225586, "rewards/margins": 13.289295196533203, "rewards/rejected": -19.35099220275879, "step": 13914 }, { "epoch": 2.16, "learning_rate": 3.9415094143887135e-06, "logits/chosen": -2.083865165710449, "logits/rejected": -2.6897833347320557, "logps/chosen": -93.97763061523438, "logps/rejected": -331.3938293457031, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -7.792245388031006, "rewards/margins": 7.345171928405762, "rewards/rejected": -15.13741683959961, "step": 13915 }, { "epoch": 2.16, "learning_rate": 3.940775973857565e-06, "logits/chosen": -1.7880160808563232, "logits/rejected": -2.61444354057312, "logps/chosen": -155.78477478027344, "logps/rejected": -290.7333068847656, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -8.85837173461914, "rewards/margins": 5.18695068359375, "rewards/rejected": -14.04532241821289, "step": 13916 }, { "epoch": 2.16, "learning_rate": 3.940042533326417e-06, "logits/chosen": -2.007002353668213, "logits/rejected": -2.8603122234344482, "logps/chosen": -340.8520812988281, "logps/rejected": -646.5494384765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.958876132965088, "rewards/margins": 12.715980529785156, "rewards/rejected": -18.674856185913086, "step": 13917 }, { "epoch": 2.16, "learning_rate": 3.93930909279527e-06, "logits/chosen": -2.9452733993530273, "logits/rejected": -2.624203681945801, "logps/chosen": -325.87518310546875, "logps/rejected": -273.923095703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.221111297607422, "rewards/margins": 7.915521621704102, "rewards/rejected": -12.136632919311523, "step": 13918 }, { "epoch": 2.16, "learning_rate": 3.938575652264122e-06, "logits/chosen": -2.293640613555908, "logits/rejected": -2.8487319946289062, "logps/chosen": -207.26556396484375, "logps/rejected": -557.5721435546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.8697052001953125, "rewards/margins": 8.58814811706543, "rewards/rejected": -14.457853317260742, "step": 13919 }, { "epoch": 2.16, "learning_rate": 3.937842211732974e-06, "logits/chosen": -1.512534737586975, "logits/rejected": -2.5941460132598877, "logps/chosen": -111.3852767944336, "logps/rejected": -266.8529968261719, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -9.034844398498535, "rewards/margins": 5.011463642120361, "rewards/rejected": -14.046308517456055, "step": 13920 }, { "epoch": 2.17, "learning_rate": 3.937108771201826e-06, "logits/chosen": -2.556973695755005, "logits/rejected": -2.7852132320404053, "logps/chosen": -97.5658950805664, "logps/rejected": -239.0222625732422, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.462700843811035, "rewards/margins": 9.423686981201172, "rewards/rejected": -15.88638687133789, "step": 13921 }, { "epoch": 2.17, "learning_rate": 3.936375330670678e-06, "logits/chosen": -2.4006001949310303, "logits/rejected": -3.0963151454925537, "logps/chosen": -78.36724853515625, "logps/rejected": -242.59347534179688, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -4.866787910461426, "rewards/margins": 8.009373664855957, "rewards/rejected": -12.876161575317383, "step": 13922 }, { "epoch": 2.17, "learning_rate": 3.93564189013953e-06, "logits/chosen": -2.4363224506378174, "logits/rejected": -2.7503182888031006, "logps/chosen": -140.96292114257812, "logps/rejected": -235.32806396484375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.918745517730713, "rewards/margins": 6.657721042633057, "rewards/rejected": -13.57646656036377, "step": 13923 }, { "epoch": 2.17, "learning_rate": 3.934908449608382e-06, "logits/chosen": -2.311190366744995, "logits/rejected": -2.8439149856567383, "logps/chosen": -98.37458038330078, "logps/rejected": -351.91424560546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.403838157653809, "rewards/margins": 8.858747482299805, "rewards/rejected": -14.26258659362793, "step": 13924 }, { "epoch": 2.17, "learning_rate": 3.934175009077234e-06, "logits/chosen": -2.222180128097534, "logits/rejected": -2.8338232040405273, "logps/chosen": -225.80426025390625, "logps/rejected": -389.017822265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.0567474365234375, "rewards/margins": 9.245931625366211, "rewards/rejected": -15.302679061889648, "step": 13925 }, { "epoch": 2.17, "learning_rate": 3.933441568546086e-06, "logits/chosen": -1.3213353157043457, "logits/rejected": -2.9676332473754883, "logps/chosen": -180.27642822265625, "logps/rejected": -585.610107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.950336456298828, "rewards/margins": 11.58621597290039, "rewards/rejected": -18.53655242919922, "step": 13926 }, { "epoch": 2.17, "learning_rate": 3.932708128014939e-06, "logits/chosen": -2.678884983062744, "logits/rejected": -1.0721484422683716, "logps/chosen": -341.4083557128906, "logps/rejected": -236.217041015625, "loss": 0.0794, "rewards/accuracies": 1.0, "rewards/chosen": -8.581981658935547, "rewards/margins": 3.0653228759765625, "rewards/rejected": -11.64730453491211, "step": 13927 }, { "epoch": 2.17, "learning_rate": 3.9319746874837905e-06, "logits/chosen": -1.374532699584961, "logits/rejected": -2.0010931491851807, "logps/chosen": -153.37399291992188, "logps/rejected": -333.4673156738281, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.371026992797852, "rewards/margins": 10.417655944824219, "rewards/rejected": -14.78868293762207, "step": 13928 }, { "epoch": 2.17, "learning_rate": 3.931241246952642e-06, "logits/chosen": -2.5727741718292236, "logits/rejected": -2.7414729595184326, "logps/chosen": -104.93114471435547, "logps/rejected": -329.27545166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.886683940887451, "rewards/margins": 11.763487815856934, "rewards/rejected": -16.650171279907227, "step": 13929 }, { "epoch": 2.17, "learning_rate": 3.930507806421494e-06, "logits/chosen": -2.569455623626709, "logits/rejected": -1.7167515754699707, "logps/chosen": -250.74765014648438, "logps/rejected": -213.67318725585938, "loss": 0.1414, "rewards/accuracies": 1.0, "rewards/chosen": -5.378134250640869, "rewards/margins": 5.175373554229736, "rewards/rejected": -10.553507804870605, "step": 13930 }, { "epoch": 2.17, "learning_rate": 3.929774365890347e-06, "logits/chosen": -2.8088200092315674, "logits/rejected": -2.575098991394043, "logps/chosen": -302.4921875, "logps/rejected": -299.1374816894531, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -7.356522560119629, "rewards/margins": 10.650785446166992, "rewards/rejected": -18.007307052612305, "step": 13931 }, { "epoch": 2.17, "learning_rate": 3.9290409253592e-06, "logits/chosen": -1.733174443244934, "logits/rejected": -2.9080235958099365, "logps/chosen": -171.85247802734375, "logps/rejected": -409.7164611816406, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -5.955378532409668, "rewards/margins": 6.682111740112305, "rewards/rejected": -12.637490272521973, "step": 13932 }, { "epoch": 2.17, "learning_rate": 3.9283074848280516e-06, "logits/chosen": -1.0257422924041748, "logits/rejected": -2.235496759414673, "logps/chosen": -102.09162902832031, "logps/rejected": -290.8108215332031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.386432647705078, "rewards/margins": 7.496450424194336, "rewards/rejected": -13.882883071899414, "step": 13933 }, { "epoch": 2.17, "learning_rate": 3.9275740442969034e-06, "logits/chosen": -1.5319340229034424, "logits/rejected": -3.0414369106292725, "logps/chosen": -185.58934020996094, "logps/rejected": -395.23828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.213170051574707, "rewards/margins": 7.995081901550293, "rewards/rejected": -13.208251953125, "step": 13934 }, { "epoch": 2.17, "learning_rate": 3.926840603765756e-06, "logits/chosen": -1.9729063510894775, "logits/rejected": -2.668959140777588, "logps/chosen": -497.8358154296875, "logps/rejected": -464.01361083984375, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -9.402565002441406, "rewards/margins": 5.887856960296631, "rewards/rejected": -15.290422439575195, "step": 13935 }, { "epoch": 2.17, "learning_rate": 3.926107163234608e-06, "logits/chosen": -1.6131752729415894, "logits/rejected": -2.7886874675750732, "logps/chosen": -79.03030395507812, "logps/rejected": -339.7523193359375, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -6.15380859375, "rewards/margins": 6.049517631530762, "rewards/rejected": -12.203327178955078, "step": 13936 }, { "epoch": 2.17, "learning_rate": 3.92537372270346e-06, "logits/chosen": -3.0284457206726074, "logits/rejected": -2.8914897441864014, "logps/chosen": -145.55517578125, "logps/rejected": -157.12826538085938, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.653090000152588, "rewards/margins": 8.11904525756836, "rewards/rejected": -11.772135734558105, "step": 13937 }, { "epoch": 2.17, "learning_rate": 3.924640282172312e-06, "logits/chosen": -3.0935585498809814, "logits/rejected": -3.1572561264038086, "logps/chosen": -466.8745422363281, "logps/rejected": -672.2098999023438, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.749830722808838, "rewards/margins": 8.252799987792969, "rewards/rejected": -12.002631187438965, "step": 13938 }, { "epoch": 2.17, "learning_rate": 3.923906841641164e-06, "logits/chosen": -2.4455113410949707, "logits/rejected": -2.6874046325683594, "logps/chosen": -176.4333953857422, "logps/rejected": -322.9122619628906, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -3.6343700885772705, "rewards/margins": 10.461254119873047, "rewards/rejected": -14.095623016357422, "step": 13939 }, { "epoch": 2.17, "learning_rate": 3.923173401110016e-06, "logits/chosen": -2.8799147605895996, "logits/rejected": -2.222496509552002, "logps/chosen": -277.9179992675781, "logps/rejected": -227.20204162597656, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -7.433269500732422, "rewards/margins": 7.464479446411133, "rewards/rejected": -14.897748947143555, "step": 13940 }, { "epoch": 2.17, "learning_rate": 3.922439960578868e-06, "logits/chosen": -2.0689175128936768, "logits/rejected": -2.7335598468780518, "logps/chosen": -376.301025390625, "logps/rejected": -490.3126220703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.921821117401123, "rewards/margins": 10.78039264678955, "rewards/rejected": -16.702213287353516, "step": 13941 }, { "epoch": 2.17, "learning_rate": 3.92170652004772e-06, "logits/chosen": -2.7946524620056152, "logits/rejected": -2.993542432785034, "logps/chosen": -120.8643569946289, "logps/rejected": -328.452392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.090876340866089, "rewards/margins": 11.896383285522461, "rewards/rejected": -13.987258911132812, "step": 13942 }, { "epoch": 2.17, "learning_rate": 3.920973079516572e-06, "logits/chosen": -1.641669511795044, "logits/rejected": -2.3947415351867676, "logps/chosen": -605.9793701171875, "logps/rejected": -467.4411315917969, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -8.974250793457031, "rewards/margins": 8.005566596984863, "rewards/rejected": -16.979816436767578, "step": 13943 }, { "epoch": 2.17, "learning_rate": 3.920239638985425e-06, "logits/chosen": -1.8158050775527954, "logits/rejected": -2.4766340255737305, "logps/chosen": -173.26104736328125, "logps/rejected": -393.7852783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.568112373352051, "rewards/margins": 10.851036071777344, "rewards/rejected": -17.419147491455078, "step": 13944 }, { "epoch": 2.17, "learning_rate": 3.919506198454277e-06, "logits/chosen": -1.7560153007507324, "logits/rejected": -2.8780012130737305, "logps/chosen": -326.46722412109375, "logps/rejected": -832.4732666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.193193435668945, "rewards/margins": 12.584309577941895, "rewards/rejected": -19.777503967285156, "step": 13945 }, { "epoch": 2.17, "learning_rate": 3.9187727579231285e-06, "logits/chosen": -2.3846945762634277, "logits/rejected": -2.8494386672973633, "logps/chosen": -233.3976287841797, "logps/rejected": -388.6636657714844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.482919216156006, "rewards/margins": 9.138389587402344, "rewards/rejected": -15.621309280395508, "step": 13946 }, { "epoch": 2.17, "learning_rate": 3.91803931739198e-06, "logits/chosen": -1.3031690120697021, "logits/rejected": -2.413696765899658, "logps/chosen": -115.7134017944336, "logps/rejected": -410.6653137207031, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.401030540466309, "rewards/margins": 11.124499320983887, "rewards/rejected": -16.525529861450195, "step": 13947 }, { "epoch": 2.17, "learning_rate": 3.917305876860833e-06, "logits/chosen": -2.0206079483032227, "logits/rejected": -2.4900026321411133, "logps/chosen": -215.44107055664062, "logps/rejected": -299.91473388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.677565097808838, "rewards/margins": 10.113752365112305, "rewards/rejected": -13.791316986083984, "step": 13948 }, { "epoch": 2.17, "learning_rate": 3.916572436329686e-06, "logits/chosen": -2.57659649848938, "logits/rejected": -2.83335018157959, "logps/chosen": -95.90172576904297, "logps/rejected": -286.11834716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1713719367980957, "rewards/margins": 9.832112312316895, "rewards/rejected": -13.003483772277832, "step": 13949 }, { "epoch": 2.17, "learning_rate": 3.915838995798538e-06, "logits/chosen": -2.7023022174835205, "logits/rejected": -2.415727376937866, "logps/chosen": -290.99237060546875, "logps/rejected": -414.5643615722656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.115325927734375, "rewards/margins": 8.83210563659668, "rewards/rejected": -14.947431564331055, "step": 13950 }, { "epoch": 2.17, "learning_rate": 3.91510555526739e-06, "logits/chosen": -2.796811580657959, "logits/rejected": -2.435730457305908, "logps/chosen": -544.5462646484375, "logps/rejected": -390.4737243652344, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -9.271824836730957, "rewards/margins": 6.257946014404297, "rewards/rejected": -15.52977180480957, "step": 13951 }, { "epoch": 2.17, "learning_rate": 3.9143721147362415e-06, "logits/chosen": -0.9926833510398865, "logits/rejected": -2.6273353099823, "logps/chosen": -135.56419372558594, "logps/rejected": -261.08929443359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.922817230224609, "rewards/margins": 7.0866594314575195, "rewards/rejected": -13.009476661682129, "step": 13952 }, { "epoch": 2.17, "learning_rate": 3.913638674205094e-06, "logits/chosen": -2.6239173412323, "logits/rejected": -3.184080123901367, "logps/chosen": -85.4469985961914, "logps/rejected": -260.7763977050781, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.068672180175781, "rewards/margins": 7.285895347595215, "rewards/rejected": -13.354567527770996, "step": 13953 }, { "epoch": 2.17, "learning_rate": 3.912905233673946e-06, "logits/chosen": -2.9287662506103516, "logits/rejected": -2.5236973762512207, "logps/chosen": -375.3050537109375, "logps/rejected": -367.7389831542969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.865504741668701, "rewards/margins": 8.204668998718262, "rewards/rejected": -12.070173263549805, "step": 13954 }, { "epoch": 2.17, "learning_rate": 3.912171793142798e-06, "logits/chosen": -2.3373987674713135, "logits/rejected": -2.5984716415405273, "logps/chosen": -309.9001159667969, "logps/rejected": -339.81982421875, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -10.369440078735352, "rewards/margins": 5.950270652770996, "rewards/rejected": -16.319711685180664, "step": 13955 }, { "epoch": 2.17, "learning_rate": 3.91143835261165e-06, "logits/chosen": -1.8612605333328247, "logits/rejected": -2.640510082244873, "logps/chosen": -98.78852844238281, "logps/rejected": -251.1958770751953, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -6.613353729248047, "rewards/margins": 5.611087799072266, "rewards/rejected": -12.224441528320312, "step": 13956 }, { "epoch": 2.17, "learning_rate": 3.910704912080502e-06, "logits/chosen": -2.5164294242858887, "logits/rejected": -3.1702449321746826, "logps/chosen": -115.13050842285156, "logps/rejected": -427.2956237792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.942116737365723, "rewards/margins": 10.35879135131836, "rewards/rejected": -16.300907135009766, "step": 13957 }, { "epoch": 2.17, "learning_rate": 3.9099714715493544e-06, "logits/chosen": -2.27856183052063, "logits/rejected": -2.5950584411621094, "logps/chosen": -203.3585205078125, "logps/rejected": -336.055908203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.983753204345703, "rewards/margins": 9.058788299560547, "rewards/rejected": -15.04254150390625, "step": 13958 }, { "epoch": 2.17, "learning_rate": 3.909238031018206e-06, "logits/chosen": -0.8388583064079285, "logits/rejected": -2.10263729095459, "logps/chosen": -172.95132446289062, "logps/rejected": -576.9116821289062, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.500321865081787, "rewards/margins": 10.788424491882324, "rewards/rejected": -18.288745880126953, "step": 13959 }, { "epoch": 2.17, "learning_rate": 3.908504590487058e-06, "logits/chosen": -2.179154872894287, "logits/rejected": -2.900869369506836, "logps/chosen": -272.1064453125, "logps/rejected": -469.11639404296875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -3.8347530364990234, "rewards/margins": 9.377954483032227, "rewards/rejected": -13.21270751953125, "step": 13960 }, { "epoch": 2.17, "learning_rate": 3.90777114995591e-06, "logits/chosen": -2.447516679763794, "logits/rejected": -2.7687697410583496, "logps/chosen": -133.05699157714844, "logps/rejected": -279.65576171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.7178702354431152, "rewards/margins": 9.90093994140625, "rewards/rejected": -12.618810653686523, "step": 13961 }, { "epoch": 2.17, "learning_rate": 3.907037709424763e-06, "logits/chosen": -2.888697624206543, "logits/rejected": -2.8615269660949707, "logps/chosen": -394.6710205078125, "logps/rejected": -461.06378173828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.980532169342041, "rewards/margins": 8.829147338867188, "rewards/rejected": -13.80967903137207, "step": 13962 }, { "epoch": 2.17, "learning_rate": 3.906304268893615e-06, "logits/chosen": -1.8237978219985962, "logits/rejected": -2.957190990447998, "logps/chosen": -166.90200805664062, "logps/rejected": -463.72369384765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.421695709228516, "rewards/margins": 9.985757827758789, "rewards/rejected": -15.407453536987305, "step": 13963 }, { "epoch": 2.17, "learning_rate": 3.9055708283624666e-06, "logits/chosen": -1.4434962272644043, "logits/rejected": -2.5192575454711914, "logps/chosen": -238.45999145507812, "logps/rejected": -666.85546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.085470676422119, "rewards/margins": 17.397418975830078, "rewards/rejected": -24.482891082763672, "step": 13964 }, { "epoch": 2.17, "learning_rate": 3.904837387831319e-06, "logits/chosen": -2.6657402515411377, "logits/rejected": -3.1041791439056396, "logps/chosen": -114.05657958984375, "logps/rejected": -412.10009765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.847440719604492, "rewards/margins": 9.081626892089844, "rewards/rejected": -16.929067611694336, "step": 13965 }, { "epoch": 2.17, "learning_rate": 3.904103947300171e-06, "logits/chosen": -1.6006759405136108, "logits/rejected": -2.4022152423858643, "logps/chosen": -323.61309814453125, "logps/rejected": -558.702392578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.074783325195312, "rewards/margins": 8.988901138305664, "rewards/rejected": -18.06368637084961, "step": 13966 }, { "epoch": 2.17, "learning_rate": 3.903370506769024e-06, "logits/chosen": -2.672334909439087, "logits/rejected": -2.0046639442443848, "logps/chosen": -589.6997680664062, "logps/rejected": -508.6366882324219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.428203582763672, "rewards/margins": 8.17494010925293, "rewards/rejected": -16.6031436920166, "step": 13967 }, { "epoch": 2.17, "learning_rate": 3.902637066237876e-06, "logits/chosen": -2.362459421157837, "logits/rejected": -2.5081987380981445, "logps/chosen": -195.16519165039062, "logps/rejected": -368.62872314453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.389300346374512, "rewards/margins": 9.90042495727539, "rewards/rejected": -18.28972625732422, "step": 13968 }, { "epoch": 2.17, "learning_rate": 3.901903625706728e-06, "logits/chosen": -1.9585328102111816, "logits/rejected": -3.0298993587493896, "logps/chosen": -175.389892578125, "logps/rejected": -384.6292724609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.217513084411621, "rewards/margins": 8.11242389678955, "rewards/rejected": -14.329936981201172, "step": 13969 }, { "epoch": 2.17, "learning_rate": 3.9011701851755795e-06, "logits/chosen": -2.287893056869507, "logits/rejected": -2.577683448791504, "logps/chosen": -333.978515625, "logps/rejected": -465.842041015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.235774993896484, "rewards/margins": 7.956316947937012, "rewards/rejected": -15.192091941833496, "step": 13970 }, { "epoch": 2.17, "learning_rate": 3.900436744644432e-06, "logits/chosen": -1.9847493171691895, "logits/rejected": -2.840252161026001, "logps/chosen": -194.46192932128906, "logps/rejected": -562.4617919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.935565948486328, "rewards/margins": 13.745365142822266, "rewards/rejected": -20.680931091308594, "step": 13971 }, { "epoch": 2.17, "learning_rate": 3.899703304113284e-06, "logits/chosen": -2.6272830963134766, "logits/rejected": -1.1809364557266235, "logps/chosen": -270.12841796875, "logps/rejected": -207.97801208496094, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -4.279370307922363, "rewards/margins": 5.165826797485352, "rewards/rejected": -9.445197105407715, "step": 13972 }, { "epoch": 2.17, "learning_rate": 3.898969863582136e-06, "logits/chosen": -2.2060492038726807, "logits/rejected": -2.6805286407470703, "logps/chosen": -415.676513671875, "logps/rejected": -466.13568115234375, "loss": 0.3243, "rewards/accuracies": 1.0, "rewards/chosen": -12.983176231384277, "rewards/margins": 2.6614770889282227, "rewards/rejected": -15.6446533203125, "step": 13973 }, { "epoch": 2.17, "learning_rate": 3.898236423050988e-06, "logits/chosen": -2.837183952331543, "logits/rejected": -3.1042752265930176, "logps/chosen": -272.9361267089844, "logps/rejected": -338.44970703125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -8.251167297363281, "rewards/margins": 6.3513569831848145, "rewards/rejected": -14.602523803710938, "step": 13974 }, { "epoch": 2.17, "learning_rate": 3.89750298251984e-06, "logits/chosen": -2.353257894515991, "logits/rejected": -2.9735965728759766, "logps/chosen": -138.23374938964844, "logps/rejected": -295.98516845703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.78128719329834, "rewards/margins": 8.859869003295898, "rewards/rejected": -14.641155242919922, "step": 13975 }, { "epoch": 2.17, "learning_rate": 3.8967695419886925e-06, "logits/chosen": -1.8637272119522095, "logits/rejected": -2.9771499633789062, "logps/chosen": -204.7347412109375, "logps/rejected": -525.0477294921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.111040115356445, "rewards/margins": 9.486629486083984, "rewards/rejected": -13.59766960144043, "step": 13976 }, { "epoch": 2.17, "learning_rate": 3.896036101457544e-06, "logits/chosen": -1.2546000480651855, "logits/rejected": -2.043663740158081, "logps/chosen": -169.77764892578125, "logps/rejected": -384.8434753417969, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -6.186098098754883, "rewards/margins": 9.223514556884766, "rewards/rejected": -15.409612655639648, "step": 13977 }, { "epoch": 2.17, "learning_rate": 3.895302660926396e-06, "logits/chosen": -2.6264572143554688, "logits/rejected": -2.217756986618042, "logps/chosen": -200.68490600585938, "logps/rejected": -314.2690734863281, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -4.704861164093018, "rewards/margins": 8.220556259155273, "rewards/rejected": -12.92541790008545, "step": 13978 }, { "epoch": 2.17, "learning_rate": 3.894569220395248e-06, "logits/chosen": -2.588226795196533, "logits/rejected": -2.364168643951416, "logps/chosen": -367.30224609375, "logps/rejected": -434.01873779296875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -5.783086776733398, "rewards/margins": 9.232633590698242, "rewards/rejected": -15.01572036743164, "step": 13979 }, { "epoch": 2.17, "learning_rate": 3.893835779864101e-06, "logits/chosen": -2.9002835750579834, "logits/rejected": -2.7274937629699707, "logps/chosen": -468.259521484375, "logps/rejected": -411.823974609375, "loss": 0.3615, "rewards/accuracies": 0.5, "rewards/chosen": -7.989398002624512, "rewards/margins": 3.1855850219726562, "rewards/rejected": -11.174983024597168, "step": 13980 }, { "epoch": 2.17, "learning_rate": 3.893102339332953e-06, "logits/chosen": -2.5252647399902344, "logits/rejected": -2.253760814666748, "logps/chosen": -122.38169860839844, "logps/rejected": -341.26885986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.857892036437988, "rewards/margins": 10.534141540527344, "rewards/rejected": -16.392032623291016, "step": 13981 }, { "epoch": 2.17, "learning_rate": 3.8923688988018054e-06, "logits/chosen": -1.8731311559677124, "logits/rejected": -2.5830836296081543, "logps/chosen": -167.7864532470703, "logps/rejected": -481.95135498046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.141523838043213, "rewards/margins": 9.094797134399414, "rewards/rejected": -15.236321449279785, "step": 13982 }, { "epoch": 2.17, "learning_rate": 3.891635458270657e-06, "logits/chosen": -2.5863137245178223, "logits/rejected": -2.879636764526367, "logps/chosen": -274.47698974609375, "logps/rejected": -341.6643981933594, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.514511585235596, "rewards/margins": 7.510582447052002, "rewards/rejected": -13.025094032287598, "step": 13983 }, { "epoch": 2.17, "learning_rate": 3.89090201773951e-06, "logits/chosen": -1.6652339696884155, "logits/rejected": -2.8844528198242188, "logps/chosen": -180.93099975585938, "logps/rejected": -705.8240356445312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.3911895751953125, "rewards/margins": 8.904529571533203, "rewards/rejected": -16.295719146728516, "step": 13984 }, { "epoch": 2.17, "learning_rate": 3.890168577208362e-06, "logits/chosen": -1.5319983959197998, "logits/rejected": -2.819793224334717, "logps/chosen": -115.39334106445312, "logps/rejected": -417.124755859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.760149955749512, "rewards/margins": 9.677021980285645, "rewards/rejected": -14.437171936035156, "step": 13985 }, { "epoch": 2.18, "learning_rate": 3.889435136677214e-06, "logits/chosen": -2.5135059356689453, "logits/rejected": -2.986078977584839, "logps/chosen": -359.6385192871094, "logps/rejected": -589.79345703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.05271053314209, "rewards/margins": 9.577353477478027, "rewards/rejected": -14.630064010620117, "step": 13986 }, { "epoch": 2.18, "learning_rate": 3.888701696146066e-06, "logits/chosen": -2.5104172229766846, "logits/rejected": -2.630704164505005, "logps/chosen": -485.62615966796875, "logps/rejected": -402.7576904296875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.810445785522461, "rewards/margins": 8.975674629211426, "rewards/rejected": -18.786121368408203, "step": 13987 }, { "epoch": 2.18, "learning_rate": 3.8879682556149176e-06, "logits/chosen": -2.8370227813720703, "logits/rejected": -2.216853380203247, "logps/chosen": -784.9298706054688, "logps/rejected": -706.23876953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.018705368041992, "rewards/margins": 11.725774765014648, "rewards/rejected": -20.74448013305664, "step": 13988 }, { "epoch": 2.18, "learning_rate": 3.88723481508377e-06, "logits/chosen": -2.732948064804077, "logits/rejected": -2.8138959407806396, "logps/chosen": -123.9085693359375, "logps/rejected": -309.7765808105469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.877462387084961, "rewards/margins": 11.453045845031738, "rewards/rejected": -17.330509185791016, "step": 13989 }, { "epoch": 2.18, "learning_rate": 3.886501374552622e-06, "logits/chosen": -2.061837673187256, "logits/rejected": -2.946241855621338, "logps/chosen": -176.56903076171875, "logps/rejected": -382.7369079589844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.510399341583252, "rewards/margins": 8.222071647644043, "rewards/rejected": -15.732471466064453, "step": 13990 }, { "epoch": 2.18, "learning_rate": 3.885767934021474e-06, "logits/chosen": -1.5945515632629395, "logits/rejected": -2.8492038249969482, "logps/chosen": -132.42987060546875, "logps/rejected": -396.4787292480469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.260244846343994, "rewards/margins": 12.821627616882324, "rewards/rejected": -18.081872940063477, "step": 13991 }, { "epoch": 2.18, "learning_rate": 3.885034493490326e-06, "logits/chosen": -3.205566644668579, "logits/rejected": -2.462236166000366, "logps/chosen": -445.4878234863281, "logps/rejected": -441.69244384765625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -6.204785346984863, "rewards/margins": 7.023046016693115, "rewards/rejected": -13.22783088684082, "step": 13992 }, { "epoch": 2.18, "learning_rate": 3.884301052959179e-06, "logits/chosen": -2.7412400245666504, "logits/rejected": -3.0501067638397217, "logps/chosen": -134.75355529785156, "logps/rejected": -283.06878662109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.5129899978637695, "rewards/margins": 8.11131763458252, "rewards/rejected": -13.624307632446289, "step": 13993 }, { "epoch": 2.18, "learning_rate": 3.8835676124280305e-06, "logits/chosen": -2.0143606662750244, "logits/rejected": -2.7651758193969727, "logps/chosen": -194.03399658203125, "logps/rejected": -347.17822265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.040724277496338, "rewards/margins": 7.726354122161865, "rewards/rejected": -14.767078399658203, "step": 13994 }, { "epoch": 2.18, "learning_rate": 3.882834171896882e-06, "logits/chosen": -2.4760634899139404, "logits/rejected": -2.6373507976531982, "logps/chosen": -450.8157653808594, "logps/rejected": -763.5731201171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.067570209503174, "rewards/margins": 7.970102310180664, "rewards/rejected": -14.03767204284668, "step": 13995 }, { "epoch": 2.18, "learning_rate": 3.882100731365734e-06, "logits/chosen": -1.8763186931610107, "logits/rejected": -2.610661745071411, "logps/chosen": -237.16098022460938, "logps/rejected": -312.1591796875, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -6.7034149169921875, "rewards/margins": 7.532169818878174, "rewards/rejected": -14.23558521270752, "step": 13996 }, { "epoch": 2.18, "learning_rate": 3.881367290834586e-06, "logits/chosen": -2.490290880203247, "logits/rejected": -2.231466054916382, "logps/chosen": -151.41644287109375, "logps/rejected": -229.30645751953125, "loss": 0.3434, "rewards/accuracies": 1.0, "rewards/chosen": -10.079865455627441, "rewards/margins": 3.571345329284668, "rewards/rejected": -13.65121078491211, "step": 13997 }, { "epoch": 2.18, "learning_rate": 3.880633850303439e-06, "logits/chosen": -2.4220166206359863, "logits/rejected": -2.3782243728637695, "logps/chosen": -142.9626922607422, "logps/rejected": -272.2925720214844, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -10.555898666381836, "rewards/margins": 7.5898356437683105, "rewards/rejected": -18.145734786987305, "step": 13998 }, { "epoch": 2.18, "learning_rate": 3.879900409772292e-06, "logits/chosen": -1.5498409271240234, "logits/rejected": -2.622967004776001, "logps/chosen": -234.33187866210938, "logps/rejected": -458.9405822753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.664906978607178, "rewards/margins": 11.037805557250977, "rewards/rejected": -15.702712059020996, "step": 13999 }, { "epoch": 2.18, "learning_rate": 3.8791669692411435e-06, "logits/chosen": -1.9208828210830688, "logits/rejected": -2.6165270805358887, "logps/chosen": -69.52999877929688, "logps/rejected": -206.30545043945312, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -5.654012680053711, "rewards/margins": 7.46363639831543, "rewards/rejected": -13.11764907836914, "step": 14000 }, { "epoch": 2.18, "learning_rate": 3.878433528709995e-06, "logits/chosen": -1.6774977445602417, "logits/rejected": -2.6607627868652344, "logps/chosen": -86.85639953613281, "logps/rejected": -315.6868591308594, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -6.934041976928711, "rewards/margins": 7.514070510864258, "rewards/rejected": -14.448112487792969, "step": 14001 }, { "epoch": 2.18, "learning_rate": 3.877700088178848e-06, "logits/chosen": -2.6038970947265625, "logits/rejected": -2.0560126304626465, "logps/chosen": -361.8058166503906, "logps/rejected": -286.986083984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.1611008644104, "rewards/margins": 8.47038459777832, "rewards/rejected": -14.631484985351562, "step": 14002 }, { "epoch": 2.18, "learning_rate": 3.8769666476477e-06, "logits/chosen": -1.5156538486480713, "logits/rejected": -2.7347068786621094, "logps/chosen": -224.4980010986328, "logps/rejected": -526.8992309570312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3021774291992188, "rewards/margins": 10.101585388183594, "rewards/rejected": -13.403762817382812, "step": 14003 }, { "epoch": 2.18, "learning_rate": 3.876233207116552e-06, "logits/chosen": -2.7718985080718994, "logits/rejected": -2.7834274768829346, "logps/chosen": -290.04833984375, "logps/rejected": -343.95611572265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.825492858886719, "rewards/margins": 6.904783248901367, "rewards/rejected": -12.730276107788086, "step": 14004 }, { "epoch": 2.18, "learning_rate": 3.875499766585404e-06, "logits/chosen": -2.772951364517212, "logits/rejected": -1.4585037231445312, "logps/chosen": -376.853759765625, "logps/rejected": -405.36663818359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.504510879516602, "rewards/margins": 7.607257843017578, "rewards/rejected": -13.11176872253418, "step": 14005 }, { "epoch": 2.18, "learning_rate": 3.874766326054256e-06, "logits/chosen": -1.842633605003357, "logits/rejected": -1.7921046018600464, "logps/chosen": -331.62152099609375, "logps/rejected": -278.6872863769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.305301666259766, "rewards/margins": 10.321853637695312, "rewards/rejected": -15.627155303955078, "step": 14006 }, { "epoch": 2.18, "learning_rate": 3.874032885523108e-06, "logits/chosen": -2.6507997512817383, "logits/rejected": -2.527721643447876, "logps/chosen": -184.9481964111328, "logps/rejected": -439.25689697265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.061287879943848, "rewards/margins": 10.190018653869629, "rewards/rejected": -19.251306533813477, "step": 14007 }, { "epoch": 2.18, "learning_rate": 3.87329944499196e-06, "logits/chosen": -2.76448130607605, "logits/rejected": -2.7511281967163086, "logps/chosen": -173.89483642578125, "logps/rejected": -222.07595825195312, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": -4.212127685546875, "rewards/margins": 7.699093818664551, "rewards/rejected": -11.911221504211426, "step": 14008 }, { "epoch": 2.18, "learning_rate": 3.872566004460812e-06, "logits/chosen": -2.040013074874878, "logits/rejected": -2.610924243927002, "logps/chosen": -105.87610626220703, "logps/rejected": -298.9236755371094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.0790019035339355, "rewards/margins": 9.930388450622559, "rewards/rejected": -15.009389877319336, "step": 14009 }, { "epoch": 2.18, "learning_rate": 3.871832563929664e-06, "logits/chosen": -1.8397170305252075, "logits/rejected": -2.655118227005005, "logps/chosen": -171.336669921875, "logps/rejected": -389.3489074707031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.5913777351379395, "rewards/margins": 11.141227722167969, "rewards/rejected": -16.73260498046875, "step": 14010 }, { "epoch": 2.18, "learning_rate": 3.871099123398517e-06, "logits/chosen": -3.014326810836792, "logits/rejected": -2.4919979572296143, "logps/chosen": -609.166748046875, "logps/rejected": -441.42474365234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -8.916169166564941, "rewards/margins": 11.096418380737305, "rewards/rejected": -20.012588500976562, "step": 14011 }, { "epoch": 2.18, "learning_rate": 3.8703656828673686e-06, "logits/chosen": -2.575003147125244, "logits/rejected": -2.3741703033447266, "logps/chosen": -334.6270751953125, "logps/rejected": -349.7021179199219, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.694436073303223, "rewards/margins": 7.291499137878418, "rewards/rejected": -13.98593521118164, "step": 14012 }, { "epoch": 2.18, "learning_rate": 3.8696322423362205e-06, "logits/chosen": -2.9588773250579834, "logits/rejected": -2.960869312286377, "logps/chosen": -300.662841796875, "logps/rejected": -343.2268371582031, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.068484306335449, "rewards/margins": 7.276664733886719, "rewards/rejected": -13.345149040222168, "step": 14013 }, { "epoch": 2.18, "learning_rate": 3.868898801805072e-06, "logits/chosen": -2.361936569213867, "logits/rejected": -1.8108291625976562, "logps/chosen": -189.09671020507812, "logps/rejected": -282.29632568359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.595019340515137, "rewards/margins": 11.26725959777832, "rewards/rejected": -15.862278938293457, "step": 14014 }, { "epoch": 2.18, "learning_rate": 3.868165361273925e-06, "logits/chosen": -2.4333691596984863, "logits/rejected": -2.832387924194336, "logps/chosen": -234.11843872070312, "logps/rejected": -358.3613586425781, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -7.765023231506348, "rewards/margins": 6.527370452880859, "rewards/rejected": -14.292393684387207, "step": 14015 }, { "epoch": 2.18, "learning_rate": 3.867431920742778e-06, "logits/chosen": -1.3945118188858032, "logits/rejected": -2.5407581329345703, "logps/chosen": -120.38618469238281, "logps/rejected": -334.90771484375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.081547737121582, "rewards/margins": 8.74018383026123, "rewards/rejected": -14.821731567382812, "step": 14016 }, { "epoch": 2.18, "learning_rate": 3.86669848021163e-06, "logits/chosen": -2.683398962020874, "logits/rejected": -2.606440782546997, "logps/chosen": -542.5950317382812, "logps/rejected": -545.9397583007812, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -8.584656715393066, "rewards/margins": 6.328986167907715, "rewards/rejected": -14.913642883300781, "step": 14017 }, { "epoch": 2.18, "learning_rate": 3.8659650396804815e-06, "logits/chosen": -1.6474348306655884, "logits/rejected": -2.2209908962249756, "logps/chosen": -184.5089874267578, "logps/rejected": -520.5654296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.67408561706543, "rewards/margins": 9.881513595581055, "rewards/rejected": -17.555599212646484, "step": 14018 }, { "epoch": 2.18, "learning_rate": 3.865231599149333e-06, "logits/chosen": -1.742150902748108, "logits/rejected": -2.7852602005004883, "logps/chosen": -141.57766723632812, "logps/rejected": -644.9046630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.137155532836914, "rewards/margins": 10.776025772094727, "rewards/rejected": -17.91318130493164, "step": 14019 }, { "epoch": 2.18, "learning_rate": 3.864498158618186e-06, "logits/chosen": -2.633620023727417, "logits/rejected": -1.8514426946640015, "logps/chosen": -372.44854736328125, "logps/rejected": -392.70208740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.344770908355713, "rewards/margins": 11.709517478942871, "rewards/rejected": -16.054288864135742, "step": 14020 }, { "epoch": 2.18, "learning_rate": 3.863764718087038e-06, "logits/chosen": -1.9497171640396118, "logits/rejected": -3.1019904613494873, "logps/chosen": -139.67913818359375, "logps/rejected": -481.24359130859375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.455650329589844, "rewards/margins": 8.577109336853027, "rewards/rejected": -14.032760620117188, "step": 14021 }, { "epoch": 2.18, "learning_rate": 3.86303127755589e-06, "logits/chosen": -2.716210126876831, "logits/rejected": -2.5633862018585205, "logps/chosen": -282.55517578125, "logps/rejected": -236.57017517089844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.668927192687988, "rewards/margins": 9.29520034790039, "rewards/rejected": -15.964128494262695, "step": 14022 }, { "epoch": 2.18, "learning_rate": 3.862297837024742e-06, "logits/chosen": -2.8686485290527344, "logits/rejected": -2.6517107486724854, "logps/chosen": -291.70465087890625, "logps/rejected": -265.5593566894531, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.2573442459106445, "rewards/margins": 8.019119262695312, "rewards/rejected": -15.27646255493164, "step": 14023 }, { "epoch": 2.18, "learning_rate": 3.861564396493594e-06, "logits/chosen": -2.6327927112579346, "logits/rejected": -2.9462482929229736, "logps/chosen": -306.2908935546875, "logps/rejected": -469.6888122558594, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -9.749292373657227, "rewards/margins": 8.521705627441406, "rewards/rejected": -18.27099609375, "step": 14024 }, { "epoch": 2.18, "learning_rate": 3.860830955962446e-06, "logits/chosen": -1.9365856647491455, "logits/rejected": -2.870689868927002, "logps/chosen": -149.9142608642578, "logps/rejected": -392.33636474609375, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -7.500097751617432, "rewards/margins": 8.58359146118164, "rewards/rejected": -16.083690643310547, "step": 14025 }, { "epoch": 2.18, "learning_rate": 3.860097515431298e-06, "logits/chosen": -1.740788459777832, "logits/rejected": -2.5908305644989014, "logps/chosen": -193.08224487304688, "logps/rejected": -451.83880615234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.310835838317871, "rewards/margins": 9.768547058105469, "rewards/rejected": -16.079383850097656, "step": 14026 }, { "epoch": 2.18, "learning_rate": 3.85936407490015e-06, "logits/chosen": -3.008587598800659, "logits/rejected": -2.6053380966186523, "logps/chosen": -463.770751953125, "logps/rejected": -493.0076599121094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.9102540016174316, "rewards/margins": 7.155804634094238, "rewards/rejected": -11.066059112548828, "step": 14027 }, { "epoch": 2.18, "learning_rate": 3.858630634369002e-06, "logits/chosen": -2.941718101501465, "logits/rejected": -2.8396990299224854, "logps/chosen": -257.10986328125, "logps/rejected": -220.86410522460938, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -8.394394874572754, "rewards/margins": 5.376106262207031, "rewards/rejected": -13.770501136779785, "step": 14028 }, { "epoch": 2.18, "learning_rate": 3.857897193837855e-06, "logits/chosen": -2.540013313293457, "logits/rejected": -2.607180595397949, "logps/chosen": -822.5689697265625, "logps/rejected": -757.1473999023438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.886168479919434, "rewards/margins": 10.01785945892334, "rewards/rejected": -17.904027938842773, "step": 14029 }, { "epoch": 2.18, "learning_rate": 3.857163753306707e-06, "logits/chosen": -2.4742066860198975, "logits/rejected": -2.38673996925354, "logps/chosen": -300.5977783203125, "logps/rejected": -327.35888671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.404523849487305, "rewards/margins": 6.951848030090332, "rewards/rejected": -12.356371879577637, "step": 14030 }, { "epoch": 2.18, "learning_rate": 3.8564303127755585e-06, "logits/chosen": -2.545644760131836, "logits/rejected": -2.696787118911743, "logps/chosen": -404.11102294921875, "logps/rejected": -608.065673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8913168907165527, "rewards/margins": 11.722940444946289, "rewards/rejected": -15.6142578125, "step": 14031 }, { "epoch": 2.18, "learning_rate": 3.855696872244411e-06, "logits/chosen": -2.63092303276062, "logits/rejected": -2.802570104598999, "logps/chosen": -270.1752624511719, "logps/rejected": -411.629150390625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -11.266250610351562, "rewards/margins": 6.490386962890625, "rewards/rejected": -17.756637573242188, "step": 14032 }, { "epoch": 2.18, "learning_rate": 3.854963431713264e-06, "logits/chosen": -1.8889983892440796, "logits/rejected": -2.8169314861297607, "logps/chosen": -397.41815185546875, "logps/rejected": -555.2684326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.824751377105713, "rewards/margins": 12.092573165893555, "rewards/rejected": -19.91732406616211, "step": 14033 }, { "epoch": 2.18, "learning_rate": 3.854229991182116e-06, "logits/chosen": -1.7603849172592163, "logits/rejected": -2.681387424468994, "logps/chosen": -227.0370635986328, "logps/rejected": -387.60107421875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.846277236938477, "rewards/margins": 8.622467041015625, "rewards/rejected": -17.468746185302734, "step": 14034 }, { "epoch": 2.18, "learning_rate": 3.853496550650968e-06, "logits/chosen": -2.78180193901062, "logits/rejected": -2.2154712677001953, "logps/chosen": -175.36788940429688, "logps/rejected": -440.41217041015625, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -6.141970634460449, "rewards/margins": 8.227333068847656, "rewards/rejected": -14.369302749633789, "step": 14035 }, { "epoch": 2.18, "learning_rate": 3.85276311011982e-06, "logits/chosen": -2.945784091949463, "logits/rejected": -1.8634827136993408, "logps/chosen": -734.3585815429688, "logps/rejected": -500.0980529785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.490423679351807, "rewards/margins": 10.400971412658691, "rewards/rejected": -14.891395568847656, "step": 14036 }, { "epoch": 2.18, "learning_rate": 3.8520296695886715e-06, "logits/chosen": -1.835788607597351, "logits/rejected": -2.8538951873779297, "logps/chosen": -466.00128173828125, "logps/rejected": -788.2725830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.372007369995117, "rewards/margins": 15.17369270324707, "rewards/rejected": -19.545700073242188, "step": 14037 }, { "epoch": 2.18, "learning_rate": 3.851296229057524e-06, "logits/chosen": -1.5289604663848877, "logits/rejected": -2.6332240104675293, "logps/chosen": -149.70855712890625, "logps/rejected": -345.61151123046875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.023116111755371, "rewards/margins": 8.905858993530273, "rewards/rejected": -13.928975105285645, "step": 14038 }, { "epoch": 2.18, "learning_rate": 3.850562788526376e-06, "logits/chosen": -1.847392201423645, "logits/rejected": -2.8143157958984375, "logps/chosen": -281.7725830078125, "logps/rejected": -514.5990600585938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.990449905395508, "rewards/margins": 10.335488319396973, "rewards/rejected": -14.325939178466797, "step": 14039 }, { "epoch": 2.18, "learning_rate": 3.849829347995228e-06, "logits/chosen": -2.2826240062713623, "logits/rejected": -3.0209531784057617, "logps/chosen": -110.27462005615234, "logps/rejected": -343.85693359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.514729022979736, "rewards/margins": 9.252233505249023, "rewards/rejected": -15.766962051391602, "step": 14040 }, { "epoch": 2.18, "learning_rate": 3.84909590746408e-06, "logits/chosen": -2.242023468017578, "logits/rejected": -2.7431280612945557, "logps/chosen": -96.75958251953125, "logps/rejected": -278.59613037109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.700264930725098, "rewards/margins": 7.948246002197266, "rewards/rejected": -14.648510932922363, "step": 14041 }, { "epoch": 2.18, "learning_rate": 3.8483624669329325e-06, "logits/chosen": -2.4202988147735596, "logits/rejected": -2.635197639465332, "logps/chosen": -380.8375244140625, "logps/rejected": -503.7574157714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.565971374511719, "rewards/margins": 12.691545486450195, "rewards/rejected": -19.257518768310547, "step": 14042 }, { "epoch": 2.18, "learning_rate": 3.8476290264017844e-06, "logits/chosen": -2.7591452598571777, "logits/rejected": -2.357025146484375, "logps/chosen": -371.4783020019531, "logps/rejected": -328.38885498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.834260940551758, "rewards/margins": 11.949714660644531, "rewards/rejected": -17.78397560119629, "step": 14043 }, { "epoch": 2.18, "learning_rate": 3.846895585870636e-06, "logits/chosen": -2.2276604175567627, "logits/rejected": -3.0061984062194824, "logps/chosen": -231.391357421875, "logps/rejected": -288.744140625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.74748420715332, "rewards/margins": 6.884146690368652, "rewards/rejected": -13.631629943847656, "step": 14044 }, { "epoch": 2.18, "learning_rate": 3.846162145339488e-06, "logits/chosen": -1.296449899673462, "logits/rejected": -2.534398078918457, "logps/chosen": -161.70492553710938, "logps/rejected": -513.6536865234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.895726203918457, "rewards/margins": 11.832759857177734, "rewards/rejected": -18.728485107421875, "step": 14045 }, { "epoch": 2.18, "learning_rate": 3.84542870480834e-06, "logits/chosen": -3.050389051437378, "logits/rejected": -3.1292319297790527, "logps/chosen": -436.2369079589844, "logps/rejected": -462.8125305175781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.107184410095215, "rewards/margins": 9.697726249694824, "rewards/rejected": -14.804910659790039, "step": 14046 }, { "epoch": 2.18, "learning_rate": 3.844695264277193e-06, "logits/chosen": -2.541335105895996, "logits/rejected": -2.9248242378234863, "logps/chosen": -167.84222412109375, "logps/rejected": -373.22344970703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.549084186553955, "rewards/margins": 9.241789817810059, "rewards/rejected": -13.790874481201172, "step": 14047 }, { "epoch": 2.18, "learning_rate": 3.843961823746045e-06, "logits/chosen": -2.967071533203125, "logits/rejected": -2.614488363265991, "logps/chosen": -283.0433654785156, "logps/rejected": -452.939208984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.589829921722412, "rewards/margins": 11.877307891845703, "rewards/rejected": -19.467138290405273, "step": 14048 }, { "epoch": 2.18, "learning_rate": 3.843228383214897e-06, "logits/chosen": -2.923250913619995, "logits/rejected": -2.8024394512176514, "logps/chosen": -286.3144836425781, "logps/rejected": -421.02105712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.212368965148926, "rewards/margins": 12.579395294189453, "rewards/rejected": -18.791763305664062, "step": 14049 }, { "epoch": 2.19, "learning_rate": 3.842494942683749e-06, "logits/chosen": -1.7133265733718872, "logits/rejected": -2.6514782905578613, "logps/chosen": -183.2873992919922, "logps/rejected": -292.7115173339844, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -7.600098133087158, "rewards/margins": 4.663325786590576, "rewards/rejected": -12.263423919677734, "step": 14050 }, { "epoch": 2.19, "learning_rate": 3.841761502152602e-06, "logits/chosen": -2.988523006439209, "logits/rejected": -2.1895599365234375, "logps/chosen": -814.1199951171875, "logps/rejected": -466.9189147949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.326653957366943, "rewards/margins": 10.758648872375488, "rewards/rejected": -15.085302352905273, "step": 14051 }, { "epoch": 2.19, "learning_rate": 3.841028061621454e-06, "logits/chosen": -2.620391845703125, "logits/rejected": -2.6533074378967285, "logps/chosen": -226.31539916992188, "logps/rejected": -331.8047790527344, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.4943413734436035, "rewards/margins": 8.979202270507812, "rewards/rejected": -14.473543167114258, "step": 14052 }, { "epoch": 2.19, "learning_rate": 3.840294621090306e-06, "logits/chosen": -1.8045755624771118, "logits/rejected": -2.7150795459747314, "logps/chosen": -191.38351440429688, "logps/rejected": -395.4886474609375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.780553817749023, "rewards/margins": 8.0114107131958, "rewards/rejected": -16.79196548461914, "step": 14053 }, { "epoch": 2.19, "learning_rate": 3.839561180559158e-06, "logits/chosen": -0.7631903290748596, "logits/rejected": -1.8473721742630005, "logps/chosen": -221.26614379882812, "logps/rejected": -556.4993896484375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.699231147766113, "rewards/margins": 13.868054389953613, "rewards/rejected": -19.567285537719727, "step": 14054 }, { "epoch": 2.19, "learning_rate": 3.8388277400280095e-06, "logits/chosen": -2.833228588104248, "logits/rejected": -2.8596715927124023, "logps/chosen": -310.2438049316406, "logps/rejected": -356.45635986328125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -8.692237854003906, "rewards/margins": 9.706415176391602, "rewards/rejected": -18.398653030395508, "step": 14055 }, { "epoch": 2.19, "learning_rate": 3.838094299496862e-06, "logits/chosen": -1.49274742603302, "logits/rejected": -2.509087324142456, "logps/chosen": -166.21316528320312, "logps/rejected": -323.07281494140625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -10.872751235961914, "rewards/margins": 5.664464473724365, "rewards/rejected": -16.537216186523438, "step": 14056 }, { "epoch": 2.19, "learning_rate": 3.837360858965714e-06, "logits/chosen": -2.8873181343078613, "logits/rejected": -2.655097007751465, "logps/chosen": -303.41290283203125, "logps/rejected": -251.97634887695312, "loss": 0.1161, "rewards/accuracies": 1.0, "rewards/chosen": -6.283884048461914, "rewards/margins": 6.3725056648254395, "rewards/rejected": -12.656389236450195, "step": 14057 }, { "epoch": 2.19, "learning_rate": 3.836627418434566e-06, "logits/chosen": -0.8356992602348328, "logits/rejected": -2.65248966217041, "logps/chosen": -107.00517272949219, "logps/rejected": -516.28369140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.879089832305908, "rewards/margins": 12.360841751098633, "rewards/rejected": -19.239933013916016, "step": 14058 }, { "epoch": 2.19, "learning_rate": 3.835893977903418e-06, "logits/chosen": -2.785294771194458, "logits/rejected": -2.851388692855835, "logps/chosen": -173.3038330078125, "logps/rejected": -249.12879943847656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.3514556884765625, "rewards/margins": 9.743256568908691, "rewards/rejected": -14.094711303710938, "step": 14059 }, { "epoch": 2.19, "learning_rate": 3.835160537372271e-06, "logits/chosen": -2.644918918609619, "logits/rejected": -1.2159711122512817, "logps/chosen": -232.22528076171875, "logps/rejected": -141.34622192382812, "loss": 0.1524, "rewards/accuracies": 1.0, "rewards/chosen": -7.499513626098633, "rewards/margins": 3.6081302165985107, "rewards/rejected": -11.107643127441406, "step": 14060 }, { "epoch": 2.19, "learning_rate": 3.8344270968411225e-06, "logits/chosen": -2.1466064453125, "logits/rejected": -2.3493685722351074, "logps/chosen": -242.2139434814453, "logps/rejected": -287.8626708984375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -3.349609613418579, "rewards/margins": 7.127110481262207, "rewards/rejected": -10.476719856262207, "step": 14061 }, { "epoch": 2.19, "learning_rate": 3.833693656309974e-06, "logits/chosen": -1.7601791620254517, "logits/rejected": -2.3414626121520996, "logps/chosen": -180.26548767089844, "logps/rejected": -397.11187744140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.146663665771484, "rewards/margins": 10.457406997680664, "rewards/rejected": -16.60407257080078, "step": 14062 }, { "epoch": 2.19, "learning_rate": 3.832960215778826e-06, "logits/chosen": -0.6746280789375305, "logits/rejected": -2.503235340118408, "logps/chosen": -126.50326538085938, "logps/rejected": -423.93817138671875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.235245227813721, "rewards/margins": 8.871477127075195, "rewards/rejected": -16.10672378540039, "step": 14063 }, { "epoch": 2.19, "learning_rate": 3.832226775247678e-06, "logits/chosen": -2.7041258811950684, "logits/rejected": -2.7672739028930664, "logps/chosen": -330.3316650390625, "logps/rejected": -348.15264892578125, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -8.484267234802246, "rewards/margins": 6.296479225158691, "rewards/rejected": -14.780746459960938, "step": 14064 }, { "epoch": 2.19, "learning_rate": 3.831493334716531e-06, "logits/chosen": -2.119678258895874, "logits/rejected": -2.7563412189483643, "logps/chosen": -298.88323974609375, "logps/rejected": -557.8772583007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.709105014801025, "rewards/margins": 10.92073917388916, "rewards/rejected": -16.629844665527344, "step": 14065 }, { "epoch": 2.19, "learning_rate": 3.8307598941853836e-06, "logits/chosen": -2.5885486602783203, "logits/rejected": -2.0184922218322754, "logps/chosen": -160.09637451171875, "logps/rejected": -189.61004638671875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.683307647705078, "rewards/margins": 7.7260637283325195, "rewards/rejected": -13.409370422363281, "step": 14066 }, { "epoch": 2.19, "learning_rate": 3.8300264536542354e-06, "logits/chosen": -1.7769376039505005, "logits/rejected": -2.9150643348693848, "logps/chosen": -169.98876953125, "logps/rejected": -451.7541809082031, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -8.039108276367188, "rewards/margins": 7.51666259765625, "rewards/rejected": -15.555770874023438, "step": 14067 }, { "epoch": 2.19, "learning_rate": 3.829293013123087e-06, "logits/chosen": -1.719020128250122, "logits/rejected": -2.7211122512817383, "logps/chosen": -326.6494140625, "logps/rejected": -517.0518798828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.353250503540039, "rewards/margins": 7.959485054016113, "rewards/rejected": -15.312734603881836, "step": 14068 }, { "epoch": 2.19, "learning_rate": 3.82855957259194e-06, "logits/chosen": -3.089913845062256, "logits/rejected": -2.8130223751068115, "logps/chosen": -115.31363677978516, "logps/rejected": -209.32965087890625, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": -6.019689559936523, "rewards/margins": 8.616796493530273, "rewards/rejected": -14.636486053466797, "step": 14069 }, { "epoch": 2.19, "learning_rate": 3.827826132060792e-06, "logits/chosen": -2.579986810684204, "logits/rejected": -2.6944549083709717, "logps/chosen": -99.10113525390625, "logps/rejected": -247.6643829345703, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.024961471557617, "rewards/margins": 10.249794006347656, "rewards/rejected": -15.274755477905273, "step": 14070 }, { "epoch": 2.19, "learning_rate": 3.827092691529644e-06, "logits/chosen": -1.5870996713638306, "logits/rejected": -2.4607362747192383, "logps/chosen": -348.14141845703125, "logps/rejected": -689.0612182617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.357934951782227, "rewards/margins": 14.16045093536377, "rewards/rejected": -22.518386840820312, "step": 14071 }, { "epoch": 2.19, "learning_rate": 3.826359250998496e-06, "logits/chosen": -1.9303237199783325, "logits/rejected": -2.8455631732940674, "logps/chosen": -134.29666137695312, "logps/rejected": -411.5310363769531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.214724540710449, "rewards/margins": 10.456745147705078, "rewards/rejected": -16.671470642089844, "step": 14072 }, { "epoch": 2.19, "learning_rate": 3.825625810467348e-06, "logits/chosen": -2.990712881088257, "logits/rejected": -2.791558265686035, "logps/chosen": -420.61578369140625, "logps/rejected": -412.23260498046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.796604156494141, "rewards/margins": 8.76077651977539, "rewards/rejected": -15.557380676269531, "step": 14073 }, { "epoch": 2.19, "learning_rate": 3.8248923699362e-06, "logits/chosen": -3.134326934814453, "logits/rejected": -3.0624911785125732, "logps/chosen": -318.1412048339844, "logps/rejected": -421.09686279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.630733489990234, "rewards/margins": 15.395133972167969, "rewards/rejected": -24.025867462158203, "step": 14074 }, { "epoch": 2.19, "learning_rate": 3.824158929405052e-06, "logits/chosen": -2.4169464111328125, "logits/rejected": -2.6963961124420166, "logps/chosen": -72.1312484741211, "logps/rejected": -321.11956787109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.056089401245117, "rewards/margins": 9.714921951293945, "rewards/rejected": -14.771011352539062, "step": 14075 }, { "epoch": 2.19, "learning_rate": 3.823425488873904e-06, "logits/chosen": -1.7397898435592651, "logits/rejected": -2.942478656768799, "logps/chosen": -154.34033203125, "logps/rejected": -371.70831298828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.977744102478027, "rewards/margins": 8.80229377746582, "rewards/rejected": -13.780036926269531, "step": 14076 }, { "epoch": 2.19, "learning_rate": 3.822692048342756e-06, "logits/chosen": -1.1265350580215454, "logits/rejected": -2.7219996452331543, "logps/chosen": -130.4022216796875, "logps/rejected": -372.6273193359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.306687355041504, "rewards/margins": 11.072562217712402, "rewards/rejected": -18.379249572753906, "step": 14077 }, { "epoch": 2.19, "learning_rate": 3.821958607811609e-06, "logits/chosen": -2.738118886947632, "logits/rejected": -3.0689051151275635, "logps/chosen": -155.21029663085938, "logps/rejected": -261.0077819824219, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -7.126850128173828, "rewards/margins": 5.7544050216674805, "rewards/rejected": -12.881254196166992, "step": 14078 }, { "epoch": 2.19, "learning_rate": 3.8212251672804605e-06, "logits/chosen": -2.91398286819458, "logits/rejected": -2.367587089538574, "logps/chosen": -260.96588134765625, "logps/rejected": -204.00405883789062, "loss": 0.0824, "rewards/accuracies": 1.0, "rewards/chosen": -9.725422859191895, "rewards/margins": 3.6772632598876953, "rewards/rejected": -13.40268611907959, "step": 14079 }, { "epoch": 2.19, "learning_rate": 3.820491726749312e-06, "logits/chosen": -1.8464784622192383, "logits/rejected": -2.976854085922241, "logps/chosen": -108.04039001464844, "logps/rejected": -467.9906921386719, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.139003276824951, "rewards/margins": 10.820167541503906, "rewards/rejected": -15.959171295166016, "step": 14080 }, { "epoch": 2.19, "learning_rate": 3.819758286218164e-06, "logits/chosen": -2.6851675510406494, "logits/rejected": -2.786608934402466, "logps/chosen": -239.15878295898438, "logps/rejected": -363.260009765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.607843399047852, "rewards/margins": 9.429394721984863, "rewards/rejected": -14.037238121032715, "step": 14081 }, { "epoch": 2.19, "learning_rate": 3.819024845687017e-06, "logits/chosen": -2.972125291824341, "logits/rejected": -3.205968141555786, "logps/chosen": -106.75416564941406, "logps/rejected": -341.3206787109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8405113220214844, "rewards/margins": 9.903799057006836, "rewards/rejected": -13.74431037902832, "step": 14082 }, { "epoch": 2.19, "learning_rate": 3.81829140515587e-06, "logits/chosen": -2.7421605587005615, "logits/rejected": -0.8303778767585754, "logps/chosen": -348.31475830078125, "logps/rejected": -165.0240020751953, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -5.587574005126953, "rewards/margins": 6.980945587158203, "rewards/rejected": -12.568519592285156, "step": 14083 }, { "epoch": 2.19, "learning_rate": 3.817557964624722e-06, "logits/chosen": -2.2153191566467285, "logits/rejected": -3.1517131328582764, "logps/chosen": -106.03617858886719, "logps/rejected": -421.17864990234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.235523223876953, "rewards/margins": 8.508445739746094, "rewards/rejected": -11.743968963623047, "step": 14084 }, { "epoch": 2.19, "learning_rate": 3.8168245240935735e-06, "logits/chosen": -2.6920218467712402, "logits/rejected": -2.6806440353393555, "logps/chosen": -178.7214813232422, "logps/rejected": -413.758544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5558063983917236, "rewards/margins": 14.062829971313477, "rewards/rejected": -17.618637084960938, "step": 14085 }, { "epoch": 2.19, "learning_rate": 3.816091083562425e-06, "logits/chosen": -2.8809540271759033, "logits/rejected": -1.5915261507034302, "logps/chosen": -380.5163269042969, "logps/rejected": -266.9124450683594, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -2.856877326965332, "rewards/margins": 9.233365058898926, "rewards/rejected": -12.090242385864258, "step": 14086 }, { "epoch": 2.19, "learning_rate": 3.815357643031278e-06, "logits/chosen": -1.962877631187439, "logits/rejected": -2.8841516971588135, "logps/chosen": -182.0732421875, "logps/rejected": -440.7357482910156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4330339431762695, "rewards/margins": 12.007162094116211, "rewards/rejected": -15.44019603729248, "step": 14087 }, { "epoch": 2.19, "learning_rate": 3.81462420250013e-06, "logits/chosen": -1.7705297470092773, "logits/rejected": -2.4905648231506348, "logps/chosen": -132.43185424804688, "logps/rejected": -344.07147216796875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.034473419189453, "rewards/margins": 10.703585624694824, "rewards/rejected": -16.738059997558594, "step": 14088 }, { "epoch": 2.19, "learning_rate": 3.813890761968982e-06, "logits/chosen": -1.9845906496047974, "logits/rejected": -2.682631015777588, "logps/chosen": -248.49728393554688, "logps/rejected": -421.84332275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.470706939697266, "rewards/margins": 9.172134399414062, "rewards/rejected": -14.642842292785645, "step": 14089 }, { "epoch": 2.19, "learning_rate": 3.8131573214378337e-06, "logits/chosen": -2.4289133548736572, "logits/rejected": -3.003679037094116, "logps/chosen": -69.07437133789062, "logps/rejected": -300.38250732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.343990325927734, "rewards/margins": 10.326866149902344, "rewards/rejected": -15.670857429504395, "step": 14090 }, { "epoch": 2.19, "learning_rate": 3.8124238809066864e-06, "logits/chosen": -1.9397767782211304, "logits/rejected": -2.6710762977600098, "logps/chosen": -322.7266845703125, "logps/rejected": -515.23046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.55388069152832, "rewards/margins": 8.433582305908203, "rewards/rejected": -15.987462997436523, "step": 14091 }, { "epoch": 2.19, "learning_rate": 3.8116904403755383e-06, "logits/chosen": -2.9417080879211426, "logits/rejected": -1.945347547531128, "logps/chosen": -275.1455993652344, "logps/rejected": -185.8912353515625, "loss": 1.0371, "rewards/accuracies": 0.5, "rewards/chosen": -6.189183235168457, "rewards/margins": 1.8071093559265137, "rewards/rejected": -7.996292591094971, "step": 14092 }, { "epoch": 2.19, "learning_rate": 3.81095699984439e-06, "logits/chosen": -3.0431439876556396, "logits/rejected": -2.108290910720825, "logps/chosen": -271.35675048828125, "logps/rejected": -207.3074188232422, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.899669170379639, "rewards/margins": 8.164048194885254, "rewards/rejected": -13.063716888427734, "step": 14093 }, { "epoch": 2.19, "learning_rate": 3.810223559313242e-06, "logits/chosen": -2.988044023513794, "logits/rejected": -2.2856686115264893, "logps/chosen": -316.42193603515625, "logps/rejected": -251.96116638183594, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -4.215367794036865, "rewards/margins": 7.301819801330566, "rewards/rejected": -11.517187118530273, "step": 14094 }, { "epoch": 2.19, "learning_rate": 3.8094901187820944e-06, "logits/chosen": -2.699158191680908, "logits/rejected": -2.5270824432373047, "logps/chosen": -217.57659912109375, "logps/rejected": -379.44287109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.97200345993042, "rewards/margins": 9.080493927001953, "rewards/rejected": -16.05249786376953, "step": 14095 }, { "epoch": 2.19, "learning_rate": 3.808756678250947e-06, "logits/chosen": -2.193851947784424, "logits/rejected": -2.8356287479400635, "logps/chosen": -157.3068084716797, "logps/rejected": -363.71099853515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.221090316772461, "rewards/margins": 10.293439865112305, "rewards/rejected": -14.514530181884766, "step": 14096 }, { "epoch": 2.19, "learning_rate": 3.808023237719799e-06, "logits/chosen": -1.4377355575561523, "logits/rejected": -2.084054946899414, "logps/chosen": -151.56649780273438, "logps/rejected": -309.2459411621094, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -10.183618545532227, "rewards/margins": 7.748964309692383, "rewards/rejected": -17.93258285522461, "step": 14097 }, { "epoch": 2.19, "learning_rate": 3.807289797188651e-06, "logits/chosen": -1.2715471982955933, "logits/rejected": -2.4799282550811768, "logps/chosen": -265.3370361328125, "logps/rejected": -411.4537353515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.330442905426025, "rewards/margins": 9.291019439697266, "rewards/rejected": -14.62146282196045, "step": 14098 }, { "epoch": 2.19, "learning_rate": 3.8065563566575027e-06, "logits/chosen": -2.634443521499634, "logits/rejected": -2.9221138954162598, "logps/chosen": -209.53646850585938, "logps/rejected": -235.21397399902344, "loss": 0.0919, "rewards/accuracies": 1.0, "rewards/chosen": -6.497590065002441, "rewards/margins": 3.9376673698425293, "rewards/rejected": -10.435256958007812, "step": 14099 }, { "epoch": 2.19, "learning_rate": 3.8058229161263555e-06, "logits/chosen": -2.6132752895355225, "logits/rejected": -1.7868871688842773, "logps/chosen": -191.9713592529297, "logps/rejected": -244.7257843017578, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -7.234001159667969, "rewards/margins": 7.506390571594238, "rewards/rejected": -14.740391731262207, "step": 14100 }, { "epoch": 2.19, "learning_rate": 3.8050894755952073e-06, "logits/chosen": -2.606419086456299, "logits/rejected": -2.813544988632202, "logps/chosen": -403.81884765625, "logps/rejected": -592.5836181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.103175640106201, "rewards/margins": 16.4395751953125, "rewards/rejected": -20.54275131225586, "step": 14101 }, { "epoch": 2.19, "learning_rate": 3.8043560350640592e-06, "logits/chosen": -2.661816358566284, "logits/rejected": -2.7511024475097656, "logps/chosen": -233.1262664794922, "logps/rejected": -349.46661376953125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -8.118623733520508, "rewards/margins": 5.719638824462891, "rewards/rejected": -13.838262557983398, "step": 14102 }, { "epoch": 2.19, "learning_rate": 3.803622594532911e-06, "logits/chosen": -1.6834547519683838, "logits/rejected": -2.452014923095703, "logps/chosen": -294.33526611328125, "logps/rejected": -352.6226806640625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.11210298538208, "rewards/margins": 7.354525566101074, "rewards/rejected": -14.466629028320312, "step": 14103 }, { "epoch": 2.19, "learning_rate": 3.8028891540017634e-06, "logits/chosen": -1.5269831418991089, "logits/rejected": -2.6812551021575928, "logps/chosen": -280.8567199707031, "logps/rejected": -400.6228942871094, "loss": 0.0869, "rewards/accuracies": 1.0, "rewards/chosen": -7.551913261413574, "rewards/margins": 5.29896879196167, "rewards/rejected": -12.850882530212402, "step": 14104 }, { "epoch": 2.19, "learning_rate": 3.802155713470616e-06, "logits/chosen": -2.7122952938079834, "logits/rejected": -2.762559652328491, "logps/chosen": -219.0334014892578, "logps/rejected": -343.1739501953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6348094940185547, "rewards/margins": 9.27676010131836, "rewards/rejected": -12.911569595336914, "step": 14105 }, { "epoch": 2.19, "learning_rate": 3.801422272939468e-06, "logits/chosen": -2.6095359325408936, "logits/rejected": -2.8079354763031006, "logps/chosen": -112.57086181640625, "logps/rejected": -287.8116455078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.041323184967041, "rewards/margins": 7.663486480712891, "rewards/rejected": -13.704809188842773, "step": 14106 }, { "epoch": 2.19, "learning_rate": 3.80068883240832e-06, "logits/chosen": -2.711371660232544, "logits/rejected": -2.897385358810425, "logps/chosen": -623.8463745117188, "logps/rejected": -436.8126220703125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.264151096343994, "rewards/margins": 9.770938873291016, "rewards/rejected": -14.035089492797852, "step": 14107 }, { "epoch": 2.19, "learning_rate": 3.7999553918771718e-06, "logits/chosen": -1.741971731185913, "logits/rejected": -2.543536424636841, "logps/chosen": -152.09646606445312, "logps/rejected": -403.62164306640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.444271087646484, "rewards/margins": 9.123757362365723, "rewards/rejected": -17.56802749633789, "step": 14108 }, { "epoch": 2.19, "learning_rate": 3.7992219513460245e-06, "logits/chosen": -2.51849102973938, "logits/rejected": -1.7325615882873535, "logps/chosen": -255.4871826171875, "logps/rejected": -261.34881591796875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.546016693115234, "rewards/margins": 8.161556243896484, "rewards/rejected": -12.707572937011719, "step": 14109 }, { "epoch": 2.19, "learning_rate": 3.7984885108148764e-06, "logits/chosen": -2.963355302810669, "logits/rejected": -2.9869964122772217, "logps/chosen": -237.0537109375, "logps/rejected": -295.4515075683594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.378127098083496, "rewards/margins": 10.02031135559082, "rewards/rejected": -15.398438453674316, "step": 14110 }, { "epoch": 2.19, "learning_rate": 3.7977550702837282e-06, "logits/chosen": -1.9052261114120483, "logits/rejected": -2.545106887817383, "logps/chosen": -172.80792236328125, "logps/rejected": -452.2375793457031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.302188873291016, "rewards/margins": 12.606159210205078, "rewards/rejected": -21.908348083496094, "step": 14111 }, { "epoch": 2.19, "learning_rate": 3.7970216297525805e-06, "logits/chosen": -2.780500888824463, "logits/rejected": -2.4726717472076416, "logps/chosen": -281.42266845703125, "logps/rejected": -345.7225036621094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.415907859802246, "rewards/margins": 8.563722610473633, "rewards/rejected": -14.979631423950195, "step": 14112 }, { "epoch": 2.19, "learning_rate": 3.7962881892214324e-06, "logits/chosen": -2.1366517543792725, "logits/rejected": -2.8774502277374268, "logps/chosen": -150.90255737304688, "logps/rejected": -413.92364501953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.553903579711914, "rewards/margins": 6.816073417663574, "rewards/rejected": -13.369976997375488, "step": 14113 }, { "epoch": 2.2, "learning_rate": 3.795554748690285e-06, "logits/chosen": -2.2310965061187744, "logits/rejected": -2.6455163955688477, "logps/chosen": -180.5599365234375, "logps/rejected": -260.0060729980469, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": -6.325946807861328, "rewards/margins": 6.3670830726623535, "rewards/rejected": -12.693029403686523, "step": 14114 }, { "epoch": 2.2, "learning_rate": 3.794821308159137e-06, "logits/chosen": -2.3374834060668945, "logits/rejected": -2.94478440284729, "logps/chosen": -444.0538330078125, "logps/rejected": -564.2720947265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.034032821655273, "rewards/margins": 9.488615036010742, "rewards/rejected": -15.522647857666016, "step": 14115 }, { "epoch": 2.2, "learning_rate": 3.794087867627989e-06, "logits/chosen": -2.396904468536377, "logits/rejected": -2.7733612060546875, "logps/chosen": -572.0390625, "logps/rejected": -649.5218505859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.151896953582764, "rewards/margins": 9.5857515335083, "rewards/rejected": -16.737648010253906, "step": 14116 }, { "epoch": 2.2, "learning_rate": 3.7933544270968408e-06, "logits/chosen": -1.3535957336425781, "logits/rejected": -2.382624387741089, "logps/chosen": -176.92138671875, "logps/rejected": -369.192626953125, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -5.562230587005615, "rewards/margins": 7.920200347900391, "rewards/rejected": -13.482431411743164, "step": 14117 }, { "epoch": 2.2, "learning_rate": 3.7926209865656935e-06, "logits/chosen": -2.417829990386963, "logits/rejected": -2.6695144176483154, "logps/chosen": -161.46961975097656, "logps/rejected": -487.5785217285156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.244080066680908, "rewards/margins": 9.725312232971191, "rewards/rejected": -14.969392776489258, "step": 14118 }, { "epoch": 2.2, "learning_rate": 3.7918875460345454e-06, "logits/chosen": -2.389603614807129, "logits/rejected": -2.7874221801757812, "logps/chosen": -568.0064697265625, "logps/rejected": -679.2930908203125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -7.248272895812988, "rewards/margins": 10.769170761108398, "rewards/rejected": -18.017444610595703, "step": 14119 }, { "epoch": 2.2, "learning_rate": 3.7911541055033973e-06, "logits/chosen": -2.2375786304473877, "logits/rejected": -2.413978338241577, "logps/chosen": -407.23297119140625, "logps/rejected": -512.432861328125, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/chosen": -5.381190299987793, "rewards/margins": 6.641736030578613, "rewards/rejected": -12.022926330566406, "step": 14120 }, { "epoch": 2.2, "learning_rate": 3.7904206649722496e-06, "logits/chosen": -2.571200132369995, "logits/rejected": -2.9755096435546875, "logps/chosen": -232.26870727539062, "logps/rejected": -380.95330810546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.128411293029785, "rewards/margins": 9.253863334655762, "rewards/rejected": -16.382274627685547, "step": 14121 }, { "epoch": 2.2, "learning_rate": 3.7896872244411023e-06, "logits/chosen": -3.141000986099243, "logits/rejected": -2.327584981918335, "logps/chosen": -529.2622680664062, "logps/rejected": -310.68902587890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.455512285232544, "rewards/margins": 11.028759956359863, "rewards/rejected": -14.484272003173828, "step": 14122 }, { "epoch": 2.2, "learning_rate": 3.788953783909954e-06, "logits/chosen": -2.548018455505371, "logits/rejected": -2.8194332122802734, "logps/chosen": -620.9961547851562, "logps/rejected": -554.2850952148438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.921894073486328, "rewards/margins": 10.942858695983887, "rewards/rejected": -15.864752769470215, "step": 14123 }, { "epoch": 2.2, "learning_rate": 3.788220343378806e-06, "logits/chosen": -2.0403997898101807, "logits/rejected": -2.8943939208984375, "logps/chosen": -149.31459045410156, "logps/rejected": -433.75592041015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.988803863525391, "rewards/margins": 9.931794166564941, "rewards/rejected": -16.92059898376465, "step": 14124 }, { "epoch": 2.2, "learning_rate": 3.787486902847658e-06, "logits/chosen": -2.9418392181396484, "logits/rejected": -2.680119276046753, "logps/chosen": -209.5586700439453, "logps/rejected": -252.62025451660156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.003604888916016, "rewards/margins": 9.562114715576172, "rewards/rejected": -14.565719604492188, "step": 14125 }, { "epoch": 2.2, "learning_rate": 3.78675346231651e-06, "logits/chosen": -2.244938850402832, "logits/rejected": -2.6974759101867676, "logps/chosen": -669.193359375, "logps/rejected": -716.1875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.061413764953613, "rewards/margins": 10.5179443359375, "rewards/rejected": -17.57935905456543, "step": 14126 }, { "epoch": 2.2, "learning_rate": 3.7860200217853625e-06, "logits/chosen": -2.5850706100463867, "logits/rejected": -2.8833470344543457, "logps/chosen": -114.67781829833984, "logps/rejected": -274.03460693359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.889342308044434, "rewards/margins": 8.065082550048828, "rewards/rejected": -13.954423904418945, "step": 14127 }, { "epoch": 2.2, "learning_rate": 3.7852865812542144e-06, "logits/chosen": -2.0059783458709717, "logits/rejected": -2.751824140548706, "logps/chosen": -90.47342681884766, "logps/rejected": -386.19622802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.446865081787109, "rewards/margins": 12.988944053649902, "rewards/rejected": -18.435810089111328, "step": 14128 }, { "epoch": 2.2, "learning_rate": 3.7845531407230667e-06, "logits/chosen": -2.469820261001587, "logits/rejected": -2.7863106727600098, "logps/chosen": -453.76300048828125, "logps/rejected": -415.8724365234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.198057174682617, "rewards/margins": 6.9389142990112305, "rewards/rejected": -13.136970520019531, "step": 14129 }, { "epoch": 2.2, "learning_rate": 3.7838197001919186e-06, "logits/chosen": -2.5223593711853027, "logits/rejected": -2.8746633529663086, "logps/chosen": -188.8736572265625, "logps/rejected": -405.2657775878906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.932157516479492, "rewards/margins": 10.8890962600708, "rewards/rejected": -18.82125473022461, "step": 14130 }, { "epoch": 2.2, "learning_rate": 3.7830862596607713e-06, "logits/chosen": -2.133450746536255, "logits/rejected": -2.765336751937866, "logps/chosen": -543.590087890625, "logps/rejected": -567.060546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.035299777984619, "rewards/margins": 10.204345703125, "rewards/rejected": -14.239645004272461, "step": 14131 }, { "epoch": 2.2, "learning_rate": 3.782352819129623e-06, "logits/chosen": -1.4492645263671875, "logits/rejected": -2.4338605403900146, "logps/chosen": -206.56570434570312, "logps/rejected": -452.05242919921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -10.978622436523438, "rewards/margins": 8.975418090820312, "rewards/rejected": -19.95404052734375, "step": 14132 }, { "epoch": 2.2, "learning_rate": 3.781619378598475e-06, "logits/chosen": -2.0862319469451904, "logits/rejected": -2.9466357231140137, "logps/chosen": -129.75985717773438, "logps/rejected": -316.5357666015625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -5.960570335388184, "rewards/margins": 6.081254959106445, "rewards/rejected": -12.041825294494629, "step": 14133 }, { "epoch": 2.2, "learning_rate": 3.780885938067327e-06, "logits/chosen": -2.6161763668060303, "logits/rejected": -2.415175437927246, "logps/chosen": -202.97189331054688, "logps/rejected": -275.33099365234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.0048255920410156, "rewards/margins": 8.218586921691895, "rewards/rejected": -11.22341251373291, "step": 14134 }, { "epoch": 2.2, "learning_rate": 3.780152497536179e-06, "logits/chosen": -2.7831363677978516, "logits/rejected": -2.796290159225464, "logps/chosen": -832.9859008789062, "logps/rejected": -501.20111083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.4934539794921875, "rewards/margins": 10.73831558227539, "rewards/rejected": -17.231769561767578, "step": 14135 }, { "epoch": 2.2, "learning_rate": 3.7794190570050316e-06, "logits/chosen": -2.268359661102295, "logits/rejected": -2.461733818054199, "logps/chosen": -630.7159423828125, "logps/rejected": -556.880126953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.711012840270996, "rewards/margins": 14.425192832946777, "rewards/rejected": -21.136205673217773, "step": 14136 }, { "epoch": 2.2, "learning_rate": 3.7786856164738834e-06, "logits/chosen": -1.3123177289962769, "logits/rejected": -2.414048671722412, "logps/chosen": -99.75410461425781, "logps/rejected": -273.07696533203125, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -7.614182472229004, "rewards/margins": 5.720826625823975, "rewards/rejected": -13.335009574890137, "step": 14137 }, { "epoch": 2.2, "learning_rate": 3.7779521759427357e-06, "logits/chosen": -1.86734139919281, "logits/rejected": -2.8886046409606934, "logps/chosen": -193.90484619140625, "logps/rejected": -463.9130554199219, "loss": 0.0681, "rewards/accuracies": 1.0, "rewards/chosen": -9.426643371582031, "rewards/margins": 3.842191696166992, "rewards/rejected": -13.268835067749023, "step": 14138 }, { "epoch": 2.2, "learning_rate": 3.7772187354115876e-06, "logits/chosen": -2.3842897415161133, "logits/rejected": -2.6617236137390137, "logps/chosen": -194.7362060546875, "logps/rejected": -264.9436340332031, "loss": 0.0341, "rewards/accuracies": 1.0, "rewards/chosen": -5.956603050231934, "rewards/margins": 5.095672130584717, "rewards/rejected": -11.052274703979492, "step": 14139 }, { "epoch": 2.2, "learning_rate": 3.7764852948804403e-06, "logits/chosen": -1.6542720794677734, "logits/rejected": -2.3227295875549316, "logps/chosen": -190.76461791992188, "logps/rejected": -365.0189514160156, "loss": 0.2814, "rewards/accuracies": 1.0, "rewards/chosen": -8.778693199157715, "rewards/margins": 3.4240822792053223, "rewards/rejected": -12.202775955200195, "step": 14140 }, { "epoch": 2.2, "learning_rate": 3.7757518543492922e-06, "logits/chosen": -1.7824763059616089, "logits/rejected": -2.797826051712036, "logps/chosen": -141.62911987304688, "logps/rejected": -387.8195495605469, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -10.12773323059082, "rewards/margins": 5.852618217468262, "rewards/rejected": -15.980352401733398, "step": 14141 }, { "epoch": 2.2, "learning_rate": 3.775018413818144e-06, "logits/chosen": -2.5322022438049316, "logits/rejected": -2.102039098739624, "logps/chosen": -194.63772583007812, "logps/rejected": -287.4134521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.542403221130371, "rewards/margins": 12.927053451538086, "rewards/rejected": -18.469457626342773, "step": 14142 }, { "epoch": 2.2, "learning_rate": 3.774284973286996e-06, "logits/chosen": -2.668403387069702, "logits/rejected": -2.5849714279174805, "logps/chosen": -296.83038330078125, "logps/rejected": -430.0542907714844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.236387252807617, "rewards/margins": 10.086431503295898, "rewards/rejected": -18.322818756103516, "step": 14143 }, { "epoch": 2.2, "learning_rate": 3.773551532755848e-06, "logits/chosen": -2.3294403553009033, "logits/rejected": -2.6322710514068604, "logps/chosen": -314.52630615234375, "logps/rejected": -461.9112243652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6581339836120605, "rewards/margins": 15.195530891418457, "rewards/rejected": -18.85366439819336, "step": 14144 }, { "epoch": 2.2, "learning_rate": 3.7728180922247006e-06, "logits/chosen": -2.6939852237701416, "logits/rejected": -2.1442108154296875, "logps/chosen": -355.23187255859375, "logps/rejected": -277.8023681640625, "loss": 0.4435, "rewards/accuracies": 0.5, "rewards/chosen": -8.515396118164062, "rewards/margins": 3.8445663452148438, "rewards/rejected": -12.359962463378906, "step": 14145 }, { "epoch": 2.2, "learning_rate": 3.772084651693553e-06, "logits/chosen": -3.013277530670166, "logits/rejected": -3.038947105407715, "logps/chosen": -174.58013916015625, "logps/rejected": -314.8477783203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -8.5446195602417, "rewards/margins": 6.909887313842773, "rewards/rejected": -15.454505920410156, "step": 14146 }, { "epoch": 2.2, "learning_rate": 3.7713512111624048e-06, "logits/chosen": -2.843604326248169, "logits/rejected": -1.608616590499878, "logps/chosen": -362.27178955078125, "logps/rejected": -247.92581176757812, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.147855281829834, "rewards/margins": 8.690557479858398, "rewards/rejected": -14.83841323852539, "step": 14147 }, { "epoch": 2.2, "learning_rate": 3.7706177706312566e-06, "logits/chosen": -2.566518545150757, "logits/rejected": -2.8691048622131348, "logps/chosen": -300.57275390625, "logps/rejected": -373.93603515625, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -5.66863489151001, "rewards/margins": 7.783557891845703, "rewards/rejected": -13.452192306518555, "step": 14148 }, { "epoch": 2.2, "learning_rate": 3.7698843301001094e-06, "logits/chosen": -2.2520673274993896, "logits/rejected": -2.548696756362915, "logps/chosen": -230.0288543701172, "logps/rejected": -378.9294128417969, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -9.738884925842285, "rewards/margins": 9.827373504638672, "rewards/rejected": -19.56625747680664, "step": 14149 }, { "epoch": 2.2, "learning_rate": 3.7691508895689612e-06, "logits/chosen": -2.0648374557495117, "logits/rejected": -2.9820637702941895, "logps/chosen": -117.48768615722656, "logps/rejected": -300.9942321777344, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -7.098824977874756, "rewards/margins": 5.662580490112305, "rewards/rejected": -12.761405944824219, "step": 14150 }, { "epoch": 2.2, "learning_rate": 3.768417449037813e-06, "logits/chosen": -1.7368985414505005, "logits/rejected": -2.6155877113342285, "logps/chosen": -186.42791748046875, "logps/rejected": -468.39947509765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.695557594299316, "rewards/margins": 9.423728942871094, "rewards/rejected": -17.119285583496094, "step": 14151 }, { "epoch": 2.2, "learning_rate": 3.767684008506665e-06, "logits/chosen": -2.6672561168670654, "logits/rejected": -2.845073699951172, "logps/chosen": -551.1408081054688, "logps/rejected": -668.784912109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.8284912109375, "rewards/margins": 9.300918579101562, "rewards/rejected": -15.129409790039062, "step": 14152 }, { "epoch": 2.2, "learning_rate": 3.766950567975517e-06, "logits/chosen": -2.5130271911621094, "logits/rejected": -2.7884538173675537, "logps/chosen": -523.8455810546875, "logps/rejected": -619.4290161132812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -10.615531921386719, "rewards/margins": 10.920015335083008, "rewards/rejected": -21.535547256469727, "step": 14153 }, { "epoch": 2.2, "learning_rate": 3.7662171274443696e-06, "logits/chosen": -2.6437485218048096, "logits/rejected": -2.6895971298217773, "logps/chosen": -339.33111572265625, "logps/rejected": -386.8072814941406, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -7.770010471343994, "rewards/margins": 8.910459518432617, "rewards/rejected": -16.680469512939453, "step": 14154 }, { "epoch": 2.2, "learning_rate": 3.765483686913222e-06, "logits/chosen": -2.643314838409424, "logits/rejected": -1.228369116783142, "logps/chosen": -907.7001342773438, "logps/rejected": -411.5823974609375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -8.404218673706055, "rewards/margins": 6.994481563568115, "rewards/rejected": -15.398700714111328, "step": 14155 }, { "epoch": 2.2, "learning_rate": 3.7647502463820738e-06, "logits/chosen": -1.2162100076675415, "logits/rejected": -2.0705206394195557, "logps/chosen": -465.47564697265625, "logps/rejected": -392.19342041015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.975086212158203, "rewards/margins": 9.061604499816895, "rewards/rejected": -15.036690711975098, "step": 14156 }, { "epoch": 2.2, "learning_rate": 3.7640168058509257e-06, "logits/chosen": -2.445411205291748, "logits/rejected": -2.6282691955566406, "logps/chosen": -393.1710205078125, "logps/rejected": -432.3230285644531, "loss": 0.1181, "rewards/accuracies": 1.0, "rewards/chosen": -6.501682281494141, "rewards/margins": 7.543160438537598, "rewards/rejected": -14.044843673706055, "step": 14157 }, { "epoch": 2.2, "learning_rate": 3.7632833653197784e-06, "logits/chosen": -1.2238911390304565, "logits/rejected": -3.001826286315918, "logps/chosen": -162.5455780029297, "logps/rejected": -520.5103759765625, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -7.774794578552246, "rewards/margins": 6.056878566741943, "rewards/rejected": -13.831672668457031, "step": 14158 }, { "epoch": 2.2, "learning_rate": 3.7625499247886303e-06, "logits/chosen": -2.173434257507324, "logits/rejected": -2.7325778007507324, "logps/chosen": -124.36137390136719, "logps/rejected": -325.2862548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.022463321685791, "rewards/margins": 10.85638427734375, "rewards/rejected": -15.878847122192383, "step": 14159 }, { "epoch": 2.2, "learning_rate": 3.761816484257482e-06, "logits/chosen": -2.716595411300659, "logits/rejected": -2.2988080978393555, "logps/chosen": -784.5321655273438, "logps/rejected": -549.943359375, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -10.408117294311523, "rewards/margins": 5.068802356719971, "rewards/rejected": -15.476920127868652, "step": 14160 }, { "epoch": 2.2, "learning_rate": 3.761083043726334e-06, "logits/chosen": -2.940720796585083, "logits/rejected": -2.358670234680176, "logps/chosen": -153.53170776367188, "logps/rejected": -174.94839477539062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.574037551879883, "rewards/margins": 8.291648864746094, "rewards/rejected": -12.865686416625977, "step": 14161 }, { "epoch": 2.2, "learning_rate": 3.7603496031951863e-06, "logits/chosen": -2.8059232234954834, "logits/rejected": -2.6414575576782227, "logps/chosen": -193.4775390625, "logps/rejected": -363.26129150390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.9909348487854, "rewards/margins": 8.38719367980957, "rewards/rejected": -15.378128051757812, "step": 14162 }, { "epoch": 2.2, "learning_rate": 3.759616162664039e-06, "logits/chosen": -2.449845790863037, "logits/rejected": -2.326988697052002, "logps/chosen": -221.3710479736328, "logps/rejected": -286.95306396484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.924574851989746, "rewards/margins": 9.286558151245117, "rewards/rejected": -15.211132049560547, "step": 14163 }, { "epoch": 2.2, "learning_rate": 3.758882722132891e-06, "logits/chosen": -2.4058849811553955, "logits/rejected": -2.687187671661377, "logps/chosen": -136.77822875976562, "logps/rejected": -338.33416748046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.834620475769043, "rewards/margins": 9.588372230529785, "rewards/rejected": -16.422992706298828, "step": 14164 }, { "epoch": 2.2, "learning_rate": 3.758149281601743e-06, "logits/chosen": -1.936930537223816, "logits/rejected": -2.766674757003784, "logps/chosen": -205.05323791503906, "logps/rejected": -477.84466552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.738846778869629, "rewards/margins": 12.18209171295166, "rewards/rejected": -18.92093849182129, "step": 14165 }, { "epoch": 2.2, "learning_rate": 3.7574158410705947e-06, "logits/chosen": -1.9372104406356812, "logits/rejected": -2.6097159385681152, "logps/chosen": -297.70550537109375, "logps/rejected": -522.0186767578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.75723648071289, "rewards/margins": 10.119124412536621, "rewards/rejected": -18.876361846923828, "step": 14166 }, { "epoch": 2.2, "learning_rate": 3.7566824005394474e-06, "logits/chosen": -2.5407369136810303, "logits/rejected": -3.0324082374572754, "logps/chosen": -106.09494018554688, "logps/rejected": -333.9951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.028687000274658, "rewards/margins": 11.380596160888672, "rewards/rejected": -16.409282684326172, "step": 14167 }, { "epoch": 2.2, "learning_rate": 3.7559489600082993e-06, "logits/chosen": -2.609269857406616, "logits/rejected": -2.6040897369384766, "logps/chosen": -134.5286865234375, "logps/rejected": -324.3246154785156, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -7.43237829208374, "rewards/margins": 7.597925186157227, "rewards/rejected": -15.030303955078125, "step": 14168 }, { "epoch": 2.2, "learning_rate": 3.755215519477151e-06, "logits/chosen": -2.5679421424865723, "logits/rejected": -2.8871748447418213, "logps/chosen": -154.64883422851562, "logps/rejected": -394.43621826171875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -7.850322723388672, "rewards/margins": 6.810311317443848, "rewards/rejected": -14.66063404083252, "step": 14169 }, { "epoch": 2.2, "learning_rate": 3.754482078946003e-06, "logits/chosen": -1.6870671510696411, "logits/rejected": -2.7914962768554688, "logps/chosen": -179.5360107421875, "logps/rejected": -574.6802978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2921953201293945, "rewards/margins": 12.807891845703125, "rewards/rejected": -20.100086212158203, "step": 14170 }, { "epoch": 2.2, "learning_rate": 3.7537486384148558e-06, "logits/chosen": -1.9605185985565186, "logits/rejected": -2.552802085876465, "logps/chosen": -257.89190673828125, "logps/rejected": -407.7498779296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.779413938522339, "rewards/margins": 10.788628578186035, "rewards/rejected": -14.568042755126953, "step": 14171 }, { "epoch": 2.2, "learning_rate": 3.753015197883708e-06, "logits/chosen": -0.8568160533905029, "logits/rejected": -2.444692850112915, "logps/chosen": -62.12684631347656, "logps/rejected": -249.46832275390625, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -3.6912336349487305, "rewards/margins": 5.668745040893555, "rewards/rejected": -9.359978675842285, "step": 14172 }, { "epoch": 2.2, "learning_rate": 3.75228175735256e-06, "logits/chosen": -2.8316195011138916, "logits/rejected": -2.403749704360962, "logps/chosen": -722.8970947265625, "logps/rejected": -454.39788818359375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.205036163330078, "rewards/margins": 9.348579406738281, "rewards/rejected": -17.55361557006836, "step": 14173 }, { "epoch": 2.2, "learning_rate": 3.751548316821412e-06, "logits/chosen": -2.488945484161377, "logits/rejected": -2.9447896480560303, "logps/chosen": -173.36685180664062, "logps/rejected": -436.42498779296875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.852605819702148, "rewards/margins": 7.09348201751709, "rewards/rejected": -12.946087837219238, "step": 14174 }, { "epoch": 2.2, "learning_rate": 3.7508148762902637e-06, "logits/chosen": -1.5937492847442627, "logits/rejected": -2.645869731903076, "logps/chosen": -353.01837158203125, "logps/rejected": -685.4796142578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -8.995404243469238, "rewards/margins": 12.702814102172852, "rewards/rejected": -21.698217391967773, "step": 14175 }, { "epoch": 2.2, "learning_rate": 3.7500814357591164e-06, "logits/chosen": -2.155906915664673, "logits/rejected": -2.7801694869995117, "logps/chosen": -189.2926025390625, "logps/rejected": -415.34161376953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.254659652709961, "rewards/margins": 9.21868896484375, "rewards/rejected": -14.473348617553711, "step": 14176 }, { "epoch": 2.2, "learning_rate": 3.7493479952279683e-06, "logits/chosen": -2.6965014934539795, "logits/rejected": -2.510099411010742, "logps/chosen": -459.82989501953125, "logps/rejected": -604.3748779296875, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -6.440061569213867, "rewards/margins": 6.168398857116699, "rewards/rejected": -12.608460426330566, "step": 14177 }, { "epoch": 2.2, "learning_rate": 3.74861455469682e-06, "logits/chosen": -2.3477110862731934, "logits/rejected": -2.6367664337158203, "logps/chosen": -106.67662048339844, "logps/rejected": -318.2756652832031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.031213760375977, "rewards/margins": 10.458555221557617, "rewards/rejected": -18.489768981933594, "step": 14178 }, { "epoch": 2.21, "learning_rate": 3.7478811141656725e-06, "logits/chosen": -2.426924705505371, "logits/rejected": -2.6076126098632812, "logps/chosen": -43.956687927246094, "logps/rejected": -275.8654479980469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.837286949157715, "rewards/margins": 10.982222557067871, "rewards/rejected": -13.819509506225586, "step": 14179 }, { "epoch": 2.21, "learning_rate": 3.747147673634525e-06, "logits/chosen": -1.9961416721343994, "logits/rejected": -2.200782299041748, "logps/chosen": -590.0584106445312, "logps/rejected": -573.667724609375, "loss": 0.0423, "rewards/accuracies": 1.0, "rewards/chosen": -6.320528984069824, "rewards/margins": 12.866826057434082, "rewards/rejected": -19.187355041503906, "step": 14180 }, { "epoch": 2.21, "learning_rate": 3.746414233103377e-06, "logits/chosen": -2.5129833221435547, "logits/rejected": -2.696841239929199, "logps/chosen": -221.731689453125, "logps/rejected": -375.30755615234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.703105926513672, "rewards/margins": 9.787101745605469, "rewards/rejected": -17.49020767211914, "step": 14181 }, { "epoch": 2.21, "learning_rate": 3.745680792572229e-06, "logits/chosen": -1.212432622909546, "logits/rejected": -2.1188042163848877, "logps/chosen": -232.25161743164062, "logps/rejected": -547.890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.169623374938965, "rewards/margins": 15.020753860473633, "rewards/rejected": -22.190378189086914, "step": 14182 }, { "epoch": 2.21, "learning_rate": 3.744947352041081e-06, "logits/chosen": -2.485339641571045, "logits/rejected": -2.3184926509857178, "logps/chosen": -111.54148864746094, "logps/rejected": -254.86749267578125, "loss": 0.7268, "rewards/accuracies": 0.5, "rewards/chosen": -5.0389485359191895, "rewards/margins": 7.021874904632568, "rewards/rejected": -12.060823440551758, "step": 14183 }, { "epoch": 2.21, "learning_rate": 3.7442139115099327e-06, "logits/chosen": -2.4660263061523438, "logits/rejected": -2.295257091522217, "logps/chosen": -590.5253295898438, "logps/rejected": -557.8473510742188, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.5408449172973633, "rewards/margins": 8.75704574584961, "rewards/rejected": -12.297889709472656, "step": 14184 }, { "epoch": 2.21, "learning_rate": 3.7434804709787854e-06, "logits/chosen": -1.3384716510772705, "logits/rejected": -2.532057046890259, "logps/chosen": -134.648681640625, "logps/rejected": -394.9941101074219, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -7.768498420715332, "rewards/margins": 7.520734786987305, "rewards/rejected": -15.289234161376953, "step": 14185 }, { "epoch": 2.21, "learning_rate": 3.7427470304476373e-06, "logits/chosen": -2.5852084159851074, "logits/rejected": -1.7582263946533203, "logps/chosen": -216.83665466308594, "logps/rejected": -261.60406494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1035780906677246, "rewards/margins": 11.004653930664062, "rewards/rejected": -14.108232498168945, "step": 14186 }, { "epoch": 2.21, "learning_rate": 3.742013589916489e-06, "logits/chosen": -2.7670814990997314, "logits/rejected": -2.0879602432250977, "logps/chosen": -408.08502197265625, "logps/rejected": -567.5347290039062, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.220794677734375, "rewards/margins": 7.535981178283691, "rewards/rejected": -14.756776809692383, "step": 14187 }, { "epoch": 2.21, "learning_rate": 3.7412801493853415e-06, "logits/chosen": -2.883620500564575, "logits/rejected": -2.825833559036255, "logps/chosen": -604.8768920898438, "logps/rejected": -625.000244140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.756237983703613, "rewards/margins": 11.653036117553711, "rewards/rejected": -18.40927505493164, "step": 14188 }, { "epoch": 2.21, "learning_rate": 3.7405467088541942e-06, "logits/chosen": -2.1248068809509277, "logits/rejected": -2.7345879077911377, "logps/chosen": -251.91055297851562, "logps/rejected": -403.565185546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.580777645111084, "rewards/margins": 8.750113487243652, "rewards/rejected": -15.330891609191895, "step": 14189 }, { "epoch": 2.21, "learning_rate": 3.739813268323046e-06, "logits/chosen": -2.91084623336792, "logits/rejected": -2.5470850467681885, "logps/chosen": -295.8935241699219, "logps/rejected": -316.484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.708277702331543, "rewards/margins": 7.512410640716553, "rewards/rejected": -15.220687866210938, "step": 14190 }, { "epoch": 2.21, "learning_rate": 3.739079827791898e-06, "logits/chosen": -2.9145472049713135, "logits/rejected": -2.329791307449341, "logps/chosen": -308.3612365722656, "logps/rejected": -322.6410827636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.893355846405029, "rewards/margins": 11.281371116638184, "rewards/rejected": -18.174728393554688, "step": 14191 }, { "epoch": 2.21, "learning_rate": 3.73834638726075e-06, "logits/chosen": -2.5586628913879395, "logits/rejected": -2.4488613605499268, "logps/chosen": -317.3025207519531, "logps/rejected": -387.4285583496094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.407260894775391, "rewards/margins": 8.12934398651123, "rewards/rejected": -14.536605834960938, "step": 14192 }, { "epoch": 2.21, "learning_rate": 3.7376129467296017e-06, "logits/chosen": -2.2143168449401855, "logits/rejected": -2.117539167404175, "logps/chosen": -151.97755432128906, "logps/rejected": -344.54547119140625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.164525985717773, "rewards/margins": 12.80415153503418, "rewards/rejected": -16.968677520751953, "step": 14193 }, { "epoch": 2.21, "learning_rate": 3.7368795061984545e-06, "logits/chosen": -1.4195518493652344, "logits/rejected": -2.5142605304718018, "logps/chosen": -190.82440185546875, "logps/rejected": -507.6671142578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.8512282371521, "rewards/margins": 10.37295913696289, "rewards/rejected": -15.224187850952148, "step": 14194 }, { "epoch": 2.21, "learning_rate": 3.7361460656673063e-06, "logits/chosen": -1.8931938409805298, "logits/rejected": -2.473236322402954, "logps/chosen": -111.59400177001953, "logps/rejected": -316.1484680175781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.373059272766113, "rewards/margins": 11.608473777770996, "rewards/rejected": -15.98153305053711, "step": 14195 }, { "epoch": 2.21, "learning_rate": 3.7354126251361586e-06, "logits/chosen": -1.1542543172836304, "logits/rejected": -2.7347733974456787, "logps/chosen": -176.0516357421875, "logps/rejected": -709.54638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.062272071838379, "rewards/margins": 11.276642799377441, "rewards/rejected": -19.33891487121582, "step": 14196 }, { "epoch": 2.21, "learning_rate": 3.7346791846050105e-06, "logits/chosen": -2.1262710094451904, "logits/rejected": -2.7563321590423584, "logps/chosen": -339.88397216796875, "logps/rejected": -464.4498291015625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -6.770044326782227, "rewards/margins": 8.223445892333984, "rewards/rejected": -14.993490219116211, "step": 14197 }, { "epoch": 2.21, "learning_rate": 3.7339457440738633e-06, "logits/chosen": -0.9566724300384521, "logits/rejected": -2.773301839828491, "logps/chosen": -126.18411254882812, "logps/rejected": -674.345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.651666641235352, "rewards/margins": 11.068120956420898, "rewards/rejected": -17.71978759765625, "step": 14198 }, { "epoch": 2.21, "learning_rate": 3.733212303542715e-06, "logits/chosen": -1.9434643983840942, "logits/rejected": -2.459138870239258, "logps/chosen": -285.29779052734375, "logps/rejected": -410.6792297363281, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.361625671386719, "rewards/margins": 6.659569263458252, "rewards/rejected": -15.021194458007812, "step": 14199 }, { "epoch": 2.21, "learning_rate": 3.732478863011567e-06, "logits/chosen": -2.290341854095459, "logits/rejected": -2.8204855918884277, "logps/chosen": -192.39772033691406, "logps/rejected": -521.00634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.564620018005371, "rewards/margins": 16.691083908081055, "rewards/rejected": -21.25570297241211, "step": 14200 }, { "epoch": 2.21, "learning_rate": 3.731745422480419e-06, "logits/chosen": -2.409867525100708, "logits/rejected": -2.63279390335083, "logps/chosen": -325.3480529785156, "logps/rejected": -440.7953796386719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.589949131011963, "rewards/margins": 10.225440979003906, "rewards/rejected": -12.815390586853027, "step": 14201 }, { "epoch": 2.21, "learning_rate": 3.7310119819492708e-06, "logits/chosen": -2.3011670112609863, "logits/rejected": -2.72916579246521, "logps/chosen": -181.16586303710938, "logps/rejected": -392.39752197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.278816223144531, "rewards/margins": 9.782306671142578, "rewards/rejected": -18.06112289428711, "step": 14202 }, { "epoch": 2.21, "learning_rate": 3.7302785414181235e-06, "logits/chosen": -1.327054738998413, "logits/rejected": -1.769996166229248, "logps/chosen": -238.6797332763672, "logps/rejected": -512.966064453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.553020477294922, "rewards/margins": 14.225179672241211, "rewards/rejected": -19.778200149536133, "step": 14203 }, { "epoch": 2.21, "learning_rate": 3.7295451008869754e-06, "logits/chosen": -2.6955361366271973, "logits/rejected": -1.6783678531646729, "logps/chosen": -861.3433227539062, "logps/rejected": -480.0735778808594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.066554546356201, "rewards/margins": 9.354955673217773, "rewards/rejected": -16.421510696411133, "step": 14204 }, { "epoch": 2.21, "learning_rate": 3.7288116603558277e-06, "logits/chosen": -2.441563844680786, "logits/rejected": -1.7595345973968506, "logps/chosen": -330.4246520996094, "logps/rejected": -515.8936767578125, "loss": 0.2223, "rewards/accuracies": 1.0, "rewards/chosen": -11.940896987915039, "rewards/margins": 7.066412448883057, "rewards/rejected": -19.007308959960938, "step": 14205 }, { "epoch": 2.21, "learning_rate": 3.7280782198246795e-06, "logits/chosen": -0.5880481004714966, "logits/rejected": -1.9309331178665161, "logps/chosen": -109.89790344238281, "logps/rejected": -573.471923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.874412536621094, "rewards/margins": 16.86617660522461, "rewards/rejected": -22.740589141845703, "step": 14206 }, { "epoch": 2.21, "learning_rate": 3.7273447792935323e-06, "logits/chosen": -1.820447564125061, "logits/rejected": -2.0596907138824463, "logps/chosen": -334.83099365234375, "logps/rejected": -546.1766967773438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.312663078308105, "rewards/margins": 13.899572372436523, "rewards/rejected": -23.212234497070312, "step": 14207 }, { "epoch": 2.21, "learning_rate": 3.726611338762384e-06, "logits/chosen": -2.5640647411346436, "logits/rejected": -2.8708417415618896, "logps/chosen": -151.52774047851562, "logps/rejected": -338.6019287109375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -5.495438575744629, "rewards/margins": 7.403548240661621, "rewards/rejected": -12.89898681640625, "step": 14208 }, { "epoch": 2.21, "learning_rate": 3.725877898231236e-06, "logits/chosen": -1.116590142250061, "logits/rejected": -2.7518868446350098, "logps/chosen": -196.88560485839844, "logps/rejected": -605.3671264648438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.048661231994629, "rewards/margins": 10.40828800201416, "rewards/rejected": -20.45694923400879, "step": 14209 }, { "epoch": 2.21, "learning_rate": 3.725144457700088e-06, "logits/chosen": -2.7426929473876953, "logits/rejected": -2.559058904647827, "logps/chosen": -632.96630859375, "logps/rejected": -680.1949462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.6903581619262695, "rewards/margins": 12.963607788085938, "rewards/rejected": -18.65396499633789, "step": 14210 }, { "epoch": 2.21, "learning_rate": 3.7244110171689398e-06, "logits/chosen": -1.7491270303726196, "logits/rejected": -2.7288787364959717, "logps/chosen": -214.6484375, "logps/rejected": -475.45849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.518126487731934, "rewards/margins": 10.295061111450195, "rewards/rejected": -16.813186645507812, "step": 14211 }, { "epoch": 2.21, "learning_rate": 3.7236775766377925e-06, "logits/chosen": -2.4544005393981934, "logits/rejected": -2.6083245277404785, "logps/chosen": -793.9598388671875, "logps/rejected": -899.7667236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.401529312133789, "rewards/margins": 14.535135269165039, "rewards/rejected": -26.936664581298828, "step": 14212 }, { "epoch": 2.21, "learning_rate": 3.722944136106645e-06, "logits/chosen": -2.615133285522461, "logits/rejected": -2.7900123596191406, "logps/chosen": -220.42874145507812, "logps/rejected": -281.86279296875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -6.525649070739746, "rewards/margins": 8.299263000488281, "rewards/rejected": -14.824912071228027, "step": 14213 }, { "epoch": 2.21, "learning_rate": 3.7222106955754967e-06, "logits/chosen": -2.375993013381958, "logits/rejected": -3.050891399383545, "logps/chosen": -124.50018310546875, "logps/rejected": -328.3804016113281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.926136493682861, "rewards/margins": 8.877862930297852, "rewards/rejected": -14.803999900817871, "step": 14214 }, { "epoch": 2.21, "learning_rate": 3.7214772550443486e-06, "logits/chosen": -2.452192783355713, "logits/rejected": -2.8049890995025635, "logps/chosen": -106.37715911865234, "logps/rejected": -370.1967468261719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.172398567199707, "rewards/margins": 12.68592643737793, "rewards/rejected": -17.858325958251953, "step": 14215 }, { "epoch": 2.21, "learning_rate": 3.7207438145132013e-06, "logits/chosen": -1.9009921550750732, "logits/rejected": -2.4830374717712402, "logps/chosen": -194.33853149414062, "logps/rejected": -390.400390625, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -7.322690963745117, "rewards/margins": 8.858499526977539, "rewards/rejected": -16.181190490722656, "step": 14216 }, { "epoch": 2.21, "learning_rate": 3.720010373982053e-06, "logits/chosen": -2.3162786960601807, "logits/rejected": -2.5920543670654297, "logps/chosen": -335.43011474609375, "logps/rejected": -557.4331665039062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.583576679229736, "rewards/margins": 11.898598670959473, "rewards/rejected": -17.482173919677734, "step": 14217 }, { "epoch": 2.21, "learning_rate": 3.719276933450905e-06, "logits/chosen": -1.205881953239441, "logits/rejected": -2.5439510345458984, "logps/chosen": -138.1776123046875, "logps/rejected": -381.40850830078125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -6.639830589294434, "rewards/margins": 5.958527565002441, "rewards/rejected": -12.598358154296875, "step": 14218 }, { "epoch": 2.21, "learning_rate": 3.718543492919757e-06, "logits/chosen": -2.6032283306121826, "logits/rejected": -2.5061423778533936, "logps/chosen": -228.20132446289062, "logps/rejected": -261.6416320800781, "loss": 0.4092, "rewards/accuracies": 0.5, "rewards/chosen": -7.609536170959473, "rewards/margins": 3.7915658950805664, "rewards/rejected": -11.401102066040039, "step": 14219 }, { "epoch": 2.21, "learning_rate": 3.7178100523886097e-06, "logits/chosen": -2.4676246643066406, "logits/rejected": -2.9556775093078613, "logps/chosen": -170.94300842285156, "logps/rejected": -348.380615234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.488326072692871, "rewards/margins": 9.286174774169922, "rewards/rejected": -16.77450180053711, "step": 14220 }, { "epoch": 2.21, "learning_rate": 3.7170766118574615e-06, "logits/chosen": -1.6848669052124023, "logits/rejected": -2.6783368587493896, "logps/chosen": -319.7423400878906, "logps/rejected": -426.5879211425781, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -7.544253349304199, "rewards/margins": 5.8806915283203125, "rewards/rejected": -13.424945831298828, "step": 14221 }, { "epoch": 2.21, "learning_rate": 3.716343171326314e-06, "logits/chosen": -2.071256637573242, "logits/rejected": -2.9007086753845215, "logps/chosen": -144.2545166015625, "logps/rejected": -344.4521484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.579744338989258, "rewards/margins": 8.324840545654297, "rewards/rejected": -16.904582977294922, "step": 14222 }, { "epoch": 2.21, "learning_rate": 3.7156097307951657e-06, "logits/chosen": -2.720858335494995, "logits/rejected": -2.3696842193603516, "logps/chosen": -513.4033813476562, "logps/rejected": -475.4267883300781, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.444034576416016, "rewards/margins": 11.864116668701172, "rewards/rejected": -21.308151245117188, "step": 14223 }, { "epoch": 2.21, "learning_rate": 3.7148762902640176e-06, "logits/chosen": -2.554539203643799, "logits/rejected": -2.050076961517334, "logps/chosen": -593.0221557617188, "logps/rejected": -452.13580322265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.056062698364258, "rewards/margins": 10.281362533569336, "rewards/rejected": -16.337425231933594, "step": 14224 }, { "epoch": 2.21, "learning_rate": 3.7141428497328703e-06, "logits/chosen": -2.8235716819763184, "logits/rejected": -2.9044878482818604, "logps/chosen": -158.91104125976562, "logps/rejected": -417.1062316894531, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.46914529800415, "rewards/margins": 10.381189346313477, "rewards/rejected": -17.85033416748047, "step": 14225 }, { "epoch": 2.21, "learning_rate": 3.713409409201722e-06, "logits/chosen": -2.608813524246216, "logits/rejected": -2.8282599449157715, "logps/chosen": -74.84077453613281, "logps/rejected": -230.1752166748047, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -6.200002670288086, "rewards/margins": 7.484306335449219, "rewards/rejected": -13.684309005737305, "step": 14226 }, { "epoch": 2.21, "learning_rate": 3.712675968670574e-06, "logits/chosen": -1.3185113668441772, "logits/rejected": -2.7147510051727295, "logps/chosen": -140.24655151367188, "logps/rejected": -473.6396789550781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.644870281219482, "rewards/margins": 10.430602073669434, "rewards/rejected": -15.075471878051758, "step": 14227 }, { "epoch": 2.21, "learning_rate": 3.711942528139426e-06, "logits/chosen": -2.9008352756500244, "logits/rejected": -2.826845645904541, "logps/chosen": -207.77911376953125, "logps/rejected": -199.03665161132812, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -7.130215644836426, "rewards/margins": 5.642230987548828, "rewards/rejected": -12.772446632385254, "step": 14228 }, { "epoch": 2.21, "learning_rate": 3.7112090876082787e-06, "logits/chosen": -2.4896726608276367, "logits/rejected": -2.9022090435028076, "logps/chosen": -293.9270324707031, "logps/rejected": -470.9952392578125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -5.192162036895752, "rewards/margins": 8.731603622436523, "rewards/rejected": -13.923765182495117, "step": 14229 }, { "epoch": 2.21, "learning_rate": 3.710475647077131e-06, "logits/chosen": -2.5149154663085938, "logits/rejected": -2.6224112510681152, "logps/chosen": -405.9034729003906, "logps/rejected": -501.18865966796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.26918888092041, "rewards/margins": 11.293783187866211, "rewards/rejected": -16.562973022460938, "step": 14230 }, { "epoch": 2.21, "learning_rate": 3.709742206545983e-06, "logits/chosen": -1.0621806383132935, "logits/rejected": -2.4026365280151367, "logps/chosen": -91.39021301269531, "logps/rejected": -415.53704833984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.669805526733398, "rewards/margins": 10.66840934753418, "rewards/rejected": -15.338214874267578, "step": 14231 }, { "epoch": 2.21, "learning_rate": 3.7090087660148347e-06, "logits/chosen": -2.191572904586792, "logits/rejected": -2.4987287521362305, "logps/chosen": -98.87996673583984, "logps/rejected": -272.713623046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.997729301452637, "rewards/margins": 9.16112995147705, "rewards/rejected": -15.158859252929688, "step": 14232 }, { "epoch": 2.21, "learning_rate": 3.7082753254836866e-06, "logits/chosen": -2.7087275981903076, "logits/rejected": -1.2539809942245483, "logps/chosen": -469.74652099609375, "logps/rejected": -260.1497497558594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.0303053855895996, "rewards/margins": 13.479829788208008, "rewards/rejected": -16.510135650634766, "step": 14233 }, { "epoch": 2.21, "learning_rate": 3.7075418849525393e-06, "logits/chosen": -2.677746295928955, "logits/rejected": -2.458282232284546, "logps/chosen": -444.0211181640625, "logps/rejected": -321.8201904296875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -7.656948566436768, "rewards/margins": 4.93190336227417, "rewards/rejected": -12.588851928710938, "step": 14234 }, { "epoch": 2.21, "learning_rate": 3.7068084444213912e-06, "logits/chosen": -1.8444421291351318, "logits/rejected": -2.923218011856079, "logps/chosen": -221.24636840820312, "logps/rejected": -372.719482421875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -7.180991172790527, "rewards/margins": 7.1234025955200195, "rewards/rejected": -14.304393768310547, "step": 14235 }, { "epoch": 2.21, "learning_rate": 3.706075003890243e-06, "logits/chosen": -2.837221622467041, "logits/rejected": -1.9556900262832642, "logps/chosen": -232.3880615234375, "logps/rejected": -232.1392822265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.2253499031066895, "rewards/margins": 8.177392959594727, "rewards/rejected": -13.402742385864258, "step": 14236 }, { "epoch": 2.21, "learning_rate": 3.705341563359095e-06, "logits/chosen": -2.3753085136413574, "logits/rejected": -2.2487552165985107, "logps/chosen": -173.1261444091797, "logps/rejected": -373.3052673339844, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -7.833792686462402, "rewards/margins": 8.665647506713867, "rewards/rejected": -16.499441146850586, "step": 14237 }, { "epoch": 2.21, "learning_rate": 3.7046081228279477e-06, "logits/chosen": -2.5664360523223877, "logits/rejected": -2.277952194213867, "logps/chosen": -268.6060791015625, "logps/rejected": -476.293701171875, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -8.79523754119873, "rewards/margins": 7.3779377937316895, "rewards/rejected": -16.173175811767578, "step": 14238 }, { "epoch": 2.21, "learning_rate": 3.7038746822968e-06, "logits/chosen": -2.6714134216308594, "logits/rejected": -1.5512378215789795, "logps/chosen": -193.35964965820312, "logps/rejected": -261.741455078125, "loss": 0.3196, "rewards/accuracies": 1.0, "rewards/chosen": -6.746238708496094, "rewards/margins": 5.469932556152344, "rewards/rejected": -12.216171264648438, "step": 14239 }, { "epoch": 2.21, "learning_rate": 3.703141241765652e-06, "logits/chosen": -1.4773484468460083, "logits/rejected": -2.5119211673736572, "logps/chosen": -332.80792236328125, "logps/rejected": -372.0724792480469, "loss": 4.0881, "rewards/accuracies": 0.5, "rewards/chosen": -11.358882904052734, "rewards/margins": 2.161550521850586, "rewards/rejected": -13.52043342590332, "step": 14240 }, { "epoch": 2.21, "learning_rate": 3.7024078012345038e-06, "logits/chosen": -2.638559341430664, "logits/rejected": -2.846651792526245, "logps/chosen": -182.61380004882812, "logps/rejected": -183.30233764648438, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.627775192260742, "rewards/margins": 7.111573696136475, "rewards/rejected": -11.739349365234375, "step": 14241 }, { "epoch": 2.21, "learning_rate": 3.7016743607033556e-06, "logits/chosen": -2.0224392414093018, "logits/rejected": -2.514721632003784, "logps/chosen": -228.35940551757812, "logps/rejected": -464.0142822265625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -9.677515029907227, "rewards/margins": 6.990965843200684, "rewards/rejected": -16.668479919433594, "step": 14242 }, { "epoch": 2.22, "learning_rate": 3.7009409201722084e-06, "logits/chosen": -2.4137675762176514, "logits/rejected": -2.669116735458374, "logps/chosen": -254.39476013183594, "logps/rejected": -418.25830078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.416397094726562, "rewards/margins": 9.40111255645752, "rewards/rejected": -17.817508697509766, "step": 14243 }, { "epoch": 2.22, "learning_rate": 3.7002074796410602e-06, "logits/chosen": -2.955549478530884, "logits/rejected": -3.106980562210083, "logps/chosen": -157.5919952392578, "logps/rejected": -332.7367248535156, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -6.07680606842041, "rewards/margins": 9.588138580322266, "rewards/rejected": -15.664945602416992, "step": 14244 }, { "epoch": 2.22, "learning_rate": 3.699474039109912e-06, "logits/chosen": -2.391449213027954, "logits/rejected": -2.786055326461792, "logps/chosen": -227.78125, "logps/rejected": -469.5021667480469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.612823486328125, "rewards/margins": 13.476297378540039, "rewards/rejected": -21.089120864868164, "step": 14245 }, { "epoch": 2.22, "learning_rate": 3.6987405985787644e-06, "logits/chosen": -2.850538969039917, "logits/rejected": -1.6545960903167725, "logps/chosen": -472.55914306640625, "logps/rejected": -388.46954345703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.5088605880737305, "rewards/margins": 10.486747741699219, "rewards/rejected": -15.995609283447266, "step": 14246 }, { "epoch": 2.22, "learning_rate": 3.698007158047617e-06, "logits/chosen": -0.831820547580719, "logits/rejected": -2.204486131668091, "logps/chosen": -240.39891052246094, "logps/rejected": -616.4306030273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.211355686187744, "rewards/margins": 16.751548767089844, "rewards/rejected": -21.962905883789062, "step": 14247 }, { "epoch": 2.22, "learning_rate": 3.697273717516469e-06, "logits/chosen": -2.5485024452209473, "logits/rejected": -2.0542049407958984, "logps/chosen": -647.40576171875, "logps/rejected": -528.288330078125, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -6.064982891082764, "rewards/margins": 10.077459335327148, "rewards/rejected": -16.14244270324707, "step": 14248 }, { "epoch": 2.22, "learning_rate": 3.696540276985321e-06, "logits/chosen": -1.415298581123352, "logits/rejected": -2.571094036102295, "logps/chosen": -160.22862243652344, "logps/rejected": -288.9393310546875, "loss": 0.2664, "rewards/accuracies": 1.0, "rewards/chosen": -8.24665355682373, "rewards/margins": 4.358623504638672, "rewards/rejected": -12.605277061462402, "step": 14249 }, { "epoch": 2.22, "learning_rate": 3.6958068364541728e-06, "logits/chosen": -2.443443536758423, "logits/rejected": -2.332210063934326, "logps/chosen": -303.47625732421875, "logps/rejected": -487.08087158203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.544179916381836, "rewards/margins": 10.675359725952148, "rewards/rejected": -19.219539642333984, "step": 14250 }, { "epoch": 2.22, "learning_rate": 3.6950733959230247e-06, "logits/chosen": -1.2520097494125366, "logits/rejected": -2.605752944946289, "logps/chosen": -217.32872009277344, "logps/rejected": -497.6287841796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.765331268310547, "rewards/margins": 9.960023880004883, "rewards/rejected": -15.72535514831543, "step": 14251 }, { "epoch": 2.22, "learning_rate": 3.6943399553918774e-06, "logits/chosen": -2.5750129222869873, "logits/rejected": -1.9818100929260254, "logps/chosen": -249.63534545898438, "logps/rejected": -507.73748779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.222431182861328, "rewards/margins": 15.221302032470703, "rewards/rejected": -24.44373321533203, "step": 14252 }, { "epoch": 2.22, "learning_rate": 3.6936065148607293e-06, "logits/chosen": -2.6558499336242676, "logits/rejected": -1.4530887603759766, "logps/chosen": -553.461181640625, "logps/rejected": -550.625732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.990509033203125, "rewards/margins": 11.771635055541992, "rewards/rejected": -17.76214599609375, "step": 14253 }, { "epoch": 2.22, "learning_rate": 3.692873074329581e-06, "logits/chosen": -1.2787880897521973, "logits/rejected": -2.7167365550994873, "logps/chosen": -133.22604370117188, "logps/rejected": -494.5296630859375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.498614311218262, "rewards/margins": 12.139801979064941, "rewards/rejected": -17.638416290283203, "step": 14254 }, { "epoch": 2.22, "learning_rate": 3.6921396337984334e-06, "logits/chosen": -1.569166898727417, "logits/rejected": -2.563537359237671, "logps/chosen": -124.58545684814453, "logps/rejected": -402.7364807128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.096478462219238, "rewards/margins": 11.043285369873047, "rewards/rejected": -18.13976287841797, "step": 14255 }, { "epoch": 2.22, "learning_rate": 3.691406193267286e-06, "logits/chosen": -1.2763011455535889, "logits/rejected": -2.6659724712371826, "logps/chosen": -157.0572052001953, "logps/rejected": -365.6033935546875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -6.998590469360352, "rewards/margins": 8.270894050598145, "rewards/rejected": -15.269485473632812, "step": 14256 }, { "epoch": 2.22, "learning_rate": 3.690672752736138e-06, "logits/chosen": -2.302464485168457, "logits/rejected": -2.722832441329956, "logps/chosen": -403.39215087890625, "logps/rejected": -574.08349609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.134262084960938, "rewards/margins": 9.415349006652832, "rewards/rejected": -20.549610137939453, "step": 14257 }, { "epoch": 2.22, "learning_rate": 3.68993931220499e-06, "logits/chosen": -2.2709381580352783, "logits/rejected": -2.358316659927368, "logps/chosen": -112.28218078613281, "logps/rejected": -337.9010009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.967947959899902, "rewards/margins": 13.598124504089355, "rewards/rejected": -19.566072463989258, "step": 14258 }, { "epoch": 2.22, "learning_rate": 3.689205871673842e-06, "logits/chosen": -1.5458654165267944, "logits/rejected": -2.5116169452667236, "logps/chosen": -211.40391540527344, "logps/rejected": -660.9838256835938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.098199844360352, "rewards/margins": 17.584819793701172, "rewards/rejected": -25.683021545410156, "step": 14259 }, { "epoch": 2.22, "learning_rate": 3.6884724311426945e-06, "logits/chosen": -1.9216920137405396, "logits/rejected": -2.5559239387512207, "logps/chosen": -137.36074829101562, "logps/rejected": -376.2594299316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.79662799835205, "rewards/margins": 11.064342498779297, "rewards/rejected": -19.860971450805664, "step": 14260 }, { "epoch": 2.22, "learning_rate": 3.6877389906115464e-06, "logits/chosen": -1.631050944328308, "logits/rejected": -2.5996553897857666, "logps/chosen": -133.77774047851562, "logps/rejected": -460.7825927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.116460800170898, "rewards/margins": 15.419951438903809, "rewards/rejected": -22.53641128540039, "step": 14261 }, { "epoch": 2.22, "learning_rate": 3.6870055500803983e-06, "logits/chosen": -2.4586470127105713, "logits/rejected": -2.6175034046173096, "logps/chosen": -101.25302124023438, "logps/rejected": -164.3108673095703, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -7.29942512512207, "rewards/margins": 4.889000415802002, "rewards/rejected": -12.188425064086914, "step": 14262 }, { "epoch": 2.22, "learning_rate": 3.6862721095492506e-06, "logits/chosen": -2.710599899291992, "logits/rejected": -2.6340723037719727, "logps/chosen": -138.52420043945312, "logps/rejected": -219.64898681640625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.8024187088012695, "rewards/margins": 8.553804397583008, "rewards/rejected": -15.356222152709961, "step": 14263 }, { "epoch": 2.22, "learning_rate": 3.6855386690181025e-06, "logits/chosen": -2.952139139175415, "logits/rejected": -3.005025625228882, "logps/chosen": -323.76190185546875, "logps/rejected": -423.57135009765625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -6.025439262390137, "rewards/margins": 8.633174896240234, "rewards/rejected": -14.658614158630371, "step": 14264 }, { "epoch": 2.22, "learning_rate": 3.684805228486955e-06, "logits/chosen": -1.3196678161621094, "logits/rejected": -2.3542680740356445, "logps/chosen": -231.58663940429688, "logps/rejected": -546.036376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.024070739746094, "rewards/margins": 11.30113410949707, "rewards/rejected": -18.325204849243164, "step": 14265 }, { "epoch": 2.22, "learning_rate": 3.684071787955807e-06, "logits/chosen": -2.5070245265960693, "logits/rejected": -1.7686758041381836, "logps/chosen": -339.68328857421875, "logps/rejected": -318.1282653808594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.792551517486572, "rewards/margins": 8.887096405029297, "rewards/rejected": -15.679647445678711, "step": 14266 }, { "epoch": 2.22, "learning_rate": 3.683338347424659e-06, "logits/chosen": -2.6411592960357666, "logits/rejected": -2.264887809753418, "logps/chosen": -375.959228515625, "logps/rejected": -573.047119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.994948387145996, "rewards/margins": 16.188339233398438, "rewards/rejected": -25.183286666870117, "step": 14267 }, { "epoch": 2.22, "learning_rate": 3.682604906893511e-06, "logits/chosen": -2.6677234172821045, "logits/rejected": -2.6936113834381104, "logps/chosen": -123.18453979492188, "logps/rejected": -258.2834167480469, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.644510269165039, "rewards/margins": 10.092581748962402, "rewards/rejected": -14.737092971801758, "step": 14268 }, { "epoch": 2.22, "learning_rate": 3.6818714663623636e-06, "logits/chosen": -2.3195574283599854, "logits/rejected": -1.5421192646026611, "logps/chosen": -582.9379272460938, "logps/rejected": -579.3389892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.549772262573242, "rewards/margins": 17.028472900390625, "rewards/rejected": -25.5782470703125, "step": 14269 }, { "epoch": 2.22, "learning_rate": 3.6811380258312154e-06, "logits/chosen": -2.8349225521087646, "logits/rejected": -2.540377140045166, "logps/chosen": -205.72592163085938, "logps/rejected": -278.85418701171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.817680358886719, "rewards/margins": 8.588939666748047, "rewards/rejected": -13.406620025634766, "step": 14270 }, { "epoch": 2.22, "learning_rate": 3.6804045853000673e-06, "logits/chosen": -2.005620002746582, "logits/rejected": -2.8574836254119873, "logps/chosen": -302.7691650390625, "logps/rejected": -419.47540283203125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -7.3889055252075195, "rewards/margins": 8.914458274841309, "rewards/rejected": -16.303363800048828, "step": 14271 }, { "epoch": 2.22, "learning_rate": 3.6796711447689196e-06, "logits/chosen": -1.4402614831924438, "logits/rejected": -2.662637233734131, "logps/chosen": -227.20462036132812, "logps/rejected": -535.1743774414062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.013823986053467, "rewards/margins": 13.084678649902344, "rewards/rejected": -18.09850311279297, "step": 14272 }, { "epoch": 2.22, "learning_rate": 3.6789377042377715e-06, "logits/chosen": -1.9483168125152588, "logits/rejected": -2.573788642883301, "logps/chosen": -157.6898193359375, "logps/rejected": -430.83441162109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.110672950744629, "rewards/margins": 8.467452049255371, "rewards/rejected": -15.578125, "step": 14273 }, { "epoch": 2.22, "learning_rate": 3.6782042637066242e-06, "logits/chosen": -1.8320059776306152, "logits/rejected": -2.5404930114746094, "logps/chosen": -555.5696411132812, "logps/rejected": -600.57080078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -11.373955726623535, "rewards/margins": 12.13864517211914, "rewards/rejected": -23.51259994506836, "step": 14274 }, { "epoch": 2.22, "learning_rate": 3.677470823175476e-06, "logits/chosen": -2.328076124191284, "logits/rejected": -2.8286919593811035, "logps/chosen": -94.96771240234375, "logps/rejected": -294.41510009765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.7974395751953125, "rewards/margins": 8.094335556030273, "rewards/rejected": -15.891775131225586, "step": 14275 }, { "epoch": 2.22, "learning_rate": 3.676737382644328e-06, "logits/chosen": -1.3152211904525757, "logits/rejected": -2.6647517681121826, "logps/chosen": -141.629638671875, "logps/rejected": -511.2447204589844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.890721321105957, "rewards/margins": 11.138967514038086, "rewards/rejected": -19.02968978881836, "step": 14276 }, { "epoch": 2.22, "learning_rate": 3.67600394211318e-06, "logits/chosen": -2.4234261512756348, "logits/rejected": -2.6291608810424805, "logps/chosen": -156.69529724121094, "logps/rejected": -358.47967529296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.5905938148498535, "rewards/margins": 8.762591361999512, "rewards/rejected": -14.353185653686523, "step": 14277 }, { "epoch": 2.22, "learning_rate": 3.6752705015820326e-06, "logits/chosen": -1.6436741352081299, "logits/rejected": -2.7844178676605225, "logps/chosen": -208.99819946289062, "logps/rejected": -321.4443359375, "loss": 0.2692, "rewards/accuracies": 1.0, "rewards/chosen": -6.737557411193848, "rewards/margins": 6.59449577331543, "rewards/rejected": -13.332053184509277, "step": 14278 }, { "epoch": 2.22, "learning_rate": 3.6745370610508845e-06, "logits/chosen": -2.334010362625122, "logits/rejected": -2.365478038787842, "logps/chosen": -149.35348510742188, "logps/rejected": -236.81768798828125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.073071479797363, "rewards/margins": 8.275760650634766, "rewards/rejected": -13.348831176757812, "step": 14279 }, { "epoch": 2.22, "learning_rate": 3.6738036205197368e-06, "logits/chosen": -2.2975847721099854, "logits/rejected": -2.774414539337158, "logps/chosen": -451.7274169921875, "logps/rejected": -499.7148132324219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.628119468688965, "rewards/margins": 12.007477760314941, "rewards/rejected": -19.635597229003906, "step": 14280 }, { "epoch": 2.22, "learning_rate": 3.6730701799885886e-06, "logits/chosen": -1.5294787883758545, "logits/rejected": -1.5987926721572876, "logps/chosen": -202.79547119140625, "logps/rejected": -394.838134765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.254683017730713, "rewards/margins": 7.762468338012695, "rewards/rejected": -15.01715087890625, "step": 14281 }, { "epoch": 2.22, "learning_rate": 3.6723367394574405e-06, "logits/chosen": -2.641671657562256, "logits/rejected": -2.054966449737549, "logps/chosen": -461.18829345703125, "logps/rejected": -403.69854736328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.505714416503906, "rewards/margins": 11.219656944274902, "rewards/rejected": -18.725372314453125, "step": 14282 }, { "epoch": 2.22, "learning_rate": 3.6716032989262932e-06, "logits/chosen": -2.593790054321289, "logits/rejected": -1.972377061843872, "logps/chosen": -670.467529296875, "logps/rejected": -463.2155456542969, "loss": 0.3909, "rewards/accuracies": 0.5, "rewards/chosen": -14.253997802734375, "rewards/margins": 4.040924549102783, "rewards/rejected": -18.294921875, "step": 14283 }, { "epoch": 2.22, "learning_rate": 3.670869858395145e-06, "logits/chosen": -2.717144727706909, "logits/rejected": -1.7782268524169922, "logps/chosen": -683.1165161132812, "logps/rejected": -637.6395263671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -10.042013168334961, "rewards/margins": 8.81959342956543, "rewards/rejected": -18.86160659790039, "step": 14284 }, { "epoch": 2.22, "learning_rate": 3.670136417863997e-06, "logits/chosen": -2.5714519023895264, "logits/rejected": -2.631728410720825, "logps/chosen": -644.15185546875, "logps/rejected": -677.6324462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.664144515991211, "rewards/margins": 10.604009628295898, "rewards/rejected": -21.26815414428711, "step": 14285 }, { "epoch": 2.22, "learning_rate": 3.669402977332849e-06, "logits/chosen": -1.9820303916931152, "logits/rejected": -2.851353406906128, "logps/chosen": -198.63540649414062, "logps/rejected": -543.812744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.286127090454102, "rewards/margins": 14.830825805664062, "rewards/rejected": -20.116952896118164, "step": 14286 }, { "epoch": 2.22, "learning_rate": 3.6686695368017016e-06, "logits/chosen": -2.2608582973480225, "logits/rejected": -2.3721187114715576, "logps/chosen": -338.4386901855469, "logps/rejected": -374.3929443359375, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/chosen": -9.504704475402832, "rewards/margins": 5.791672706604004, "rewards/rejected": -15.296377182006836, "step": 14287 }, { "epoch": 2.22, "learning_rate": 3.6679360962705535e-06, "logits/chosen": -2.636404275894165, "logits/rejected": -2.744905471801758, "logps/chosen": -380.60223388671875, "logps/rejected": -537.7946166992188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.66313648223877, "rewards/margins": 11.66239070892334, "rewards/rejected": -20.32552719116211, "step": 14288 }, { "epoch": 2.22, "learning_rate": 3.6672026557394058e-06, "logits/chosen": -0.9508735537528992, "logits/rejected": -2.6703155040740967, "logps/chosen": -172.38070678710938, "logps/rejected": -596.450927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.7558088302612305, "rewards/margins": 15.395451545715332, "rewards/rejected": -20.151260375976562, "step": 14289 }, { "epoch": 2.22, "learning_rate": 3.6664692152082577e-06, "logits/chosen": -1.4856621026992798, "logits/rejected": -2.9038407802581787, "logps/chosen": -102.85235595703125, "logps/rejected": -408.4970397949219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.609792709350586, "rewards/margins": 9.319286346435547, "rewards/rejected": -14.929079055786133, "step": 14290 }, { "epoch": 2.22, "learning_rate": 3.6657357746771095e-06, "logits/chosen": -2.4779775142669678, "logits/rejected": -2.6349503993988037, "logps/chosen": -238.0415802001953, "logps/rejected": -249.681884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.200543403625488, "rewards/margins": 10.996891021728516, "rewards/rejected": -15.19743537902832, "step": 14291 }, { "epoch": 2.22, "learning_rate": 3.6650023341459623e-06, "logits/chosen": -2.718444347381592, "logits/rejected": -2.8443105220794678, "logps/chosen": -143.0338592529297, "logps/rejected": -331.10443115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.456478118896484, "rewards/margins": 11.161157608032227, "rewards/rejected": -17.61763572692871, "step": 14292 }, { "epoch": 2.22, "learning_rate": 3.664268893614814e-06, "logits/chosen": -2.6717560291290283, "logits/rejected": -3.0682294368743896, "logps/chosen": -183.75198364257812, "logps/rejected": -338.806396484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.2458176612854, "rewards/margins": 7.19162654876709, "rewards/rejected": -12.437443733215332, "step": 14293 }, { "epoch": 2.22, "learning_rate": 3.663535453083666e-06, "logits/chosen": -2.721966505050659, "logits/rejected": -2.285080671310425, "logps/chosen": -473.46343994140625, "logps/rejected": -360.67315673828125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -10.467711448669434, "rewards/margins": 6.387170791625977, "rewards/rejected": -16.854881286621094, "step": 14294 }, { "epoch": 2.22, "learning_rate": 3.662802012552518e-06, "logits/chosen": -2.588865280151367, "logits/rejected": -1.8184905052185059, "logps/chosen": -749.3502197265625, "logps/rejected": -451.6455078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.051490306854248, "rewards/margins": 9.131058692932129, "rewards/rejected": -12.182548522949219, "step": 14295 }, { "epoch": 2.22, "learning_rate": 3.6620685720213706e-06, "logits/chosen": -2.1545445919036865, "logits/rejected": -2.5728728771209717, "logps/chosen": -185.20799255371094, "logps/rejected": -370.4500732421875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -6.346078872680664, "rewards/margins": 9.465572357177734, "rewards/rejected": -15.811651229858398, "step": 14296 }, { "epoch": 2.22, "learning_rate": 3.6613351314902225e-06, "logits/chosen": -1.7838466167449951, "logits/rejected": -2.7180335521698, "logps/chosen": -98.50581359863281, "logps/rejected": -276.12481689453125, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -6.242156028747559, "rewards/margins": 6.398568153381348, "rewards/rejected": -12.640724182128906, "step": 14297 }, { "epoch": 2.22, "learning_rate": 3.660601690959075e-06, "logits/chosen": -2.880098342895508, "logits/rejected": -1.6050995588302612, "logps/chosen": -244.8359375, "logps/rejected": -147.95260620117188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.237022399902344, "rewards/margins": 7.409847736358643, "rewards/rejected": -12.646869659423828, "step": 14298 }, { "epoch": 2.22, "learning_rate": 3.6598682504279267e-06, "logits/chosen": -2.8398642539978027, "logits/rejected": -2.604907512664795, "logps/chosen": -259.40838623046875, "logps/rejected": -340.6236572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.953849792480469, "rewards/margins": 11.105239868164062, "rewards/rejected": -17.05908966064453, "step": 14299 }, { "epoch": 2.22, "learning_rate": 3.6591348098967786e-06, "logits/chosen": -2.233722686767578, "logits/rejected": -2.7538723945617676, "logps/chosen": -106.14079284667969, "logps/rejected": -306.6336669921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.1262969970703125, "rewards/margins": 8.794394493103027, "rewards/rejected": -11.920692443847656, "step": 14300 }, { "epoch": 2.22, "learning_rate": 3.6584013693656313e-06, "logits/chosen": -2.502958059310913, "logits/rejected": -2.3726282119750977, "logps/chosen": -255.7960662841797, "logps/rejected": -363.8164367675781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.3934550285339355, "rewards/margins": 10.734752655029297, "rewards/rejected": -17.12820816040039, "step": 14301 }, { "epoch": 2.22, "learning_rate": 3.657667928834483e-06, "logits/chosen": -1.6139070987701416, "logits/rejected": -2.545328378677368, "logps/chosen": -106.18302917480469, "logps/rejected": -334.37481689453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.403213977813721, "rewards/margins": 8.964473724365234, "rewards/rejected": -13.367687225341797, "step": 14302 }, { "epoch": 2.22, "learning_rate": 3.656934488303335e-06, "logits/chosen": -2.9101767539978027, "logits/rejected": -2.4718337059020996, "logps/chosen": -737.2650146484375, "logps/rejected": -554.7390747070312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.051724433898926, "rewards/margins": 7.670288562774658, "rewards/rejected": -13.722012519836426, "step": 14303 }, { "epoch": 2.22, "learning_rate": 3.656201047772187e-06, "logits/chosen": -1.641687273979187, "logits/rejected": -2.774513006210327, "logps/chosen": -60.287330627441406, "logps/rejected": -566.96240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.198889255523682, "rewards/margins": 14.061017990112305, "rewards/rejected": -18.259906768798828, "step": 14304 }, { "epoch": 2.22, "learning_rate": 3.6554676072410396e-06, "logits/chosen": -1.8029752969741821, "logits/rejected": -2.8955142498016357, "logps/chosen": -309.5257568359375, "logps/rejected": -693.333740234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.932035446166992, "rewards/margins": 16.03572654724121, "rewards/rejected": -20.967761993408203, "step": 14305 }, { "epoch": 2.22, "learning_rate": 3.654734166709892e-06, "logits/chosen": -2.8675873279571533, "logits/rejected": -2.8768255710601807, "logps/chosen": -412.1515197753906, "logps/rejected": -400.30731201171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.18238639831543, "rewards/margins": 8.595099449157715, "rewards/rejected": -17.777484893798828, "step": 14306 }, { "epoch": 2.23, "learning_rate": 3.654000726178744e-06, "logits/chosen": -1.1287075281143188, "logits/rejected": -2.284331798553467, "logps/chosen": -353.2657775878906, "logps/rejected": -563.6678466796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.308769226074219, "rewards/margins": 10.225029945373535, "rewards/rejected": -18.533798217773438, "step": 14307 }, { "epoch": 2.23, "learning_rate": 3.6532672856475957e-06, "logits/chosen": -2.5255942344665527, "logits/rejected": -2.876547336578369, "logps/chosen": -523.3444213867188, "logps/rejected": -672.7905883789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.838754653930664, "rewards/margins": 11.971254348754883, "rewards/rejected": -16.810009002685547, "step": 14308 }, { "epoch": 2.23, "learning_rate": 3.6525338451164484e-06, "logits/chosen": -1.6565231084823608, "logits/rejected": -2.4343767166137695, "logps/chosen": -373.8912048339844, "logps/rejected": -443.8186340332031, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -8.037678718566895, "rewards/margins": 8.166860580444336, "rewards/rejected": -16.204538345336914, "step": 14309 }, { "epoch": 2.23, "learning_rate": 3.6518004045853003e-06, "logits/chosen": -2.607743978500366, "logits/rejected": -1.796176791191101, "logps/chosen": -339.68292236328125, "logps/rejected": -326.95220947265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -10.188650131225586, "rewards/margins": 7.572044849395752, "rewards/rejected": -17.76069450378418, "step": 14310 }, { "epoch": 2.23, "learning_rate": 3.651066964054152e-06, "logits/chosen": -1.5847247838974, "logits/rejected": -1.2378822565078735, "logps/chosen": -630.97265625, "logps/rejected": -465.9006042480469, "loss": 0.0875, "rewards/accuracies": 1.0, "rewards/chosen": -10.932778358459473, "rewards/margins": 11.049846649169922, "rewards/rejected": -21.982624053955078, "step": 14311 }, { "epoch": 2.23, "learning_rate": 3.650333523523004e-06, "logits/chosen": -2.129939079284668, "logits/rejected": -2.4594228267669678, "logps/chosen": -310.2886962890625, "logps/rejected": -336.3177185058594, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.607247829437256, "rewards/margins": 9.07017707824707, "rewards/rejected": -14.677425384521484, "step": 14312 }, { "epoch": 2.23, "learning_rate": 3.649600082991856e-06, "logits/chosen": -2.8019986152648926, "logits/rejected": -1.9499189853668213, "logps/chosen": -246.78977966308594, "logps/rejected": -164.16314697265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.9102553129196167, "rewards/margins": 10.188109397888184, "rewards/rejected": -12.09836483001709, "step": 14313 }, { "epoch": 2.23, "learning_rate": 3.6488666424607087e-06, "logits/chosen": -2.332350730895996, "logits/rejected": -2.9787909984588623, "logps/chosen": -172.35684204101562, "logps/rejected": -513.86669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7793121337890625, "rewards/margins": 13.639192581176758, "rewards/rejected": -19.41850471496582, "step": 14314 }, { "epoch": 2.23, "learning_rate": 3.648133201929561e-06, "logits/chosen": -1.5005630254745483, "logits/rejected": -2.8771612644195557, "logps/chosen": -100.20179748535156, "logps/rejected": -492.2475280761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9777116775512695, "rewards/margins": 14.682900428771973, "rewards/rejected": -19.660612106323242, "step": 14315 }, { "epoch": 2.23, "learning_rate": 3.647399761398413e-06, "logits/chosen": -2.2126963138580322, "logits/rejected": -2.7270615100860596, "logps/chosen": -292.3711242675781, "logps/rejected": -490.3877258300781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.433862686157227, "rewards/margins": 11.639142036437988, "rewards/rejected": -18.07300567626953, "step": 14316 }, { "epoch": 2.23, "learning_rate": 3.6466663208672647e-06, "logits/chosen": -2.819474697113037, "logits/rejected": -2.043386697769165, "logps/chosen": -688.4237060546875, "logps/rejected": -539.40966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.237571716308594, "rewards/margins": 11.870196342468262, "rewards/rejected": -22.107769012451172, "step": 14317 }, { "epoch": 2.23, "learning_rate": 3.6459328803361174e-06, "logits/chosen": -2.733458995819092, "logits/rejected": -1.9078739881515503, "logps/chosen": -1215.5269775390625, "logps/rejected": -812.5693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.876776695251465, "rewards/margins": 12.90920352935791, "rewards/rejected": -20.785980224609375, "step": 14318 }, { "epoch": 2.23, "learning_rate": 3.6451994398049693e-06, "logits/chosen": -1.9656412601470947, "logits/rejected": -2.520552635192871, "logps/chosen": -70.71660614013672, "logps/rejected": -311.96929931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.841508150100708, "rewards/margins": 15.400257110595703, "rewards/rejected": -19.24176597595215, "step": 14319 }, { "epoch": 2.23, "learning_rate": 3.644465999273821e-06, "logits/chosen": -2.5593738555908203, "logits/rejected": -2.35113525390625, "logps/chosen": -196.12374877929688, "logps/rejected": -322.6384582519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.654296875, "rewards/margins": 12.065170288085938, "rewards/rejected": -16.719467163085938, "step": 14320 }, { "epoch": 2.23, "learning_rate": 3.643732558742673e-06, "logits/chosen": -2.2871360778808594, "logits/rejected": -2.9146792888641357, "logps/chosen": -186.80587768554688, "logps/rejected": -402.7521057128906, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -5.95346212387085, "rewards/margins": 8.88157844543457, "rewards/rejected": -14.835041046142578, "step": 14321 }, { "epoch": 2.23, "learning_rate": 3.6429991182115254e-06, "logits/chosen": -2.344951629638672, "logits/rejected": -2.6430258750915527, "logps/chosen": -84.72892761230469, "logps/rejected": -248.69528198242188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.178932189941406, "rewards/margins": 8.952296257019043, "rewards/rejected": -15.13122844696045, "step": 14322 }, { "epoch": 2.23, "learning_rate": 3.642265677680378e-06, "logits/chosen": -2.27656888961792, "logits/rejected": -2.4026548862457275, "logps/chosen": -294.7449035644531, "logps/rejected": -391.206298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.423161506652832, "rewards/margins": 11.776758193969727, "rewards/rejected": -20.199920654296875, "step": 14323 }, { "epoch": 2.23, "learning_rate": 3.64153223714923e-06, "logits/chosen": -2.120649576187134, "logits/rejected": -2.6834685802459717, "logps/chosen": -129.2293701171875, "logps/rejected": -336.1226806640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.769074440002441, "rewards/margins": 11.61989974975586, "rewards/rejected": -17.388973236083984, "step": 14324 }, { "epoch": 2.23, "learning_rate": 3.640798796618082e-06, "logits/chosen": -2.624674081802368, "logits/rejected": -2.058290958404541, "logps/chosen": -569.5537109375, "logps/rejected": -518.3648681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.179590225219727, "rewards/margins": 11.69917106628418, "rewards/rejected": -19.878761291503906, "step": 14325 }, { "epoch": 2.23, "learning_rate": 3.6400653560869337e-06, "logits/chosen": -2.170055389404297, "logits/rejected": -2.7852942943573, "logps/chosen": -258.8658142089844, "logps/rejected": -627.990478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2096359729766846, "rewards/margins": 13.903776168823242, "rewards/rejected": -17.113412857055664, "step": 14326 }, { "epoch": 2.23, "learning_rate": 3.6393319155557865e-06, "logits/chosen": -2.4423668384552, "logits/rejected": -2.9426233768463135, "logps/chosen": -62.52317810058594, "logps/rejected": -321.1707458496094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.62115478515625, "rewards/margins": 11.490757942199707, "rewards/rejected": -16.11191177368164, "step": 14327 }, { "epoch": 2.23, "learning_rate": 3.6385984750246383e-06, "logits/chosen": -2.491873264312744, "logits/rejected": -2.9841015338897705, "logps/chosen": -447.0179443359375, "logps/rejected": -590.7610473632812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.8977532386779785, "rewards/margins": 11.18814468383789, "rewards/rejected": -16.085899353027344, "step": 14328 }, { "epoch": 2.23, "learning_rate": 3.6378650344934902e-06, "logits/chosen": -2.218618869781494, "logits/rejected": -1.9834312200546265, "logps/chosen": -132.28562927246094, "logps/rejected": -256.3087463378906, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.0874924659729, "rewards/margins": 8.20086669921875, "rewards/rejected": -13.288359642028809, "step": 14329 }, { "epoch": 2.23, "learning_rate": 3.637131593962342e-06, "logits/chosen": -1.6434532403945923, "logits/rejected": -2.4557743072509766, "logps/chosen": -170.78939819335938, "logps/rejected": -399.82928466796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.161139488220215, "rewards/margins": 9.078217506408691, "rewards/rejected": -15.239356994628906, "step": 14330 }, { "epoch": 2.23, "learning_rate": 3.6363981534311944e-06, "logits/chosen": -2.1741180419921875, "logits/rejected": -2.5468082427978516, "logps/chosen": -188.05685424804688, "logps/rejected": -331.955322265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.156179428100586, "rewards/margins": 9.427364349365234, "rewards/rejected": -18.58354377746582, "step": 14331 }, { "epoch": 2.23, "learning_rate": 3.635664712900047e-06, "logits/chosen": -2.659391403198242, "logits/rejected": -3.015113592147827, "logps/chosen": -537.850341796875, "logps/rejected": -601.3582153320312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.280066013336182, "rewards/margins": 11.62485122680664, "rewards/rejected": -18.904916763305664, "step": 14332 }, { "epoch": 2.23, "learning_rate": 3.634931272368899e-06, "logits/chosen": -1.3273316621780396, "logits/rejected": -2.592317819595337, "logps/chosen": -120.27481842041016, "logps/rejected": -321.31109619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.178744316101074, "rewards/margins": 9.193387985229492, "rewards/rejected": -15.372133255004883, "step": 14333 }, { "epoch": 2.23, "learning_rate": 3.634197831837751e-06, "logits/chosen": -2.6161553859710693, "logits/rejected": -2.8035378456115723, "logps/chosen": -263.2167053222656, "logps/rejected": -342.99261474609375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.3987298011779785, "rewards/margins": 7.696002006530762, "rewards/rejected": -14.094732284545898, "step": 14334 }, { "epoch": 2.23, "learning_rate": 3.6334643913066028e-06, "logits/chosen": -2.639317512512207, "logits/rejected": -1.9659276008605957, "logps/chosen": -278.2276611328125, "logps/rejected": -365.8080139160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.944478988647461, "rewards/margins": 12.0264310836792, "rewards/rejected": -18.970909118652344, "step": 14335 }, { "epoch": 2.23, "learning_rate": 3.6327309507754555e-06, "logits/chosen": -2.5365617275238037, "logits/rejected": -2.358914375305176, "logps/chosen": -411.3560485839844, "logps/rejected": -626.9317626953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.400713920593262, "rewards/margins": 11.362395286560059, "rewards/rejected": -18.76310920715332, "step": 14336 }, { "epoch": 2.23, "learning_rate": 3.6319975102443074e-06, "logits/chosen": -2.5875461101531982, "logits/rejected": -2.613624334335327, "logps/chosen": -202.84567260742188, "logps/rejected": -387.6615905761719, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -6.24262809753418, "rewards/margins": 7.691645622253418, "rewards/rejected": -13.934274673461914, "step": 14337 }, { "epoch": 2.23, "learning_rate": 3.6312640697131592e-06, "logits/chosen": -2.570377826690674, "logits/rejected": -2.0164809226989746, "logps/chosen": -608.6224975585938, "logps/rejected": -586.1168823242188, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -9.36924934387207, "rewards/margins": 6.658662796020508, "rewards/rejected": -16.027912139892578, "step": 14338 }, { "epoch": 2.23, "learning_rate": 3.6305306291820115e-06, "logits/chosen": -2.0793001651763916, "logits/rejected": -2.6457037925720215, "logps/chosen": -123.64535522460938, "logps/rejected": -185.4937744140625, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -2.6532092094421387, "rewards/margins": 6.824307441711426, "rewards/rejected": -9.477516174316406, "step": 14339 }, { "epoch": 2.23, "learning_rate": 3.6297971886508634e-06, "logits/chosen": -2.427060842514038, "logits/rejected": -1.7089848518371582, "logps/chosen": -220.4246826171875, "logps/rejected": -273.8643798828125, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -9.499006271362305, "rewards/margins": 6.284573554992676, "rewards/rejected": -15.783580780029297, "step": 14340 }, { "epoch": 2.23, "learning_rate": 3.629063748119716e-06, "logits/chosen": -2.020312547683716, "logits/rejected": -2.8003947734832764, "logps/chosen": -211.98841857910156, "logps/rejected": -363.50628662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.8164567947387695, "rewards/margins": 10.458992004394531, "rewards/rejected": -18.275447845458984, "step": 14341 }, { "epoch": 2.23, "learning_rate": 3.628330307588568e-06, "logits/chosen": -2.857229709625244, "logits/rejected": -2.601836919784546, "logps/chosen": -422.93609619140625, "logps/rejected": -183.69557189941406, "loss": 2.3705, "rewards/accuracies": 0.5, "rewards/chosen": -6.22550106048584, "rewards/margins": 2.9555959701538086, "rewards/rejected": -9.181097030639648, "step": 14342 }, { "epoch": 2.23, "learning_rate": 3.62759686705742e-06, "logits/chosen": -2.5245018005371094, "logits/rejected": -2.6432459354400635, "logps/chosen": -331.47662353515625, "logps/rejected": -478.38037109375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.633024215698242, "rewards/margins": 9.15294361114502, "rewards/rejected": -14.785966873168945, "step": 14343 }, { "epoch": 2.23, "learning_rate": 3.6268634265262718e-06, "logits/chosen": -2.7645492553710938, "logits/rejected": -2.9275119304656982, "logps/chosen": -214.48123168945312, "logps/rejected": -350.6419372558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.345556259155273, "rewards/margins": 12.532722473144531, "rewards/rejected": -17.878278732299805, "step": 14344 }, { "epoch": 2.23, "learning_rate": 3.6261299859951245e-06, "logits/chosen": -0.8368343114852905, "logits/rejected": -2.6769073009490967, "logps/chosen": -105.96122741699219, "logps/rejected": -381.3072204589844, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -8.713337898254395, "rewards/margins": 9.419288635253906, "rewards/rejected": -18.132625579833984, "step": 14345 }, { "epoch": 2.23, "learning_rate": 3.6253965454639764e-06, "logits/chosen": -1.8354641199111938, "logits/rejected": -2.564500570297241, "logps/chosen": -392.37567138671875, "logps/rejected": -650.123779296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.623085975646973, "rewards/margins": 10.07078742980957, "rewards/rejected": -18.69387435913086, "step": 14346 }, { "epoch": 2.23, "learning_rate": 3.6246631049328283e-06, "logits/chosen": -2.5052330493927, "logits/rejected": -2.826732635498047, "logps/chosen": -374.39361572265625, "logps/rejected": -528.453857421875, "loss": 0.0323, "rewards/accuracies": 1.0, "rewards/chosen": -8.484091758728027, "rewards/margins": 5.612068176269531, "rewards/rejected": -14.096158981323242, "step": 14347 }, { "epoch": 2.23, "learning_rate": 3.6239296644016806e-06, "logits/chosen": -2.4906537532806396, "logits/rejected": -2.800358295440674, "logps/chosen": -188.94497680664062, "logps/rejected": -445.689208984375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.593691349029541, "rewards/margins": 9.804048538208008, "rewards/rejected": -15.397740364074707, "step": 14348 }, { "epoch": 2.23, "learning_rate": 3.6231962238705324e-06, "logits/chosen": -2.9816367626190186, "logits/rejected": -3.052706480026245, "logps/chosen": -514.3468627929688, "logps/rejected": -465.7470703125, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -5.425410747528076, "rewards/margins": 6.543722152709961, "rewards/rejected": -11.969132423400879, "step": 14349 }, { "epoch": 2.23, "learning_rate": 3.622462783339385e-06, "logits/chosen": -2.682886838912964, "logits/rejected": -0.6260976195335388, "logps/chosen": -512.1643676757812, "logps/rejected": -136.5651397705078, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.9239197373390198, "rewards/margins": 8.057049751281738, "rewards/rejected": -8.980969429016113, "step": 14350 }, { "epoch": 2.23, "learning_rate": 3.621729342808237e-06, "logits/chosen": -2.0567901134490967, "logits/rejected": -2.218418598175049, "logps/chosen": -120.74820709228516, "logps/rejected": -328.1263427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.4092230796813965, "rewards/margins": 11.531537055969238, "rewards/rejected": -17.940759658813477, "step": 14351 }, { "epoch": 2.23, "learning_rate": 3.620995902277089e-06, "logits/chosen": -2.6982579231262207, "logits/rejected": -2.9976742267608643, "logps/chosen": -340.0931701660156, "logps/rejected": -453.1986083984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.516075134277344, "rewards/margins": 10.042895317077637, "rewards/rejected": -16.558971405029297, "step": 14352 }, { "epoch": 2.23, "learning_rate": 3.620262461745941e-06, "logits/chosen": -2.4886231422424316, "logits/rejected": -2.8412346839904785, "logps/chosen": -210.981689453125, "logps/rejected": -546.5836181640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.081718444824219, "rewards/margins": 10.335659980773926, "rewards/rejected": -16.417377471923828, "step": 14353 }, { "epoch": 2.23, "learning_rate": 3.6195290212147935e-06, "logits/chosen": -1.9960116147994995, "logits/rejected": -2.8090274333953857, "logps/chosen": -260.9295654296875, "logps/rejected": -521.6015625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.224514961242676, "rewards/margins": 10.44943904876709, "rewards/rejected": -17.673954010009766, "step": 14354 }, { "epoch": 2.23, "learning_rate": 3.6187955806836454e-06, "logits/chosen": -1.441743016242981, "logits/rejected": -2.634005546569824, "logps/chosen": -108.1728744506836, "logps/rejected": -348.1200256347656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.668467044830322, "rewards/margins": 9.507043838500977, "rewards/rejected": -14.17551040649414, "step": 14355 }, { "epoch": 2.23, "learning_rate": 3.6180621401524977e-06, "logits/chosen": -2.4952495098114014, "logits/rejected": -2.1979262828826904, "logps/chosen": -527.6006469726562, "logps/rejected": -479.9656066894531, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -8.391979217529297, "rewards/margins": 7.210012435913086, "rewards/rejected": -15.601991653442383, "step": 14356 }, { "epoch": 2.23, "learning_rate": 3.6173286996213496e-06, "logits/chosen": -2.1994576454162598, "logits/rejected": -2.943713665008545, "logps/chosen": -205.81675720214844, "logps/rejected": -419.7431335449219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.49797248840332, "rewards/margins": 7.980922698974609, "rewards/rejected": -16.47889518737793, "step": 14357 }, { "epoch": 2.23, "learning_rate": 3.6165952590902023e-06, "logits/chosen": -1.2797214984893799, "logits/rejected": -2.4140970706939697, "logps/chosen": -231.22511291503906, "logps/rejected": -658.2667846679688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.101123809814453, "rewards/margins": 16.999553680419922, "rewards/rejected": -25.100677490234375, "step": 14358 }, { "epoch": 2.23, "learning_rate": 3.615861818559054e-06, "logits/chosen": -2.5033986568450928, "logits/rejected": -3.2016959190368652, "logps/chosen": -67.11146545410156, "logps/rejected": -342.17987060546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.266848087310791, "rewards/margins": 11.49465560913086, "rewards/rejected": -16.761505126953125, "step": 14359 }, { "epoch": 2.23, "learning_rate": 3.615128378027906e-06, "logits/chosen": -1.8724644184112549, "logits/rejected": -2.352600336074829, "logps/chosen": -314.54437255859375, "logps/rejected": -489.24127197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.353494644165039, "rewards/margins": 9.934530258178711, "rewards/rejected": -15.28802490234375, "step": 14360 }, { "epoch": 2.23, "learning_rate": 3.614394937496758e-06, "logits/chosen": -2.4878220558166504, "logits/rejected": -1.2234818935394287, "logps/chosen": -204.61282348632812, "logps/rejected": -186.35450744628906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.72637414932251, "rewards/margins": 8.368538856506348, "rewards/rejected": -13.094913482666016, "step": 14361 }, { "epoch": 2.23, "learning_rate": 3.61366149696561e-06, "logits/chosen": -1.9908409118652344, "logits/rejected": -2.762911796569824, "logps/chosen": -80.19962310791016, "logps/rejected": -301.7572937011719, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -5.199291706085205, "rewards/margins": 11.25414752960205, "rewards/rejected": -16.453439712524414, "step": 14362 }, { "epoch": 2.23, "learning_rate": 3.6129280564344626e-06, "logits/chosen": -2.402188539505005, "logits/rejected": -2.9258453845977783, "logps/chosen": -92.2545394897461, "logps/rejected": -249.53028869628906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.229280471801758, "rewards/margins": 9.19046688079834, "rewards/rejected": -15.419746398925781, "step": 14363 }, { "epoch": 2.23, "learning_rate": 3.6121946159033144e-06, "logits/chosen": -1.81087064743042, "logits/rejected": -3.1416940689086914, "logps/chosen": -156.8325958251953, "logps/rejected": -660.728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.914348125457764, "rewards/margins": 12.19391918182373, "rewards/rejected": -17.10826873779297, "step": 14364 }, { "epoch": 2.23, "learning_rate": 3.6114611753721667e-06, "logits/chosen": -2.686657428741455, "logits/rejected": -3.0307528972625732, "logps/chosen": -137.50735473632812, "logps/rejected": -324.7181396484375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.047445774078369, "rewards/margins": 9.531368255615234, "rewards/rejected": -13.578813552856445, "step": 14365 }, { "epoch": 2.23, "learning_rate": 3.6107277348410186e-06, "logits/chosen": -2.533738613128662, "logits/rejected": -2.073009729385376, "logps/chosen": -280.73150634765625, "logps/rejected": -301.22998046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.28310775756836, "rewards/margins": 8.913816452026367, "rewards/rejected": -17.196924209594727, "step": 14366 }, { "epoch": 2.23, "learning_rate": 3.6099942943098713e-06, "logits/chosen": -2.2185444831848145, "logits/rejected": -2.562190294265747, "logps/chosen": -285.49383544921875, "logps/rejected": -353.6918029785156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.598479747772217, "rewards/margins": 9.923942565917969, "rewards/rejected": -14.522421836853027, "step": 14367 }, { "epoch": 2.23, "learning_rate": 3.6092608537787232e-06, "logits/chosen": -2.4922237396240234, "logits/rejected": -2.7575347423553467, "logps/chosen": -68.49646759033203, "logps/rejected": -238.01971435546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.418992280960083, "rewards/margins": 9.824934959411621, "rewards/rejected": -12.243927001953125, "step": 14368 }, { "epoch": 2.23, "learning_rate": 3.608527413247575e-06, "logits/chosen": -2.6127798557281494, "logits/rejected": -3.1512720584869385, "logps/chosen": -279.21820068359375, "logps/rejected": -709.2713623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.8543195724487305, "rewards/margins": 17.82778549194336, "rewards/rejected": -25.682106018066406, "step": 14369 }, { "epoch": 2.23, "learning_rate": 3.607793972716427e-06, "logits/chosen": -2.3503265380859375, "logits/rejected": -1.8802798986434937, "logps/chosen": -307.0328369140625, "logps/rejected": -342.79742431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.779941558837891, "rewards/margins": 11.86663818359375, "rewards/rejected": -19.64657974243164, "step": 14370 }, { "epoch": 2.23, "learning_rate": 3.607060532185279e-06, "logits/chosen": -2.660280704498291, "logits/rejected": -2.134174108505249, "logps/chosen": -333.998291015625, "logps/rejected": -366.52587890625, "loss": 0.1122, "rewards/accuracies": 1.0, "rewards/chosen": -9.008313179016113, "rewards/margins": 5.45228385925293, "rewards/rejected": -14.460597038269043, "step": 14371 }, { "epoch": 2.24, "learning_rate": 3.6063270916541316e-06, "logits/chosen": -2.2384872436523438, "logits/rejected": -2.947343587875366, "logps/chosen": -553.723388671875, "logps/rejected": -1258.2052001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.896453857421875, "rewards/margins": 11.379293441772461, "rewards/rejected": -17.275747299194336, "step": 14372 }, { "epoch": 2.24, "learning_rate": 3.605593651122984e-06, "logits/chosen": -1.5913323163986206, "logits/rejected": -2.7495150566101074, "logps/chosen": -151.28701782226562, "logps/rejected": -547.3422241210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4269938468933105, "rewards/margins": 14.662759780883789, "rewards/rejected": -19.089754104614258, "step": 14373 }, { "epoch": 2.24, "learning_rate": 3.6048602105918358e-06, "logits/chosen": -2.725302219390869, "logits/rejected": -1.9538040161132812, "logps/chosen": -945.0955200195312, "logps/rejected": -658.499267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.861035346984863, "rewards/margins": 10.924054145812988, "rewards/rejected": -17.78508949279785, "step": 14374 }, { "epoch": 2.24, "learning_rate": 3.6041267700606876e-06, "logits/chosen": -1.684945821762085, "logits/rejected": -2.3935816287994385, "logps/chosen": -188.4646759033203, "logps/rejected": -372.59344482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.774134635925293, "rewards/margins": 10.234447479248047, "rewards/rejected": -17.008583068847656, "step": 14375 }, { "epoch": 2.24, "learning_rate": 3.6033933295295404e-06, "logits/chosen": -2.3442978858947754, "logits/rejected": -1.997452974319458, "logps/chosen": -580.3063354492188, "logps/rejected": -707.7857666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.627521514892578, "rewards/margins": 14.1104736328125, "rewards/rejected": -22.737995147705078, "step": 14376 }, { "epoch": 2.24, "learning_rate": 3.6026598889983922e-06, "logits/chosen": -1.3632992506027222, "logits/rejected": -2.735105514526367, "logps/chosen": -404.01226806640625, "logps/rejected": -805.5037841796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.531569480895996, "rewards/margins": 14.485404014587402, "rewards/rejected": -20.0169734954834, "step": 14377 }, { "epoch": 2.24, "learning_rate": 3.601926448467244e-06, "logits/chosen": -2.236473798751831, "logits/rejected": -2.7621145248413086, "logps/chosen": -230.4410400390625, "logps/rejected": -374.3800048828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.553362846374512, "rewards/margins": 9.015458106994629, "rewards/rejected": -17.56882095336914, "step": 14378 }, { "epoch": 2.24, "learning_rate": 3.601193007936096e-06, "logits/chosen": -2.4306018352508545, "logits/rejected": -2.6896047592163086, "logps/chosen": -147.57489013671875, "logps/rejected": -250.2509765625, "loss": 0.0649, "rewards/accuracies": 1.0, "rewards/chosen": -6.34657621383667, "rewards/margins": 6.757550239562988, "rewards/rejected": -13.1041259765625, "step": 14379 }, { "epoch": 2.24, "learning_rate": 3.600459567404948e-06, "logits/chosen": -2.828709125518799, "logits/rejected": -2.2634315490722656, "logps/chosen": -434.63800048828125, "logps/rejected": -296.10565185546875, "loss": 2.5202, "rewards/accuracies": 0.5, "rewards/chosen": -10.827260971069336, "rewards/margins": 2.5776681900024414, "rewards/rejected": -13.404929161071777, "step": 14380 }, { "epoch": 2.24, "learning_rate": 3.5997261268738006e-06, "logits/chosen": -2.6584253311157227, "logits/rejected": -2.702329397201538, "logps/chosen": -245.15805053710938, "logps/rejected": -209.79852294921875, "loss": 3.2831, "rewards/accuracies": 0.5, "rewards/chosen": -10.237125396728516, "rewards/margins": 0.19692730903625488, "rewards/rejected": -10.434052467346191, "step": 14381 }, { "epoch": 2.24, "learning_rate": 3.598992686342653e-06, "logits/chosen": -2.292771100997925, "logits/rejected": -2.4895763397216797, "logps/chosen": -128.9215850830078, "logps/rejected": -391.7222900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.157967567443848, "rewards/margins": 12.781919479370117, "rewards/rejected": -17.93988800048828, "step": 14382 }, { "epoch": 2.24, "learning_rate": 3.5982592458115048e-06, "logits/chosen": -2.761101007461548, "logits/rejected": -2.2098143100738525, "logps/chosen": -117.46308898925781, "logps/rejected": -162.48167419433594, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -5.804601669311523, "rewards/margins": 4.600468635559082, "rewards/rejected": -10.405070304870605, "step": 14383 }, { "epoch": 2.24, "learning_rate": 3.5975258052803567e-06, "logits/chosen": -2.648225784301758, "logits/rejected": -2.9396913051605225, "logps/chosen": -271.0115966796875, "logps/rejected": -433.8313293457031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.546440124511719, "rewards/margins": 9.54947280883789, "rewards/rejected": -16.09591293334961, "step": 14384 }, { "epoch": 2.24, "learning_rate": 3.5967923647492094e-06, "logits/chosen": -2.7005650997161865, "logits/rejected": -2.6266732215881348, "logps/chosen": -687.959228515625, "logps/rejected": -577.5526733398438, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -7.028392314910889, "rewards/margins": 11.50122356414795, "rewards/rejected": -18.52961540222168, "step": 14385 }, { "epoch": 2.24, "learning_rate": 3.5960589242180613e-06, "logits/chosen": -2.462071418762207, "logits/rejected": -1.880135178565979, "logps/chosen": -286.34429931640625, "logps/rejected": -269.890380859375, "loss": 0.0701, "rewards/accuracies": 1.0, "rewards/chosen": -5.503517150878906, "rewards/margins": 6.559381484985352, "rewards/rejected": -12.062898635864258, "step": 14386 }, { "epoch": 2.24, "learning_rate": 3.595325483686913e-06, "logits/chosen": -2.3161730766296387, "logits/rejected": -2.2762959003448486, "logps/chosen": -205.38961791992188, "logps/rejected": -371.7179260253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.257752895355225, "rewards/margins": 14.29356575012207, "rewards/rejected": -18.551319122314453, "step": 14387 }, { "epoch": 2.24, "learning_rate": 3.594592043155765e-06, "logits/chosen": -2.5614161491394043, "logits/rejected": -2.33339262008667, "logps/chosen": -182.1926727294922, "logps/rejected": -371.03936767578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.341778755187988, "rewards/margins": 9.839731216430664, "rewards/rejected": -15.181509017944336, "step": 14388 }, { "epoch": 2.24, "learning_rate": 3.5938586026246173e-06, "logits/chosen": -2.4802725315093994, "logits/rejected": -1.4062329530715942, "logps/chosen": -136.01226806640625, "logps/rejected": -132.50885009765625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -5.27475118637085, "rewards/margins": 5.157451629638672, "rewards/rejected": -10.43220329284668, "step": 14389 }, { "epoch": 2.24, "learning_rate": 3.59312516209347e-06, "logits/chosen": -3.0348219871520996, "logits/rejected": -2.9363701343536377, "logps/chosen": -290.79400634765625, "logps/rejected": -265.8930358886719, "loss": 0.4803, "rewards/accuracies": 0.5, "rewards/chosen": -7.102080345153809, "rewards/margins": 5.011777877807617, "rewards/rejected": -12.113858222961426, "step": 14390 }, { "epoch": 2.24, "learning_rate": 3.592391721562322e-06, "logits/chosen": -2.239515542984009, "logits/rejected": -2.601773500442505, "logps/chosen": -111.50617980957031, "logps/rejected": -224.5916748046875, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -8.768632888793945, "rewards/margins": 6.530683517456055, "rewards/rejected": -15.29931640625, "step": 14391 }, { "epoch": 2.24, "learning_rate": 3.591658281031174e-06, "logits/chosen": -2.9842169284820557, "logits/rejected": -2.4585635662078857, "logps/chosen": -510.780029296875, "logps/rejected": -461.2149353027344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.680422306060791, "rewards/margins": 12.229918479919434, "rewards/rejected": -17.910341262817383, "step": 14392 }, { "epoch": 2.24, "learning_rate": 3.5909248405000257e-06, "logits/chosen": -2.2147252559661865, "logits/rejected": -2.561753511428833, "logps/chosen": -171.3394012451172, "logps/rejected": -245.1427459716797, "loss": 1.3501, "rewards/accuracies": 0.5, "rewards/chosen": -10.764554023742676, "rewards/margins": 4.0362324714660645, "rewards/rejected": -14.800786972045898, "step": 14393 }, { "epoch": 2.24, "learning_rate": 3.5901913999688784e-06, "logits/chosen": -2.344780683517456, "logits/rejected": -2.684990406036377, "logps/chosen": -404.7832946777344, "logps/rejected": -581.0604248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.359333515167236, "rewards/margins": 11.083602905273438, "rewards/rejected": -18.442935943603516, "step": 14394 }, { "epoch": 2.24, "learning_rate": 3.5894579594377303e-06, "logits/chosen": -2.5418460369110107, "logits/rejected": -2.7488198280334473, "logps/chosen": -225.29373168945312, "logps/rejected": -385.90716552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6935853958129883, "rewards/margins": 9.918668746948242, "rewards/rejected": -13.61225414276123, "step": 14395 }, { "epoch": 2.24, "learning_rate": 3.588724518906582e-06, "logits/chosen": -1.1530532836914062, "logits/rejected": -1.974776268005371, "logps/chosen": -203.54002380371094, "logps/rejected": -672.1903076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.573469161987305, "rewards/margins": 17.718162536621094, "rewards/rejected": -24.29163360595703, "step": 14396 }, { "epoch": 2.24, "learning_rate": 3.587991078375434e-06, "logits/chosen": -2.6848366260528564, "logits/rejected": -2.700965404510498, "logps/chosen": -340.58795166015625, "logps/rejected": -308.9517517089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.844875335693359, "rewards/margins": 10.551197052001953, "rewards/rejected": -15.396072387695312, "step": 14397 }, { "epoch": 2.24, "learning_rate": 3.5872576378442863e-06, "logits/chosen": -2.38407826423645, "logits/rejected": -2.419044017791748, "logps/chosen": -374.8984375, "logps/rejected": -433.02191162109375, "loss": 0.1924, "rewards/accuracies": 1.0, "rewards/chosen": -7.042993068695068, "rewards/margins": 4.895288944244385, "rewards/rejected": -11.938282012939453, "step": 14398 }, { "epoch": 2.24, "learning_rate": 3.586524197313139e-06, "logits/chosen": -0.4427320957183838, "logits/rejected": -0.9811611175537109, "logps/chosen": -133.731201171875, "logps/rejected": -443.4796142578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.382887840270996, "rewards/margins": 8.190727233886719, "rewards/rejected": -15.573616027832031, "step": 14399 }, { "epoch": 2.24, "learning_rate": 3.585790756781991e-06, "logits/chosen": -2.7697880268096924, "logits/rejected": -2.3279056549072266, "logps/chosen": -380.8416748046875, "logps/rejected": -488.5058288574219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.1436662673950195, "rewards/margins": 8.84813117980957, "rewards/rejected": -14.991796493530273, "step": 14400 }, { "epoch": 2.24, "learning_rate": 3.585057316250843e-06, "logits/chosen": -1.6518559455871582, "logits/rejected": -2.0912656784057617, "logps/chosen": -142.5167999267578, "logps/rejected": -247.56057739257812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.8637285232543945, "rewards/margins": 9.896150588989258, "rewards/rejected": -17.75988006591797, "step": 14401 }, { "epoch": 2.24, "learning_rate": 3.5843238757196947e-06, "logits/chosen": -2.5386478900909424, "logits/rejected": -2.003791570663452, "logps/chosen": -343.2541198730469, "logps/rejected": -332.4734191894531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.494291305541992, "rewards/margins": 9.493833541870117, "rewards/rejected": -15.98812484741211, "step": 14402 }, { "epoch": 2.24, "learning_rate": 3.5835904351885474e-06, "logits/chosen": -2.7385964393615723, "logits/rejected": -2.9886903762817383, "logps/chosen": -126.56806945800781, "logps/rejected": -211.67361450195312, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -7.039669036865234, "rewards/margins": 6.03997802734375, "rewards/rejected": -13.079647064208984, "step": 14403 }, { "epoch": 2.24, "learning_rate": 3.5828569946573993e-06, "logits/chosen": -1.6163383722305298, "logits/rejected": -2.343801736831665, "logps/chosen": -170.03707885742188, "logps/rejected": -383.776123046875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -8.767725944519043, "rewards/margins": 6.857721328735352, "rewards/rejected": -15.625447273254395, "step": 14404 }, { "epoch": 2.24, "learning_rate": 3.582123554126251e-06, "logits/chosen": -3.004169225692749, "logits/rejected": -3.0775694847106934, "logps/chosen": -141.43325805664062, "logps/rejected": -274.54351806640625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.800936222076416, "rewards/margins": 7.920363426208496, "rewards/rejected": -11.72130012512207, "step": 14405 }, { "epoch": 2.24, "learning_rate": 3.5813901135951035e-06, "logits/chosen": -2.849416732788086, "logits/rejected": -2.6619999408721924, "logps/chosen": -536.361572265625, "logps/rejected": -484.29644775390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.283743858337402, "rewards/margins": 11.738767623901367, "rewards/rejected": -17.022510528564453, "step": 14406 }, { "epoch": 2.24, "learning_rate": 3.5806566730639562e-06, "logits/chosen": -2.1483864784240723, "logits/rejected": -2.9013516902923584, "logps/chosen": -195.2626190185547, "logps/rejected": -431.21258544921875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.386479377746582, "rewards/margins": 7.363851070404053, "rewards/rejected": -13.750329971313477, "step": 14407 }, { "epoch": 2.24, "learning_rate": 3.579923232532808e-06, "logits/chosen": -1.9784456491470337, "logits/rejected": -2.6626973152160645, "logps/chosen": -393.4199523925781, "logps/rejected": -552.145751953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.361382484436035, "rewards/margins": 8.538572311401367, "rewards/rejected": -14.899954795837402, "step": 14408 }, { "epoch": 2.24, "learning_rate": 3.57918979200166e-06, "logits/chosen": -2.3887991905212402, "logits/rejected": -2.8826539516448975, "logps/chosen": -229.06365966796875, "logps/rejected": -386.84613037109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.773928642272949, "rewards/margins": 8.436975479125977, "rewards/rejected": -14.210905075073242, "step": 14409 }, { "epoch": 2.24, "learning_rate": 3.578456351470512e-06, "logits/chosen": -2.4773542881011963, "logits/rejected": -1.487992286682129, "logps/chosen": -169.64093017578125, "logps/rejected": -177.30572509765625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.731879234313965, "rewards/margins": 7.174476623535156, "rewards/rejected": -12.906355857849121, "step": 14410 }, { "epoch": 2.24, "learning_rate": 3.5777229109393637e-06, "logits/chosen": -2.465569019317627, "logits/rejected": -2.7610206604003906, "logps/chosen": -149.10946655273438, "logps/rejected": -228.60198974609375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.8556642532348633, "rewards/margins": 6.094615936279297, "rewards/rejected": -9.95028018951416, "step": 14411 }, { "epoch": 2.24, "learning_rate": 3.5769894704082165e-06, "logits/chosen": -1.7792469263076782, "logits/rejected": -2.749533176422119, "logps/chosen": -219.05833435058594, "logps/rejected": -396.0664978027344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.609551429748535, "rewards/margins": 9.846732139587402, "rewards/rejected": -16.456283569335938, "step": 14412 }, { "epoch": 2.24, "learning_rate": 3.5762560298770683e-06, "logits/chosen": -2.74294114112854, "logits/rejected": -2.5134387016296387, "logps/chosen": -250.21826171875, "logps/rejected": -402.52178955078125, "loss": 0.1367, "rewards/accuracies": 1.0, "rewards/chosen": -9.363205909729004, "rewards/margins": 3.7197256088256836, "rewards/rejected": -13.082931518554688, "step": 14413 }, { "epoch": 2.24, "learning_rate": 3.57552258934592e-06, "logits/chosen": -1.1671113967895508, "logits/rejected": -2.5271615982055664, "logps/chosen": -210.74266052246094, "logps/rejected": -586.908203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.483240127563477, "rewards/margins": 12.22610855102539, "rewards/rejected": -18.709348678588867, "step": 14414 }, { "epoch": 2.24, "learning_rate": 3.5747891488147725e-06, "logits/chosen": -1.2915618419647217, "logits/rejected": -1.842673659324646, "logps/chosen": -120.32444763183594, "logps/rejected": -292.3419189453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.280108451843262, "rewards/margins": 10.021982192993164, "rewards/rejected": -16.302091598510742, "step": 14415 }, { "epoch": 2.24, "learning_rate": 3.5740557082836252e-06, "logits/chosen": -2.513847827911377, "logits/rejected": -2.16064715385437, "logps/chosen": -380.55218505859375, "logps/rejected": -540.1478271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.542422294616699, "rewards/margins": 11.481707572937012, "rewards/rejected": -19.02412986755371, "step": 14416 }, { "epoch": 2.24, "learning_rate": 3.573322267752477e-06, "logits/chosen": -1.7813541889190674, "logits/rejected": -2.44234037399292, "logps/chosen": -180.58038330078125, "logps/rejected": -348.47637939453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.922424793243408, "rewards/margins": 8.988944053649902, "rewards/rejected": -16.91136932373047, "step": 14417 }, { "epoch": 2.24, "learning_rate": 3.572588827221329e-06, "logits/chosen": -2.4217236042022705, "logits/rejected": -2.9570682048797607, "logps/chosen": -249.25152587890625, "logps/rejected": -516.7748413085938, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.219093322753906, "rewards/margins": 8.92983627319336, "rewards/rejected": -15.148929595947266, "step": 14418 }, { "epoch": 2.24, "learning_rate": 3.571855386690181e-06, "logits/chosen": -1.7475237846374512, "logits/rejected": -2.7977371215820312, "logps/chosen": -144.7997283935547, "logps/rejected": -431.09552001953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.0517096519470215, "rewards/margins": 9.279531478881836, "rewards/rejected": -14.3312406539917, "step": 14419 }, { "epoch": 2.24, "learning_rate": 3.5711219461590327e-06, "logits/chosen": -2.012805700302124, "logits/rejected": -2.821901321411133, "logps/chosen": -111.4781723022461, "logps/rejected": -633.6723022460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.043071746826172, "rewards/margins": 13.979002952575684, "rewards/rejected": -19.022075653076172, "step": 14420 }, { "epoch": 2.24, "learning_rate": 3.5703885056278855e-06, "logits/chosen": -2.6735801696777344, "logits/rejected": -2.8823933601379395, "logps/chosen": -102.5372314453125, "logps/rejected": -302.36041259765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.104046821594238, "rewards/margins": 8.508790969848633, "rewards/rejected": -16.612838745117188, "step": 14421 }, { "epoch": 2.24, "learning_rate": 3.5696550650967374e-06, "logits/chosen": -2.4239373207092285, "logits/rejected": -2.0629589557647705, "logps/chosen": -154.1468048095703, "logps/rejected": -187.99517822265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.3621931076049805, "rewards/margins": 7.668707847595215, "rewards/rejected": -12.030900955200195, "step": 14422 }, { "epoch": 2.24, "learning_rate": 3.5689216245655897e-06, "logits/chosen": -2.7602128982543945, "logits/rejected": -2.226991891860962, "logps/chosen": -272.5377502441406, "logps/rejected": -221.572021484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.8211402893066406, "rewards/margins": 7.997124195098877, "rewards/rejected": -11.81826400756836, "step": 14423 }, { "epoch": 2.24, "learning_rate": 3.5681881840344415e-06, "logits/chosen": -2.5956575870513916, "logits/rejected": -2.722097396850586, "logps/chosen": -288.3695983886719, "logps/rejected": -340.7923889160156, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.519927978515625, "rewards/margins": 8.130220413208008, "rewards/rejected": -16.650148391723633, "step": 14424 }, { "epoch": 2.24, "learning_rate": 3.5674547435032943e-06, "logits/chosen": -2.609394073486328, "logits/rejected": -2.6739747524261475, "logps/chosen": -512.6443481445312, "logps/rejected": -424.8160705566406, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -8.788551330566406, "rewards/margins": 8.900932312011719, "rewards/rejected": -17.689483642578125, "step": 14425 }, { "epoch": 2.24, "learning_rate": 3.566721302972146e-06, "logits/chosen": -1.2842241525650024, "logits/rejected": -2.85684871673584, "logps/chosen": -185.16343688964844, "logps/rejected": -592.8353881835938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.217412948608398, "rewards/margins": 9.963998794555664, "rewards/rejected": -17.181411743164062, "step": 14426 }, { "epoch": 2.24, "learning_rate": 3.565987862440998e-06, "logits/chosen": -2.3460681438446045, "logits/rejected": -2.7467172145843506, "logps/chosen": -134.96490478515625, "logps/rejected": -516.9691772460938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.50488805770874, "rewards/margins": 9.537942886352539, "rewards/rejected": -16.042831420898438, "step": 14427 }, { "epoch": 2.24, "learning_rate": 3.56525442190985e-06, "logits/chosen": -3.109126329421997, "logits/rejected": -2.2922799587249756, "logps/chosen": -298.24761962890625, "logps/rejected": -214.73355102539062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0745744705200195, "rewards/margins": 10.544140815734863, "rewards/rejected": -14.618715286254883, "step": 14428 }, { "epoch": 2.24, "learning_rate": 3.5645209813787018e-06, "logits/chosen": -1.5002660751342773, "logits/rejected": -2.55920147895813, "logps/chosen": -194.075439453125, "logps/rejected": -482.9918212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.371818542480469, "rewards/margins": 10.78061580657959, "rewards/rejected": -15.152434349060059, "step": 14429 }, { "epoch": 2.24, "learning_rate": 3.5637875408475545e-06, "logits/chosen": -1.4229623079299927, "logits/rejected": -2.6381969451904297, "logps/chosen": -197.02005004882812, "logps/rejected": -476.64166259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.595911741256714, "rewards/margins": 12.317488670349121, "rewards/rejected": -15.913400650024414, "step": 14430 }, { "epoch": 2.24, "learning_rate": 3.5630541003164064e-06, "logits/chosen": -2.1686336994171143, "logits/rejected": -2.2959938049316406, "logps/chosen": -212.22311401367188, "logps/rejected": -379.5126037597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0042481422424316, "rewards/margins": 10.713926315307617, "rewards/rejected": -13.718174934387207, "step": 14431 }, { "epoch": 2.24, "learning_rate": 3.5623206597852587e-06, "logits/chosen": -2.584404945373535, "logits/rejected": -2.3042826652526855, "logps/chosen": -370.7626037597656, "logps/rejected": -482.65277099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.535765647888184, "rewards/margins": 10.856857299804688, "rewards/rejected": -19.392623901367188, "step": 14432 }, { "epoch": 2.24, "learning_rate": 3.5615872192541106e-06, "logits/chosen": -1.456244707107544, "logits/rejected": -2.5792014598846436, "logps/chosen": -151.7238006591797, "logps/rejected": -306.04852294921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.230347633361816, "rewards/margins": 8.844751358032227, "rewards/rejected": -14.07509994506836, "step": 14433 }, { "epoch": 2.24, "learning_rate": 3.5608537787229633e-06, "logits/chosen": -2.4614741802215576, "logits/rejected": -2.3399646282196045, "logps/chosen": -224.91455078125, "logps/rejected": -406.35821533203125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -6.966416358947754, "rewards/margins": 7.724050998687744, "rewards/rejected": -14.690467834472656, "step": 14434 }, { "epoch": 2.24, "learning_rate": 3.560120338191815e-06, "logits/chosen": -2.7390315532684326, "logits/rejected": -1.6164231300354004, "logps/chosen": -159.93394470214844, "logps/rejected": -271.1159362792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.313566207885742, "rewards/margins": 13.262371063232422, "rewards/rejected": -16.575937271118164, "step": 14435 }, { "epoch": 2.25, "learning_rate": 3.559386897660667e-06, "logits/chosen": -2.032660484313965, "logits/rejected": -2.682488203048706, "logps/chosen": -109.10343933105469, "logps/rejected": -376.01153564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.324413776397705, "rewards/margins": 13.071938514709473, "rewards/rejected": -18.396352767944336, "step": 14436 }, { "epoch": 2.25, "learning_rate": 3.558653457129519e-06, "logits/chosen": -2.9629387855529785, "logits/rejected": -2.6251723766326904, "logps/chosen": -242.37657165527344, "logps/rejected": -315.4981384277344, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -5.401435852050781, "rewards/margins": 7.839693069458008, "rewards/rejected": -13.241128921508789, "step": 14437 }, { "epoch": 2.25, "learning_rate": 3.557920016598371e-06, "logits/chosen": -2.1732890605926514, "logits/rejected": -2.681205987930298, "logps/chosen": -114.52445220947266, "logps/rejected": -317.4051513671875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -7.83311128616333, "rewards/margins": 10.213889122009277, "rewards/rejected": -18.047000885009766, "step": 14438 }, { "epoch": 2.25, "learning_rate": 3.5571865760672235e-06, "logits/chosen": -1.6772856712341309, "logits/rejected": -2.610724687576294, "logps/chosen": -190.19100952148438, "logps/rejected": -497.5428771972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.742088794708252, "rewards/margins": 14.57537841796875, "rewards/rejected": -22.317466735839844, "step": 14439 }, { "epoch": 2.25, "learning_rate": 3.556453135536076e-06, "logits/chosen": -2.4186155796051025, "logits/rejected": -2.8067831993103027, "logps/chosen": -135.99423217773438, "logps/rejected": -418.49755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.7923455238342285, "rewards/margins": 10.909478187561035, "rewards/rejected": -15.701824188232422, "step": 14440 }, { "epoch": 2.25, "learning_rate": 3.5557196950049277e-06, "logits/chosen": -2.771939516067505, "logits/rejected": -2.891834020614624, "logps/chosen": -193.65951538085938, "logps/rejected": -397.7884216308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.314591407775879, "rewards/margins": 10.033565521240234, "rewards/rejected": -14.348156929016113, "step": 14441 }, { "epoch": 2.25, "learning_rate": 3.5549862544737796e-06, "logits/chosen": -2.7717814445495605, "logits/rejected": -2.0983710289001465, "logps/chosen": -293.1556396484375, "logps/rejected": -331.0196533203125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -9.395637512207031, "rewards/margins": 8.74791145324707, "rewards/rejected": -18.1435489654541, "step": 14442 }, { "epoch": 2.25, "learning_rate": 3.5542528139426323e-06, "logits/chosen": -2.833188772201538, "logits/rejected": -2.472522497177124, "logps/chosen": -295.2816162109375, "logps/rejected": -348.604736328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.043045997619629, "rewards/margins": 8.857239723205566, "rewards/rejected": -16.900285720825195, "step": 14443 }, { "epoch": 2.25, "learning_rate": 3.553519373411484e-06, "logits/chosen": -2.5491366386413574, "logits/rejected": -2.889402389526367, "logps/chosen": -594.1785888671875, "logps/rejected": -652.32080078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.585931301116943, "rewards/margins": 7.821994781494141, "rewards/rejected": -15.407926559448242, "step": 14444 }, { "epoch": 2.25, "learning_rate": 3.552785932880336e-06, "logits/chosen": -2.291940450668335, "logits/rejected": -2.292240858078003, "logps/chosen": -297.4085693359375, "logps/rejected": -308.15118408203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.5811357498168945, "rewards/margins": 8.730812072753906, "rewards/rejected": -15.311948776245117, "step": 14445 }, { "epoch": 2.25, "learning_rate": 3.552052492349188e-06, "logits/chosen": -2.2115845680236816, "logits/rejected": -2.984966516494751, "logps/chosen": -187.88629150390625, "logps/rejected": -252.62091064453125, "loss": 0.723, "rewards/accuracies": 0.5, "rewards/chosen": -6.9659013748168945, "rewards/margins": 2.7384250164031982, "rewards/rejected": -9.704326629638672, "step": 14446 }, { "epoch": 2.25, "learning_rate": 3.5513190518180407e-06, "logits/chosen": -2.470853805541992, "logits/rejected": -3.016284942626953, "logps/chosen": -124.04435729980469, "logps/rejected": -348.43267822265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.223738670349121, "rewards/margins": 9.0763578414917, "rewards/rejected": -14.30009651184082, "step": 14447 }, { "epoch": 2.25, "learning_rate": 3.5505856112868925e-06, "logits/chosen": -2.7421746253967285, "logits/rejected": -2.6792120933532715, "logps/chosen": -261.0796813964844, "logps/rejected": -254.88352966308594, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": -7.521588325500488, "rewards/margins": 6.79313325881958, "rewards/rejected": -14.314722061157227, "step": 14448 }, { "epoch": 2.25, "learning_rate": 3.549852170755745e-06, "logits/chosen": -1.765994668006897, "logits/rejected": -2.6161234378814697, "logps/chosen": -192.89938354492188, "logps/rejected": -492.1892395019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.732361316680908, "rewards/margins": 11.343461036682129, "rewards/rejected": -17.075820922851562, "step": 14449 }, { "epoch": 2.25, "learning_rate": 3.5491187302245967e-06, "logits/chosen": -1.879214882850647, "logits/rejected": -2.9211928844451904, "logps/chosen": -154.42381286621094, "logps/rejected": -405.2861633300781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.183818817138672, "rewards/margins": 8.528595924377441, "rewards/rejected": -14.712414741516113, "step": 14450 }, { "epoch": 2.25, "learning_rate": 3.5483852896934486e-06, "logits/chosen": -2.792602062225342, "logits/rejected": -2.6757090091705322, "logps/chosen": -292.0896301269531, "logps/rejected": -247.6515350341797, "loss": 1.7113, "rewards/accuracies": 0.5, "rewards/chosen": -6.989776611328125, "rewards/margins": 4.120908737182617, "rewards/rejected": -11.110685348510742, "step": 14451 }, { "epoch": 2.25, "learning_rate": 3.5476518491623013e-06, "logits/chosen": -1.67060124874115, "logits/rejected": -2.9001879692077637, "logps/chosen": -310.2394104003906, "logps/rejected": -671.6163940429688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.072831153869629, "rewards/margins": 10.355965614318848, "rewards/rejected": -19.428796768188477, "step": 14452 }, { "epoch": 2.25, "learning_rate": 3.546918408631153e-06, "logits/chosen": -2.528677225112915, "logits/rejected": -1.956992268562317, "logps/chosen": -220.5826416015625, "logps/rejected": -300.435791015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.232579708099365, "rewards/margins": 8.464085578918457, "rewards/rejected": -13.696664810180664, "step": 14453 }, { "epoch": 2.25, "learning_rate": 3.546184968100005e-06, "logits/chosen": -2.409644603729248, "logits/rejected": -2.7563507556915283, "logps/chosen": -539.0033569335938, "logps/rejected": -620.93115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.707009315490723, "rewards/margins": 11.338855743408203, "rewards/rejected": -17.045866012573242, "step": 14454 }, { "epoch": 2.25, "learning_rate": 3.545451527568857e-06, "logits/chosen": -2.778569459915161, "logits/rejected": -2.4615581035614014, "logps/chosen": -970.9630737304688, "logps/rejected": -721.2855224609375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.373529434204102, "rewards/margins": 10.194589614868164, "rewards/rejected": -16.568119049072266, "step": 14455 }, { "epoch": 2.25, "learning_rate": 3.5447180870377097e-06, "logits/chosen": -2.3695874214172363, "logits/rejected": -2.6987407207489014, "logps/chosen": -958.255615234375, "logps/rejected": -808.6097412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.922696113586426, "rewards/margins": 12.583152770996094, "rewards/rejected": -22.505847930908203, "step": 14456 }, { "epoch": 2.25, "learning_rate": 3.543984646506562e-06, "logits/chosen": -2.4885706901550293, "logits/rejected": -2.7917709350585938, "logps/chosen": -574.0590209960938, "logps/rejected": -740.1546020507812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.152206420898438, "rewards/margins": 13.406005859375, "rewards/rejected": -21.558212280273438, "step": 14457 }, { "epoch": 2.25, "learning_rate": 3.543251205975414e-06, "logits/chosen": -2.0941479206085205, "logits/rejected": -2.6963984966278076, "logps/chosen": -598.6617431640625, "logps/rejected": -680.2744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.884639739990234, "rewards/margins": 14.000818252563477, "rewards/rejected": -26.88545799255371, "step": 14458 }, { "epoch": 2.25, "learning_rate": 3.5425177654442657e-06, "logits/chosen": -1.0957976579666138, "logits/rejected": -2.6774747371673584, "logps/chosen": -138.60739135742188, "logps/rejected": -412.5626220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5281982421875, "rewards/margins": 12.273945808410645, "rewards/rejected": -18.802143096923828, "step": 14459 }, { "epoch": 2.25, "learning_rate": 3.5417843249131176e-06, "logits/chosen": -2.7626051902770996, "logits/rejected": -2.6554925441741943, "logps/chosen": -146.9830322265625, "logps/rejected": -334.67486572265625, "loss": 0.0529, "rewards/accuracies": 1.0, "rewards/chosen": -5.201269149780273, "rewards/margins": 8.03820514678955, "rewards/rejected": -13.239473342895508, "step": 14460 }, { "epoch": 2.25, "learning_rate": 3.5410508843819703e-06, "logits/chosen": -2.706178903579712, "logits/rejected": -1.7408239841461182, "logps/chosen": -384.3511047363281, "logps/rejected": -279.8319396972656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.164108276367188, "rewards/margins": 8.25909423828125, "rewards/rejected": -16.423202514648438, "step": 14461 }, { "epoch": 2.25, "learning_rate": 3.5403174438508222e-06, "logits/chosen": -2.9204344749450684, "logits/rejected": -2.9689953327178955, "logps/chosen": -151.23822021484375, "logps/rejected": -255.65289306640625, "loss": 0.0733, "rewards/accuracies": 1.0, "rewards/chosen": -7.278118133544922, "rewards/margins": 8.05199909210205, "rewards/rejected": -15.330117225646973, "step": 14462 }, { "epoch": 2.25, "learning_rate": 3.539584003319674e-06, "logits/chosen": -2.708142042160034, "logits/rejected": -1.5438189506530762, "logps/chosen": -426.30877685546875, "logps/rejected": -227.98614501953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.737812042236328, "rewards/margins": 9.280067443847656, "rewards/rejected": -15.017879486083984, "step": 14463 }, { "epoch": 2.25, "learning_rate": 3.538850562788526e-06, "logits/chosen": -0.9462053179740906, "logits/rejected": -2.2440435886383057, "logps/chosen": -99.78546142578125, "logps/rejected": -293.3457336425781, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.808210849761963, "rewards/margins": 8.111050605773926, "rewards/rejected": -14.919261932373047, "step": 14464 }, { "epoch": 2.25, "learning_rate": 3.5381171222573787e-06, "logits/chosen": -2.265395164489746, "logits/rejected": -2.6385793685913086, "logps/chosen": -141.6361083984375, "logps/rejected": -283.62060546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -9.595279693603516, "rewards/margins": 6.718816757202148, "rewards/rejected": -16.314096450805664, "step": 14465 }, { "epoch": 2.25, "learning_rate": 3.537383681726231e-06, "logits/chosen": -2.7657101154327393, "logits/rejected": -2.3737738132476807, "logps/chosen": -614.5349731445312, "logps/rejected": -608.8591918945312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.465868949890137, "rewards/margins": 11.707870483398438, "rewards/rejected": -16.17374038696289, "step": 14466 }, { "epoch": 2.25, "learning_rate": 3.536650241195083e-06, "logits/chosen": -2.874189853668213, "logits/rejected": -2.819192409515381, "logps/chosen": -135.75318908691406, "logps/rejected": -179.2061767578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.59210205078125, "rewards/margins": 8.602960586547852, "rewards/rejected": -14.195062637329102, "step": 14467 }, { "epoch": 2.25, "learning_rate": 3.5359168006639348e-06, "logits/chosen": -1.8473553657531738, "logits/rejected": -2.711251974105835, "logps/chosen": -386.2310485839844, "logps/rejected": -781.20947265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.566350936889648, "rewards/margins": 10.533767700195312, "rewards/rejected": -19.100120544433594, "step": 14468 }, { "epoch": 2.25, "learning_rate": 3.5351833601327866e-06, "logits/chosen": -1.8337286710739136, "logits/rejected": -2.533372163772583, "logps/chosen": -277.1629943847656, "logps/rejected": -492.5947265625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -5.255558967590332, "rewards/margins": 9.967967987060547, "rewards/rejected": -15.223527908325195, "step": 14469 }, { "epoch": 2.25, "learning_rate": 3.534449919601639e-06, "logits/chosen": -2.759415626525879, "logits/rejected": -2.4372141361236572, "logps/chosen": -206.09310913085938, "logps/rejected": -287.50811767578125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -8.232954978942871, "rewards/margins": 7.310516357421875, "rewards/rejected": -15.543471336364746, "step": 14470 }, { "epoch": 2.25, "learning_rate": 3.5337164790704912e-06, "logits/chosen": -1.3691824674606323, "logits/rejected": -2.7244367599487305, "logps/chosen": -312.40618896484375, "logps/rejected": -361.21099853515625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -6.27601957321167, "rewards/margins": 12.490653991699219, "rewards/rejected": -18.766674041748047, "step": 14471 }, { "epoch": 2.25, "learning_rate": 3.532983038539343e-06, "logits/chosen": -2.550922393798828, "logits/rejected": -2.0755276679992676, "logps/chosen": -277.32952880859375, "logps/rejected": -308.8100280761719, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -5.243997573852539, "rewards/margins": 6.726334095001221, "rewards/rejected": -11.970331192016602, "step": 14472 }, { "epoch": 2.25, "learning_rate": 3.5322495980081954e-06, "logits/chosen": -2.638533115386963, "logits/rejected": -3.0246262550354004, "logps/chosen": -355.0587158203125, "logps/rejected": -465.25628662109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.646169662475586, "rewards/margins": 8.67384147644043, "rewards/rejected": -15.320011138916016, "step": 14473 }, { "epoch": 2.25, "learning_rate": 3.5315161574770477e-06, "logits/chosen": -1.67983877658844, "logits/rejected": -2.251304864883423, "logps/chosen": -130.890625, "logps/rejected": -212.1924591064453, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -6.687851905822754, "rewards/margins": 6.738065242767334, "rewards/rejected": -13.42591667175293, "step": 14474 }, { "epoch": 2.25, "learning_rate": 3.5307827169459e-06, "logits/chosen": -1.5808568000793457, "logits/rejected": -2.489518880844116, "logps/chosen": -142.49447631835938, "logps/rejected": -360.3524169921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.159935474395752, "rewards/margins": 12.030265808105469, "rewards/rejected": -16.190200805664062, "step": 14475 }, { "epoch": 2.25, "learning_rate": 3.530049276414752e-06, "logits/chosen": -2.6006712913513184, "logits/rejected": -2.614835739135742, "logps/chosen": -301.91143798828125, "logps/rejected": -288.2779846191406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.785805702209473, "rewards/margins": 9.06956672668457, "rewards/rejected": -14.855371475219727, "step": 14476 }, { "epoch": 2.25, "learning_rate": 3.5293158358836038e-06, "logits/chosen": -2.229776620864868, "logits/rejected": -2.8749423027038574, "logps/chosen": -137.05853271484375, "logps/rejected": -416.08978271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.207929611206055, "rewards/margins": 14.544734954833984, "rewards/rejected": -19.75266456604004, "step": 14477 }, { "epoch": 2.25, "learning_rate": 3.528582395352456e-06, "logits/chosen": -1.8989816904067993, "logits/rejected": -2.890932321548462, "logps/chosen": -232.72352600097656, "logps/rejected": -432.7768859863281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.521693706512451, "rewards/margins": 11.5159912109375, "rewards/rejected": -18.03768539428711, "step": 14478 }, { "epoch": 2.25, "learning_rate": 3.527848954821308e-06, "logits/chosen": -1.528437852859497, "logits/rejected": -2.5251569747924805, "logps/chosen": -231.4375762939453, "logps/rejected": -317.8941650390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.936784744262695, "rewards/margins": 9.700596809387207, "rewards/rejected": -14.637381553649902, "step": 14479 }, { "epoch": 2.25, "learning_rate": 3.5271155142901603e-06, "logits/chosen": -2.4565813541412354, "logits/rejected": -2.9227640628814697, "logps/chosen": -120.70011138916016, "logps/rejected": -368.9846496582031, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -7.730672836303711, "rewards/margins": 5.707902908325195, "rewards/rejected": -13.438575744628906, "step": 14480 }, { "epoch": 2.25, "learning_rate": 3.526382073759012e-06, "logits/chosen": -1.0844284296035767, "logits/rejected": -1.4717800617218018, "logps/chosen": -222.90798950195312, "logps/rejected": -526.3279418945312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.612096786499023, "rewards/margins": 11.473257064819336, "rewards/rejected": -19.08535385131836, "step": 14481 }, { "epoch": 2.25, "learning_rate": 3.5256486332278644e-06, "logits/chosen": -2.721954345703125, "logits/rejected": -2.1574199199676514, "logps/chosen": -219.63912963867188, "logps/rejected": -317.06201171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.50609827041626, "rewards/margins": 11.209192276000977, "rewards/rejected": -15.715290069580078, "step": 14482 }, { "epoch": 2.25, "learning_rate": 3.5249151926967168e-06, "logits/chosen": -1.651961088180542, "logits/rejected": -2.5525410175323486, "logps/chosen": -167.1444549560547, "logps/rejected": -362.1641845703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -8.118459701538086, "rewards/margins": 10.168981552124023, "rewards/rejected": -18.28744125366211, "step": 14483 }, { "epoch": 2.25, "learning_rate": 3.524181752165569e-06, "logits/chosen": -2.5078701972961426, "logits/rejected": -3.1089179515838623, "logps/chosen": -56.703460693359375, "logps/rejected": -314.82666015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.9514427185058594, "rewards/margins": 7.320669174194336, "rewards/rejected": -11.272111892700195, "step": 14484 }, { "epoch": 2.25, "learning_rate": 3.523448311634421e-06, "logits/chosen": -2.6106109619140625, "logits/rejected": -2.0678446292877197, "logps/chosen": -310.19580078125, "logps/rejected": -294.8681640625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.1407952308654785, "rewards/margins": 6.6746907234191895, "rewards/rejected": -12.815485954284668, "step": 14485 }, { "epoch": 2.25, "learning_rate": 3.5227148711032732e-06, "logits/chosen": -2.4134836196899414, "logits/rejected": -2.4476230144500732, "logps/chosen": -129.56326293945312, "logps/rejected": -265.5636291503906, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.9046406745910645, "rewards/margins": 8.88010025024414, "rewards/rejected": -16.784740447998047, "step": 14486 }, { "epoch": 2.25, "learning_rate": 3.521981430572125e-06, "logits/chosen": -2.5713207721710205, "logits/rejected": -2.8756930828094482, "logps/chosen": -113.05018615722656, "logps/rejected": -304.7982177734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9302561283111572, "rewards/margins": 9.780094146728516, "rewards/rejected": -13.710350036621094, "step": 14487 }, { "epoch": 2.25, "learning_rate": 3.521247990040977e-06, "logits/chosen": -2.8185863494873047, "logits/rejected": -2.837925672531128, "logps/chosen": -724.4430541992188, "logps/rejected": -657.5196533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.746054649353027, "rewards/margins": 11.902666091918945, "rewards/rejected": -19.64872169494629, "step": 14488 }, { "epoch": 2.25, "learning_rate": 3.5205145495098293e-06, "logits/chosen": -2.0367698669433594, "logits/rejected": -2.7319607734680176, "logps/chosen": -377.95208740234375, "logps/rejected": -509.5931091308594, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -7.570076942443848, "rewards/margins": 8.378872871398926, "rewards/rejected": -15.948949813842773, "step": 14489 }, { "epoch": 2.25, "learning_rate": 3.519781108978681e-06, "logits/chosen": -2.2531909942626953, "logits/rejected": -2.878636360168457, "logps/chosen": -372.134521484375, "logps/rejected": -657.159912109375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.801249980926514, "rewards/margins": 10.540364265441895, "rewards/rejected": -17.34161376953125, "step": 14490 }, { "epoch": 2.25, "learning_rate": 3.519047668447534e-06, "logits/chosen": -2.4784111976623535, "logits/rejected": -2.405881404876709, "logps/chosen": -98.40584564208984, "logps/rejected": -217.80731201171875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.922022819519043, "rewards/margins": 9.289236068725586, "rewards/rejected": -14.211259841918945, "step": 14491 }, { "epoch": 2.25, "learning_rate": 3.5183142279163858e-06, "logits/chosen": -1.9977424144744873, "logits/rejected": -2.5726122856140137, "logps/chosen": -311.88568115234375, "logps/rejected": -621.803955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.24941349029541, "rewards/margins": 12.755074501037598, "rewards/rejected": -16.004487991333008, "step": 14492 }, { "epoch": 2.25, "learning_rate": 3.517580787385238e-06, "logits/chosen": -2.380352020263672, "logits/rejected": -2.6933562755584717, "logps/chosen": -327.3760070800781, "logps/rejected": -388.3335266113281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.357404708862305, "rewards/margins": 10.981725692749023, "rewards/rejected": -20.339130401611328, "step": 14493 }, { "epoch": 2.25, "learning_rate": 3.51684734685409e-06, "logits/chosen": -1.9440642595291138, "logits/rejected": -2.5678486824035645, "logps/chosen": -108.03298950195312, "logps/rejected": -370.0220642089844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.254997730255127, "rewards/margins": 14.467845916748047, "rewards/rejected": -18.722843170166016, "step": 14494 }, { "epoch": 2.25, "learning_rate": 3.5161139063229423e-06, "logits/chosen": -2.4854984283447266, "logits/rejected": -2.64654803276062, "logps/chosen": -174.58404541015625, "logps/rejected": -517.520263671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.552582740783691, "rewards/margins": 11.815740585327148, "rewards/rejected": -20.368322372436523, "step": 14495 }, { "epoch": 2.25, "learning_rate": 3.515380465791794e-06, "logits/chosen": -2.3187966346740723, "logits/rejected": -2.7510032653808594, "logps/chosen": -266.36114501953125, "logps/rejected": -383.01483154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.878255844116211, "rewards/margins": 11.884857177734375, "rewards/rejected": -18.763111114501953, "step": 14496 }, { "epoch": 2.25, "learning_rate": 3.514647025260646e-06, "logits/chosen": -2.7371814250946045, "logits/rejected": -2.3510046005249023, "logps/chosen": -704.04052734375, "logps/rejected": -514.5255126953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.486000061035156, "rewards/margins": 10.022974014282227, "rewards/rejected": -18.508975982666016, "step": 14497 }, { "epoch": 2.25, "learning_rate": 3.5139135847294983e-06, "logits/chosen": -2.510798692703247, "logits/rejected": -2.748138427734375, "logps/chosen": -134.04833984375, "logps/rejected": -181.64964294433594, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.826414108276367, "rewards/margins": 7.103974342346191, "rewards/rejected": -11.930388450622559, "step": 14498 }, { "epoch": 2.25, "learning_rate": 3.5131801441983506e-06, "logits/chosen": -2.4446587562561035, "logits/rejected": -2.3183600902557373, "logps/chosen": -92.2446517944336, "logps/rejected": -200.26730346679688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.930227279663086, "rewards/margins": 8.126274108886719, "rewards/rejected": -14.056501388549805, "step": 14499 }, { "epoch": 2.26, "learning_rate": 3.512446703667203e-06, "logits/chosen": -2.670471429824829, "logits/rejected": -2.723245143890381, "logps/chosen": -602.767822265625, "logps/rejected": -715.2275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.467690467834473, "rewards/margins": 14.673900604248047, "rewards/rejected": -23.141592025756836, "step": 14500 }, { "epoch": 2.26, "learning_rate": 3.511713263136055e-06, "logits/chosen": -2.5308597087860107, "logits/rejected": -2.784839153289795, "logps/chosen": -386.5679016113281, "logps/rejected": -494.12548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.24822473526001, "rewards/margins": 9.342630386352539, "rewards/rejected": -15.590855598449707, "step": 14501 }, { "epoch": 2.26, "learning_rate": 3.510979822604907e-06, "logits/chosen": -2.7195470333099365, "logits/rejected": -2.255505323410034, "logps/chosen": -523.60205078125, "logps/rejected": -553.8992309570312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.800760269165039, "rewards/margins": 12.854232788085938, "rewards/rejected": -21.654993057250977, "step": 14502 }, { "epoch": 2.26, "learning_rate": 3.510246382073759e-06, "logits/chosen": -1.968607783317566, "logits/rejected": -2.790043592453003, "logps/chosen": -166.78244018554688, "logps/rejected": -370.6534118652344, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.698365211486816, "rewards/margins": 7.252246856689453, "rewards/rejected": -14.95061206817627, "step": 14503 }, { "epoch": 2.26, "learning_rate": 3.5095129415426113e-06, "logits/chosen": -2.3984344005584717, "logits/rejected": -2.9521968364715576, "logps/chosen": -162.49169921875, "logps/rejected": -687.9890747070312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.100582122802734, "rewards/margins": 8.375078201293945, "rewards/rejected": -17.475662231445312, "step": 14504 }, { "epoch": 2.26, "learning_rate": 3.508779501011463e-06, "logits/chosen": -2.494211196899414, "logits/rejected": -2.233001232147217, "logps/chosen": -321.8121337890625, "logps/rejected": -463.8731994628906, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -8.391692161560059, "rewards/margins": 5.673526287078857, "rewards/rejected": -14.065217971801758, "step": 14505 }, { "epoch": 2.26, "learning_rate": 3.5080460604803155e-06, "logits/chosen": -2.5959975719451904, "logits/rejected": -2.6252408027648926, "logps/chosen": -167.60231018066406, "logps/rejected": -208.41970825195312, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.031039237976074, "rewards/margins": 7.342902183532715, "rewards/rejected": -14.373941421508789, "step": 14506 }, { "epoch": 2.26, "learning_rate": 3.5073126199491673e-06, "logits/chosen": -2.106398105621338, "logits/rejected": -2.7209761142730713, "logps/chosen": -225.1993408203125, "logps/rejected": -293.5196838378906, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.5715436935424805, "rewards/margins": 11.570091247558594, "rewards/rejected": -17.14163589477539, "step": 14507 }, { "epoch": 2.26, "learning_rate": 3.5065791794180196e-06, "logits/chosen": -0.8965454697608948, "logits/rejected": -1.8176279067993164, "logps/chosen": -284.5183410644531, "logps/rejected": -665.9513549804688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.622676849365234, "rewards/margins": 14.503580093383789, "rewards/rejected": -21.126256942749023, "step": 14508 }, { "epoch": 2.26, "learning_rate": 3.505845738886872e-06, "logits/chosen": -2.670079231262207, "logits/rejected": -2.9898736476898193, "logps/chosen": -164.87344360351562, "logps/rejected": -323.4132080078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.068461894989014, "rewards/margins": 8.26839542388916, "rewards/rejected": -13.336857795715332, "step": 14509 }, { "epoch": 2.26, "learning_rate": 3.505112298355724e-06, "logits/chosen": -2.5875346660614014, "logits/rejected": -1.8912526369094849, "logps/chosen": -250.599365234375, "logps/rejected": -283.9365539550781, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.8318352699279785, "rewards/margins": 10.78284740447998, "rewards/rejected": -16.614683151245117, "step": 14510 }, { "epoch": 2.26, "learning_rate": 3.504378857824576e-06, "logits/chosen": -2.387221574783325, "logits/rejected": -2.7802908420562744, "logps/chosen": -178.17112731933594, "logps/rejected": -405.8550109863281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.789590835571289, "rewards/margins": 8.522540092468262, "rewards/rejected": -13.31213092803955, "step": 14511 }, { "epoch": 2.26, "learning_rate": 3.503645417293428e-06, "logits/chosen": -1.5331329107284546, "logits/rejected": -2.5549495220184326, "logps/chosen": -224.06944274902344, "logps/rejected": -792.3992309570312, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -13.734437942504883, "rewards/margins": 9.240730285644531, "rewards/rejected": -22.975168228149414, "step": 14512 }, { "epoch": 2.26, "learning_rate": 3.5029119767622803e-06, "logits/chosen": -2.515324354171753, "logits/rejected": -1.8420295715332031, "logps/chosen": -486.34039306640625, "logps/rejected": -356.4875793457031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.594229221343994, "rewards/margins": 8.40069580078125, "rewards/rejected": -12.994925498962402, "step": 14513 }, { "epoch": 2.26, "learning_rate": 3.502178536231132e-06, "logits/chosen": -2.6029114723205566, "logits/rejected": -1.6594418287277222, "logps/chosen": -342.6768798828125, "logps/rejected": -321.1673278808594, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.029726505279541, "rewards/margins": 6.664065361022949, "rewards/rejected": -11.693792343139648, "step": 14514 }, { "epoch": 2.26, "learning_rate": 3.5014450956999845e-06, "logits/chosen": -1.9032288789749146, "logits/rejected": -3.0254929065704346, "logps/chosen": -173.09715270996094, "logps/rejected": -393.4169006347656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.077704429626465, "rewards/margins": 9.969497680664062, "rewards/rejected": -14.047201156616211, "step": 14515 }, { "epoch": 2.26, "learning_rate": 3.5007116551688368e-06, "logits/chosen": -2.3055763244628906, "logits/rejected": -2.834294557571411, "logps/chosen": -139.78561401367188, "logps/rejected": -347.7536926269531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.671303749084473, "rewards/margins": 10.267491340637207, "rewards/rejected": -15.93879508972168, "step": 14516 }, { "epoch": 2.26, "learning_rate": 3.4999782146376887e-06, "logits/chosen": -1.0347503423690796, "logits/rejected": -2.2985246181488037, "logps/chosen": -114.01262664794922, "logps/rejected": -388.9443359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.967230319976807, "rewards/margins": 12.50456714630127, "rewards/rejected": -18.471797943115234, "step": 14517 }, { "epoch": 2.26, "learning_rate": 3.499244774106541e-06, "logits/chosen": -2.407841920852661, "logits/rejected": -2.327704906463623, "logps/chosen": -586.1522216796875, "logps/rejected": -671.13232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.313378810882568, "rewards/margins": 11.893739700317383, "rewards/rejected": -17.20711898803711, "step": 14518 }, { "epoch": 2.26, "learning_rate": 3.498511333575393e-06, "logits/chosen": -2.6587769985198975, "logits/rejected": -1.8349090814590454, "logps/chosen": -200.36233520507812, "logps/rejected": -190.73355102539062, "loss": 0.138, "rewards/accuracies": 1.0, "rewards/chosen": -5.4036359786987305, "rewards/margins": 5.946202278137207, "rewards/rejected": -11.349838256835938, "step": 14519 }, { "epoch": 2.26, "learning_rate": 3.497777893044245e-06, "logits/chosen": -2.2616913318634033, "logits/rejected": -2.675902843475342, "logps/chosen": -443.5585021972656, "logps/rejected": -588.8460693359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.492727279663086, "rewards/margins": 10.85574722290039, "rewards/rejected": -18.348474502563477, "step": 14520 }, { "epoch": 2.26, "learning_rate": 3.497044452513097e-06, "logits/chosen": -2.439955711364746, "logits/rejected": -2.6497745513916016, "logps/chosen": -199.31951904296875, "logps/rejected": -377.1951599121094, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.811124324798584, "rewards/margins": 7.104837894439697, "rewards/rejected": -12.915962219238281, "step": 14521 }, { "epoch": 2.26, "learning_rate": 3.4963110119819493e-06, "logits/chosen": -2.2499990463256836, "logits/rejected": -2.90053653717041, "logps/chosen": -165.69944763183594, "logps/rejected": -543.0123291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.334596633911133, "rewards/margins": 11.147001266479492, "rewards/rejected": -20.481597900390625, "step": 14522 }, { "epoch": 2.26, "learning_rate": 3.495577571450801e-06, "logits/chosen": -2.221484899520874, "logits/rejected": -2.7847378253936768, "logps/chosen": -507.23248291015625, "logps/rejected": -578.91259765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.36676025390625, "rewards/margins": 9.88593864440918, "rewards/rejected": -18.25269889831543, "step": 14523 }, { "epoch": 2.26, "learning_rate": 3.4948441309196535e-06, "logits/chosen": -2.2297451496124268, "logits/rejected": -2.6996910572052, "logps/chosen": -98.58392333984375, "logps/rejected": -331.76947021484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.409846305847168, "rewards/margins": 9.344159126281738, "rewards/rejected": -14.754005432128906, "step": 14524 }, { "epoch": 2.26, "learning_rate": 3.494110690388506e-06, "logits/chosen": -3.1430747509002686, "logits/rejected": -2.1470394134521484, "logps/chosen": -415.9209289550781, "logps/rejected": -134.86309814453125, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -3.3974685668945312, "rewards/margins": 5.500185966491699, "rewards/rejected": -8.89765453338623, "step": 14525 }, { "epoch": 2.26, "learning_rate": 3.4933772498573577e-06, "logits/chosen": -2.34930157661438, "logits/rejected": -2.813748836517334, "logps/chosen": -332.89202880859375, "logps/rejected": -517.8388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.3932663202285767, "rewards/margins": 16.195466995239258, "rewards/rejected": -17.588733673095703, "step": 14526 }, { "epoch": 2.26, "learning_rate": 3.49264380932621e-06, "logits/chosen": -2.316524028778076, "logits/rejected": -2.0350699424743652, "logps/chosen": -145.9519500732422, "logps/rejected": -210.503173828125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -6.171344757080078, "rewards/margins": 5.286852836608887, "rewards/rejected": -11.458196640014648, "step": 14527 }, { "epoch": 2.26, "learning_rate": 3.491910368795062e-06, "logits/chosen": -2.98466157913208, "logits/rejected": -2.6229944229125977, "logps/chosen": -665.7330932617188, "logps/rejected": -514.41796875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.283991813659668, "rewards/margins": 13.479472160339355, "rewards/rejected": -19.763465881347656, "step": 14528 }, { "epoch": 2.26, "learning_rate": 3.491176928263914e-06, "logits/chosen": -2.4776456356048584, "logits/rejected": -2.986297130584717, "logps/chosen": -112.3124008178711, "logps/rejected": -414.37255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.116599082946777, "rewards/margins": 11.738924980163574, "rewards/rejected": -16.85552406311035, "step": 14529 }, { "epoch": 2.26, "learning_rate": 3.490443487732766e-06, "logits/chosen": -2.703381299972534, "logits/rejected": -2.9997079372406006, "logps/chosen": -120.18405151367188, "logps/rejected": -300.516357421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.0906267166137695, "rewards/margins": 9.984281539916992, "rewards/rejected": -15.074907302856445, "step": 14530 }, { "epoch": 2.26, "learning_rate": 3.4897100472016183e-06, "logits/chosen": -2.7989273071289062, "logits/rejected": -2.8285739421844482, "logps/chosen": -100.84895324707031, "logps/rejected": -191.11251831054688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.4100494384765625, "rewards/margins": 8.938480377197266, "rewards/rejected": -13.348529815673828, "step": 14531 }, { "epoch": 2.26, "learning_rate": 3.4889766066704702e-06, "logits/chosen": -2.4169723987579346, "logits/rejected": -2.4117798805236816, "logps/chosen": -503.92913818359375, "logps/rejected": -604.3729248046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.031328201293945, "rewards/margins": 8.897298812866211, "rewards/rejected": -17.928627014160156, "step": 14532 }, { "epoch": 2.26, "learning_rate": 3.488243166139323e-06, "logits/chosen": -1.2036699056625366, "logits/rejected": -2.599304676055908, "logps/chosen": -428.8119812011719, "logps/rejected": -523.3154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.658731937408447, "rewards/margins": 12.397483825683594, "rewards/rejected": -20.056217193603516, "step": 14533 }, { "epoch": 2.26, "learning_rate": 3.487509725608175e-06, "logits/chosen": -1.7947282791137695, "logits/rejected": -2.662874698638916, "logps/chosen": -207.80001831054688, "logps/rejected": -405.7020263671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.405050277709961, "rewards/margins": 10.716227531433105, "rewards/rejected": -16.12127685546875, "step": 14534 }, { "epoch": 2.26, "learning_rate": 3.486776285077027e-06, "logits/chosen": -1.8683539628982544, "logits/rejected": -2.5007548332214355, "logps/chosen": -127.55911254882812, "logps/rejected": -282.0958251953125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.1451873779296875, "rewards/margins": 8.123597145080566, "rewards/rejected": -14.268783569335938, "step": 14535 }, { "epoch": 2.26, "learning_rate": 3.486042844545879e-06, "logits/chosen": -2.2006990909576416, "logits/rejected": -2.8375158309936523, "logps/chosen": -176.0111083984375, "logps/rejected": -373.761474609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.194985389709473, "rewards/margins": 11.732152938842773, "rewards/rejected": -15.927138328552246, "step": 14536 }, { "epoch": 2.26, "learning_rate": 3.485309404014731e-06, "logits/chosen": -2.762930154800415, "logits/rejected": -1.8781604766845703, "logps/chosen": -304.7232360839844, "logps/rejected": -161.4775390625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.601109504699707, "rewards/margins": 6.528602123260498, "rewards/rejected": -10.129711151123047, "step": 14537 }, { "epoch": 2.26, "learning_rate": 3.484575963483583e-06, "logits/chosen": -2.533188819885254, "logits/rejected": -2.653404712677002, "logps/chosen": -81.69020080566406, "logps/rejected": -254.3211669921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.078891754150391, "rewards/margins": 7.722962379455566, "rewards/rejected": -13.80185317993164, "step": 14538 }, { "epoch": 2.26, "learning_rate": 3.483842522952435e-06, "logits/chosen": -1.9823620319366455, "logits/rejected": -2.777613639831543, "logps/chosen": -226.1696014404297, "logps/rejected": -471.10894775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.838809967041016, "rewards/margins": 12.49457836151123, "rewards/rejected": -17.333389282226562, "step": 14539 }, { "epoch": 2.26, "learning_rate": 3.4831090824212874e-06, "logits/chosen": -2.7462899684906006, "logits/rejected": -2.2083702087402344, "logps/chosen": -277.08197021484375, "logps/rejected": -301.78607177734375, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -7.046601295471191, "rewards/margins": 7.980273246765137, "rewards/rejected": -15.026874542236328, "step": 14540 }, { "epoch": 2.26, "learning_rate": 3.4823756418901397e-06, "logits/chosen": -2.7732772827148438, "logits/rejected": -2.369795322418213, "logps/chosen": -324.533935546875, "logps/rejected": -222.4456024169922, "loss": 0.088, "rewards/accuracies": 1.0, "rewards/chosen": -7.433255195617676, "rewards/margins": 7.300724983215332, "rewards/rejected": -14.733980178833008, "step": 14541 }, { "epoch": 2.26, "learning_rate": 3.481642201358992e-06, "logits/chosen": -1.693239688873291, "logits/rejected": -2.2431628704071045, "logps/chosen": -119.46753692626953, "logps/rejected": -306.2151184082031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.076318264007568, "rewards/margins": 10.267574310302734, "rewards/rejected": -16.343891143798828, "step": 14542 }, { "epoch": 2.26, "learning_rate": 3.480908760827844e-06, "logits/chosen": -2.5983738899230957, "logits/rejected": -2.459092855453491, "logps/chosen": -399.1033935546875, "logps/rejected": -392.3493347167969, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.877717971801758, "rewards/margins": 11.317670822143555, "rewards/rejected": -18.195388793945312, "step": 14543 }, { "epoch": 2.26, "learning_rate": 3.480175320296696e-06, "logits/chosen": -2.5383381843566895, "logits/rejected": -2.828622341156006, "logps/chosen": -329.14031982421875, "logps/rejected": -417.8183898925781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.165653228759766, "rewards/margins": 12.030317306518555, "rewards/rejected": -19.19597053527832, "step": 14544 }, { "epoch": 2.26, "learning_rate": 3.479441879765548e-06, "logits/chosen": -2.1542580127716064, "logits/rejected": -1.6790348291397095, "logps/chosen": -1845.8857421875, "logps/rejected": -577.14208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.1470136642456055, "rewards/margins": 12.011913299560547, "rewards/rejected": -19.15892791748047, "step": 14545 }, { "epoch": 2.26, "learning_rate": 3.4787084392344e-06, "logits/chosen": -2.680506467819214, "logits/rejected": -2.888084888458252, "logps/chosen": -334.2437438964844, "logps/rejected": -295.8060302734375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -6.228003025054932, "rewards/margins": 7.105008125305176, "rewards/rejected": -13.333011627197266, "step": 14546 }, { "epoch": 2.26, "learning_rate": 3.477974998703252e-06, "logits/chosen": -1.7445167303085327, "logits/rejected": -2.702723503112793, "logps/chosen": -272.96746826171875, "logps/rejected": -567.012939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.600663185119629, "rewards/margins": 11.929664611816406, "rewards/rejected": -19.53032875061035, "step": 14547 }, { "epoch": 2.26, "learning_rate": 3.477241558172104e-06, "logits/chosen": -2.2761335372924805, "logits/rejected": -2.722172975540161, "logps/chosen": -464.864501953125, "logps/rejected": -553.9324340820312, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.2641282081604, "rewards/margins": 8.159582138061523, "rewards/rejected": -12.423710823059082, "step": 14548 }, { "epoch": 2.26, "learning_rate": 3.4765081176409564e-06, "logits/chosen": -1.2590051889419556, "logits/rejected": -2.339944839477539, "logps/chosen": -159.9434814453125, "logps/rejected": -465.98077392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.462215423583984, "rewards/margins": 13.130038261413574, "rewards/rejected": -20.592254638671875, "step": 14549 }, { "epoch": 2.26, "learning_rate": 3.4757746771098087e-06, "logits/chosen": -1.8011376857757568, "logits/rejected": -2.6112382411956787, "logps/chosen": -146.68385314941406, "logps/rejected": -320.8335876464844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.404394626617432, "rewards/margins": 10.755278587341309, "rewards/rejected": -18.1596736907959, "step": 14550 }, { "epoch": 2.26, "learning_rate": 3.475041236578661e-06, "logits/chosen": -2.5854694843292236, "logits/rejected": -2.000437021255493, "logps/chosen": -320.26812744140625, "logps/rejected": -253.0831298828125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.1889848709106445, "rewards/margins": 7.7105183601379395, "rewards/rejected": -12.899502754211426, "step": 14551 }, { "epoch": 2.26, "learning_rate": 3.474307796047513e-06, "logits/chosen": -1.8503468036651611, "logits/rejected": -2.883371114730835, "logps/chosen": -208.87353515625, "logps/rejected": -385.2706298828125, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -7.867594242095947, "rewards/margins": 9.53985595703125, "rewards/rejected": -17.40744972229004, "step": 14552 }, { "epoch": 2.26, "learning_rate": 3.473574355516365e-06, "logits/chosen": -2.5525577068328857, "logits/rejected": -2.169527292251587, "logps/chosen": -304.3811340332031, "logps/rejected": -400.8245849609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.137127876281738, "rewards/margins": 9.723384857177734, "rewards/rejected": -16.860511779785156, "step": 14553 }, { "epoch": 2.26, "learning_rate": 3.472840914985217e-06, "logits/chosen": -2.437155246734619, "logits/rejected": -3.124629497528076, "logps/chosen": -109.41175842285156, "logps/rejected": -318.2453308105469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.281292915344238, "rewards/margins": 8.510770797729492, "rewards/rejected": -14.792064666748047, "step": 14554 }, { "epoch": 2.26, "learning_rate": 3.4721074744540694e-06, "logits/chosen": -2.454307794570923, "logits/rejected": -2.061089515686035, "logps/chosen": -586.7628173828125, "logps/rejected": -473.2283935546875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -11.911531448364258, "rewards/margins": 8.845809936523438, "rewards/rejected": -20.757341384887695, "step": 14555 }, { "epoch": 2.26, "learning_rate": 3.4713740339229212e-06, "logits/chosen": -2.8442888259887695, "logits/rejected": -2.7850258350372314, "logps/chosen": -284.50341796875, "logps/rejected": -347.0301513671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.6439452171325684, "rewards/margins": 8.89665699005127, "rewards/rejected": -11.540602684020996, "step": 14556 }, { "epoch": 2.26, "learning_rate": 3.470640593391773e-06, "logits/chosen": -2.0855445861816406, "logits/rejected": -0.7703302502632141, "logps/chosen": -383.3593444824219, "logps/rejected": -290.092041015625, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -8.685930252075195, "rewards/margins": 5.126755237579346, "rewards/rejected": -13.812685012817383, "step": 14557 }, { "epoch": 2.26, "learning_rate": 3.469907152860626e-06, "logits/chosen": -2.4570322036743164, "logits/rejected": -2.2812795639038086, "logps/chosen": -207.88723754882812, "logps/rejected": -316.92840576171875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.773879528045654, "rewards/margins": 8.654095649719238, "rewards/rejected": -13.427974700927734, "step": 14558 }, { "epoch": 2.26, "learning_rate": 3.4691737123294777e-06, "logits/chosen": -2.8454763889312744, "logits/rejected": -1.7007371187210083, "logps/chosen": -380.2945556640625, "logps/rejected": -252.9022216796875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -5.392139434814453, "rewards/margins": 6.190695762634277, "rewards/rejected": -11.58283519744873, "step": 14559 }, { "epoch": 2.26, "learning_rate": 3.46844027179833e-06, "logits/chosen": -1.8600285053253174, "logits/rejected": -2.627971649169922, "logps/chosen": -270.37066650390625, "logps/rejected": -452.28802490234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.7084150314331055, "rewards/margins": 9.896562576293945, "rewards/rejected": -15.604978561401367, "step": 14560 }, { "epoch": 2.26, "learning_rate": 3.467706831267182e-06, "logits/chosen": -1.3678869009017944, "logits/rejected": -1.9800567626953125, "logps/chosen": -170.083740234375, "logps/rejected": -297.10797119140625, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/chosen": -5.929224014282227, "rewards/margins": 6.270127296447754, "rewards/rejected": -12.19935131072998, "step": 14561 }, { "epoch": 2.26, "learning_rate": 3.466973390736034e-06, "logits/chosen": -2.796426773071289, "logits/rejected": -2.4644525051116943, "logps/chosen": -593.4312133789062, "logps/rejected": -625.77392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0833330154418945, "rewards/margins": 13.367502212524414, "rewards/rejected": -17.450836181640625, "step": 14562 }, { "epoch": 2.26, "learning_rate": 3.466239950204886e-06, "logits/chosen": -2.5931150913238525, "logits/rejected": -2.1973538398742676, "logps/chosen": -171.31912231445312, "logps/rejected": -267.8626708984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.62652587890625, "rewards/margins": 12.498461723327637, "rewards/rejected": -13.124987602233887, "step": 14563 }, { "epoch": 2.27, "learning_rate": 3.4655065096737384e-06, "logits/chosen": -2.7572691440582275, "logits/rejected": -2.3170132637023926, "logps/chosen": -263.8114929199219, "logps/rejected": -293.864501953125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.09820032119751, "rewards/margins": 8.203636169433594, "rewards/rejected": -13.301836013793945, "step": 14564 }, { "epoch": 2.27, "learning_rate": 3.4647730691425903e-06, "logits/chosen": -2.2624568939208984, "logits/rejected": -2.724949598312378, "logps/chosen": -112.70941925048828, "logps/rejected": -234.54843139648438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.513742446899414, "rewards/margins": 11.392126083374023, "rewards/rejected": -16.905868530273438, "step": 14565 }, { "epoch": 2.27, "learning_rate": 3.4640396286114426e-06, "logits/chosen": -2.795607805252075, "logits/rejected": -2.3469223976135254, "logps/chosen": -500.30340576171875, "logps/rejected": -578.1639404296875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -4.464735984802246, "rewards/margins": 12.072901725769043, "rewards/rejected": -16.53763771057129, "step": 14566 }, { "epoch": 2.27, "learning_rate": 3.463306188080295e-06, "logits/chosen": -2.4060347080230713, "logits/rejected": -2.9196484088897705, "logps/chosen": -280.2571105957031, "logps/rejected": -532.477294921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.618968963623047, "rewards/margins": 9.044528007507324, "rewards/rejected": -15.663496971130371, "step": 14567 }, { "epoch": 2.27, "learning_rate": 3.4625727475491467e-06, "logits/chosen": -2.591139316558838, "logits/rejected": -1.8197191953659058, "logps/chosen": -147.32684326171875, "logps/rejected": -256.3017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0685877799987793, "rewards/margins": 12.465982437133789, "rewards/rejected": -15.534570693969727, "step": 14568 }, { "epoch": 2.27, "learning_rate": 3.461839307017999e-06, "logits/chosen": -2.502361536026001, "logits/rejected": -2.7678542137145996, "logps/chosen": -184.51681518554688, "logps/rejected": -267.0845642089844, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.374774932861328, "rewards/margins": 6.614229202270508, "rewards/rejected": -12.989004135131836, "step": 14569 }, { "epoch": 2.27, "learning_rate": 3.461105866486851e-06, "logits/chosen": -1.9255597591400146, "logits/rejected": -2.5886170864105225, "logps/chosen": -323.3543701171875, "logps/rejected": -545.4008178710938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.037290573120117, "rewards/margins": 12.736923217773438, "rewards/rejected": -18.774213790893555, "step": 14570 }, { "epoch": 2.27, "learning_rate": 3.4603724259557032e-06, "logits/chosen": -2.6422183513641357, "logits/rejected": -2.221085786819458, "logps/chosen": -946.4248046875, "logps/rejected": -562.04443359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.274928092956543, "rewards/margins": 8.954450607299805, "rewards/rejected": -15.229379653930664, "step": 14571 }, { "epoch": 2.27, "learning_rate": 3.459638985424555e-06, "logits/chosen": -2.6704790592193604, "logits/rejected": -2.6773858070373535, "logps/chosen": -424.8510437011719, "logps/rejected": -304.68048095703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.9927263259887695, "rewards/margins": 9.877872467041016, "rewards/rejected": -14.870597839355469, "step": 14572 }, { "epoch": 2.27, "learning_rate": 3.4589055448934074e-06, "logits/chosen": -1.7764768600463867, "logits/rejected": -2.677400827407837, "logps/chosen": -79.427978515625, "logps/rejected": -262.5769348144531, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.6857457160949707, "rewards/margins": 9.316699028015137, "rewards/rejected": -12.002445220947266, "step": 14573 }, { "epoch": 2.27, "learning_rate": 3.4581721043622593e-06, "logits/chosen": -0.7676519751548767, "logits/rejected": -2.5151023864746094, "logps/chosen": -201.6396484375, "logps/rejected": -495.2505187988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.154605388641357, "rewards/margins": 10.17085075378418, "rewards/rejected": -15.325456619262695, "step": 14574 }, { "epoch": 2.27, "learning_rate": 3.457438663831112e-06, "logits/chosen": -2.5285720825195312, "logits/rejected": -3.0875680446624756, "logps/chosen": -147.65142822265625, "logps/rejected": -254.48806762695312, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -7.349018096923828, "rewards/margins": 7.614184856414795, "rewards/rejected": -14.963203430175781, "step": 14575 }, { "epoch": 2.27, "learning_rate": 3.456705223299964e-06, "logits/chosen": -2.662963390350342, "logits/rejected": -2.0979418754577637, "logps/chosen": -438.56842041015625, "logps/rejected": -436.7770080566406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.223426818847656, "rewards/margins": 8.521663665771484, "rewards/rejected": -16.74509048461914, "step": 14576 }, { "epoch": 2.27, "learning_rate": 3.4559717827688158e-06, "logits/chosen": -3.244992733001709, "logits/rejected": -2.877002716064453, "logps/chosen": -216.35853576660156, "logps/rejected": -187.3992919921875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -3.9938299655914307, "rewards/margins": 6.486583232879639, "rewards/rejected": -10.480413436889648, "step": 14577 }, { "epoch": 2.27, "learning_rate": 3.455238342237668e-06, "logits/chosen": -2.5910756587982178, "logits/rejected": -2.6529080867767334, "logps/chosen": -102.93238830566406, "logps/rejected": -213.65301513671875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.644964218139648, "rewards/margins": 7.2980451583862305, "rewards/rejected": -13.943009376525879, "step": 14578 }, { "epoch": 2.27, "learning_rate": 3.45450490170652e-06, "logits/chosen": -2.3943300247192383, "logits/rejected": -2.637474298477173, "logps/chosen": -363.5997009277344, "logps/rejected": -475.2124938964844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.945898532867432, "rewards/margins": 7.028023719787598, "rewards/rejected": -14.973922729492188, "step": 14579 }, { "epoch": 2.27, "learning_rate": 3.4537714611753722e-06, "logits/chosen": -0.7711058855056763, "logits/rejected": -2.6585590839385986, "logps/chosen": -113.054931640625, "logps/rejected": -601.109619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.961371421813965, "rewards/margins": 13.809514045715332, "rewards/rejected": -17.770885467529297, "step": 14580 }, { "epoch": 2.27, "learning_rate": 3.453038020644224e-06, "logits/chosen": -2.520726442337036, "logits/rejected": -2.7073583602905273, "logps/chosen": -111.59918212890625, "logps/rejected": -343.7286376953125, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -5.363492012023926, "rewards/margins": 10.087952613830566, "rewards/rejected": -15.451444625854492, "step": 14581 }, { "epoch": 2.27, "learning_rate": 3.4523045801130764e-06, "logits/chosen": -2.650660753250122, "logits/rejected": -2.4437222480773926, "logps/chosen": -242.30410766601562, "logps/rejected": -282.167236328125, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": -6.11729621887207, "rewards/margins": 6.986491680145264, "rewards/rejected": -13.103788375854492, "step": 14582 }, { "epoch": 2.27, "learning_rate": 3.4515711395819287e-06, "logits/chosen": -2.8123929500579834, "logits/rejected": -1.8060696125030518, "logps/chosen": -519.0399169921875, "logps/rejected": -280.55389404296875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -2.9004807472229004, "rewards/margins": 6.7972412109375, "rewards/rejected": -9.697722434997559, "step": 14583 }, { "epoch": 2.27, "learning_rate": 3.450837699050781e-06, "logits/chosen": -0.9435958862304688, "logits/rejected": -2.873350143432617, "logps/chosen": -150.543212890625, "logps/rejected": -868.5225830078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.209352493286133, "rewards/margins": 14.083452224731445, "rewards/rejected": -20.292804718017578, "step": 14584 }, { "epoch": 2.27, "learning_rate": 3.450104258519633e-06, "logits/chosen": -2.718488931655884, "logits/rejected": -2.9449667930603027, "logps/chosen": -640.617431640625, "logps/rejected": -534.1083374023438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.878512382507324, "rewards/margins": 10.698602676391602, "rewards/rejected": -14.577116012573242, "step": 14585 }, { "epoch": 2.27, "learning_rate": 3.4493708179884848e-06, "logits/chosen": -2.886784791946411, "logits/rejected": -2.2758100032806396, "logps/chosen": -546.4920654296875, "logps/rejected": -1101.15576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.6015243530273438, "rewards/margins": 15.333330154418945, "rewards/rejected": -17.93485450744629, "step": 14586 }, { "epoch": 2.27, "learning_rate": 3.448637377457337e-06, "logits/chosen": -2.586702823638916, "logits/rejected": -2.6912593841552734, "logps/chosen": -218.94351196289062, "logps/rejected": -406.94512939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.3482513427734375, "rewards/margins": 9.481290817260742, "rewards/rejected": -13.82954216003418, "step": 14587 }, { "epoch": 2.27, "learning_rate": 3.447903936926189e-06, "logits/chosen": -2.741209030151367, "logits/rejected": -1.7659879922866821, "logps/chosen": -247.58229064941406, "logps/rejected": -270.22821044921875, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -6.293724060058594, "rewards/margins": 6.8164896965026855, "rewards/rejected": -13.110213279724121, "step": 14588 }, { "epoch": 2.27, "learning_rate": 3.4471704963950413e-06, "logits/chosen": -2.029334783554077, "logits/rejected": -2.2196204662323, "logps/chosen": -146.57969665527344, "logps/rejected": -316.70751953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.471292972564697, "rewards/margins": 9.263507843017578, "rewards/rejected": -16.734800338745117, "step": 14589 }, { "epoch": 2.27, "learning_rate": 3.446437055863893e-06, "logits/chosen": -1.1979864835739136, "logits/rejected": -2.8532285690307617, "logps/chosen": -164.0570068359375, "logps/rejected": -677.8359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -10.19845199584961, "rewards/margins": 9.826117515563965, "rewards/rejected": -20.02457046508789, "step": 14590 }, { "epoch": 2.27, "learning_rate": 3.4457036153327454e-06, "logits/chosen": -1.7606045007705688, "logits/rejected": -2.5443859100341797, "logps/chosen": -216.65701293945312, "logps/rejected": -379.3236083984375, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -6.1614837646484375, "rewards/margins": 7.783112525939941, "rewards/rejected": -13.944596290588379, "step": 14591 }, { "epoch": 2.27, "learning_rate": 3.4449701748015977e-06, "logits/chosen": -1.5079418420791626, "logits/rejected": -2.2907590866088867, "logps/chosen": -234.16998291015625, "logps/rejected": -434.1524353027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.993329048156738, "rewards/margins": 12.273221969604492, "rewards/rejected": -21.266551971435547, "step": 14592 }, { "epoch": 2.27, "learning_rate": 3.44423673427045e-06, "logits/chosen": -2.365981101989746, "logits/rejected": -2.5553388595581055, "logps/chosen": -91.52813720703125, "logps/rejected": -186.93653869628906, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -2.035735845565796, "rewards/margins": 8.515609741210938, "rewards/rejected": -10.551345825195312, "step": 14593 }, { "epoch": 2.27, "learning_rate": 3.443503293739302e-06, "logits/chosen": -1.959657073020935, "logits/rejected": -2.6328675746917725, "logps/chosen": -288.68505859375, "logps/rejected": -429.6708679199219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.080032348632812, "rewards/margins": 9.021432876586914, "rewards/rejected": -17.101465225219727, "step": 14594 }, { "epoch": 2.27, "learning_rate": 3.442769853208154e-06, "logits/chosen": -2.984126567840576, "logits/rejected": -1.7493940591812134, "logps/chosen": -229.52320861816406, "logps/rejected": -223.709716796875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.2735726833343506, "rewards/margins": 7.946889877319336, "rewards/rejected": -10.220462799072266, "step": 14595 }, { "epoch": 2.27, "learning_rate": 3.442036412677006e-06, "logits/chosen": -2.7755300998687744, "logits/rejected": -2.2408523559570312, "logps/chosen": -427.79339599609375, "logps/rejected": -483.70611572265625, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -7.338520050048828, "rewards/margins": 7.040688514709473, "rewards/rejected": -14.3792085647583, "step": 14596 }, { "epoch": 2.27, "learning_rate": 3.441302972145858e-06, "logits/chosen": -1.9008296728134155, "logits/rejected": -2.404348611831665, "logps/chosen": -188.87432861328125, "logps/rejected": -508.77862548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.264020919799805, "rewards/margins": 13.304665565490723, "rewards/rejected": -20.568687438964844, "step": 14597 }, { "epoch": 2.27, "learning_rate": 3.4405695316147103e-06, "logits/chosen": -2.6463980674743652, "logits/rejected": -2.6537368297576904, "logps/chosen": -396.01263427734375, "logps/rejected": -459.1904296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.356349945068359, "rewards/margins": 10.613296508789062, "rewards/rejected": -15.969646453857422, "step": 14598 }, { "epoch": 2.27, "learning_rate": 3.439836091083562e-06, "logits/chosen": -2.8407020568847656, "logits/rejected": -2.7609267234802246, "logps/chosen": -561.2208251953125, "logps/rejected": -596.3389892578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.693686008453369, "rewards/margins": 12.719554901123047, "rewards/rejected": -20.413240432739258, "step": 14599 }, { "epoch": 2.27, "learning_rate": 3.439102650552415e-06, "logits/chosen": -2.105069398880005, "logits/rejected": -2.619670867919922, "logps/chosen": -128.35536193847656, "logps/rejected": -343.7928466796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.278998374938965, "rewards/margins": 11.497746467590332, "rewards/rejected": -15.776744842529297, "step": 14600 }, { "epoch": 2.27, "learning_rate": 3.4383692100212668e-06, "logits/chosen": -2.9110827445983887, "logits/rejected": -1.806041955947876, "logps/chosen": -237.43069458007812, "logps/rejected": -167.8875732421875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.044769287109375, "rewards/margins": 6.385421276092529, "rewards/rejected": -10.430191040039062, "step": 14601 }, { "epoch": 2.27, "learning_rate": 3.437635769490119e-06, "logits/chosen": -0.6596423983573914, "logits/rejected": -2.0249826908111572, "logps/chosen": -129.21615600585938, "logps/rejected": -373.75543212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4973039627075195, "rewards/margins": 10.644718170166016, "rewards/rejected": -15.142023086547852, "step": 14602 }, { "epoch": 2.27, "learning_rate": 3.436902328958971e-06, "logits/chosen": -2.4589879512786865, "logits/rejected": -2.9915294647216797, "logps/chosen": -89.18464660644531, "logps/rejected": -349.8455810546875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.9686384201049805, "rewards/margins": 7.339216232299805, "rewards/rejected": -13.307855606079102, "step": 14603 }, { "epoch": 2.27, "learning_rate": 3.4361688884278232e-06, "logits/chosen": -1.5313507318496704, "logits/rejected": -2.529402732849121, "logps/chosen": -294.19976806640625, "logps/rejected": -617.5439453125, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": -11.40471076965332, "rewards/margins": 7.298095703125, "rewards/rejected": -18.70280647277832, "step": 14604 }, { "epoch": 2.27, "learning_rate": 3.435435447896675e-06, "logits/chosen": -2.2116458415985107, "logits/rejected": -2.639497756958008, "logps/chosen": -430.7861328125, "logps/rejected": -459.85540771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6802000999450684, "rewards/margins": 11.742645263671875, "rewards/rejected": -15.422845840454102, "step": 14605 }, { "epoch": 2.27, "learning_rate": 3.434702007365527e-06, "logits/chosen": -2.450115203857422, "logits/rejected": -2.8594329357147217, "logps/chosen": -246.981201171875, "logps/rejected": -441.2089538574219, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -8.750054359436035, "rewards/margins": 6.5937395095825195, "rewards/rejected": -15.343793869018555, "step": 14606 }, { "epoch": 2.27, "learning_rate": 3.4339685668343793e-06, "logits/chosen": -2.7446391582489014, "logits/rejected": -1.819932222366333, "logps/chosen": -322.5533447265625, "logps/rejected": -227.8075408935547, "loss": 0.0508, "rewards/accuracies": 1.0, "rewards/chosen": -4.24627161026001, "rewards/margins": 5.220975399017334, "rewards/rejected": -9.467247009277344, "step": 14607 }, { "epoch": 2.27, "learning_rate": 3.4332351263032316e-06, "logits/chosen": -2.8453540802001953, "logits/rejected": -2.8020074367523193, "logps/chosen": -234.23419189453125, "logps/rejected": -316.6112060546875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.4985857009887695, "rewards/margins": 7.561018943786621, "rewards/rejected": -14.05960464477539, "step": 14608 }, { "epoch": 2.27, "learning_rate": 3.432501685772084e-06, "logits/chosen": -2.2712326049804688, "logits/rejected": -2.717407464981079, "logps/chosen": -257.58026123046875, "logps/rejected": -420.9576416015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.795440673828125, "rewards/margins": 7.992328643798828, "rewards/rejected": -17.787769317626953, "step": 14609 }, { "epoch": 2.27, "learning_rate": 3.4317682452409358e-06, "logits/chosen": -2.1223275661468506, "logits/rejected": -2.749119281768799, "logps/chosen": -172.28738403320312, "logps/rejected": -345.5467834472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.1455607414245605, "rewards/margins": 10.864398956298828, "rewards/rejected": -15.009960174560547, "step": 14610 }, { "epoch": 2.27, "learning_rate": 3.431034804709788e-06, "logits/chosen": -2.518522262573242, "logits/rejected": -2.705458164215088, "logps/chosen": -359.8203430175781, "logps/rejected": -395.043212890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.038417100906372, "rewards/margins": 8.285781860351562, "rewards/rejected": -11.324199676513672, "step": 14611 }, { "epoch": 2.27, "learning_rate": 3.43030136417864e-06, "logits/chosen": -2.4506685733795166, "logits/rejected": -3.100865602493286, "logps/chosen": -125.38687133789062, "logps/rejected": -314.2301940917969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.828125, "rewards/margins": 8.782220840454102, "rewards/rejected": -14.610345840454102, "step": 14612 }, { "epoch": 2.27, "learning_rate": 3.4295679236474923e-06, "logits/chosen": -2.1145832538604736, "logits/rejected": -2.523606538772583, "logps/chosen": -212.76705932617188, "logps/rejected": -314.5446472167969, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.078881025314331, "rewards/margins": 6.867668151855469, "rewards/rejected": -9.946549415588379, "step": 14613 }, { "epoch": 2.27, "learning_rate": 3.428834483116344e-06, "logits/chosen": -2.0499603748321533, "logits/rejected": -2.5295145511627197, "logps/chosen": -176.29666137695312, "logps/rejected": -433.08233642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.761303901672363, "rewards/margins": 11.205288887023926, "rewards/rejected": -17.96659278869629, "step": 14614 }, { "epoch": 2.27, "learning_rate": 3.428101042585196e-06, "logits/chosen": -1.3157265186309814, "logits/rejected": -2.553717851638794, "logps/chosen": -177.86807250976562, "logps/rejected": -430.83319091796875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.2263360023498535, "rewards/margins": 10.190958023071289, "rewards/rejected": -14.417293548583984, "step": 14615 }, { "epoch": 2.27, "learning_rate": 3.4273676020540483e-06, "logits/chosen": -2.6260437965393066, "logits/rejected": -2.6136486530303955, "logps/chosen": -321.01458740234375, "logps/rejected": -422.84063720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3906891345977783, "rewards/margins": 12.378107070922852, "rewards/rejected": -14.768796920776367, "step": 14616 }, { "epoch": 2.27, "learning_rate": 3.4266341615229006e-06, "logits/chosen": -2.8627076148986816, "logits/rejected": -2.906353235244751, "logps/chosen": -143.9052276611328, "logps/rejected": -252.08079528808594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.136120796203613, "rewards/margins": 8.699823379516602, "rewards/rejected": -14.835944175720215, "step": 14617 }, { "epoch": 2.27, "learning_rate": 3.425900720991753e-06, "logits/chosen": -2.585604429244995, "logits/rejected": -2.704136610031128, "logps/chosen": -422.58148193359375, "logps/rejected": -401.1529846191406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.7966766357421875, "rewards/margins": 7.373809814453125, "rewards/rejected": -11.170486450195312, "step": 14618 }, { "epoch": 2.27, "learning_rate": 3.425167280460605e-06, "logits/chosen": -2.645908832550049, "logits/rejected": -2.890204429626465, "logps/chosen": -68.40291595458984, "logps/rejected": -279.7469482421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.230833530426025, "rewards/margins": 10.605853080749512, "rewards/rejected": -15.836687088012695, "step": 14619 }, { "epoch": 2.27, "learning_rate": 3.424433839929457e-06, "logits/chosen": -1.4677178859710693, "logits/rejected": -3.0135412216186523, "logps/chosen": -267.693115234375, "logps/rejected": -617.6624755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.871432304382324, "rewards/margins": 18.223052978515625, "rewards/rejected": -23.094486236572266, "step": 14620 }, { "epoch": 2.27, "learning_rate": 3.423700399398309e-06, "logits/chosen": -2.6870946884155273, "logits/rejected": -2.853973150253296, "logps/chosen": -258.80914306640625, "logps/rejected": -310.89617919921875, "loss": 0.0835, "rewards/accuracies": 1.0, "rewards/chosen": -10.672357559204102, "rewards/margins": 6.518077850341797, "rewards/rejected": -17.1904354095459, "step": 14621 }, { "epoch": 2.27, "learning_rate": 3.4229669588671613e-06, "logits/chosen": -2.5372111797332764, "logits/rejected": -2.4480197429656982, "logps/chosen": -129.5615692138672, "logps/rejected": -241.469482421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9067606925964355, "rewards/margins": 10.34920883178711, "rewards/rejected": -14.255969047546387, "step": 14622 }, { "epoch": 2.27, "learning_rate": 3.422233518336013e-06, "logits/chosen": -2.6819827556610107, "logits/rejected": -2.8377506732940674, "logps/chosen": -233.69171142578125, "logps/rejected": -372.7092590332031, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.672481060028076, "rewards/margins": 8.823019027709961, "rewards/rejected": -12.495500564575195, "step": 14623 }, { "epoch": 2.27, "learning_rate": 3.4215000778048655e-06, "logits/chosen": -2.6229076385498047, "logits/rejected": -2.7168800830841064, "logps/chosen": -130.9236297607422, "logps/rejected": -263.4063415527344, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -9.445825576782227, "rewards/margins": 7.136612892150879, "rewards/rejected": -16.58243751525879, "step": 14624 }, { "epoch": 2.27, "learning_rate": 3.4207666372737178e-06, "logits/chosen": -1.448900818824768, "logits/rejected": -2.539062738418579, "logps/chosen": -126.25926971435547, "logps/rejected": -446.5599060058594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.898436546325684, "rewards/margins": 9.261886596679688, "rewards/rejected": -14.160324096679688, "step": 14625 }, { "epoch": 2.27, "learning_rate": 3.4200331967425697e-06, "logits/chosen": -2.0006916522979736, "logits/rejected": -2.571911573410034, "logps/chosen": -81.29450988769531, "logps/rejected": -281.7672119140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.860310077667236, "rewards/margins": 13.518365859985352, "rewards/rejected": -18.37867546081543, "step": 14626 }, { "epoch": 2.27, "learning_rate": 3.419299756211422e-06, "logits/chosen": -1.6174685955047607, "logits/rejected": -2.6154067516326904, "logps/chosen": -98.19818115234375, "logps/rejected": -195.1144561767578, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -5.117115497589111, "rewards/margins": 5.138352394104004, "rewards/rejected": -10.255468368530273, "step": 14627 }, { "epoch": 2.27, "learning_rate": 3.418566315680274e-06, "logits/chosen": -2.3974945545196533, "logits/rejected": -1.8985776901245117, "logps/chosen": -334.1809387207031, "logps/rejected": -373.37896728515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.620907783508301, "rewards/margins": 8.82675838470459, "rewards/rejected": -14.44766616821289, "step": 14628 }, { "epoch": 2.28, "learning_rate": 3.417832875149126e-06, "logits/chosen": -3.0151546001434326, "logits/rejected": -1.8085737228393555, "logps/chosen": -883.1221923828125, "logps/rejected": -474.9812927246094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.187946796417236, "rewards/margins": 11.368642807006836, "rewards/rejected": -15.556589126586914, "step": 14629 }, { "epoch": 2.28, "learning_rate": 3.417099434617978e-06, "logits/chosen": -2.1211659908294678, "logits/rejected": -2.6677968502044678, "logps/chosen": -165.87628173828125, "logps/rejected": -411.740966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.8686604499816895, "rewards/margins": 12.263717651367188, "rewards/rejected": -19.13237762451172, "step": 14630 }, { "epoch": 2.28, "learning_rate": 3.4163659940868303e-06, "logits/chosen": -2.3838000297546387, "logits/rejected": -1.775294542312622, "logps/chosen": -403.2539978027344, "logps/rejected": -386.868408203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.94189453125, "rewards/margins": 10.380476951599121, "rewards/rejected": -13.322371482849121, "step": 14631 }, { "epoch": 2.28, "learning_rate": 3.415632553555682e-06, "logits/chosen": -2.501316547393799, "logits/rejected": -2.2836103439331055, "logps/chosen": -799.7911376953125, "logps/rejected": -777.3997802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.463710784912109, "rewards/margins": 12.87320327758789, "rewards/rejected": -19.3369140625, "step": 14632 }, { "epoch": 2.28, "learning_rate": 3.4148991130245345e-06, "logits/chosen": -2.6278483867645264, "logits/rejected": -2.4416041374206543, "logps/chosen": -278.4529113769531, "logps/rejected": -333.0141296386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.2314858436584473, "rewards/margins": 10.718351364135742, "rewards/rejected": -12.949836730957031, "step": 14633 }, { "epoch": 2.28, "learning_rate": 3.414165672493387e-06, "logits/chosen": -2.709888219833374, "logits/rejected": -1.3997812271118164, "logps/chosen": -575.767578125, "logps/rejected": -290.4591064453125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.648301124572754, "rewards/margins": 6.8766984939575195, "rewards/rejected": -13.524999618530273, "step": 14634 }, { "epoch": 2.28, "learning_rate": 3.4134322319622387e-06, "logits/chosen": -2.1272659301757812, "logits/rejected": -2.707496404647827, "logps/chosen": -138.0488739013672, "logps/rejected": -394.2935485839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.858939170837402, "rewards/margins": 12.848335266113281, "rewards/rejected": -17.707275390625, "step": 14635 }, { "epoch": 2.28, "learning_rate": 3.412698791431091e-06, "logits/chosen": -2.0357978343963623, "logits/rejected": -2.676104784011841, "logps/chosen": -241.13525390625, "logps/rejected": -469.30859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.418956756591797, "rewards/margins": 10.095884323120117, "rewards/rejected": -13.514841079711914, "step": 14636 }, { "epoch": 2.28, "learning_rate": 3.411965350899943e-06, "logits/chosen": -2.5523862838745117, "logits/rejected": -1.6897073984146118, "logps/chosen": -716.0792236328125, "logps/rejected": -513.6270751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5884978771209717, "rewards/margins": 15.170177459716797, "rewards/rejected": -18.75867462158203, "step": 14637 }, { "epoch": 2.28, "learning_rate": 3.411231910368795e-06, "logits/chosen": -2.3957290649414062, "logits/rejected": -2.858464002609253, "logps/chosen": -257.43060302734375, "logps/rejected": -270.12939453125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -8.483529090881348, "rewards/margins": 5.510474681854248, "rewards/rejected": -13.994003295898438, "step": 14638 }, { "epoch": 2.28, "learning_rate": 3.410498469837647e-06, "logits/chosen": -2.129953622817993, "logits/rejected": -2.5251007080078125, "logps/chosen": -161.1888885498047, "logps/rejected": -388.88250732421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.395781517028809, "rewards/margins": 10.962021827697754, "rewards/rejected": -15.357803344726562, "step": 14639 }, { "epoch": 2.28, "learning_rate": 3.4097650293064993e-06, "logits/chosen": -2.2860465049743652, "logits/rejected": -2.719609022140503, "logps/chosen": -92.1856689453125, "logps/rejected": -212.466064453125, "loss": 0.7365, "rewards/accuracies": 0.5, "rewards/chosen": -7.319025039672852, "rewards/margins": 4.20644998550415, "rewards/rejected": -11.525474548339844, "step": 14640 }, { "epoch": 2.28, "learning_rate": 3.4090315887753512e-06, "logits/chosen": -2.8791515827178955, "logits/rejected": -2.5600996017456055, "logps/chosen": -578.379150390625, "logps/rejected": -615.5538330078125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.515878677368164, "rewards/margins": 11.633790969848633, "rewards/rejected": -19.149669647216797, "step": 14641 }, { "epoch": 2.28, "learning_rate": 3.408298148244204e-06, "logits/chosen": -2.148491144180298, "logits/rejected": -2.3228063583374023, "logps/chosen": -520.1357421875, "logps/rejected": -435.95361328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.273353099822998, "rewards/margins": 8.497353553771973, "rewards/rejected": -13.770707130432129, "step": 14642 }, { "epoch": 2.28, "learning_rate": 3.407564707713056e-06, "logits/chosen": -2.477917432785034, "logits/rejected": -2.7550172805786133, "logps/chosen": -269.6200256347656, "logps/rejected": -421.8798828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.268665790557861, "rewards/margins": 9.717723846435547, "rewards/rejected": -14.98638916015625, "step": 14643 }, { "epoch": 2.28, "learning_rate": 3.4068312671819077e-06, "logits/chosen": -2.328770637512207, "logits/rejected": -2.7348334789276123, "logps/chosen": -102.4569091796875, "logps/rejected": -216.73757934570312, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -8.045124053955078, "rewards/margins": 6.845365524291992, "rewards/rejected": -14.89048957824707, "step": 14644 }, { "epoch": 2.28, "learning_rate": 3.40609782665076e-06, "logits/chosen": -2.470822334289551, "logits/rejected": -2.64315128326416, "logps/chosen": -335.2194519042969, "logps/rejected": -546.7014770507812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.0401229858398438, "rewards/margins": 9.624187469482422, "rewards/rejected": -12.664310455322266, "step": 14645 }, { "epoch": 2.28, "learning_rate": 3.405364386119612e-06, "logits/chosen": -2.800333023071289, "logits/rejected": -2.8702118396759033, "logps/chosen": -632.2711181640625, "logps/rejected": -555.5133666992188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9880170822143555, "rewards/margins": 10.156055450439453, "rewards/rejected": -15.144072532653809, "step": 14646 }, { "epoch": 2.28, "learning_rate": 3.404630945588464e-06, "logits/chosen": -1.817785620689392, "logits/rejected": -2.602712631225586, "logps/chosen": -109.06007385253906, "logps/rejected": -267.87322998046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.723233461380005, "rewards/margins": 7.232027530670166, "rewards/rejected": -9.95526123046875, "step": 14647 }, { "epoch": 2.28, "learning_rate": 3.403897505057316e-06, "logits/chosen": -2.4828639030456543, "logits/rejected": -2.8338940143585205, "logps/chosen": -389.23956298828125, "logps/rejected": -456.3727722167969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.58730411529541, "rewards/margins": 8.752994537353516, "rewards/rejected": -17.340299606323242, "step": 14648 }, { "epoch": 2.28, "learning_rate": 3.4031640645261684e-06, "logits/chosen": -2.0741426944732666, "logits/rejected": -2.844696044921875, "logps/chosen": -117.87339782714844, "logps/rejected": -359.4408874511719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.014866352081299, "rewards/margins": 8.734222412109375, "rewards/rejected": -14.749089241027832, "step": 14649 }, { "epoch": 2.28, "learning_rate": 3.4024306239950207e-06, "logits/chosen": -1.7946337461471558, "logits/rejected": -2.4934113025665283, "logps/chosen": -170.79251098632812, "logps/rejected": -468.21112060546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.4616193771362305, "rewards/margins": 11.909538269042969, "rewards/rejected": -17.371158599853516, "step": 14650 }, { "epoch": 2.28, "learning_rate": 3.401697183463873e-06, "logits/chosen": -1.9669575691223145, "logits/rejected": -2.806546688079834, "logps/chosen": -159.98330688476562, "logps/rejected": -509.9664306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.349856853485107, "rewards/margins": 11.629951477050781, "rewards/rejected": -18.979808807373047, "step": 14651 }, { "epoch": 2.28, "learning_rate": 3.400963742932725e-06, "logits/chosen": -3.0090091228485107, "logits/rejected": -3.270841121673584, "logps/chosen": -155.44851684570312, "logps/rejected": -324.20001220703125, "loss": 0.4805, "rewards/accuracies": 0.5, "rewards/chosen": -7.33529806137085, "rewards/margins": 4.417529582977295, "rewards/rejected": -11.752827644348145, "step": 14652 }, { "epoch": 2.28, "learning_rate": 3.400230302401577e-06, "logits/chosen": -2.6446712017059326, "logits/rejected": -1.6674304008483887, "logps/chosen": -168.97764587402344, "logps/rejected": -160.346923828125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.728351593017578, "rewards/margins": 6.059375762939453, "rewards/rejected": -11.787727355957031, "step": 14653 }, { "epoch": 2.28, "learning_rate": 3.399496861870429e-06, "logits/chosen": -2.7480390071868896, "logits/rejected": -2.4791464805603027, "logps/chosen": -669.4205322265625, "logps/rejected": -614.4275512695312, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.5821540355682373, "rewards/margins": 12.180826187133789, "rewards/rejected": -15.762980461120605, "step": 14654 }, { "epoch": 2.28, "learning_rate": 3.398763421339281e-06, "logits/chosen": -1.7485522031784058, "logits/rejected": -2.78214693069458, "logps/chosen": -221.44139099121094, "logps/rejected": -505.1829833984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.430202484130859, "rewards/margins": 7.966447830200195, "rewards/rejected": -14.396650314331055, "step": 14655 }, { "epoch": 2.28, "learning_rate": 3.398029980808133e-06, "logits/chosen": -0.7525539398193359, "logits/rejected": -2.203277587890625, "logps/chosen": -111.5892333984375, "logps/rejected": -414.5973205566406, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.575878143310547, "rewards/margins": 9.381815910339355, "rewards/rejected": -14.957694053649902, "step": 14656 }, { "epoch": 2.28, "learning_rate": 3.397296540276985e-06, "logits/chosen": -1.9620808362960815, "logits/rejected": -2.680595874786377, "logps/chosen": -106.848388671875, "logps/rejected": -539.443359375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -6.261202335357666, "rewards/margins": 6.864558219909668, "rewards/rejected": -13.125761032104492, "step": 14657 }, { "epoch": 2.28, "learning_rate": 3.3965630997458374e-06, "logits/chosen": -2.4002678394317627, "logits/rejected": -3.1785202026367188, "logps/chosen": -105.34158325195312, "logps/rejected": -290.931640625, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -4.812234878540039, "rewards/margins": 4.875418663024902, "rewards/rejected": -9.687653541564941, "step": 14658 }, { "epoch": 2.28, "learning_rate": 3.3958296592146897e-06, "logits/chosen": -2.672922134399414, "logits/rejected": -2.8246874809265137, "logps/chosen": -119.30419921875, "logps/rejected": -152.47463989257812, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -7.0058512687683105, "rewards/margins": 4.474330902099609, "rewards/rejected": -11.480182647705078, "step": 14659 }, { "epoch": 2.28, "learning_rate": 3.395096218683542e-06, "logits/chosen": -2.8543708324432373, "logits/rejected": -2.2322616577148438, "logps/chosen": -496.65911865234375, "logps/rejected": -497.9925842285156, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.749825477600098, "rewards/margins": 8.967123031616211, "rewards/rejected": -14.716947555541992, "step": 14660 }, { "epoch": 2.28, "learning_rate": 3.394362778152394e-06, "logits/chosen": -2.8147165775299072, "logits/rejected": -2.478599786758423, "logps/chosen": -238.13681030273438, "logps/rejected": -332.86798095703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.161388874053955, "rewards/margins": 9.08644962310791, "rewards/rejected": -14.247838973999023, "step": 14661 }, { "epoch": 2.28, "learning_rate": 3.393629337621246e-06, "logits/chosen": -2.336146354675293, "logits/rejected": -2.6822612285614014, "logps/chosen": -146.20626831054688, "logps/rejected": -327.495361328125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -6.146289825439453, "rewards/margins": 5.735423564910889, "rewards/rejected": -11.8817138671875, "step": 14662 }, { "epoch": 2.28, "learning_rate": 3.392895897090098e-06, "logits/chosen": -2.6184887886047363, "logits/rejected": -2.6652088165283203, "logps/chosen": -191.790283203125, "logps/rejected": -330.67694091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.415114879608154, "rewards/margins": 10.805551528930664, "rewards/rejected": -16.220664978027344, "step": 14663 }, { "epoch": 2.28, "learning_rate": 3.39216245655895e-06, "logits/chosen": -1.5888900756835938, "logits/rejected": -2.6603825092315674, "logps/chosen": -171.55459594726562, "logps/rejected": -387.48577880859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.111202239990234, "rewards/margins": 9.859781265258789, "rewards/rejected": -14.970983505249023, "step": 14664 }, { "epoch": 2.28, "learning_rate": 3.3914290160278022e-06, "logits/chosen": -2.3754897117614746, "logits/rejected": -2.588669776916504, "logps/chosen": -118.31686401367188, "logps/rejected": -170.859130859375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.101304531097412, "rewards/margins": 6.69274377822876, "rewards/rejected": -10.794048309326172, "step": 14665 }, { "epoch": 2.28, "learning_rate": 3.390695575496654e-06, "logits/chosen": -2.0985443592071533, "logits/rejected": -2.746493101119995, "logps/chosen": -304.39056396484375, "logps/rejected": -485.9071044921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.835668563842773, "rewards/margins": 10.645538330078125, "rewards/rejected": -20.48120880126953, "step": 14666 }, { "epoch": 2.28, "learning_rate": 3.3899621349655064e-06, "logits/chosen": -2.866527557373047, "logits/rejected": -2.616579294204712, "logps/chosen": -432.5043029785156, "logps/rejected": -251.89816284179688, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -5.861323833465576, "rewards/margins": 3.840815544128418, "rewards/rejected": -9.702138900756836, "step": 14667 }, { "epoch": 2.28, "learning_rate": 3.3892286944343587e-06, "logits/chosen": -2.697632074356079, "logits/rejected": -2.6545679569244385, "logps/chosen": -239.5319366455078, "logps/rejected": -474.7009582519531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.573741912841797, "rewards/margins": 13.93515396118164, "rewards/rejected": -18.508895874023438, "step": 14668 }, { "epoch": 2.28, "learning_rate": 3.388495253903211e-06, "logits/chosen": -2.3138163089752197, "logits/rejected": -2.925133466720581, "logps/chosen": -128.80990600585938, "logps/rejected": -404.2012939453125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.286787509918213, "rewards/margins": 8.762985229492188, "rewards/rejected": -13.049772262573242, "step": 14669 }, { "epoch": 2.28, "learning_rate": 3.387761813372063e-06, "logits/chosen": -1.9322988986968994, "logits/rejected": -2.5516953468322754, "logps/chosen": -187.55206298828125, "logps/rejected": -227.36297607421875, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -5.504274368286133, "rewards/margins": 5.158747673034668, "rewards/rejected": -10.6630220413208, "step": 14670 }, { "epoch": 2.28, "learning_rate": 3.387028372840915e-06, "logits/chosen": -1.642930269241333, "logits/rejected": -2.39032244682312, "logps/chosen": -209.13424682617188, "logps/rejected": -312.19720458984375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -5.752326965332031, "rewards/margins": 10.52560806274414, "rewards/rejected": -16.277935028076172, "step": 14671 }, { "epoch": 2.28, "learning_rate": 3.386294932309767e-06, "logits/chosen": -1.6306639909744263, "logits/rejected": -2.7155256271362305, "logps/chosen": -171.39599609375, "logps/rejected": -553.2992553710938, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.8639326095581055, "rewards/margins": 13.489459037780762, "rewards/rejected": -18.353391647338867, "step": 14672 }, { "epoch": 2.28, "learning_rate": 3.3855614917786194e-06, "logits/chosen": -2.7938408851623535, "logits/rejected": -2.989055633544922, "logps/chosen": -82.47624206542969, "logps/rejected": -258.3025817871094, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.028057098388672, "rewards/margins": 8.319311141967773, "rewards/rejected": -12.347368240356445, "step": 14673 }, { "epoch": 2.28, "learning_rate": 3.3848280512474712e-06, "logits/chosen": -2.6078286170959473, "logits/rejected": -1.5716382265090942, "logps/chosen": -223.73062133789062, "logps/rejected": -221.62042236328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.166643142700195, "rewards/margins": 8.248023986816406, "rewards/rejected": -12.414667129516602, "step": 14674 }, { "epoch": 2.28, "learning_rate": 3.384094610716323e-06, "logits/chosen": -2.6151394844055176, "logits/rejected": -2.5611793994903564, "logps/chosen": -484.9905700683594, "logps/rejected": -533.466796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.448111534118652, "rewards/margins": 13.099817276000977, "rewards/rejected": -19.547927856445312, "step": 14675 }, { "epoch": 2.28, "learning_rate": 3.383361170185176e-06, "logits/chosen": -2.565250873565674, "logits/rejected": -1.7187752723693848, "logps/chosen": -599.956787109375, "logps/rejected": -483.02557373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.707976341247559, "rewards/margins": 12.374958992004395, "rewards/rejected": -17.082935333251953, "step": 14676 }, { "epoch": 2.28, "learning_rate": 3.3826277296540277e-06, "logits/chosen": -2.400786876678467, "logits/rejected": -2.126958131790161, "logps/chosen": -625.7230224609375, "logps/rejected": -558.80224609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.914202690124512, "rewards/margins": 10.114790916442871, "rewards/rejected": -18.028993606567383, "step": 14677 }, { "epoch": 2.28, "learning_rate": 3.38189428912288e-06, "logits/chosen": -2.914804458618164, "logits/rejected": -2.8902463912963867, "logps/chosen": -132.89781188964844, "logps/rejected": -152.63241577148438, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.906879901885986, "rewards/margins": 7.498326301574707, "rewards/rejected": -12.405206680297852, "step": 14678 }, { "epoch": 2.28, "learning_rate": 3.381160848591732e-06, "logits/chosen": -2.6898369789123535, "logits/rejected": -2.4873790740966797, "logps/chosen": -187.8218536376953, "logps/rejected": -331.0620422363281, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -3.5498998165130615, "rewards/margins": 9.740626335144043, "rewards/rejected": -13.290525436401367, "step": 14679 }, { "epoch": 2.28, "learning_rate": 3.380427408060584e-06, "logits/chosen": -2.7318036556243896, "logits/rejected": -1.5160677433013916, "logps/chosen": -389.9717102050781, "logps/rejected": -248.45376586914062, "loss": 0.6449, "rewards/accuracies": 0.5, "rewards/chosen": -7.247330665588379, "rewards/margins": 5.897279739379883, "rewards/rejected": -13.144610404968262, "step": 14680 }, { "epoch": 2.28, "learning_rate": 3.379693967529436e-06, "logits/chosen": -2.8370115756988525, "logits/rejected": -2.602494955062866, "logps/chosen": -146.8775634765625, "logps/rejected": -207.9173583984375, "loss": 0.2902, "rewards/accuracies": 1.0, "rewards/chosen": -5.988862991333008, "rewards/margins": 8.352787971496582, "rewards/rejected": -14.341650009155273, "step": 14681 }, { "epoch": 2.28, "learning_rate": 3.3789605269982884e-06, "logits/chosen": -2.3134236335754395, "logits/rejected": -2.773397445678711, "logps/chosen": -149.8328399658203, "logps/rejected": -228.07948303222656, "loss": 0.341, "rewards/accuracies": 1.0, "rewards/chosen": -7.249744892120361, "rewards/margins": 2.389965295791626, "rewards/rejected": -9.639710426330566, "step": 14682 }, { "epoch": 2.28, "learning_rate": 3.3782270864671403e-06, "logits/chosen": -1.3645857572555542, "logits/rejected": -2.450888156890869, "logps/chosen": -72.45478820800781, "logps/rejected": -291.9637451171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0241994857788086, "rewards/margins": 9.801841735839844, "rewards/rejected": -12.826041221618652, "step": 14683 }, { "epoch": 2.28, "learning_rate": 3.3774936459359926e-06, "logits/chosen": -2.684932231903076, "logits/rejected": -3.156832695007324, "logps/chosen": -112.75331115722656, "logps/rejected": -351.204345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.277946472167969, "rewards/margins": 11.087288856506348, "rewards/rejected": -17.365236282348633, "step": 14684 }, { "epoch": 2.28, "learning_rate": 3.376760205404845e-06, "logits/chosen": -1.974280834197998, "logits/rejected": -2.0014045238494873, "logps/chosen": -249.31521606445312, "logps/rejected": -536.8037719726562, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.64961051940918, "rewards/margins": 7.949538707733154, "rewards/rejected": -16.59914779663086, "step": 14685 }, { "epoch": 2.28, "learning_rate": 3.3760267648736967e-06, "logits/chosen": -2.4813385009765625, "logits/rejected": -2.716329336166382, "logps/chosen": -117.35293579101562, "logps/rejected": -297.18084716796875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.759044647216797, "rewards/margins": 7.367008686065674, "rewards/rejected": -11.126052856445312, "step": 14686 }, { "epoch": 2.28, "learning_rate": 3.375293324342549e-06, "logits/chosen": -1.6895283460617065, "logits/rejected": -2.8396048545837402, "logps/chosen": -121.31214904785156, "logps/rejected": -356.62152099609375, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -5.710079193115234, "rewards/margins": 8.544503211975098, "rewards/rejected": -14.254583358764648, "step": 14687 }, { "epoch": 2.28, "learning_rate": 3.374559883811401e-06, "logits/chosen": -1.4881198406219482, "logits/rejected": -2.362471580505371, "logps/chosen": -92.79209899902344, "logps/rejected": -423.73602294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3807997703552246, "rewards/margins": 14.056412696838379, "rewards/rejected": -17.437211990356445, "step": 14688 }, { "epoch": 2.28, "learning_rate": 3.3738264432802532e-06, "logits/chosen": -2.789659023284912, "logits/rejected": -2.780125141143799, "logps/chosen": -142.5869903564453, "logps/rejected": -160.39036560058594, "loss": 0.1689, "rewards/accuracies": 1.0, "rewards/chosen": -7.197230339050293, "rewards/margins": 4.475555419921875, "rewards/rejected": -11.672786712646484, "step": 14689 }, { "epoch": 2.28, "learning_rate": 3.373093002749105e-06, "logits/chosen": -2.8347253799438477, "logits/rejected": -2.334557056427002, "logps/chosen": -253.2822265625, "logps/rejected": -259.39068603515625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.007083892822266, "rewards/margins": 6.429207801818848, "rewards/rejected": -11.436291694641113, "step": 14690 }, { "epoch": 2.28, "learning_rate": 3.3723595622179574e-06, "logits/chosen": -2.0659899711608887, "logits/rejected": -3.2656538486480713, "logps/chosen": -58.95718002319336, "logps/rejected": -332.0799865722656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.347747802734375, "rewards/margins": 9.532930374145508, "rewards/rejected": -13.880678176879883, "step": 14691 }, { "epoch": 2.28, "learning_rate": 3.3716261216868093e-06, "logits/chosen": -1.202044129371643, "logits/rejected": -2.6170647144317627, "logps/chosen": -84.14404296875, "logps/rejected": -320.2744445800781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.803384780883789, "rewards/margins": 9.689390182495117, "rewards/rejected": -14.492774963378906, "step": 14692 }, { "epoch": 2.29, "learning_rate": 3.370892681155662e-06, "logits/chosen": -2.4422807693481445, "logits/rejected": -2.645014524459839, "logps/chosen": -220.29452514648438, "logps/rejected": -344.2176208496094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.540115356445312, "rewards/margins": 8.779184341430664, "rewards/rejected": -17.319299697875977, "step": 14693 }, { "epoch": 2.29, "learning_rate": 3.370159240624514e-06, "logits/chosen": -2.573514699935913, "logits/rejected": -2.5453662872314453, "logps/chosen": -114.35768127441406, "logps/rejected": -330.3720703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.746308326721191, "rewards/margins": 10.658857345581055, "rewards/rejected": -16.405166625976562, "step": 14694 }, { "epoch": 2.29, "learning_rate": 3.3694258000933658e-06, "logits/chosen": -2.8782060146331787, "logits/rejected": -3.337219476699829, "logps/chosen": -319.1008605957031, "logps/rejected": -335.65692138671875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -6.749956130981445, "rewards/margins": 5.534161567687988, "rewards/rejected": -12.284116744995117, "step": 14695 }, { "epoch": 2.29, "learning_rate": 3.368692359562218e-06, "logits/chosen": -2.312744140625, "logits/rejected": -2.6801135540008545, "logps/chosen": -186.40809631347656, "logps/rejected": -259.59344482421875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.475919723510742, "rewards/margins": 6.641613960266113, "rewards/rejected": -13.117533683776855, "step": 14696 }, { "epoch": 2.29, "learning_rate": 3.36795891903107e-06, "logits/chosen": -2.7143890857696533, "logits/rejected": -2.2812910079956055, "logps/chosen": -211.22897338867188, "logps/rejected": -384.68096923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.910388946533203, "rewards/margins": 10.373631477355957, "rewards/rejected": -16.284019470214844, "step": 14697 }, { "epoch": 2.29, "learning_rate": 3.3672254784999223e-06, "logits/chosen": -2.774247646331787, "logits/rejected": -2.818535327911377, "logps/chosen": -103.90784454345703, "logps/rejected": -312.20782470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.600580215454102, "rewards/margins": 11.082740783691406, "rewards/rejected": -16.683320999145508, "step": 14698 }, { "epoch": 2.29, "learning_rate": 3.366492037968774e-06, "logits/chosen": -2.8785974979400635, "logits/rejected": -2.4752678871154785, "logps/chosen": -137.23675537109375, "logps/rejected": -172.8611602783203, "loss": 0.4025, "rewards/accuracies": 0.5, "rewards/chosen": -5.549065589904785, "rewards/margins": 4.040454864501953, "rewards/rejected": -9.589521408081055, "step": 14699 }, { "epoch": 2.29, "learning_rate": 3.3657585974376264e-06, "logits/chosen": -2.362677574157715, "logits/rejected": -2.1150870323181152, "logps/chosen": -408.7816467285156, "logps/rejected": -366.578369140625, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -6.382078170776367, "rewards/margins": 7.0045647621154785, "rewards/rejected": -13.386642456054688, "step": 14700 }, { "epoch": 2.29, "learning_rate": 3.3650251569064787e-06, "logits/chosen": -2.611433982849121, "logits/rejected": -2.6234679222106934, "logps/chosen": -189.8705596923828, "logps/rejected": -301.4369812011719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.335630416870117, "rewards/margins": 9.880233764648438, "rewards/rejected": -16.215864181518555, "step": 14701 }, { "epoch": 2.29, "learning_rate": 3.364291716375331e-06, "logits/chosen": -2.1749136447906494, "logits/rejected": -3.022488832473755, "logps/chosen": -140.3572998046875, "logps/rejected": -455.8931884765625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.924074172973633, "rewards/margins": 7.80503511428833, "rewards/rejected": -12.729108810424805, "step": 14702 }, { "epoch": 2.29, "learning_rate": 3.363558275844183e-06, "logits/chosen": -2.243373155593872, "logits/rejected": -2.538316011428833, "logps/chosen": -176.6623992919922, "logps/rejected": -420.6959228515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -9.0750150680542, "rewards/margins": 6.6238203048706055, "rewards/rejected": -15.698835372924805, "step": 14703 }, { "epoch": 2.29, "learning_rate": 3.362824835313035e-06, "logits/chosen": -1.415565013885498, "logits/rejected": -2.6734066009521484, "logps/chosen": -128.6172332763672, "logps/rejected": -547.8245849609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.464151859283447, "rewards/margins": 11.315975189208984, "rewards/rejected": -16.780128479003906, "step": 14704 }, { "epoch": 2.29, "learning_rate": 3.362091394781887e-06, "logits/chosen": -1.7985912561416626, "logits/rejected": -2.396192789077759, "logps/chosen": -254.708740234375, "logps/rejected": -295.4517822265625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -7.439521789550781, "rewards/margins": 7.394543647766113, "rewards/rejected": -14.834065437316895, "step": 14705 }, { "epoch": 2.29, "learning_rate": 3.361357954250739e-06, "logits/chosen": -1.7479392290115356, "logits/rejected": -2.741811990737915, "logps/chosen": -99.61808776855469, "logps/rejected": -243.73135375976562, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.016569137573242, "rewards/margins": 7.293187141418457, "rewards/rejected": -15.309755325317383, "step": 14706 }, { "epoch": 2.29, "learning_rate": 3.3606245137195913e-06, "logits/chosen": -1.987286925315857, "logits/rejected": -2.589245557785034, "logps/chosen": -111.678955078125, "logps/rejected": -210.49766540527344, "loss": 0.246, "rewards/accuracies": 1.0, "rewards/chosen": -8.212048530578613, "rewards/margins": 4.679895401000977, "rewards/rejected": -12.891944885253906, "step": 14707 }, { "epoch": 2.29, "learning_rate": 3.359891073188443e-06, "logits/chosen": -1.2785024642944336, "logits/rejected": -2.632974863052368, "logps/chosen": -112.78543090820312, "logps/rejected": -328.58062744140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.256845474243164, "rewards/margins": 7.601507186889648, "rewards/rejected": -14.858352661132812, "step": 14708 }, { "epoch": 2.29, "learning_rate": 3.3591576326572955e-06, "logits/chosen": -2.7833592891693115, "logits/rejected": -2.5535888671875, "logps/chosen": -191.4876708984375, "logps/rejected": -329.8312683105469, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3692718744277954, "rewards/margins": 9.131814956665039, "rewards/rejected": -10.501086235046387, "step": 14709 }, { "epoch": 2.29, "learning_rate": 3.3584241921261478e-06, "logits/chosen": -2.8382091522216797, "logits/rejected": -2.1833925247192383, "logps/chosen": -498.7169189453125, "logps/rejected": -373.0601501464844, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -4.869292259216309, "rewards/margins": 11.45350456237793, "rewards/rejected": -16.322795867919922, "step": 14710 }, { "epoch": 2.29, "learning_rate": 3.357690751595e-06, "logits/chosen": -2.3834915161132812, "logits/rejected": -2.6935198307037354, "logps/chosen": -130.59359741210938, "logps/rejected": -378.3391418457031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.905364036560059, "rewards/margins": 10.64602279663086, "rewards/rejected": -15.551387786865234, "step": 14711 }, { "epoch": 2.29, "learning_rate": 3.356957311063852e-06, "logits/chosen": -2.455855131149292, "logits/rejected": -1.6795014142990112, "logps/chosen": -176.94496154785156, "logps/rejected": -380.1683349609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.789093971252441, "rewards/margins": 11.812023162841797, "rewards/rejected": -19.601118087768555, "step": 14712 }, { "epoch": 2.29, "learning_rate": 3.356223870532704e-06, "logits/chosen": -2.6947150230407715, "logits/rejected": -1.8346524238586426, "logps/chosen": -747.4808959960938, "logps/rejected": -464.4544677734375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -4.637494087219238, "rewards/margins": 9.40530776977539, "rewards/rejected": -14.042802810668945, "step": 14713 }, { "epoch": 2.29, "learning_rate": 3.355490430001556e-06, "logits/chosen": -2.3920161724090576, "logits/rejected": -2.1567461490631104, "logps/chosen": -491.48651123046875, "logps/rejected": -525.515380859375, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -11.321937561035156, "rewards/margins": 7.691733360290527, "rewards/rejected": -19.013671875, "step": 14714 }, { "epoch": 2.29, "learning_rate": 3.354756989470408e-06, "logits/chosen": -2.6083154678344727, "logits/rejected": -1.5104116201400757, "logps/chosen": -323.12725830078125, "logps/rejected": -306.8221435546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.668378829956055, "rewards/margins": 8.218326568603516, "rewards/rejected": -15.88670539855957, "step": 14715 }, { "epoch": 2.29, "learning_rate": 3.3540235489392603e-06, "logits/chosen": -1.7174957990646362, "logits/rejected": -1.959979772567749, "logps/chosen": -233.63900756835938, "logps/rejected": -405.876708984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.9692583084106445, "rewards/margins": 13.790092468261719, "rewards/rejected": -17.75935173034668, "step": 14716 }, { "epoch": 2.29, "learning_rate": 3.353290108408112e-06, "logits/chosen": -2.7157297134399414, "logits/rejected": -2.6968820095062256, "logps/chosen": -150.7467041015625, "logps/rejected": -207.27532958984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7094826698303223, "rewards/margins": 9.83961296081543, "rewards/rejected": -13.549095153808594, "step": 14717 }, { "epoch": 2.29, "learning_rate": 3.352556667876965e-06, "logits/chosen": -1.4000871181488037, "logits/rejected": -2.7125582695007324, "logps/chosen": -155.75787353515625, "logps/rejected": -411.14794921875, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -8.201192855834961, "rewards/margins": 6.83847713470459, "rewards/rejected": -15.039669036865234, "step": 14718 }, { "epoch": 2.29, "learning_rate": 3.3518232273458168e-06, "logits/chosen": -2.7209534645080566, "logits/rejected": -2.850092649459839, "logps/chosen": -167.9012451171875, "logps/rejected": -230.31094360351562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.746093511581421, "rewards/margins": 9.651741981506348, "rewards/rejected": -13.397834777832031, "step": 14719 }, { "epoch": 2.29, "learning_rate": 3.351089786814669e-06, "logits/chosen": -2.4851362705230713, "logits/rejected": -2.7759931087493896, "logps/chosen": -302.1669006347656, "logps/rejected": -486.7239990234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.407790660858154, "rewards/margins": 10.590800285339355, "rewards/rejected": -16.99859046936035, "step": 14720 }, { "epoch": 2.29, "learning_rate": 3.350356346283521e-06, "logits/chosen": -2.3172197341918945, "logits/rejected": -2.7557342052459717, "logps/chosen": -122.93388366699219, "logps/rejected": -272.4960021972656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.723443984985352, "rewards/margins": 8.321272850036621, "rewards/rejected": -14.044717788696289, "step": 14721 }, { "epoch": 2.29, "learning_rate": 3.3496229057523733e-06, "logits/chosen": -2.6866307258605957, "logits/rejected": -2.8635690212249756, "logps/chosen": -508.7666015625, "logps/rejected": -630.0352172851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.279546737670898, "rewards/margins": 11.97375774383545, "rewards/rejected": -18.25330352783203, "step": 14722 }, { "epoch": 2.29, "learning_rate": 3.348889465221225e-06, "logits/chosen": -2.8415045738220215, "logits/rejected": -3.249525308609009, "logps/chosen": -203.78607177734375, "logps/rejected": -414.728759765625, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -5.7447919845581055, "rewards/margins": 7.649927139282227, "rewards/rejected": -13.394720077514648, "step": 14723 }, { "epoch": 2.29, "learning_rate": 3.348156024690077e-06, "logits/chosen": -2.2670223712921143, "logits/rejected": -2.5395760536193848, "logps/chosen": -166.23489379882812, "logps/rejected": -239.5218505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.072074890136719, "rewards/margins": 10.199638366699219, "rewards/rejected": -14.271713256835938, "step": 14724 }, { "epoch": 2.29, "learning_rate": 3.3474225841589293e-06, "logits/chosen": -1.906179666519165, "logits/rejected": -2.821577548980713, "logps/chosen": -261.0263366699219, "logps/rejected": -377.5797119140625, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/chosen": -8.249954223632812, "rewards/margins": 6.68658447265625, "rewards/rejected": -14.936538696289062, "step": 14725 }, { "epoch": 2.29, "learning_rate": 3.3466891436277816e-06, "logits/chosen": -2.582366704940796, "logits/rejected": -2.6623647212982178, "logps/chosen": -423.0201721191406, "logps/rejected": -459.7900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.482419013977051, "rewards/margins": 11.224078178405762, "rewards/rejected": -17.706497192382812, "step": 14726 }, { "epoch": 2.29, "learning_rate": 3.345955703096634e-06, "logits/chosen": -2.1801209449768066, "logits/rejected": -2.826732635498047, "logps/chosen": -316.57122802734375, "logps/rejected": -463.9050598144531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.183072090148926, "rewards/margins": 10.426610946655273, "rewards/rejected": -14.609683990478516, "step": 14727 }, { "epoch": 2.29, "learning_rate": 3.345222262565486e-06, "logits/chosen": -2.128648519515991, "logits/rejected": -2.7633306980133057, "logps/chosen": -129.58871459960938, "logps/rejected": -339.7885437011719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.972135543823242, "rewards/margins": 8.928932189941406, "rewards/rejected": -14.901067733764648, "step": 14728 }, { "epoch": 2.29, "learning_rate": 3.344488822034338e-06, "logits/chosen": -1.1055587530136108, "logits/rejected": -2.579986333847046, "logps/chosen": -261.9312438964844, "logps/rejected": -542.533203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.1800918579101562, "rewards/margins": 10.042112350463867, "rewards/rejected": -13.222204208374023, "step": 14729 }, { "epoch": 2.29, "learning_rate": 3.34375538150319e-06, "logits/chosen": -2.0903384685516357, "logits/rejected": -2.4332735538482666, "logps/chosen": -233.10787963867188, "logps/rejected": -386.94097900390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.1129069328308105, "rewards/margins": 9.118681907653809, "rewards/rejected": -15.231589317321777, "step": 14730 }, { "epoch": 2.29, "learning_rate": 3.3430219409720423e-06, "logits/chosen": -2.91943359375, "logits/rejected": -2.4408743381500244, "logps/chosen": -375.0731201171875, "logps/rejected": -305.578369140625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -6.7725958824157715, "rewards/margins": 7.019349098205566, "rewards/rejected": -13.79194450378418, "step": 14731 }, { "epoch": 2.29, "learning_rate": 3.342288500440894e-06, "logits/chosen": -1.5746911764144897, "logits/rejected": -2.6326186656951904, "logps/chosen": -152.08624267578125, "logps/rejected": -451.6781005859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.4382805824279785, "rewards/margins": 11.174652099609375, "rewards/rejected": -18.612932205200195, "step": 14732 }, { "epoch": 2.29, "learning_rate": 3.341555059909746e-06, "logits/chosen": -1.9984134435653687, "logits/rejected": -2.624920129776001, "logps/chosen": -272.98675537109375, "logps/rejected": -529.99072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.03294563293457, "rewards/margins": 15.824609756469727, "rewards/rejected": -22.857555389404297, "step": 14733 }, { "epoch": 2.29, "learning_rate": 3.3408216193785983e-06, "logits/chosen": -2.2640678882598877, "logits/rejected": -2.857910633087158, "logps/chosen": -374.7336730957031, "logps/rejected": -824.39111328125, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -10.518959045410156, "rewards/margins": 6.439010143280029, "rewards/rejected": -16.957969665527344, "step": 14734 }, { "epoch": 2.29, "learning_rate": 3.3400881788474506e-06, "logits/chosen": -2.5393917560577393, "logits/rejected": -1.9238758087158203, "logps/chosen": -223.84945678710938, "logps/rejected": -269.0867614746094, "loss": 0.1026, "rewards/accuracies": 1.0, "rewards/chosen": -4.83638858795166, "rewards/margins": 6.554654121398926, "rewards/rejected": -11.391042709350586, "step": 14735 }, { "epoch": 2.29, "learning_rate": 3.339354738316303e-06, "logits/chosen": -2.913395404815674, "logits/rejected": -2.8958966732025146, "logps/chosen": -223.3470458984375, "logps/rejected": -383.5562438964844, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.763797283172607, "rewards/margins": 16.48935317993164, "rewards/rejected": -23.253150939941406, "step": 14736 }, { "epoch": 2.29, "learning_rate": 3.338621297785155e-06, "logits/chosen": -2.771407127380371, "logits/rejected": -2.7087454795837402, "logps/chosen": -449.50091552734375, "logps/rejected": -434.9218444824219, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -9.255094528198242, "rewards/margins": 6.1241774559021, "rewards/rejected": -15.3792724609375, "step": 14737 }, { "epoch": 2.29, "learning_rate": 3.337887857254007e-06, "logits/chosen": -2.803168773651123, "logits/rejected": -0.9462147355079651, "logps/chosen": -240.9801788330078, "logps/rejected": -129.79788208007812, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -1.106275200843811, "rewards/margins": 7.874684810638428, "rewards/rejected": -8.98095989227295, "step": 14738 }, { "epoch": 2.29, "learning_rate": 3.337154416722859e-06, "logits/chosen": -2.9940483570098877, "logits/rejected": -2.355710506439209, "logps/chosen": -250.96873474121094, "logps/rejected": -286.6543884277344, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -5.995581150054932, "rewards/margins": 7.351375579833984, "rewards/rejected": -13.346956253051758, "step": 14739 }, { "epoch": 2.29, "learning_rate": 3.3364209761917113e-06, "logits/chosen": -2.8167881965637207, "logits/rejected": -2.6104085445404053, "logps/chosen": -610.47021484375, "logps/rejected": -644.16943359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.998797655105591, "rewards/margins": 8.536184310913086, "rewards/rejected": -12.534982681274414, "step": 14740 }, { "epoch": 2.29, "learning_rate": 3.335687535660563e-06, "logits/chosen": -1.842306137084961, "logits/rejected": -0.8739676475524902, "logps/chosen": -497.5339050292969, "logps/rejected": -393.76788330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.215386390686035, "rewards/margins": 12.946732521057129, "rewards/rejected": -19.162118911743164, "step": 14741 }, { "epoch": 2.29, "learning_rate": 3.3349540951294155e-06, "logits/chosen": -2.710386037826538, "logits/rejected": -3.015406370162964, "logps/chosen": -513.8616943359375, "logps/rejected": -527.7281494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.881842851638794, "rewards/margins": 10.990262985229492, "rewards/rejected": -12.872105598449707, "step": 14742 }, { "epoch": 2.29, "learning_rate": 3.3342206545982678e-06, "logits/chosen": -2.2698166370391846, "logits/rejected": -2.485607147216797, "logps/chosen": -271.76812744140625, "logps/rejected": -321.2196960449219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.245089530944824, "rewards/margins": 15.368097305297852, "rewards/rejected": -21.613187789916992, "step": 14743 }, { "epoch": 2.29, "learning_rate": 3.3334872140671197e-06, "logits/chosen": -2.972839593887329, "logits/rejected": -1.6113271713256836, "logps/chosen": -292.62347412109375, "logps/rejected": -202.4182891845703, "loss": 0.1478, "rewards/accuracies": 1.0, "rewards/chosen": -8.430336952209473, "rewards/margins": 4.6197991371154785, "rewards/rejected": -13.05013656616211, "step": 14744 }, { "epoch": 2.29, "learning_rate": 3.332753773535972e-06, "logits/chosen": -2.5829017162323, "logits/rejected": -2.656201124191284, "logps/chosen": -96.40556335449219, "logps/rejected": -294.3522644042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.228472709655762, "rewards/margins": 11.882928848266602, "rewards/rejected": -19.11140251159668, "step": 14745 }, { "epoch": 2.29, "learning_rate": 3.332020333004824e-06, "logits/chosen": -3.0397777557373047, "logits/rejected": -2.5198769569396973, "logps/chosen": -209.8085479736328, "logps/rejected": -208.2612762451172, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3683512210845947, "rewards/margins": 10.121637344360352, "rewards/rejected": -13.489989280700684, "step": 14746 }, { "epoch": 2.29, "learning_rate": 3.331286892473676e-06, "logits/chosen": -2.548363208770752, "logits/rejected": -2.934065818786621, "logps/chosen": -234.91290283203125, "logps/rejected": -556.1922607421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.207486152648926, "rewards/margins": 8.223047256469727, "rewards/rejected": -12.430533409118652, "step": 14747 }, { "epoch": 2.29, "learning_rate": 3.330553451942528e-06, "logits/chosen": -1.5261119604110718, "logits/rejected": -2.4713356494903564, "logps/chosen": -219.799072265625, "logps/rejected": -484.6599426269531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.924856185913086, "rewards/margins": 11.01736068725586, "rewards/rejected": -16.942216873168945, "step": 14748 }, { "epoch": 2.29, "learning_rate": 3.3298200114113803e-06, "logits/chosen": -2.388679265975952, "logits/rejected": -1.9790452718734741, "logps/chosen": -218.35186767578125, "logps/rejected": -387.78314208984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.628741502761841, "rewards/margins": 11.666854858398438, "rewards/rejected": -15.295595169067383, "step": 14749 }, { "epoch": 2.29, "learning_rate": 3.329086570880232e-06, "logits/chosen": -2.576577663421631, "logits/rejected": -2.181570291519165, "logps/chosen": -478.9618835449219, "logps/rejected": -514.8330078125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -6.214463710784912, "rewards/margins": 10.026233673095703, "rewards/rejected": -16.24069595336914, "step": 14750 }, { "epoch": 2.29, "learning_rate": 3.3283531303490845e-06, "logits/chosen": -2.8274450302124023, "logits/rejected": -2.747741937637329, "logps/chosen": -120.72344207763672, "logps/rejected": -235.95135498046875, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": -4.10817813873291, "rewards/margins": 8.583806991577148, "rewards/rejected": -12.691985130310059, "step": 14751 }, { "epoch": 2.29, "learning_rate": 3.327619689817937e-06, "logits/chosen": -2.6355748176574707, "logits/rejected": -2.6014516353607178, "logps/chosen": -183.32293701171875, "logps/rejected": -228.20501708984375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.43368673324585, "rewards/margins": 6.7818145751953125, "rewards/rejected": -12.21550178527832, "step": 14752 }, { "epoch": 2.29, "learning_rate": 3.3268862492867887e-06, "logits/chosen": -2.8360490798950195, "logits/rejected": -2.9229636192321777, "logps/chosen": -247.7227783203125, "logps/rejected": -411.684326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.216706275939941, "rewards/margins": 8.887929916381836, "rewards/rejected": -13.104635238647461, "step": 14753 }, { "epoch": 2.29, "learning_rate": 3.326152808755641e-06, "logits/chosen": -1.6562734842300415, "logits/rejected": -2.4277007579803467, "logps/chosen": -256.1700134277344, "logps/rejected": -654.5505981445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.081408500671387, "rewards/margins": 17.225847244262695, "rewards/rejected": -24.307254791259766, "step": 14754 }, { "epoch": 2.29, "learning_rate": 3.325419368224493e-06, "logits/chosen": -2.530574083328247, "logits/rejected": -1.9868178367614746, "logps/chosen": -244.76593017578125, "logps/rejected": -290.10028076171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.267338752746582, "rewards/margins": 7.974483489990234, "rewards/rejected": -12.241822242736816, "step": 14755 }, { "epoch": 2.29, "learning_rate": 3.324685927693345e-06, "logits/chosen": -2.3939507007598877, "logits/rejected": -2.961817979812622, "logps/chosen": -501.820068359375, "logps/rejected": -815.9632568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.301034927368164, "rewards/margins": 11.112394332885742, "rewards/rejected": -19.413429260253906, "step": 14756 }, { "epoch": 2.3, "learning_rate": 3.323952487162197e-06, "logits/chosen": -2.4052255153656006, "logits/rejected": -1.7758182287216187, "logps/chosen": -446.92156982421875, "logps/rejected": -403.9013977050781, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -9.125312805175781, "rewards/margins": 9.879730224609375, "rewards/rejected": -19.005043029785156, "step": 14757 }, { "epoch": 2.3, "learning_rate": 3.3232190466310493e-06, "logits/chosen": -2.1856939792633057, "logits/rejected": -2.831922769546509, "logps/chosen": -329.71307373046875, "logps/rejected": -550.0940551757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.643128395080566, "rewards/margins": 14.100262641906738, "rewards/rejected": -19.743391036987305, "step": 14758 }, { "epoch": 2.3, "learning_rate": 3.3224856060999012e-06, "logits/chosen": -2.865778923034668, "logits/rejected": -2.059885025024414, "logps/chosen": -419.8714599609375, "logps/rejected": -342.9035949707031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.181546211242676, "rewards/margins": 7.432938575744629, "rewards/rejected": -11.614484786987305, "step": 14759 }, { "epoch": 2.3, "learning_rate": 3.321752165568754e-06, "logits/chosen": -1.2681163549423218, "logits/rejected": -2.458677291870117, "logps/chosen": -207.49264526367188, "logps/rejected": -397.2684631347656, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -6.826415061950684, "rewards/margins": 6.386130332946777, "rewards/rejected": -13.212545394897461, "step": 14760 }, { "epoch": 2.3, "learning_rate": 3.321018725037606e-06, "logits/chosen": -2.4459688663482666, "logits/rejected": -2.9869492053985596, "logps/chosen": -147.40170288085938, "logps/rejected": -447.1175537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.145508766174316, "rewards/margins": 15.094552993774414, "rewards/rejected": -20.240062713623047, "step": 14761 }, { "epoch": 2.3, "learning_rate": 3.320285284506458e-06, "logits/chosen": -2.7175941467285156, "logits/rejected": -2.2768986225128174, "logps/chosen": -212.04798889160156, "logps/rejected": -321.55938720703125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -10.113749504089355, "rewards/margins": 9.554096221923828, "rewards/rejected": -19.6678466796875, "step": 14762 }, { "epoch": 2.3, "learning_rate": 3.31955184397531e-06, "logits/chosen": -1.779729962348938, "logits/rejected": -2.5546188354492188, "logps/chosen": -87.83988952636719, "logps/rejected": -283.39263916015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.7367234230041504, "rewards/margins": 8.471273422241211, "rewards/rejected": -12.20799732208252, "step": 14763 }, { "epoch": 2.3, "learning_rate": 3.318818403444162e-06, "logits/chosen": -1.8144519329071045, "logits/rejected": -2.840712070465088, "logps/chosen": -116.65264892578125, "logps/rejected": -345.52777099609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.351171493530273, "rewards/margins": 10.046381950378418, "rewards/rejected": -16.397554397583008, "step": 14764 }, { "epoch": 2.3, "learning_rate": 3.318084962913014e-06, "logits/chosen": -2.607085943222046, "logits/rejected": -2.608057975769043, "logps/chosen": -151.74282836914062, "logps/rejected": -294.9850769042969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.134729862213135, "rewards/margins": 9.884346961975098, "rewards/rejected": -16.01907730102539, "step": 14765 }, { "epoch": 2.3, "learning_rate": 3.317351522381866e-06, "logits/chosen": -1.281640648841858, "logits/rejected": -2.676271915435791, "logps/chosen": -68.5336685180664, "logps/rejected": -400.2813720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.226226806640625, "rewards/margins": 16.995513916015625, "rewards/rejected": -19.22174072265625, "step": 14766 }, { "epoch": 2.3, "learning_rate": 3.3166180818507184e-06, "logits/chosen": -2.9257194995880127, "logits/rejected": -2.674438953399658, "logps/chosen": -367.069580078125, "logps/rejected": -408.957763671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.009574890136719, "rewards/margins": 9.221834182739258, "rewards/rejected": -13.231409072875977, "step": 14767 }, { "epoch": 2.3, "learning_rate": 3.3158846413195707e-06, "logits/chosen": -2.653308629989624, "logits/rejected": -2.8622469902038574, "logps/chosen": -341.970458984375, "logps/rejected": -521.384033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.092798709869385, "rewards/margins": 11.775789260864258, "rewards/rejected": -16.868587493896484, "step": 14768 }, { "epoch": 2.3, "learning_rate": 3.315151200788423e-06, "logits/chosen": -3.0062077045440674, "logits/rejected": -2.4684557914733887, "logps/chosen": -178.71450805664062, "logps/rejected": -142.1131134033203, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -2.0657830238342285, "rewards/margins": 8.553176879882812, "rewards/rejected": -10.618959426879883, "step": 14769 }, { "epoch": 2.3, "learning_rate": 3.314417760257275e-06, "logits/chosen": -2.7288577556610107, "logits/rejected": -2.249606132507324, "logps/chosen": -178.57867431640625, "logps/rejected": -371.8663635253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3614673614501953, "rewards/margins": 14.136775970458984, "rewards/rejected": -17.49824333190918, "step": 14770 }, { "epoch": 2.3, "learning_rate": 3.313684319726127e-06, "logits/chosen": -2.5780227184295654, "logits/rejected": -2.1294445991516113, "logps/chosen": -211.08799743652344, "logps/rejected": -277.0143737792969, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -5.739274978637695, "rewards/margins": 9.729771614074707, "rewards/rejected": -15.469045639038086, "step": 14771 }, { "epoch": 2.3, "learning_rate": 3.312950879194979e-06, "logits/chosen": -2.562664031982422, "logits/rejected": -2.6887285709381104, "logps/chosen": -268.61212158203125, "logps/rejected": -469.8456726074219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.971071720123291, "rewards/margins": 11.802637100219727, "rewards/rejected": -15.77370834350586, "step": 14772 }, { "epoch": 2.3, "learning_rate": 3.312217438663831e-06, "logits/chosen": -2.2074708938598633, "logits/rejected": -2.509685754776001, "logps/chosen": -122.33683013916016, "logps/rejected": -254.96240234375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -6.159977912902832, "rewards/margins": 8.582906723022461, "rewards/rejected": -14.74288558959961, "step": 14773 }, { "epoch": 2.3, "learning_rate": 3.3114839981326832e-06, "logits/chosen": -2.2871253490448, "logits/rejected": -2.8114089965820312, "logps/chosen": -399.84619140625, "logps/rejected": -558.6776123046875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -9.16065788269043, "rewards/margins": 6.026130199432373, "rewards/rejected": -15.186787605285645, "step": 14774 }, { "epoch": 2.3, "learning_rate": 3.310750557601535e-06, "logits/chosen": -2.667462110519409, "logits/rejected": -2.594799757003784, "logps/chosen": -467.40264892578125, "logps/rejected": -302.4128723144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.434905529022217, "rewards/margins": 12.433658599853516, "rewards/rejected": -16.86856460571289, "step": 14775 }, { "epoch": 2.3, "learning_rate": 3.3100171170703874e-06, "logits/chosen": -1.3324114084243774, "logits/rejected": -2.200026273727417, "logps/chosen": -214.04319763183594, "logps/rejected": -508.7288818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.23100471496582, "rewards/margins": 14.068071365356445, "rewards/rejected": -22.299076080322266, "step": 14776 }, { "epoch": 2.3, "learning_rate": 3.3092836765392397e-06, "logits/chosen": -2.714024066925049, "logits/rejected": -2.757693290710449, "logps/chosen": -138.664794921875, "logps/rejected": -186.41004943847656, "loss": 1.8406, "rewards/accuracies": 0.5, "rewards/chosen": -9.716850280761719, "rewards/margins": 0.3499767780303955, "rewards/rejected": -10.066826820373535, "step": 14777 }, { "epoch": 2.3, "learning_rate": 3.308550236008092e-06, "logits/chosen": -2.1993653774261475, "logits/rejected": -2.6683785915374756, "logps/chosen": -296.3382263183594, "logps/rejected": -331.6089172363281, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": -10.322040557861328, "rewards/margins": 3.6472134590148926, "rewards/rejected": -13.969253540039062, "step": 14778 }, { "epoch": 2.3, "learning_rate": 3.307816795476944e-06, "logits/chosen": -1.695482611656189, "logits/rejected": -2.560598850250244, "logps/chosen": -287.4963073730469, "logps/rejected": -393.8313293457031, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.438718795776367, "rewards/margins": 6.715934753417969, "rewards/rejected": -15.154653549194336, "step": 14779 }, { "epoch": 2.3, "learning_rate": 3.307083354945796e-06, "logits/chosen": -1.7997193336486816, "logits/rejected": -2.9906973838806152, "logps/chosen": -137.03741455078125, "logps/rejected": -484.48870849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.346851348876953, "rewards/margins": 11.499689102172852, "rewards/rejected": -16.846540451049805, "step": 14780 }, { "epoch": 2.3, "learning_rate": 3.306349914414648e-06, "logits/chosen": -2.734431266784668, "logits/rejected": -2.6620094776153564, "logps/chosen": -228.6163787841797, "logps/rejected": -254.56137084960938, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.4594932794570923, "rewards/margins": 8.7901611328125, "rewards/rejected": -10.249653816223145, "step": 14781 }, { "epoch": 2.3, "learning_rate": 3.3056164738835e-06, "logits/chosen": -1.7848657369613647, "logits/rejected": -2.4397172927856445, "logps/chosen": -124.87738800048828, "logps/rejected": -248.09906005859375, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": -7.020077705383301, "rewards/margins": 6.337530136108398, "rewards/rejected": -13.3576078414917, "step": 14782 }, { "epoch": 2.3, "learning_rate": 3.3048830333523522e-06, "logits/chosen": -2.8252134323120117, "logits/rejected": -1.7961902618408203, "logps/chosen": -329.14276123046875, "logps/rejected": -221.9439239501953, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.422095775604248, "rewards/margins": 6.8161773681640625, "rewards/rejected": -11.238273620605469, "step": 14783 }, { "epoch": 2.3, "learning_rate": 3.304149592821204e-06, "logits/chosen": -2.1331961154937744, "logits/rejected": -2.696878433227539, "logps/chosen": -265.1303405761719, "logps/rejected": -394.3329772949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.608123779296875, "rewards/margins": 11.325263977050781, "rewards/rejected": -16.933387756347656, "step": 14784 }, { "epoch": 2.3, "learning_rate": 3.303416152290057e-06, "logits/chosen": -2.2197723388671875, "logits/rejected": -2.811277151107788, "logps/chosen": -108.60125732421875, "logps/rejected": -363.69757080078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.4204864501953125, "rewards/margins": 14.128793716430664, "rewards/rejected": -19.549280166625977, "step": 14785 }, { "epoch": 2.3, "learning_rate": 3.3026827117589087e-06, "logits/chosen": -2.341904640197754, "logits/rejected": -2.80934476852417, "logps/chosen": -698.815185546875, "logps/rejected": -626.1113891601562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.601385116577148, "rewards/margins": 11.049148559570312, "rewards/rejected": -16.65053367614746, "step": 14786 }, { "epoch": 2.3, "learning_rate": 3.301949271227761e-06, "logits/chosen": -2.576603651046753, "logits/rejected": -1.131284475326538, "logps/chosen": -277.2802429199219, "logps/rejected": -197.39492797851562, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.548688888549805, "rewards/margins": 7.832646369934082, "rewards/rejected": -12.381336212158203, "step": 14787 }, { "epoch": 2.3, "learning_rate": 3.301215830696613e-06, "logits/chosen": -2.0561869144439697, "logits/rejected": -2.72648549079895, "logps/chosen": -163.7213592529297, "logps/rejected": -478.8040771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.229523181915283, "rewards/margins": 14.394851684570312, "rewards/rejected": -21.624374389648438, "step": 14788 }, { "epoch": 2.3, "learning_rate": 3.300482390165465e-06, "logits/chosen": -2.0903003215789795, "logits/rejected": -2.8615500926971436, "logps/chosen": -125.62263488769531, "logps/rejected": -412.8909606933594, "loss": 2.2199, "rewards/accuracies": 0.5, "rewards/chosen": -7.049833297729492, "rewards/margins": 5.044615745544434, "rewards/rejected": -12.094449043273926, "step": 14789 }, { "epoch": 2.3, "learning_rate": 3.299748949634317e-06, "logits/chosen": -2.5126583576202393, "logits/rejected": -2.791158437728882, "logps/chosen": -97.69784545898438, "logps/rejected": -316.30694580078125, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/chosen": -7.8018646240234375, "rewards/margins": 10.41441822052002, "rewards/rejected": -18.21628189086914, "step": 14790 }, { "epoch": 2.3, "learning_rate": 3.2990155091031694e-06, "logits/chosen": -2.766611337661743, "logits/rejected": -2.001863718032837, "logps/chosen": -262.8607177734375, "logps/rejected": -276.348388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.432018756866455, "rewards/margins": 11.713626861572266, "rewards/rejected": -15.145646095275879, "step": 14791 }, { "epoch": 2.3, "learning_rate": 3.2982820685720213e-06, "logits/chosen": -2.361377239227295, "logits/rejected": -2.6782567501068115, "logps/chosen": -87.14167022705078, "logps/rejected": -291.39959716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.2287087440490723, "rewards/margins": 10.294918060302734, "rewards/rejected": -13.523626327514648, "step": 14792 }, { "epoch": 2.3, "learning_rate": 3.2975486280408736e-06, "logits/chosen": -1.8878856897354126, "logits/rejected": -2.5519771575927734, "logps/chosen": -185.8691864013672, "logps/rejected": -415.424560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.025775909423828, "rewards/margins": 10.260454177856445, "rewards/rejected": -17.286230087280273, "step": 14793 }, { "epoch": 2.3, "learning_rate": 3.296815187509726e-06, "logits/chosen": -2.5989017486572266, "logits/rejected": -2.5927510261535645, "logps/chosen": -129.19500732421875, "logps/rejected": -227.2831573486328, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.829225063323975, "rewards/margins": 7.845649719238281, "rewards/rejected": -14.674875259399414, "step": 14794 }, { "epoch": 2.3, "learning_rate": 3.2960817469785777e-06, "logits/chosen": -2.5100741386413574, "logits/rejected": -2.7399325370788574, "logps/chosen": -101.95916748046875, "logps/rejected": -229.54275512695312, "loss": 0.0248, "rewards/accuracies": 1.0, "rewards/chosen": -8.696752548217773, "rewards/margins": 5.952737331390381, "rewards/rejected": -14.649490356445312, "step": 14795 }, { "epoch": 2.3, "learning_rate": 3.29534830644743e-06, "logits/chosen": -2.553147077560425, "logits/rejected": -2.595637559890747, "logps/chosen": -215.01693725585938, "logps/rejected": -453.9892272949219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.214867115020752, "rewards/margins": 14.822586059570312, "rewards/rejected": -22.037452697753906, "step": 14796 }, { "epoch": 2.3, "learning_rate": 3.294614865916282e-06, "logits/chosen": -2.559048652648926, "logits/rejected": -2.0915422439575195, "logps/chosen": -251.54586791992188, "logps/rejected": -179.73748779296875, "loss": 0.1004, "rewards/accuracies": 1.0, "rewards/chosen": -5.191099166870117, "rewards/margins": 4.222107887268066, "rewards/rejected": -9.413206100463867, "step": 14797 }, { "epoch": 2.3, "learning_rate": 3.2938814253851342e-06, "logits/chosen": -2.697937488555908, "logits/rejected": -2.177276372909546, "logps/chosen": -882.8125, "logps/rejected": -659.095458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.455324172973633, "rewards/margins": 11.725366592407227, "rewards/rejected": -18.18069076538086, "step": 14798 }, { "epoch": 2.3, "learning_rate": 3.293147984853986e-06, "logits/chosen": -2.6213715076446533, "logits/rejected": -1.7132703065872192, "logps/chosen": -457.0260009765625, "logps/rejected": -302.4283142089844, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.862967014312744, "rewards/margins": 7.024273872375488, "rewards/rejected": -12.88724136352539, "step": 14799 }, { "epoch": 2.3, "learning_rate": 3.2924145443228384e-06, "logits/chosen": -0.9276179671287537, "logits/rejected": -1.766008973121643, "logps/chosen": -229.7000274658203, "logps/rejected": -488.3498840332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.3356614112854, "rewards/margins": 19.07933235168457, "rewards/rejected": -24.414993286132812, "step": 14800 }, { "epoch": 2.3, "learning_rate": 3.2916811037916903e-06, "logits/chosen": -2.1786203384399414, "logits/rejected": -2.1742098331451416, "logps/chosen": -300.59857177734375, "logps/rejected": -439.588623046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.925739288330078, "rewards/margins": 11.596832275390625, "rewards/rejected": -23.522571563720703, "step": 14801 }, { "epoch": 2.3, "learning_rate": 3.2909476632605426e-06, "logits/chosen": -2.2366151809692383, "logits/rejected": -2.858603000640869, "logps/chosen": -80.09095001220703, "logps/rejected": -354.6929016113281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.056186199188232, "rewards/margins": 13.77336311340332, "rewards/rejected": -18.82954978942871, "step": 14802 }, { "epoch": 2.3, "learning_rate": 3.290214222729395e-06, "logits/chosen": -2.5057363510131836, "logits/rejected": -2.058213949203491, "logps/chosen": -268.7937927246094, "logps/rejected": -275.1324157714844, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -3.5105223655700684, "rewards/margins": 10.13861083984375, "rewards/rejected": -13.649133682250977, "step": 14803 }, { "epoch": 2.3, "learning_rate": 3.2894807821982468e-06, "logits/chosen": -2.406128406524658, "logits/rejected": -2.7173099517822266, "logps/chosen": -205.29518127441406, "logps/rejected": -261.31103515625, "loss": 0.2493, "rewards/accuracies": 1.0, "rewards/chosen": -5.509702682495117, "rewards/margins": 6.741955757141113, "rewards/rejected": -12.251657485961914, "step": 14804 }, { "epoch": 2.3, "learning_rate": 3.288747341667099e-06, "logits/chosen": -2.0610103607177734, "logits/rejected": -2.7182440757751465, "logps/chosen": -278.2801818847656, "logps/rejected": -549.0875244140625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.944479465484619, "rewards/margins": 9.785514831542969, "rewards/rejected": -16.72999382019043, "step": 14805 }, { "epoch": 2.3, "learning_rate": 3.288013901135951e-06, "logits/chosen": -2.415027379989624, "logits/rejected": -1.6494028568267822, "logps/chosen": -479.1798095703125, "logps/rejected": -415.963623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.538700103759766, "rewards/margins": 10.054403305053711, "rewards/rejected": -21.593103408813477, "step": 14806 }, { "epoch": 2.3, "learning_rate": 3.2872804606048032e-06, "logits/chosen": -2.345827102661133, "logits/rejected": -2.6009228229522705, "logps/chosen": -595.9830322265625, "logps/rejected": -588.0057373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.140963554382324, "rewards/margins": 11.590771675109863, "rewards/rejected": -23.731735229492188, "step": 14807 }, { "epoch": 2.3, "learning_rate": 3.286547020073655e-06, "logits/chosen": -2.111140251159668, "logits/rejected": -2.711759567260742, "logps/chosen": -157.47177124023438, "logps/rejected": -335.4515380859375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.080768585205078, "rewards/margins": 8.154130935668945, "rewards/rejected": -13.234899520874023, "step": 14808 }, { "epoch": 2.3, "learning_rate": 3.2858135795425074e-06, "logits/chosen": -2.8519372940063477, "logits/rejected": -3.0138869285583496, "logps/chosen": -592.4133911132812, "logps/rejected": -647.2208862304688, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -5.444857597351074, "rewards/margins": 11.031838417053223, "rewards/rejected": -16.476696014404297, "step": 14809 }, { "epoch": 2.3, "learning_rate": 3.2850801390113597e-06, "logits/chosen": -1.962891697883606, "logits/rejected": -2.984931707382202, "logps/chosen": -69.49522399902344, "logps/rejected": -360.20281982421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.938549757003784, "rewards/margins": 10.953987121582031, "rewards/rejected": -13.892537117004395, "step": 14810 }, { "epoch": 2.3, "learning_rate": 3.284346698480212e-06, "logits/chosen": -1.7396266460418701, "logits/rejected": -2.855196714401245, "logps/chosen": -229.8122100830078, "logps/rejected": -378.0857238769531, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.021236419677734, "rewards/margins": 7.849692344665527, "rewards/rejected": -13.870928764343262, "step": 14811 }, { "epoch": 2.3, "learning_rate": 3.283613257949064e-06, "logits/chosen": -2.4504103660583496, "logits/rejected": -2.5802552700042725, "logps/chosen": -252.4732666015625, "logps/rejected": -454.2876281738281, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -9.040868759155273, "rewards/margins": 9.692487716674805, "rewards/rejected": -18.733356475830078, "step": 14812 }, { "epoch": 2.3, "learning_rate": 3.2828798174179158e-06, "logits/chosen": -2.509631872177124, "logits/rejected": -2.6691908836364746, "logps/chosen": -340.64984130859375, "logps/rejected": -407.3784484863281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.895914077758789, "rewards/margins": 9.564468383789062, "rewards/rejected": -15.460381507873535, "step": 14813 }, { "epoch": 2.3, "learning_rate": 3.282146376886768e-06, "logits/chosen": -2.2452499866485596, "logits/rejected": -2.661853313446045, "logps/chosen": -134.05137634277344, "logps/rejected": -333.1538391113281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.358592510223389, "rewards/margins": 12.706731796264648, "rewards/rejected": -17.065324783325195, "step": 14814 }, { "epoch": 2.3, "learning_rate": 3.28141293635562e-06, "logits/chosen": -2.527944564819336, "logits/rejected": -2.8869566917419434, "logps/chosen": -138.990478515625, "logps/rejected": -276.74505615234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.540139198303223, "rewards/margins": 11.20407485961914, "rewards/rejected": -15.744213104248047, "step": 14815 }, { "epoch": 2.3, "learning_rate": 3.2806794958244723e-06, "logits/chosen": -2.4945850372314453, "logits/rejected": -2.607247829437256, "logps/chosen": -141.90731811523438, "logps/rejected": -222.74378967285156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.079737186431885, "rewards/margins": 8.884244918823242, "rewards/rejected": -14.963981628417969, "step": 14816 }, { "epoch": 2.3, "learning_rate": 3.279946055293324e-06, "logits/chosen": -2.233851909637451, "logits/rejected": -2.6818013191223145, "logps/chosen": -133.7917022705078, "logps/rejected": -432.7723388671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.03510046005249, "rewards/margins": 9.645198822021484, "rewards/rejected": -16.6802978515625, "step": 14817 }, { "epoch": 2.3, "learning_rate": 3.2792126147621764e-06, "logits/chosen": -2.573272943496704, "logits/rejected": -2.6068336963653564, "logps/chosen": -297.82568359375, "logps/rejected": -436.156005859375, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -8.592235565185547, "rewards/margins": 9.240800857543945, "rewards/rejected": -17.833036422729492, "step": 14818 }, { "epoch": 2.3, "learning_rate": 3.2784791742310287e-06, "logits/chosen": -3.265094757080078, "logits/rejected": -3.2881712913513184, "logps/chosen": -135.23504638671875, "logps/rejected": -203.59542846679688, "loss": 0.7579, "rewards/accuracies": 0.5, "rewards/chosen": -7.754408359527588, "rewards/margins": 5.313353538513184, "rewards/rejected": -13.06776237487793, "step": 14819 }, { "epoch": 2.3, "learning_rate": 3.277745733699881e-06, "logits/chosen": -1.8327200412750244, "logits/rejected": -2.9277420043945312, "logps/chosen": -108.56134796142578, "logps/rejected": -518.9708862304688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.80179500579834, "rewards/margins": 12.572415351867676, "rewards/rejected": -21.374210357666016, "step": 14820 }, { "epoch": 2.3, "learning_rate": 3.277012293168733e-06, "logits/chosen": -2.7387490272521973, "logits/rejected": -2.217879295349121, "logps/chosen": -218.68777465820312, "logps/rejected": -295.4528503417969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.322558879852295, "rewards/margins": 10.434633255004883, "rewards/rejected": -15.75719165802002, "step": 14821 }, { "epoch": 2.31, "learning_rate": 3.276278852637585e-06, "logits/chosen": -2.4201557636260986, "logits/rejected": -2.3737666606903076, "logps/chosen": -282.0174560546875, "logps/rejected": -285.2137451171875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -5.872347354888916, "rewards/margins": 6.671441078186035, "rewards/rejected": -12.54378890991211, "step": 14822 }, { "epoch": 2.31, "learning_rate": 3.275545412106437e-06, "logits/chosen": -2.0683393478393555, "logits/rejected": -3.1272196769714355, "logps/chosen": -706.1912841796875, "logps/rejected": -778.0966186523438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.248809814453125, "rewards/margins": 10.426239013671875, "rewards/rejected": -18.675048828125, "step": 14823 }, { "epoch": 2.31, "learning_rate": 3.274811971575289e-06, "logits/chosen": -1.6921106576919556, "logits/rejected": -2.406531810760498, "logps/chosen": -171.41517639160156, "logps/rejected": -520.5430297851562, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -6.71478271484375, "rewards/margins": 11.136709213256836, "rewards/rejected": -17.851491928100586, "step": 14824 }, { "epoch": 2.31, "learning_rate": 3.2740785310441413e-06, "logits/chosen": -2.5726959705352783, "logits/rejected": -1.9358359575271606, "logps/chosen": -716.328125, "logps/rejected": -460.7716369628906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.944653034210205, "rewards/margins": 9.176875114440918, "rewards/rejected": -17.12152862548828, "step": 14825 }, { "epoch": 2.31, "learning_rate": 3.273345090512993e-06, "logits/chosen": -2.5906291007995605, "logits/rejected": -3.079301357269287, "logps/chosen": -177.1853485107422, "logps/rejected": -448.8495788574219, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -8.19611644744873, "rewards/margins": 6.315868377685547, "rewards/rejected": -14.511985778808594, "step": 14826 }, { "epoch": 2.31, "learning_rate": 3.272611649981846e-06, "logits/chosen": -2.643463611602783, "logits/rejected": -2.7361106872558594, "logps/chosen": -127.56654357910156, "logps/rejected": -278.8879699707031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.119144439697266, "rewards/margins": 9.972244262695312, "rewards/rejected": -16.091388702392578, "step": 14827 }, { "epoch": 2.31, "learning_rate": 3.2718782094506978e-06, "logits/chosen": -2.8710696697235107, "logits/rejected": -3.0482895374298096, "logps/chosen": -127.92884063720703, "logps/rejected": -266.33770751953125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.23018741607666, "rewards/margins": 8.471731185913086, "rewards/rejected": -13.701919555664062, "step": 14828 }, { "epoch": 2.31, "learning_rate": 3.27114476891955e-06, "logits/chosen": -2.398763656616211, "logits/rejected": -2.666717290878296, "logps/chosen": -273.90655517578125, "logps/rejected": -305.96380615234375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.893261909484863, "rewards/margins": 8.360166549682617, "rewards/rejected": -13.25342845916748, "step": 14829 }, { "epoch": 2.31, "learning_rate": 3.270411328388402e-06, "logits/chosen": -1.297924518585205, "logits/rejected": -2.73433518409729, "logps/chosen": -116.3531494140625, "logps/rejected": -333.182373046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.298344612121582, "rewards/margins": 9.022832870483398, "rewards/rejected": -13.321178436279297, "step": 14830 }, { "epoch": 2.31, "learning_rate": 3.269677887857254e-06, "logits/chosen": -2.4493043422698975, "logits/rejected": -2.9075186252593994, "logps/chosen": -166.74789428710938, "logps/rejected": -388.77093505859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.958474159240723, "rewards/margins": 9.190105438232422, "rewards/rejected": -15.148578643798828, "step": 14831 }, { "epoch": 2.31, "learning_rate": 3.268944447326106e-06, "logits/chosen": -2.3963234424591064, "logits/rejected": -2.1292026042938232, "logps/chosen": -449.5412902832031, "logps/rejected": -553.5759887695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.199500560760498, "rewards/margins": 10.73496150970459, "rewards/rejected": -14.93446159362793, "step": 14832 }, { "epoch": 2.31, "learning_rate": 3.268211006794958e-06, "logits/chosen": -2.7188704013824463, "logits/rejected": -2.931614875793457, "logps/chosen": -770.7186279296875, "logps/rejected": -809.5914916992188, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -10.40289306640625, "rewards/margins": 8.92711067199707, "rewards/rejected": -19.33000373840332, "step": 14833 }, { "epoch": 2.31, "learning_rate": 3.2674775662638103e-06, "logits/chosen": -2.139031171798706, "logits/rejected": -2.5678577423095703, "logps/chosen": -419.1266174316406, "logps/rejected": -667.735107421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.522137641906738, "rewards/margins": 9.7733793258667, "rewards/rejected": -21.295516967773438, "step": 14834 }, { "epoch": 2.31, "learning_rate": 3.2667441257326626e-06, "logits/chosen": -1.9611271619796753, "logits/rejected": -2.3269617557525635, "logps/chosen": -233.50436401367188, "logps/rejected": -405.69866943359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.939735412597656, "rewards/margins": 8.846941947937012, "rewards/rejected": -15.786677360534668, "step": 14835 }, { "epoch": 2.31, "learning_rate": 3.266010685201515e-06, "logits/chosen": -1.8493596315383911, "logits/rejected": -2.599749803543091, "logps/chosen": -82.73799133300781, "logps/rejected": -323.74896240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.03695011138916, "rewards/margins": 12.952164649963379, "rewards/rejected": -18.98911476135254, "step": 14836 }, { "epoch": 2.31, "learning_rate": 3.265277244670367e-06, "logits/chosen": -2.0323119163513184, "logits/rejected": -2.7087888717651367, "logps/chosen": -279.01678466796875, "logps/rejected": -538.7822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.185216903686523, "rewards/margins": 17.56295394897461, "rewards/rejected": -21.748170852661133, "step": 14837 }, { "epoch": 2.31, "learning_rate": 3.264543804139219e-06, "logits/chosen": -2.5251822471618652, "logits/rejected": -2.8099937438964844, "logps/chosen": -160.51438903808594, "logps/rejected": -314.14678955078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.075270652770996, "rewards/margins": 10.772622108459473, "rewards/rejected": -16.84789276123047, "step": 14838 }, { "epoch": 2.31, "learning_rate": 3.263810363608071e-06, "logits/chosen": -2.3680105209350586, "logits/rejected": -2.6599233150482178, "logps/chosen": -130.50650024414062, "logps/rejected": -233.0304718017578, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -4.547688007354736, "rewards/margins": 6.860251426696777, "rewards/rejected": -11.407939910888672, "step": 14839 }, { "epoch": 2.31, "learning_rate": 3.2630769230769233e-06, "logits/chosen": -2.027663469314575, "logits/rejected": -2.5807785987854004, "logps/chosen": -138.65158081054688, "logps/rejected": -297.38653564453125, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -5.8448805809021, "rewards/margins": 9.582780838012695, "rewards/rejected": -15.427661895751953, "step": 14840 }, { "epoch": 2.31, "learning_rate": 3.262343482545775e-06, "logits/chosen": -2.674832344055176, "logits/rejected": -2.6862266063690186, "logps/chosen": -445.8902587890625, "logps/rejected": -533.1510009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.970184326171875, "rewards/margins": 11.532136917114258, "rewards/rejected": -20.502321243286133, "step": 14841 }, { "epoch": 2.31, "learning_rate": 3.261610042014627e-06, "logits/chosen": -2.409730911254883, "logits/rejected": -2.8925063610076904, "logps/chosen": -249.3378448486328, "logps/rejected": -522.2830810546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.300699234008789, "rewards/margins": 13.801387786865234, "rewards/rejected": -23.10208511352539, "step": 14842 }, { "epoch": 2.31, "learning_rate": 3.2608766014834793e-06, "logits/chosen": -2.876044273376465, "logits/rejected": -1.9479563236236572, "logps/chosen": -614.6045532226562, "logps/rejected": -579.7023315429688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.587033271789551, "rewards/margins": 9.838483810424805, "rewards/rejected": -14.425516128540039, "step": 14843 }, { "epoch": 2.31, "learning_rate": 3.2601431609523316e-06, "logits/chosen": -2.512925148010254, "logits/rejected": -2.7922005653381348, "logps/chosen": -52.51072692871094, "logps/rejected": -309.75726318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1030473709106445, "rewards/margins": 13.397212982177734, "rewards/rejected": -16.500261306762695, "step": 14844 }, { "epoch": 2.31, "learning_rate": 3.259409720421184e-06, "logits/chosen": -2.506481170654297, "logits/rejected": -2.6929805278778076, "logps/chosen": -361.53326416015625, "logps/rejected": -682.556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.53564167022705, "rewards/margins": 14.558212280273438, "rewards/rejected": -24.093852996826172, "step": 14845 }, { "epoch": 2.31, "learning_rate": 3.258676279890036e-06, "logits/chosen": -2.5545639991760254, "logits/rejected": -2.3419840335845947, "logps/chosen": -289.21026611328125, "logps/rejected": -279.3092041015625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -5.301565647125244, "rewards/margins": 6.616519927978516, "rewards/rejected": -11.918085098266602, "step": 14846 }, { "epoch": 2.31, "learning_rate": 3.257942839358888e-06, "logits/chosen": -1.149150013923645, "logits/rejected": -1.2883611917495728, "logps/chosen": -492.5184020996094, "logps/rejected": -650.187255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.598494052886963, "rewards/margins": 12.13176155090332, "rewards/rejected": -15.730255126953125, "step": 14847 }, { "epoch": 2.31, "learning_rate": 3.25720939882774e-06, "logits/chosen": -2.6320464611053467, "logits/rejected": -2.9581334590911865, "logps/chosen": -80.41133117675781, "logps/rejected": -296.8011474609375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -6.388155937194824, "rewards/margins": 11.847861289978027, "rewards/rejected": -18.23601722717285, "step": 14848 }, { "epoch": 2.31, "learning_rate": 3.2564759582965923e-06, "logits/chosen": -1.8264491558074951, "logits/rejected": -2.553086996078491, "logps/chosen": -451.7056579589844, "logps/rejected": -548.4991455078125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.460262298583984, "rewards/margins": 11.022781372070312, "rewards/rejected": -19.483043670654297, "step": 14849 }, { "epoch": 2.31, "learning_rate": 3.255742517765444e-06, "logits/chosen": -2.054248571395874, "logits/rejected": -2.753131866455078, "logps/chosen": -229.7637176513672, "logps/rejected": -454.89910888671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.639533996582031, "rewards/margins": 10.741363525390625, "rewards/rejected": -19.380897521972656, "step": 14850 }, { "epoch": 2.31, "learning_rate": 3.255009077234296e-06, "logits/chosen": -2.6194863319396973, "logits/rejected": -2.687562942504883, "logps/chosen": -145.9077911376953, "logps/rejected": -296.9140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.432005882263184, "rewards/margins": 8.244804382324219, "rewards/rejected": -13.676809310913086, "step": 14851 }, { "epoch": 2.31, "learning_rate": 3.2542756367031484e-06, "logits/chosen": -2.6685445308685303, "logits/rejected": -2.8203494548797607, "logps/chosen": -130.32769775390625, "logps/rejected": -447.8539733886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.460351943969727, "rewards/margins": 17.060379028320312, "rewards/rejected": -22.52073097229004, "step": 14852 }, { "epoch": 2.31, "learning_rate": 3.2535421961720007e-06, "logits/chosen": -2.9501237869262695, "logits/rejected": -3.1552765369415283, "logps/chosen": -110.76679992675781, "logps/rejected": -362.035400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0544891357421875, "rewards/margins": 11.610884666442871, "rewards/rejected": -15.665374755859375, "step": 14853 }, { "epoch": 2.31, "learning_rate": 3.252808755640853e-06, "logits/chosen": -2.027940034866333, "logits/rejected": -1.3327387571334839, "logps/chosen": -206.7672119140625, "logps/rejected": -208.2808380126953, "loss": 0.1985, "rewards/accuracies": 1.0, "rewards/chosen": -4.795031547546387, "rewards/margins": 8.2488374710083, "rewards/rejected": -13.043869018554688, "step": 14854 }, { "epoch": 2.31, "learning_rate": 3.252075315109705e-06, "logits/chosen": -2.58954119682312, "logits/rejected": -1.7127267122268677, "logps/chosen": -385.26165771484375, "logps/rejected": -386.38360595703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.702628135681152, "rewards/margins": 9.663578033447266, "rewards/rejected": -16.366207122802734, "step": 14855 }, { "epoch": 2.31, "learning_rate": 3.251341874578557e-06, "logits/chosen": -2.7546699047088623, "logits/rejected": -2.5469400882720947, "logps/chosen": -135.91188049316406, "logps/rejected": -152.998291015625, "loss": 2.5032, "rewards/accuracies": 0.5, "rewards/chosen": -5.318476676940918, "rewards/margins": 3.0172791481018066, "rewards/rejected": -8.335756301879883, "step": 14856 }, { "epoch": 2.31, "learning_rate": 3.250608434047409e-06, "logits/chosen": -1.9570437669754028, "logits/rejected": -2.795865774154663, "logps/chosen": -136.078857421875, "logps/rejected": -465.59686279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.078775405883789, "rewards/margins": 12.981760025024414, "rewards/rejected": -19.060535430908203, "step": 14857 }, { "epoch": 2.31, "learning_rate": 3.2498749935162613e-06, "logits/chosen": -1.8641637563705444, "logits/rejected": -2.539585828781128, "logps/chosen": -253.7936248779297, "logps/rejected": -358.33807373046875, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -8.602666854858398, "rewards/margins": 9.473749160766602, "rewards/rejected": -18.076416015625, "step": 14858 }, { "epoch": 2.31, "learning_rate": 3.249141552985113e-06, "logits/chosen": -2.904737710952759, "logits/rejected": -2.383524179458618, "logps/chosen": -512.7626953125, "logps/rejected": -418.5796813964844, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/chosen": -7.9397125244140625, "rewards/margins": 9.620946884155273, "rewards/rejected": -17.560659408569336, "step": 14859 }, { "epoch": 2.31, "learning_rate": 3.2484081124539655e-06, "logits/chosen": -2.298266649246216, "logits/rejected": -2.599133014678955, "logps/chosen": -226.59463500976562, "logps/rejected": -439.8188781738281, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -9.865311622619629, "rewards/margins": 7.822492599487305, "rewards/rejected": -17.68780517578125, "step": 14860 }, { "epoch": 2.31, "learning_rate": 3.247674671922818e-06, "logits/chosen": -2.8036317825317383, "logits/rejected": -2.8349173069000244, "logps/chosen": -617.1124267578125, "logps/rejected": -858.2638549804688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.568492889404297, "rewards/margins": 10.465085983276367, "rewards/rejected": -20.033578872680664, "step": 14861 }, { "epoch": 2.31, "learning_rate": 3.2469412313916697e-06, "logits/chosen": -2.797097682952881, "logits/rejected": -2.845470428466797, "logps/chosen": -165.30166625976562, "logps/rejected": -293.38134765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.100828170776367, "rewards/margins": 9.962215423583984, "rewards/rejected": -14.063043594360352, "step": 14862 }, { "epoch": 2.31, "learning_rate": 3.246207790860522e-06, "logits/chosen": -2.798656463623047, "logits/rejected": -2.9679067134857178, "logps/chosen": -191.23712158203125, "logps/rejected": -246.35867309570312, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": -6.715391159057617, "rewards/margins": 5.78416109085083, "rewards/rejected": -12.499551773071289, "step": 14863 }, { "epoch": 2.31, "learning_rate": 3.245474350329374e-06, "logits/chosen": -2.5648205280303955, "logits/rejected": -1.8013458251953125, "logps/chosen": -342.4003601074219, "logps/rejected": -247.94419860839844, "loss": 0.1287, "rewards/accuracies": 1.0, "rewards/chosen": -7.998764991760254, "rewards/margins": 3.367917537689209, "rewards/rejected": -11.366682052612305, "step": 14864 }, { "epoch": 2.31, "learning_rate": 3.244740909798226e-06, "logits/chosen": -2.6153078079223633, "logits/rejected": -2.0995302200317383, "logps/chosen": -481.4224853515625, "logps/rejected": -432.7493896484375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.96577787399292, "rewards/margins": 6.513657093048096, "rewards/rejected": -11.479434967041016, "step": 14865 }, { "epoch": 2.31, "learning_rate": 3.244007469267078e-06, "logits/chosen": -2.6466801166534424, "logits/rejected": -1.429855465888977, "logps/chosen": -934.7844848632812, "logps/rejected": -510.97503662109375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -6.982274055480957, "rewards/margins": 7.688620567321777, "rewards/rejected": -14.670894622802734, "step": 14866 }, { "epoch": 2.31, "learning_rate": 3.2432740287359303e-06, "logits/chosen": -2.486203670501709, "logits/rejected": -2.100510597229004, "logps/chosen": -565.2406005859375, "logps/rejected": -547.1016235351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.134921073913574, "rewards/margins": 13.247739791870117, "rewards/rejected": -21.382661819458008, "step": 14867 }, { "epoch": 2.31, "learning_rate": 3.2425405882047822e-06, "logits/chosen": -1.9675095081329346, "logits/rejected": -2.722151756286621, "logps/chosen": -230.52793884277344, "logps/rejected": -376.923583984375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.863924503326416, "rewards/margins": 9.795652389526367, "rewards/rejected": -17.659576416015625, "step": 14868 }, { "epoch": 2.31, "learning_rate": 3.2418071476736345e-06, "logits/chosen": -2.218930244445801, "logits/rejected": -2.8593192100524902, "logps/chosen": -75.21690368652344, "logps/rejected": -264.74798583984375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.412524700164795, "rewards/margins": 8.117474555969238, "rewards/rejected": -12.529999732971191, "step": 14869 }, { "epoch": 2.31, "learning_rate": 3.241073707142487e-06, "logits/chosen": -2.6807734966278076, "logits/rejected": -2.8653321266174316, "logps/chosen": -105.47193145751953, "logps/rejected": -280.3270263671875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.446004390716553, "rewards/margins": 7.305747032165527, "rewards/rejected": -12.751750946044922, "step": 14870 }, { "epoch": 2.31, "learning_rate": 3.2403402666113387e-06, "logits/chosen": -1.2679213285446167, "logits/rejected": -2.6163713932037354, "logps/chosen": -112.6603775024414, "logps/rejected": -355.78033447265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.96080207824707, "rewards/margins": 7.499413967132568, "rewards/rejected": -15.460216522216797, "step": 14871 }, { "epoch": 2.31, "learning_rate": 3.239606826080191e-06, "logits/chosen": -1.3781020641326904, "logits/rejected": -2.6667912006378174, "logps/chosen": -201.5810546875, "logps/rejected": -464.3824157714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.750411033630371, "rewards/margins": 13.069358825683594, "rewards/rejected": -18.81977081298828, "step": 14872 }, { "epoch": 2.31, "learning_rate": 3.238873385549043e-06, "logits/chosen": -2.5368218421936035, "logits/rejected": -1.7535295486450195, "logps/chosen": -238.0093994140625, "logps/rejected": -216.94859313964844, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -3.1305899620056152, "rewards/margins": 5.957609176635742, "rewards/rejected": -9.088199615478516, "step": 14873 }, { "epoch": 2.31, "learning_rate": 3.238139945017895e-06, "logits/chosen": -1.2346643209457397, "logits/rejected": -2.5820977687835693, "logps/chosen": -112.83861541748047, "logps/rejected": -354.1097412109375, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -5.305572509765625, "rewards/margins": 9.295978546142578, "rewards/rejected": -14.601551055908203, "step": 14874 }, { "epoch": 2.31, "learning_rate": 3.237406504486747e-06, "logits/chosen": -2.3992018699645996, "logits/rejected": -2.7505483627319336, "logps/chosen": -162.7593994140625, "logps/rejected": -292.7281799316406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.69658088684082, "rewards/margins": 9.207633972167969, "rewards/rejected": -13.904214859008789, "step": 14875 }, { "epoch": 2.31, "learning_rate": 3.2366730639555994e-06, "logits/chosen": -2.6383509635925293, "logits/rejected": -2.5309815406799316, "logps/chosen": -273.38720703125, "logps/rejected": -262.0956726074219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.840301513671875, "rewards/margins": 8.480188369750977, "rewards/rejected": -13.320489883422852, "step": 14876 }, { "epoch": 2.31, "learning_rate": 3.2359396234244512e-06, "logits/chosen": -1.4191094636917114, "logits/rejected": -2.104196548461914, "logps/chosen": -242.7984619140625, "logps/rejected": -432.18060302734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.685460090637207, "rewards/margins": 11.585174560546875, "rewards/rejected": -17.270633697509766, "step": 14877 }, { "epoch": 2.31, "learning_rate": 3.235206182893304e-06, "logits/chosen": -2.759530782699585, "logits/rejected": -2.1109859943389893, "logps/chosen": -320.4001159667969, "logps/rejected": -301.3997497558594, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -4.980859279632568, "rewards/margins": 7.551020622253418, "rewards/rejected": -12.531879425048828, "step": 14878 }, { "epoch": 2.31, "learning_rate": 3.234472742362156e-06, "logits/chosen": -2.4817049503326416, "logits/rejected": -1.7085320949554443, "logps/chosen": -398.2752685546875, "logps/rejected": -360.2916564941406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.5717644691467285, "rewards/margins": 8.832832336425781, "rewards/rejected": -13.404596328735352, "step": 14879 }, { "epoch": 2.31, "learning_rate": 3.233739301831008e-06, "logits/chosen": -1.1545614004135132, "logits/rejected": -2.350113868713379, "logps/chosen": -271.4776306152344, "logps/rejected": -547.8377685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.401081562042236, "rewards/margins": 13.09177017211914, "rewards/rejected": -20.49285125732422, "step": 14880 }, { "epoch": 2.31, "learning_rate": 3.23300586129986e-06, "logits/chosen": -2.821784257888794, "logits/rejected": -2.5849456787109375, "logps/chosen": -161.09716796875, "logps/rejected": -187.31788635253906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.307039260864258, "rewards/margins": 9.950483322143555, "rewards/rejected": -14.257522583007812, "step": 14881 }, { "epoch": 2.31, "learning_rate": 3.232272420768712e-06, "logits/chosen": -2.5197808742523193, "logits/rejected": -2.7814221382141113, "logps/chosen": -111.3823013305664, "logps/rejected": -309.56243896484375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.676876068115234, "rewards/margins": 10.998947143554688, "rewards/rejected": -16.675823211669922, "step": 14882 }, { "epoch": 2.31, "learning_rate": 3.231538980237564e-06, "logits/chosen": -1.0058242082595825, "logits/rejected": -2.6908962726593018, "logps/chosen": -189.09475708007812, "logps/rejected": -692.6390991210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.251733779907227, "rewards/margins": 12.099878311157227, "rewards/rejected": -18.351612091064453, "step": 14883 }, { "epoch": 2.31, "learning_rate": 3.230805539706416e-06, "logits/chosen": -2.5806190967559814, "logits/rejected": -2.0245249271392822, "logps/chosen": -193.76373291015625, "logps/rejected": -311.9946594238281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.861419200897217, "rewards/margins": 9.119969367980957, "rewards/rejected": -15.981388092041016, "step": 14884 }, { "epoch": 2.31, "learning_rate": 3.2300720991752684e-06, "logits/chosen": -2.5751445293426514, "logits/rejected": -1.875304102897644, "logps/chosen": -232.66238403320312, "logps/rejected": -294.7294616699219, "loss": 0.3689, "rewards/accuracies": 0.5, "rewards/chosen": -5.62665319442749, "rewards/margins": 6.150650501251221, "rewards/rejected": -11.777303695678711, "step": 14885 }, { "epoch": 2.32, "learning_rate": 3.2293386586441207e-06, "logits/chosen": -2.8601136207580566, "logits/rejected": -2.8302812576293945, "logps/chosen": -197.540283203125, "logps/rejected": -230.30157470703125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -6.765957832336426, "rewards/margins": 5.67044734954834, "rewards/rejected": -12.436405181884766, "step": 14886 }, { "epoch": 2.32, "learning_rate": 3.228605218112973e-06, "logits/chosen": -1.7830710411071777, "logits/rejected": -3.290334463119507, "logps/chosen": -207.91799926757812, "logps/rejected": -488.6961975097656, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.8592658042907715, "rewards/margins": 9.257630348205566, "rewards/rejected": -17.11689567565918, "step": 14887 }, { "epoch": 2.32, "learning_rate": 3.227871777581825e-06, "logits/chosen": -1.6308759450912476, "logits/rejected": -2.6885879039764404, "logps/chosen": -209.6156005859375, "logps/rejected": -406.50750732421875, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -9.262972831726074, "rewards/margins": 7.1059489250183105, "rewards/rejected": -16.368921279907227, "step": 14888 }, { "epoch": 2.32, "learning_rate": 3.227138337050677e-06, "logits/chosen": -2.8818581104278564, "logits/rejected": -2.8617193698883057, "logps/chosen": -190.48013305664062, "logps/rejected": -257.79498291015625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -7.953108787536621, "rewards/margins": 6.422418594360352, "rewards/rejected": -14.375526428222656, "step": 14889 }, { "epoch": 2.32, "learning_rate": 3.226404896519529e-06, "logits/chosen": -1.180757999420166, "logits/rejected": -2.651942729949951, "logps/chosen": -113.08670043945312, "logps/rejected": -376.9208068847656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.559383392333984, "rewards/margins": 9.052163124084473, "rewards/rejected": -17.61154556274414, "step": 14890 }, { "epoch": 2.32, "learning_rate": 3.225671455988381e-06, "logits/chosen": -2.7508013248443604, "logits/rejected": -2.5773305892944336, "logps/chosen": -208.96981811523438, "logps/rejected": -217.600341796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.3484649658203125, "rewards/margins": 7.109307289123535, "rewards/rejected": -11.457772254943848, "step": 14891 }, { "epoch": 2.32, "learning_rate": 3.2249380154572332e-06, "logits/chosen": -2.8397159576416016, "logits/rejected": -2.874154806137085, "logps/chosen": -327.3607177734375, "logps/rejected": -536.9276733398438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.631214141845703, "rewards/margins": 13.585548400878906, "rewards/rejected": -19.21676254272461, "step": 14892 }, { "epoch": 2.32, "learning_rate": 3.224204574926085e-06, "logits/chosen": -2.8812918663024902, "logits/rejected": -2.63529634475708, "logps/chosen": -287.95660400390625, "logps/rejected": -387.0111083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.565550327301025, "rewards/margins": 14.986488342285156, "rewards/rejected": -20.552040100097656, "step": 14893 }, { "epoch": 2.32, "learning_rate": 3.2234711343949374e-06, "logits/chosen": -2.589927911758423, "logits/rejected": -2.4464008808135986, "logps/chosen": -136.46731567382812, "logps/rejected": -223.5502166748047, "loss": 0.5479, "rewards/accuracies": 0.5, "rewards/chosen": -7.739106178283691, "rewards/margins": 4.377862930297852, "rewards/rejected": -12.116969108581543, "step": 14894 }, { "epoch": 2.32, "learning_rate": 3.2227376938637897e-06, "logits/chosen": -2.728052854537964, "logits/rejected": -1.7173993587493896, "logps/chosen": -557.6036376953125, "logps/rejected": -335.4488525390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.2743287086486816, "rewards/margins": 8.73970890045166, "rewards/rejected": -12.0140380859375, "step": 14895 }, { "epoch": 2.32, "learning_rate": 3.222004253332642e-06, "logits/chosen": -2.8446950912475586, "logits/rejected": -2.7840633392333984, "logps/chosen": -434.20758056640625, "logps/rejected": -590.6202392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.358565330505371, "rewards/margins": 14.005426406860352, "rewards/rejected": -18.363990783691406, "step": 14896 }, { "epoch": 2.32, "learning_rate": 3.221270812801494e-06, "logits/chosen": -2.449275255203247, "logits/rejected": -2.968248128890991, "logps/chosen": -494.1798095703125, "logps/rejected": -646.3692626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.77085018157959, "rewards/margins": 12.6400785446167, "rewards/rejected": -21.41092872619629, "step": 14897 }, { "epoch": 2.32, "learning_rate": 3.220537372270346e-06, "logits/chosen": -1.8636418581008911, "logits/rejected": -2.5057337284088135, "logps/chosen": -241.516357421875, "logps/rejected": -448.69415283203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.227243423461914, "rewards/margins": 9.102678298950195, "rewards/rejected": -15.32992172241211, "step": 14898 }, { "epoch": 2.32, "learning_rate": 3.219803931739198e-06, "logits/chosen": -2.1103999614715576, "logits/rejected": -2.796677589416504, "logps/chosen": -251.6697540283203, "logps/rejected": -369.72381591796875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.776100158691406, "rewards/margins": 8.249069213867188, "rewards/rejected": -13.025169372558594, "step": 14899 }, { "epoch": 2.32, "learning_rate": 3.21907049120805e-06, "logits/chosen": -2.570239782333374, "logits/rejected": -2.848360061645508, "logps/chosen": -72.6917724609375, "logps/rejected": -231.29429626464844, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.74409294128418, "rewards/margins": 6.650271415710449, "rewards/rejected": -12.394364356994629, "step": 14900 }, { "epoch": 2.32, "learning_rate": 3.2183370506769022e-06, "logits/chosen": -3.017838716506958, "logits/rejected": -3.039633274078369, "logps/chosen": -301.875, "logps/rejected": -456.9537048339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.737102508544922, "rewards/margins": 12.197946548461914, "rewards/rejected": -17.93505096435547, "step": 14901 }, { "epoch": 2.32, "learning_rate": 3.217603610145754e-06, "logits/chosen": -2.8392817974090576, "logits/rejected": -3.3500945568084717, "logps/chosen": -102.21984100341797, "logps/rejected": -309.9007568359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.790509223937988, "rewards/margins": 9.492706298828125, "rewards/rejected": -18.283214569091797, "step": 14902 }, { "epoch": 2.32, "learning_rate": 3.216870169614607e-06, "logits/chosen": -2.504617214202881, "logits/rejected": -2.89465594291687, "logps/chosen": -239.50439453125, "logps/rejected": -425.00933837890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.926496982574463, "rewards/margins": 9.948406219482422, "rewards/rejected": -13.874903678894043, "step": 14903 }, { "epoch": 2.32, "learning_rate": 3.2161367290834587e-06, "logits/chosen": -2.730971097946167, "logits/rejected": -2.512238025665283, "logps/chosen": -96.66376495361328, "logps/rejected": -303.8693542480469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.30426025390625, "rewards/margins": 14.522743225097656, "rewards/rejected": -18.827003479003906, "step": 14904 }, { "epoch": 2.32, "learning_rate": 3.215403288552311e-06, "logits/chosen": -1.7663763761520386, "logits/rejected": -2.518781900405884, "logps/chosen": -308.43634033203125, "logps/rejected": -435.7506408691406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.358556747436523, "rewards/margins": 11.137052536010742, "rewards/rejected": -16.495609283447266, "step": 14905 }, { "epoch": 2.32, "learning_rate": 3.214669848021163e-06, "logits/chosen": -1.5556738376617432, "logits/rejected": -2.4865643978118896, "logps/chosen": -207.87838745117188, "logps/rejected": -520.3779907226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.775864124298096, "rewards/margins": 12.784414291381836, "rewards/rejected": -17.560279846191406, "step": 14906 }, { "epoch": 2.32, "learning_rate": 3.2139364074900152e-06, "logits/chosen": -2.79717755317688, "logits/rejected": -2.799818515777588, "logps/chosen": -252.58474731445312, "logps/rejected": -322.6889343261719, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.1093854904174805, "rewards/margins": 7.187644958496094, "rewards/rejected": -13.297030448913574, "step": 14907 }, { "epoch": 2.32, "learning_rate": 3.213202966958867e-06, "logits/chosen": -1.9945292472839355, "logits/rejected": -2.752014636993408, "logps/chosen": -146.84129333496094, "logps/rejected": -404.5219421386719, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/chosen": -5.364123344421387, "rewards/margins": 7.517184734344482, "rewards/rejected": -12.881307601928711, "step": 14908 }, { "epoch": 2.32, "learning_rate": 3.2124695264277194e-06, "logits/chosen": -2.5071730613708496, "logits/rejected": -2.0247159004211426, "logps/chosen": -313.6447448730469, "logps/rejected": -477.85577392578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.691476821899414, "rewards/margins": 10.894113540649414, "rewards/rejected": -18.585590362548828, "step": 14909 }, { "epoch": 2.32, "learning_rate": 3.2117360858965713e-06, "logits/chosen": -2.7369778156280518, "logits/rejected": -3.0818586349487305, "logps/chosen": -781.2642822265625, "logps/rejected": -837.057861328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.587140083312988, "rewards/margins": 9.284335136413574, "rewards/rejected": -15.871475219726562, "step": 14910 }, { "epoch": 2.32, "learning_rate": 3.2110026453654236e-06, "logits/chosen": -2.184030294418335, "logits/rejected": -2.029574394226074, "logps/chosen": -441.5050048828125, "logps/rejected": -565.7733764648438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.156466007232666, "rewards/margins": 12.438820838928223, "rewards/rejected": -17.595287322998047, "step": 14911 }, { "epoch": 2.32, "learning_rate": 3.210269204834276e-06, "logits/chosen": -1.6482874155044556, "logits/rejected": -2.4217689037323, "logps/chosen": -163.84823608398438, "logps/rejected": -384.3502197265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.382511615753174, "rewards/margins": 8.233619689941406, "rewards/rejected": -14.616130828857422, "step": 14912 }, { "epoch": 2.32, "learning_rate": 3.2095357643031278e-06, "logits/chosen": -1.3249541521072388, "logits/rejected": -2.6984009742736816, "logps/chosen": -189.7174072265625, "logps/rejected": -594.1923828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.164219856262207, "rewards/margins": 11.430780410766602, "rewards/rejected": -17.595001220703125, "step": 14913 }, { "epoch": 2.32, "learning_rate": 3.20880232377198e-06, "logits/chosen": -2.7493784427642822, "logits/rejected": -2.6328299045562744, "logps/chosen": -130.4152374267578, "logps/rejected": -167.46885681152344, "loss": 0.144, "rewards/accuracies": 1.0, "rewards/chosen": -9.207134246826172, "rewards/margins": 4.129532814025879, "rewards/rejected": -13.33666706085205, "step": 14914 }, { "epoch": 2.32, "learning_rate": 3.208068883240832e-06, "logits/chosen": -2.4354593753814697, "logits/rejected": -2.746436357498169, "logps/chosen": -71.47640991210938, "logps/rejected": -281.8603210449219, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.3449437618255615, "rewards/margins": 9.21983814239502, "rewards/rejected": -12.56478214263916, "step": 14915 }, { "epoch": 2.32, "learning_rate": 3.2073354427096842e-06, "logits/chosen": -2.4243438243865967, "logits/rejected": -2.1903023719787598, "logps/chosen": -245.77308654785156, "logps/rejected": -416.99072265625, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -7.2358903884887695, "rewards/margins": 6.605072975158691, "rewards/rejected": -13.840963363647461, "step": 14916 }, { "epoch": 2.32, "learning_rate": 3.206602002178536e-06, "logits/chosen": -2.3439455032348633, "logits/rejected": -2.4108784198760986, "logps/chosen": -128.78097534179688, "logps/rejected": -270.51171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.410475254058838, "rewards/margins": 10.577010154724121, "rewards/rejected": -13.987485885620117, "step": 14917 }, { "epoch": 2.32, "learning_rate": 3.2058685616473884e-06, "logits/chosen": -1.8857169151306152, "logits/rejected": -2.5455167293548584, "logps/chosen": -246.47024536132812, "logps/rejected": -346.42681884765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.7065963745117188, "rewards/margins": 8.366209030151367, "rewards/rejected": -12.072805404663086, "step": 14918 }, { "epoch": 2.32, "learning_rate": 3.2051351211162403e-06, "logits/chosen": -2.665609836578369, "logits/rejected": -2.8195040225982666, "logps/chosen": -583.82861328125, "logps/rejected": -499.8377685546875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.8427653312683105, "rewards/margins": 9.850204467773438, "rewards/rejected": -15.69296932220459, "step": 14919 }, { "epoch": 2.32, "learning_rate": 3.2044016805850926e-06, "logits/chosen": -1.6819190979003906, "logits/rejected": -2.179391384124756, "logps/chosen": -157.52098083496094, "logps/rejected": -528.827392578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.045598030090332, "rewards/margins": 13.378413200378418, "rewards/rejected": -19.42401123046875, "step": 14920 }, { "epoch": 2.32, "learning_rate": 3.203668240053945e-06, "logits/chosen": -2.3105287551879883, "logits/rejected": -3.10720157623291, "logps/chosen": -155.69552612304688, "logps/rejected": -392.35906982421875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.290199279785156, "rewards/margins": 7.849474906921387, "rewards/rejected": -13.139674186706543, "step": 14921 }, { "epoch": 2.32, "learning_rate": 3.2029347995227968e-06, "logits/chosen": -1.6609866619110107, "logits/rejected": -2.7750751972198486, "logps/chosen": -117.48164367675781, "logps/rejected": -334.2115783691406, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -6.255034446716309, "rewards/margins": 5.765653610229492, "rewards/rejected": -12.0206880569458, "step": 14922 }, { "epoch": 2.32, "learning_rate": 3.202201358991649e-06, "logits/chosen": -2.662753105163574, "logits/rejected": -2.783811569213867, "logps/chosen": -285.8483581542969, "logps/rejected": -560.6500854492188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.298553466796875, "rewards/margins": 14.020384788513184, "rewards/rejected": -20.318939208984375, "step": 14923 }, { "epoch": 2.32, "learning_rate": 3.201467918460501e-06, "logits/chosen": -2.7882769107818604, "logits/rejected": -2.6762540340423584, "logps/chosen": -536.548583984375, "logps/rejected": -828.3806762695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.22447681427002, "rewards/margins": 14.154012680053711, "rewards/rejected": -23.378490447998047, "step": 14924 }, { "epoch": 2.32, "learning_rate": 3.2007344779293533e-06, "logits/chosen": -2.06839656829834, "logits/rejected": -2.4691145420074463, "logps/chosen": -301.67791748046875, "logps/rejected": -319.2354736328125, "loss": 0.1508, "rewards/accuracies": 1.0, "rewards/chosen": -6.24087381362915, "rewards/margins": 5.508744239807129, "rewards/rejected": -11.749618530273438, "step": 14925 }, { "epoch": 2.32, "learning_rate": 3.200001037398205e-06, "logits/chosen": -2.2250144481658936, "logits/rejected": -2.5619466304779053, "logps/chosen": -405.3672790527344, "logps/rejected": -476.4720458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.732322692871094, "rewards/margins": 10.085860252380371, "rewards/rejected": -18.81818389892578, "step": 14926 }, { "epoch": 2.32, "learning_rate": 3.1992675968670574e-06, "logits/chosen": -2.6999435424804688, "logits/rejected": -2.6365580558776855, "logps/chosen": -517.856201171875, "logps/rejected": -578.6710205078125, "loss": 0.1652, "rewards/accuracies": 1.0, "rewards/chosen": -5.4667253494262695, "rewards/margins": 7.509560585021973, "rewards/rejected": -12.976285934448242, "step": 14927 }, { "epoch": 2.32, "learning_rate": 3.1985341563359097e-06, "logits/chosen": -1.7719690799713135, "logits/rejected": -2.5782487392425537, "logps/chosen": -128.88531494140625, "logps/rejected": -296.069580078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.363215446472168, "rewards/margins": 9.954689025878906, "rewards/rejected": -15.317904472351074, "step": 14928 }, { "epoch": 2.32, "learning_rate": 3.197800715804762e-06, "logits/chosen": -2.924866199493408, "logits/rejected": -1.8035088777542114, "logps/chosen": -972.238525390625, "logps/rejected": -424.0903625488281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.39544677734375, "rewards/margins": 10.649340629577637, "rewards/rejected": -16.044788360595703, "step": 14929 }, { "epoch": 2.32, "learning_rate": 3.197067275273614e-06, "logits/chosen": -2.385345697402954, "logits/rejected": -2.6497249603271484, "logps/chosen": -431.78778076171875, "logps/rejected": -496.970703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.498202800750732, "rewards/margins": 11.871844291687012, "rewards/rejected": -18.37004852294922, "step": 14930 }, { "epoch": 2.32, "learning_rate": 3.196333834742466e-06, "logits/chosen": -2.5513088703155518, "logits/rejected": -2.922968626022339, "logps/chosen": -420.5155944824219, "logps/rejected": -439.2657470703125, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": -8.660923957824707, "rewards/margins": 3.065437078475952, "rewards/rejected": -11.726361274719238, "step": 14931 }, { "epoch": 2.32, "learning_rate": 3.195600394211318e-06, "logits/chosen": -2.560800790786743, "logits/rejected": -2.4392077922821045, "logps/chosen": -456.8721923828125, "logps/rejected": -392.4305419921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.165111541748047, "rewards/margins": 8.881791114807129, "rewards/rejected": -17.04690170288086, "step": 14932 }, { "epoch": 2.32, "learning_rate": 3.19486695368017e-06, "logits/chosen": -1.123143196105957, "logits/rejected": -2.9363691806793213, "logps/chosen": -193.24057006835938, "logps/rejected": -684.202392578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.6467790603637695, "rewards/margins": 7.781110763549805, "rewards/rejected": -14.42789077758789, "step": 14933 }, { "epoch": 2.32, "learning_rate": 3.1941335131490223e-06, "logits/chosen": -2.684117555618286, "logits/rejected": -2.5356342792510986, "logps/chosen": -567.2496337890625, "logps/rejected": -517.8302612304688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.490750312805176, "rewards/margins": 9.3842134475708, "rewards/rejected": -19.874963760375977, "step": 14934 }, { "epoch": 2.32, "learning_rate": 3.193400072617874e-06, "logits/chosen": -2.132631778717041, "logits/rejected": -2.7642054557800293, "logps/chosen": -342.6185607910156, "logps/rejected": -662.2804565429688, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.810957431793213, "rewards/margins": 9.814030647277832, "rewards/rejected": -14.624988555908203, "step": 14935 }, { "epoch": 2.32, "learning_rate": 3.1926666320867265e-06, "logits/chosen": -2.484766721725464, "logits/rejected": -2.3293302059173584, "logps/chosen": -209.29327392578125, "logps/rejected": -418.046142578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.216838836669922, "rewards/margins": 11.417153358459473, "rewards/rejected": -17.633991241455078, "step": 14936 }, { "epoch": 2.32, "learning_rate": 3.1919331915555788e-06, "logits/chosen": -2.472134590148926, "logits/rejected": -1.7436350584030151, "logps/chosen": -493.04583740234375, "logps/rejected": -473.94219970703125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -7.702108383178711, "rewards/margins": 9.505685806274414, "rewards/rejected": -17.207794189453125, "step": 14937 }, { "epoch": 2.32, "learning_rate": 3.191199751024431e-06, "logits/chosen": -1.463952660560608, "logits/rejected": -2.5309062004089355, "logps/chosen": -196.4540252685547, "logps/rejected": -437.559814453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.933206081390381, "rewards/margins": 9.186525344848633, "rewards/rejected": -15.119731903076172, "step": 14938 }, { "epoch": 2.32, "learning_rate": 3.190466310493283e-06, "logits/chosen": -2.4846603870391846, "logits/rejected": -1.6399035453796387, "logps/chosen": -236.9619140625, "logps/rejected": -248.8378143310547, "loss": 0.7052, "rewards/accuracies": 0.5, "rewards/chosen": -6.405091285705566, "rewards/margins": 7.433952808380127, "rewards/rejected": -13.839044570922852, "step": 14939 }, { "epoch": 2.32, "learning_rate": 3.189732869962135e-06, "logits/chosen": -2.5911765098571777, "logits/rejected": -2.409973621368408, "logps/chosen": -655.241455078125, "logps/rejected": -541.5272827148438, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -7.140406608581543, "rewards/margins": 4.933455467224121, "rewards/rejected": -12.073862075805664, "step": 14940 }, { "epoch": 2.32, "learning_rate": 3.188999429430987e-06, "logits/chosen": -2.4696760177612305, "logits/rejected": -2.7056689262390137, "logps/chosen": -238.3458251953125, "logps/rejected": -397.4989929199219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.967576026916504, "rewards/margins": 8.798118591308594, "rewards/rejected": -16.76569366455078, "step": 14941 }, { "epoch": 2.32, "learning_rate": 3.188265988899839e-06, "logits/chosen": -2.5479323863983154, "logits/rejected": -2.558007001876831, "logps/chosen": -353.9569091796875, "logps/rejected": -470.5377197265625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -8.36936092376709, "rewards/margins": 12.35854721069336, "rewards/rejected": -20.727909088134766, "step": 14942 }, { "epoch": 2.32, "learning_rate": 3.1875325483686913e-06, "logits/chosen": -2.228282928466797, "logits/rejected": -2.894191026687622, "logps/chosen": -303.5162658691406, "logps/rejected": -360.1805114746094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -2.8211426734924316, "rewards/margins": 8.081613540649414, "rewards/rejected": -10.902756690979004, "step": 14943 }, { "epoch": 2.32, "learning_rate": 3.186799107837543e-06, "logits/chosen": -2.30926251411438, "logits/rejected": -2.7888081073760986, "logps/chosen": -274.29534912109375, "logps/rejected": -394.4524841308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.699714660644531, "rewards/margins": 11.760204315185547, "rewards/rejected": -20.459918975830078, "step": 14944 }, { "epoch": 2.32, "learning_rate": 3.186065667306396e-06, "logits/chosen": -1.445015788078308, "logits/rejected": -2.5152978897094727, "logps/chosen": -200.248046875, "logps/rejected": -731.9288330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5544843673706055, "rewards/margins": 17.811321258544922, "rewards/rejected": -24.365806579589844, "step": 14945 }, { "epoch": 2.32, "learning_rate": 3.1853322267752478e-06, "logits/chosen": -2.675339460372925, "logits/rejected": -2.8262381553649902, "logps/chosen": -362.6267395019531, "logps/rejected": -470.5027770996094, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.597893714904785, "rewards/margins": 10.92696762084961, "rewards/rejected": -19.52486228942871, "step": 14946 }, { "epoch": 2.32, "learning_rate": 3.1845987862441e-06, "logits/chosen": -2.2327628135681152, "logits/rejected": -2.605851411819458, "logps/chosen": -176.1945343017578, "logps/rejected": -384.65362548828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.665390968322754, "rewards/margins": 11.704797744750977, "rewards/rejected": -17.370187759399414, "step": 14947 }, { "epoch": 2.32, "learning_rate": 3.183865345712952e-06, "logits/chosen": -1.1455061435699463, "logits/rejected": -2.797834634780884, "logps/chosen": -279.59100341796875, "logps/rejected": -623.5112915039062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.504340648651123, "rewards/margins": 8.65247917175293, "rewards/rejected": -15.156820297241211, "step": 14948 }, { "epoch": 2.32, "learning_rate": 3.1831319051818043e-06, "logits/chosen": -1.8360563516616821, "logits/rejected": -2.489131212234497, "logps/chosen": -136.03445434570312, "logps/rejected": -353.56396484375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -6.249569892883301, "rewards/margins": 9.611091613769531, "rewards/rejected": -15.860661506652832, "step": 14949 }, { "epoch": 2.33, "learning_rate": 3.182398464650656e-06, "logits/chosen": -0.7256712913513184, "logits/rejected": -2.6606085300445557, "logps/chosen": -89.39051055908203, "logps/rejected": -745.861572265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.8832249641418457, "rewards/margins": 15.609910011291504, "rewards/rejected": -19.493135452270508, "step": 14950 }, { "epoch": 2.33, "learning_rate": 3.181665024119508e-06, "logits/chosen": -2.298499822616577, "logits/rejected": -2.547778367996216, "logps/chosen": -161.88162231445312, "logps/rejected": -470.0081787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.870478630065918, "rewards/margins": 16.823680877685547, "rewards/rejected": -21.69416046142578, "step": 14951 }, { "epoch": 2.33, "learning_rate": 3.1809315835883603e-06, "logits/chosen": -2.5068767070770264, "logits/rejected": -2.684096574783325, "logps/chosen": -599.518310546875, "logps/rejected": -714.425048828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.042259216308594, "rewards/margins": 9.228723526000977, "rewards/rejected": -18.27098274230957, "step": 14952 }, { "epoch": 2.33, "learning_rate": 3.1801981430572126e-06, "logits/chosen": -2.9445724487304688, "logits/rejected": -3.029712200164795, "logps/chosen": -200.7794189453125, "logps/rejected": -470.0478210449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.021366596221924, "rewards/margins": 12.473424911499023, "rewards/rejected": -16.49479103088379, "step": 14953 }, { "epoch": 2.33, "learning_rate": 3.179464702526065e-06, "logits/chosen": -1.5363614559173584, "logits/rejected": -2.820963144302368, "logps/chosen": -223.69442749023438, "logps/rejected": -517.48046875, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -7.251989841461182, "rewards/margins": 5.0594482421875, "rewards/rejected": -12.311437606811523, "step": 14954 }, { "epoch": 2.33, "learning_rate": 3.178731261994917e-06, "logits/chosen": -2.7585606575012207, "logits/rejected": -2.459953784942627, "logps/chosen": -163.100341796875, "logps/rejected": -263.18060302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9127402305603027, "rewards/margins": 12.401298522949219, "rewards/rejected": -15.31403923034668, "step": 14955 }, { "epoch": 2.33, "learning_rate": 3.177997821463769e-06, "logits/chosen": -1.8893558979034424, "logits/rejected": -2.1487035751342773, "logps/chosen": -454.2626953125, "logps/rejected": -552.6844482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.253778457641602, "rewards/margins": 11.69530963897705, "rewards/rejected": -17.94908905029297, "step": 14956 }, { "epoch": 2.33, "learning_rate": 3.177264380932621e-06, "logits/chosen": -2.3594300746917725, "logits/rejected": -2.7790749073028564, "logps/chosen": -140.77047729492188, "logps/rejected": -325.2543029785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.105151176452637, "rewards/margins": 13.870696067810059, "rewards/rejected": -22.975847244262695, "step": 14957 }, { "epoch": 2.33, "learning_rate": 3.1765309404014733e-06, "logits/chosen": -2.26145339012146, "logits/rejected": -2.653183937072754, "logps/chosen": -165.07785034179688, "logps/rejected": -223.99746704101562, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -4.9229230880737305, "rewards/margins": 7.010424613952637, "rewards/rejected": -11.933347702026367, "step": 14958 }, { "epoch": 2.33, "learning_rate": 3.175797499870325e-06, "logits/chosen": -1.9862360954284668, "logits/rejected": -2.890838861465454, "logps/chosen": -231.3368682861328, "logps/rejected": -403.03497314453125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.963987350463867, "rewards/margins": 7.351449966430664, "rewards/rejected": -14.315437316894531, "step": 14959 }, { "epoch": 2.33, "learning_rate": 3.175064059339177e-06, "logits/chosen": -1.6146944761276245, "logits/rejected": -2.713549852371216, "logps/chosen": -99.82270812988281, "logps/rejected": -544.2353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.028218746185303, "rewards/margins": 17.637941360473633, "rewards/rejected": -23.666160583496094, "step": 14960 }, { "epoch": 2.33, "learning_rate": 3.1743306188080293e-06, "logits/chosen": -1.13871169090271, "logits/rejected": -2.7155983448028564, "logps/chosen": -260.7752685546875, "logps/rejected": -348.89642333984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4195282459259033, "rewards/margins": 9.640365600585938, "rewards/rejected": -13.059894561767578, "step": 14961 }, { "epoch": 2.33, "learning_rate": 3.1735971782768816e-06, "logits/chosen": -2.6633102893829346, "logits/rejected": -2.7836947441101074, "logps/chosen": -129.81625366210938, "logps/rejected": -404.9588623046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.760383605957031, "rewards/margins": 9.186382293701172, "rewards/rejected": -13.946765899658203, "step": 14962 }, { "epoch": 2.33, "learning_rate": 3.172863737745734e-06, "logits/chosen": -3.003952741622925, "logits/rejected": -2.9986138343811035, "logps/chosen": -416.3392028808594, "logps/rejected": -373.05218505859375, "loss": 0.0934, "rewards/accuracies": 1.0, "rewards/chosen": -10.730426788330078, "rewards/margins": 4.560128211975098, "rewards/rejected": -15.29055404663086, "step": 14963 }, { "epoch": 2.33, "learning_rate": 3.172130297214586e-06, "logits/chosen": -2.2567028999328613, "logits/rejected": -2.605546474456787, "logps/chosen": -128.3777313232422, "logps/rejected": -363.0379943847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.881109714508057, "rewards/margins": 10.429670333862305, "rewards/rejected": -15.31078052520752, "step": 14964 }, { "epoch": 2.33, "learning_rate": 3.171396856683438e-06, "logits/chosen": -1.8434669971466064, "logits/rejected": -2.5213515758514404, "logps/chosen": -214.0294952392578, "logps/rejected": -329.2412109375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.307618618011475, "rewards/margins": 7.338759899139404, "rewards/rejected": -12.646378517150879, "step": 14965 }, { "epoch": 2.33, "learning_rate": 3.17066341615229e-06, "logits/chosen": -2.2272658348083496, "logits/rejected": -2.310574531555176, "logps/chosen": -932.8846435546875, "logps/rejected": -928.4215087890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.571362495422363, "rewards/margins": 9.69550895690918, "rewards/rejected": -16.266870498657227, "step": 14966 }, { "epoch": 2.33, "learning_rate": 3.1699299756211423e-06, "logits/chosen": -2.556427001953125, "logits/rejected": -2.864346742630005, "logps/chosen": -358.5866394042969, "logps/rejected": -444.6625671386719, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.388970851898193, "rewards/margins": 6.510674953460693, "rewards/rejected": -12.899645805358887, "step": 14967 }, { "epoch": 2.33, "learning_rate": 3.169196535089994e-06, "logits/chosen": -1.939285397529602, "logits/rejected": -2.4316327571868896, "logps/chosen": -138.82492065429688, "logps/rejected": -330.5286865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.521091938018799, "rewards/margins": 12.843193054199219, "rewards/rejected": -17.36428451538086, "step": 14968 }, { "epoch": 2.33, "learning_rate": 3.168463094558846e-06, "logits/chosen": -2.544578790664673, "logits/rejected": -2.277151107788086, "logps/chosen": -191.31390380859375, "logps/rejected": -407.8454895019531, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.997580051422119, "rewards/margins": 12.194979667663574, "rewards/rejected": -19.19255828857422, "step": 14969 }, { "epoch": 2.33, "learning_rate": 3.167729654027699e-06, "logits/chosen": -2.469287157058716, "logits/rejected": -2.731234550476074, "logps/chosen": -137.7975616455078, "logps/rejected": -198.9452667236328, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -6.257109642028809, "rewards/margins": 6.50834321975708, "rewards/rejected": -12.765453338623047, "step": 14970 }, { "epoch": 2.33, "learning_rate": 3.1669962134965507e-06, "logits/chosen": -1.9274011850357056, "logits/rejected": -2.5382237434387207, "logps/chosen": -381.864501953125, "logps/rejected": -421.1785888671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.611395359039307, "rewards/margins": 8.68414306640625, "rewards/rejected": -13.295537948608398, "step": 14971 }, { "epoch": 2.33, "learning_rate": 3.166262772965403e-06, "logits/chosen": -1.3526904582977295, "logits/rejected": -2.4731550216674805, "logps/chosen": -130.61355590820312, "logps/rejected": -319.54229736328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.7425408363342285, "rewards/margins": 8.546913146972656, "rewards/rejected": -15.289454460144043, "step": 14972 }, { "epoch": 2.33, "learning_rate": 3.165529332434255e-06, "logits/chosen": -3.1741647720336914, "logits/rejected": -2.9174916744232178, "logps/chosen": -436.9635314941406, "logps/rejected": -405.33197021484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.073725700378418, "rewards/margins": 7.714709281921387, "rewards/rejected": -14.788434982299805, "step": 14973 }, { "epoch": 2.33, "learning_rate": 3.164795891903107e-06, "logits/chosen": -2.4323887825012207, "logits/rejected": -3.1917409896850586, "logps/chosen": -122.31686401367188, "logps/rejected": -348.7109069824219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.611825466156006, "rewards/margins": 9.289660453796387, "rewards/rejected": -12.90148639678955, "step": 14974 }, { "epoch": 2.33, "learning_rate": 3.164062451371959e-06, "logits/chosen": -2.615922212600708, "logits/rejected": -2.4930930137634277, "logps/chosen": -606.2777099609375, "logps/rejected": -479.1814270019531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.929556369781494, "rewards/margins": 10.646621704101562, "rewards/rejected": -14.576179504394531, "step": 14975 }, { "epoch": 2.33, "learning_rate": 3.1633290108408113e-06, "logits/chosen": -0.5662233233451843, "logits/rejected": -2.7724826335906982, "logps/chosen": -155.97470092773438, "logps/rejected": -643.4111328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.448452472686768, "rewards/margins": 7.692795276641846, "rewards/rejected": -15.141247749328613, "step": 14976 }, { "epoch": 2.33, "learning_rate": 3.162595570309663e-06, "logits/chosen": -2.6131060123443604, "logits/rejected": -2.6251907348632812, "logps/chosen": -292.7105712890625, "logps/rejected": -540.228759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.972737789154053, "rewards/margins": 15.078968048095703, "rewards/rejected": -22.05170440673828, "step": 14977 }, { "epoch": 2.33, "learning_rate": 3.1618621297785155e-06, "logits/chosen": -2.9017670154571533, "logits/rejected": -3.2568914890289307, "logps/chosen": -145.85366821289062, "logps/rejected": -307.181396484375, "loss": 0.5093, "rewards/accuracies": 0.5, "rewards/chosen": -4.819510459899902, "rewards/margins": 3.78351092338562, "rewards/rejected": -8.603021621704102, "step": 14978 }, { "epoch": 2.33, "learning_rate": 3.161128689247368e-06, "logits/chosen": -2.2214250564575195, "logits/rejected": -2.786592483520508, "logps/chosen": -126.56781005859375, "logps/rejected": -240.81585693359375, "loss": 0.0481, "rewards/accuracies": 1.0, "rewards/chosen": -8.285673141479492, "rewards/margins": 5.527319431304932, "rewards/rejected": -13.812992095947266, "step": 14979 }, { "epoch": 2.33, "learning_rate": 3.1603952487162197e-06, "logits/chosen": -1.993791103363037, "logits/rejected": -2.7579565048217773, "logps/chosen": -122.98211669921875, "logps/rejected": -261.3599853515625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.458854675292969, "rewards/margins": 7.64015007019043, "rewards/rejected": -12.099004745483398, "step": 14980 }, { "epoch": 2.33, "learning_rate": 3.159661808185072e-06, "logits/chosen": -2.146754503250122, "logits/rejected": -2.373913049697876, "logps/chosen": -200.62973022460938, "logps/rejected": -479.0565490722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.558549880981445, "rewards/margins": 14.043292999267578, "rewards/rejected": -23.601844787597656, "step": 14981 }, { "epoch": 2.33, "learning_rate": 3.158928367653924e-06, "logits/chosen": -2.7157230377197266, "logits/rejected": -2.8065919876098633, "logps/chosen": -546.8780517578125, "logps/rejected": -659.4767456054688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.472005844116211, "rewards/margins": 9.81976318359375, "rewards/rejected": -19.29176902770996, "step": 14982 }, { "epoch": 2.33, "learning_rate": 3.158194927122776e-06, "logits/chosen": -0.9653066396713257, "logits/rejected": -2.684286594390869, "logps/chosen": -202.19573974609375, "logps/rejected": -388.9100036621094, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.90718412399292, "rewards/margins": 6.788446426391602, "rewards/rejected": -11.69563102722168, "step": 14983 }, { "epoch": 2.33, "learning_rate": 3.157461486591628e-06, "logits/chosen": -2.5769166946411133, "logits/rejected": -3.003831624984741, "logps/chosen": -148.34133911132812, "logps/rejected": -357.5176086425781, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.825675964355469, "rewards/margins": 9.052496910095215, "rewards/rejected": -13.878172874450684, "step": 14984 }, { "epoch": 2.33, "learning_rate": 3.1567280460604804e-06, "logits/chosen": -2.404839038848877, "logits/rejected": -2.6076927185058594, "logps/chosen": -631.7274169921875, "logps/rejected": -575.2108154296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.524271011352539, "rewards/margins": 7.960906505584717, "rewards/rejected": -13.485177993774414, "step": 14985 }, { "epoch": 2.33, "learning_rate": 3.1559946055293322e-06, "logits/chosen": -2.7813258171081543, "logits/rejected": -2.699693441390991, "logps/chosen": -258.6044921875, "logps/rejected": -302.97613525390625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -4.398255348205566, "rewards/margins": 7.297098636627197, "rewards/rejected": -11.695354461669922, "step": 14986 }, { "epoch": 2.33, "learning_rate": 3.155261164998185e-06, "logits/chosen": -2.1756603717803955, "logits/rejected": -2.8914356231689453, "logps/chosen": -424.3429870605469, "logps/rejected": -579.083251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.752239227294922, "rewards/margins": 11.21346378326416, "rewards/rejected": -17.965702056884766, "step": 14987 }, { "epoch": 2.33, "learning_rate": 3.154527724467037e-06, "logits/chosen": -2.9245407581329346, "logits/rejected": -2.3818118572235107, "logps/chosen": -731.4603271484375, "logps/rejected": -252.3463592529297, "loss": 0.5365, "rewards/accuracies": 0.5, "rewards/chosen": -2.9232094287872314, "rewards/margins": 7.033237457275391, "rewards/rejected": -9.956446647644043, "step": 14988 }, { "epoch": 2.33, "learning_rate": 3.1537942839358887e-06, "logits/chosen": -2.740560293197632, "logits/rejected": -2.8266632556915283, "logps/chosen": -413.94775390625, "logps/rejected": -463.60186767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.451507568359375, "rewards/margins": 10.804866790771484, "rewards/rejected": -16.25637435913086, "step": 14989 }, { "epoch": 2.33, "learning_rate": 3.153060843404741e-06, "logits/chosen": -2.0165281295776367, "logits/rejected": -2.4794747829437256, "logps/chosen": -555.8216552734375, "logps/rejected": -718.10205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.43527889251709, "rewards/margins": 15.42715072631836, "rewards/rejected": -20.862430572509766, "step": 14990 }, { "epoch": 2.33, "learning_rate": 3.152327402873593e-06, "logits/chosen": -2.580097198486328, "logits/rejected": -2.519444465637207, "logps/chosen": -547.751953125, "logps/rejected": -543.2353515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.427901268005371, "rewards/margins": 11.209946632385254, "rewards/rejected": -15.637847900390625, "step": 14991 }, { "epoch": 2.33, "learning_rate": 3.151593962342445e-06, "logits/chosen": -1.7081547975540161, "logits/rejected": -2.683964252471924, "logps/chosen": -169.60348510742188, "logps/rejected": -468.8252258300781, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.737773418426514, "rewards/margins": 6.600439071655273, "rewards/rejected": -13.338212966918945, "step": 14992 }, { "epoch": 2.33, "learning_rate": 3.150860521811297e-06, "logits/chosen": -2.6656458377838135, "logits/rejected": -2.6426446437835693, "logps/chosen": -359.148681640625, "logps/rejected": -248.70079040527344, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -5.3142595291137695, "rewards/margins": 5.995721817016602, "rewards/rejected": -11.309980392456055, "step": 14993 }, { "epoch": 2.33, "learning_rate": 3.1501270812801494e-06, "logits/chosen": -1.6066631078720093, "logits/rejected": -2.739563226699829, "logps/chosen": -278.9635009765625, "logps/rejected": -437.6048889160156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.678197860717773, "rewards/margins": 9.105916976928711, "rewards/rejected": -16.784114837646484, "step": 14994 }, { "epoch": 2.33, "learning_rate": 3.1493936407490017e-06, "logits/chosen": -2.7496564388275146, "logits/rejected": -1.9160497188568115, "logps/chosen": -1068.2119140625, "logps/rejected": -673.9173583984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.726696968078613, "rewards/margins": 10.712225914001465, "rewards/rejected": -20.438922882080078, "step": 14995 }, { "epoch": 2.33, "learning_rate": 3.148660200217854e-06, "logits/chosen": -1.7966784238815308, "logits/rejected": -2.541665554046631, "logps/chosen": -314.71307373046875, "logps/rejected": -488.1607360839844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.7613325119018555, "rewards/margins": 11.823942184448242, "rewards/rejected": -19.58527374267578, "step": 14996 }, { "epoch": 2.33, "learning_rate": 3.147926759686706e-06, "logits/chosen": -2.008467435836792, "logits/rejected": -2.817182779312134, "logps/chosen": -90.50736236572266, "logps/rejected": -264.89886474609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.524980545043945, "rewards/margins": 9.335902214050293, "rewards/rejected": -14.860883712768555, "step": 14997 }, { "epoch": 2.33, "learning_rate": 3.147193319155558e-06, "logits/chosen": -2.374429941177368, "logits/rejected": -2.5243117809295654, "logps/chosen": -317.9465026855469, "logps/rejected": -423.9413757324219, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.337831497192383, "rewards/margins": 9.870856285095215, "rewards/rejected": -16.20868682861328, "step": 14998 }, { "epoch": 2.33, "learning_rate": 3.14645987862441e-06, "logits/chosen": -1.333102822303772, "logits/rejected": -2.6012561321258545, "logps/chosen": -270.31097412109375, "logps/rejected": -648.6137084960938, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -11.924619674682617, "rewards/margins": 5.071934700012207, "rewards/rejected": -16.99655532836914, "step": 14999 }, { "epoch": 2.33, "learning_rate": 3.145726438093262e-06, "logits/chosen": -1.7356956005096436, "logits/rejected": -2.5373356342315674, "logps/chosen": -100.59825134277344, "logps/rejected": -328.7707824707031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.286384344100952, "rewards/margins": 11.565317153930664, "rewards/rejected": -14.851701736450195, "step": 15000 }, { "epoch": 2.33, "learning_rate": 3.1449929975621142e-06, "logits/chosen": -2.329127788543701, "logits/rejected": -1.6179958581924438, "logps/chosen": -447.33197021484375, "logps/rejected": -409.380859375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.633852958679199, "rewards/margins": 9.703453063964844, "rewards/rejected": -15.337306022644043, "step": 15001 }, { "epoch": 2.33, "learning_rate": 3.144259557030966e-06, "logits/chosen": -1.2175042629241943, "logits/rejected": -1.6015738248825073, "logps/chosen": -184.63462829589844, "logps/rejected": -417.40216064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.71516752243042, "rewards/margins": 10.762179374694824, "rewards/rejected": -17.477346420288086, "step": 15002 }, { "epoch": 2.33, "learning_rate": 3.1435261164998184e-06, "logits/chosen": -2.5458762645721436, "logits/rejected": -2.5243680477142334, "logps/chosen": -308.321533203125, "logps/rejected": -376.33209228515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.398013591766357, "rewards/margins": 8.664793968200684, "rewards/rejected": -16.062807083129883, "step": 15003 }, { "epoch": 2.33, "learning_rate": 3.1427926759686707e-06, "logits/chosen": -0.8878369331359863, "logits/rejected": -2.710245132446289, "logps/chosen": -157.151123046875, "logps/rejected": -606.0247802734375, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -5.535979270935059, "rewards/margins": 5.994143486022949, "rewards/rejected": -11.530122756958008, "step": 15004 }, { "epoch": 2.33, "learning_rate": 3.142059235437523e-06, "logits/chosen": -2.592575788497925, "logits/rejected": -2.7104194164276123, "logps/chosen": -128.96060180664062, "logps/rejected": -352.7734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.5024471282958984, "rewards/margins": 12.365680694580078, "rewards/rejected": -15.868127822875977, "step": 15005 }, { "epoch": 2.33, "learning_rate": 3.141325794906375e-06, "logits/chosen": -2.4060187339782715, "logits/rejected": -1.1143866777420044, "logps/chosen": -542.8443603515625, "logps/rejected": -298.7720947265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.699487686157227, "rewards/margins": 9.543086051940918, "rewards/rejected": -17.24257469177246, "step": 15006 }, { "epoch": 2.33, "learning_rate": 3.140592354375227e-06, "logits/chosen": -0.5242010951042175, "logits/rejected": -1.8802739381790161, "logps/chosen": -123.52529907226562, "logps/rejected": -594.3651123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.912405014038086, "rewards/margins": 18.348970413208008, "rewards/rejected": -24.261375427246094, "step": 15007 }, { "epoch": 2.33, "learning_rate": 3.139858913844079e-06, "logits/chosen": -1.2422727346420288, "logits/rejected": -2.8216145038604736, "logps/chosen": -131.42112731933594, "logps/rejected": -491.52392578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.249560356140137, "rewards/margins": 10.119413375854492, "rewards/rejected": -18.368972778320312, "step": 15008 }, { "epoch": 2.33, "learning_rate": 3.139125473312931e-06, "logits/chosen": -1.3986303806304932, "logits/rejected": -2.4167892932891846, "logps/chosen": -202.43951416015625, "logps/rejected": -454.72222900390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.091396331787109, "rewards/margins": 9.96615219116211, "rewards/rejected": -15.057548522949219, "step": 15009 }, { "epoch": 2.33, "learning_rate": 3.1383920327817832e-06, "logits/chosen": -1.9964139461517334, "logits/rejected": -2.6551575660705566, "logps/chosen": -233.57794189453125, "logps/rejected": -244.39041137695312, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -3.958604335784912, "rewards/margins": 7.237361907958984, "rewards/rejected": -11.195965766906738, "step": 15010 }, { "epoch": 2.33, "learning_rate": 3.137658592250635e-06, "logits/chosen": -1.2724825143814087, "logits/rejected": -2.445436477661133, "logps/chosen": -220.09564208984375, "logps/rejected": -493.3037109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.92087173461914, "rewards/margins": 12.186100959777832, "rewards/rejected": -21.10697364807129, "step": 15011 }, { "epoch": 2.33, "learning_rate": 3.136925151719488e-06, "logits/chosen": -1.966752290725708, "logits/rejected": -2.293890953063965, "logps/chosen": -200.24627685546875, "logps/rejected": -372.75860595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0038299560546875, "rewards/margins": 9.98385238647461, "rewards/rejected": -16.987682342529297, "step": 15012 }, { "epoch": 2.33, "learning_rate": 3.1361917111883397e-06, "logits/chosen": -2.7008938789367676, "logits/rejected": -3.0198886394500732, "logps/chosen": -667.9232177734375, "logps/rejected": -734.6630249023438, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.840001106262207, "rewards/margins": 13.440013885498047, "rewards/rejected": -17.28001594543457, "step": 15013 }, { "epoch": 2.33, "learning_rate": 3.135458270657192e-06, "logits/chosen": -2.290442943572998, "logits/rejected": -2.528839588165283, "logps/chosen": -103.76637268066406, "logps/rejected": -224.5570068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.844022274017334, "rewards/margins": 11.324800491333008, "rewards/rejected": -14.1688232421875, "step": 15014 }, { "epoch": 2.34, "learning_rate": 3.134724830126044e-06, "logits/chosen": -1.8131425380706787, "logits/rejected": -2.6744518280029297, "logps/chosen": -304.9444274902344, "logps/rejected": -428.3216552734375, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -5.653228282928467, "rewards/margins": 4.965198516845703, "rewards/rejected": -10.618427276611328, "step": 15015 }, { "epoch": 2.34, "learning_rate": 3.133991389594896e-06, "logits/chosen": -2.0085525512695312, "logits/rejected": -3.000525951385498, "logps/chosen": -109.7937240600586, "logps/rejected": -503.3367614746094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.467668533325195, "rewards/margins": 8.51877212524414, "rewards/rejected": -16.986440658569336, "step": 15016 }, { "epoch": 2.34, "learning_rate": 3.133257949063748e-06, "logits/chosen": -2.7089502811431885, "logits/rejected": -2.711148500442505, "logps/chosen": -486.9952087402344, "logps/rejected": -489.6739501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.672299385070801, "rewards/margins": 11.883526802062988, "rewards/rejected": -18.55582618713379, "step": 15017 }, { "epoch": 2.34, "learning_rate": 3.1325245085326e-06, "logits/chosen": -0.8787485957145691, "logits/rejected": -2.7876291275024414, "logps/chosen": -110.29917907714844, "logps/rejected": -780.6967163085938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.237399101257324, "rewards/margins": 15.880820274353027, "rewards/rejected": -22.11821937561035, "step": 15018 }, { "epoch": 2.34, "learning_rate": 3.1317910680014523e-06, "logits/chosen": -2.093498468399048, "logits/rejected": -2.6973159313201904, "logps/chosen": -409.6424865722656, "logps/rejected": -389.02825927734375, "loss": 1.0692, "rewards/accuracies": 0.5, "rewards/chosen": -9.609450340270996, "rewards/margins": 6.0291829109191895, "rewards/rejected": -15.638632774353027, "step": 15019 }, { "epoch": 2.34, "learning_rate": 3.1310576274703046e-06, "logits/chosen": -3.0963637828826904, "logits/rejected": -2.520981550216675, "logps/chosen": -266.6895751953125, "logps/rejected": -158.3719940185547, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.768857002258301, "rewards/margins": 8.874198913574219, "rewards/rejected": -12.64305591583252, "step": 15020 }, { "epoch": 2.34, "learning_rate": 3.130324186939157e-06, "logits/chosen": -2.68515682220459, "logits/rejected": -2.6188430786132812, "logps/chosen": -549.8541259765625, "logps/rejected": -470.90313720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.925015449523926, "rewards/margins": 11.032434463500977, "rewards/rejected": -15.957449913024902, "step": 15021 }, { "epoch": 2.34, "learning_rate": 3.1295907464080087e-06, "logits/chosen": -2.653142213821411, "logits/rejected": -2.4758245944976807, "logps/chosen": -317.5506896972656, "logps/rejected": -348.016845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.705775737762451, "rewards/margins": 10.720754623413086, "rewards/rejected": -15.426530838012695, "step": 15022 }, { "epoch": 2.34, "learning_rate": 3.128857305876861e-06, "logits/chosen": -2.0791006088256836, "logits/rejected": -2.5918729305267334, "logps/chosen": -655.4764404296875, "logps/rejected": -684.4219970703125, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -9.249152183532715, "rewards/margins": 6.334742069244385, "rewards/rejected": -15.583894729614258, "step": 15023 }, { "epoch": 2.34, "learning_rate": 3.128123865345713e-06, "logits/chosen": -1.89528226852417, "logits/rejected": -2.643784999847412, "logps/chosen": -411.66656494140625, "logps/rejected": -577.048828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.599170684814453, "rewards/margins": 10.399150848388672, "rewards/rejected": -20.998321533203125, "step": 15024 }, { "epoch": 2.34, "learning_rate": 3.1273904248145652e-06, "logits/chosen": -2.78523850440979, "logits/rejected": -1.1347182989120483, "logps/chosen": -234.35751342773438, "logps/rejected": -253.82882690429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.887042284011841, "rewards/margins": 12.407200813293457, "rewards/rejected": -16.29424285888672, "step": 15025 }, { "epoch": 2.34, "learning_rate": 3.126656984283417e-06, "logits/chosen": -2.526210069656372, "logits/rejected": -2.896519660949707, "logps/chosen": -108.56576538085938, "logps/rejected": -446.35491943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.560765266418457, "rewards/margins": 11.819610595703125, "rewards/rejected": -19.3803768157959, "step": 15026 }, { "epoch": 2.34, "learning_rate": 3.1259235437522694e-06, "logits/chosen": -2.563652992248535, "logits/rejected": -2.378631114959717, "logps/chosen": -224.29022216796875, "logps/rejected": -309.7747802734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.956906318664551, "rewards/margins": 6.89180850982666, "rewards/rejected": -12.848714828491211, "step": 15027 }, { "epoch": 2.34, "learning_rate": 3.1251901032211213e-06, "logits/chosen": -2.730093479156494, "logits/rejected": -2.634786605834961, "logps/chosen": -317.9368591308594, "logps/rejected": -403.1372375488281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.48944091796875, "rewards/margins": 10.96255111694336, "rewards/rejected": -16.45199203491211, "step": 15028 }, { "epoch": 2.34, "learning_rate": 3.1244566626899736e-06, "logits/chosen": -0.8246602416038513, "logits/rejected": -2.259437322616577, "logps/chosen": -110.83290100097656, "logps/rejected": -632.268310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.392429351806641, "rewards/margins": 14.650238037109375, "rewards/rejected": -21.042667388916016, "step": 15029 }, { "epoch": 2.34, "learning_rate": 3.123723222158826e-06, "logits/chosen": -2.68705415725708, "logits/rejected": -2.0241622924804688, "logps/chosen": -357.5836181640625, "logps/rejected": -329.14794921875, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -5.293837070465088, "rewards/margins": 6.951173782348633, "rewards/rejected": -12.245010375976562, "step": 15030 }, { "epoch": 2.34, "learning_rate": 3.1229897816276778e-06, "logits/chosen": -2.7695460319519043, "logits/rejected": -1.8675947189331055, "logps/chosen": -271.17572021484375, "logps/rejected": -202.70248413085938, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.666574001312256, "rewards/margins": 7.541599750518799, "rewards/rejected": -11.208173751831055, "step": 15031 }, { "epoch": 2.34, "learning_rate": 3.12225634109653e-06, "logits/chosen": -2.8062989711761475, "logits/rejected": -2.569279432296753, "logps/chosen": -681.2831420898438, "logps/rejected": -673.5968627929688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.403873443603516, "rewards/margins": 10.976024627685547, "rewards/rejected": -19.379898071289062, "step": 15032 }, { "epoch": 2.34, "learning_rate": 3.121522900565382e-06, "logits/chosen": -1.2296905517578125, "logits/rejected": -2.539764404296875, "logps/chosen": -171.48312377929688, "logps/rejected": -378.710693359375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.428020477294922, "rewards/margins": 7.562286376953125, "rewards/rejected": -12.990306854248047, "step": 15033 }, { "epoch": 2.34, "learning_rate": 3.1207894600342342e-06, "logits/chosen": -2.859937906265259, "logits/rejected": -1.769043207168579, "logps/chosen": -544.939208984375, "logps/rejected": -302.4270324707031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.042616128921509, "rewards/margins": 9.992128372192383, "rewards/rejected": -13.034745216369629, "step": 15034 }, { "epoch": 2.34, "learning_rate": 3.120056019503086e-06, "logits/chosen": -2.8339076042175293, "logits/rejected": -2.369774580001831, "logps/chosen": -557.9359741210938, "logps/rejected": -501.8270263671875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.019431114196777, "rewards/margins": 11.78884220123291, "rewards/rejected": -16.808273315429688, "step": 15035 }, { "epoch": 2.34, "learning_rate": 3.1193225789719384e-06, "logits/chosen": -2.447705030441284, "logits/rejected": -2.5055973529815674, "logps/chosen": -148.23040771484375, "logps/rejected": -221.966552734375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.364525318145752, "rewards/margins": 7.763687610626221, "rewards/rejected": -12.128212928771973, "step": 15036 }, { "epoch": 2.34, "learning_rate": 3.1185891384407903e-06, "logits/chosen": -2.796009063720703, "logits/rejected": -2.1298716068267822, "logps/chosen": -448.3164367675781, "logps/rejected": -271.9560546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.485891819000244, "rewards/margins": 9.453018188476562, "rewards/rejected": -12.938910484313965, "step": 15037 }, { "epoch": 2.34, "learning_rate": 3.1178556979096426e-06, "logits/chosen": -1.6200015544891357, "logits/rejected": -2.7063658237457275, "logps/chosen": -412.8521728515625, "logps/rejected": -628.168701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.2120795249938965, "rewards/margins": 12.457502365112305, "rewards/rejected": -17.66958236694336, "step": 15038 }, { "epoch": 2.34, "learning_rate": 3.117122257378495e-06, "logits/chosen": -2.081480026245117, "logits/rejected": -2.916438579559326, "logps/chosen": -474.74737548828125, "logps/rejected": -982.9141235351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.011832237243652, "rewards/margins": 12.196304321289062, "rewards/rejected": -20.20813751220703, "step": 15039 }, { "epoch": 2.34, "learning_rate": 3.1163888168473468e-06, "logits/chosen": -2.6198291778564453, "logits/rejected": -2.7538280487060547, "logps/chosen": -323.27313232421875, "logps/rejected": -484.1484069824219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.437632083892822, "rewards/margins": 13.606720924377441, "rewards/rejected": -20.044353485107422, "step": 15040 }, { "epoch": 2.34, "learning_rate": 3.115655376316199e-06, "logits/chosen": -2.0950076580047607, "logits/rejected": -2.550291061401367, "logps/chosen": -142.82876586914062, "logps/rejected": -273.3040466308594, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -7.814332008361816, "rewards/margins": 9.704456329345703, "rewards/rejected": -17.518789291381836, "step": 15041 }, { "epoch": 2.34, "learning_rate": 3.114921935785051e-06, "logits/chosen": -1.8825212717056274, "logits/rejected": -2.6106886863708496, "logps/chosen": -206.14132690429688, "logps/rejected": -412.4073486328125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -7.467164516448975, "rewards/margins": 6.284834384918213, "rewards/rejected": -13.751998901367188, "step": 15042 }, { "epoch": 2.34, "learning_rate": 3.1141884952539033e-06, "logits/chosen": -2.3935258388519287, "logits/rejected": -2.0356132984161377, "logps/chosen": -442.36236572265625, "logps/rejected": -528.2952880859375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -14.09435749053955, "rewards/margins": 8.393658638000488, "rewards/rejected": -22.48801612854004, "step": 15043 }, { "epoch": 2.34, "learning_rate": 3.113455054722755e-06, "logits/chosen": -2.6494076251983643, "logits/rejected": -2.8807778358459473, "logps/chosen": -136.770751953125, "logps/rejected": -183.89263916015625, "loss": 0.9228, "rewards/accuracies": 0.5, "rewards/chosen": -9.248495101928711, "rewards/margins": 2.8752801418304443, "rewards/rejected": -12.123775482177734, "step": 15044 }, { "epoch": 2.34, "learning_rate": 3.1127216141916074e-06, "logits/chosen": -2.431518316268921, "logits/rejected": -1.3962562084197998, "logps/chosen": -422.2487487792969, "logps/rejected": -358.9339294433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2956414222717285, "rewards/margins": 12.24561595916748, "rewards/rejected": -18.541257858276367, "step": 15045 }, { "epoch": 2.34, "learning_rate": 3.1119881736604598e-06, "logits/chosen": -2.1860766410827637, "logits/rejected": -2.7984371185302734, "logps/chosen": -171.32469177246094, "logps/rejected": -255.98316955566406, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.689860820770264, "rewards/margins": 6.827382564544678, "rewards/rejected": -11.517243385314941, "step": 15046 }, { "epoch": 2.34, "learning_rate": 3.111254733129312e-06, "logits/chosen": -2.5709002017974854, "logits/rejected": -2.8439383506774902, "logps/chosen": -160.59774780273438, "logps/rejected": -289.99053955078125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -8.11473274230957, "rewards/margins": 5.827296257019043, "rewards/rejected": -13.942028045654297, "step": 15047 }, { "epoch": 2.34, "learning_rate": 3.110521292598164e-06, "logits/chosen": -1.7377265691757202, "logits/rejected": -2.6802778244018555, "logps/chosen": -107.16328430175781, "logps/rejected": -492.75140380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.138099670410156, "rewards/margins": 15.678985595703125, "rewards/rejected": -21.81708526611328, "step": 15048 }, { "epoch": 2.34, "learning_rate": 3.109787852067016e-06, "logits/chosen": -2.8007137775421143, "logits/rejected": -2.4627203941345215, "logps/chosen": -257.9056701660156, "logps/rejected": -250.6964874267578, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -0.032009124755859375, "rewards/margins": 6.613770484924316, "rewards/rejected": -6.645779609680176, "step": 15049 }, { "epoch": 2.34, "learning_rate": 3.109054411535868e-06, "logits/chosen": -2.6283037662506104, "logits/rejected": -2.9579312801361084, "logps/chosen": -139.5176239013672, "logps/rejected": -395.0845947265625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -8.088065147399902, "rewards/margins": 6.561993598937988, "rewards/rejected": -14.65005874633789, "step": 15050 }, { "epoch": 2.34, "learning_rate": 3.10832097100472e-06, "logits/chosen": -1.503244400024414, "logits/rejected": -2.649092197418213, "logps/chosen": -330.38287353515625, "logps/rejected": -516.7418212890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.140994071960449, "rewards/margins": 10.110234260559082, "rewards/rejected": -17.25122833251953, "step": 15051 }, { "epoch": 2.34, "learning_rate": 3.1075875304735723e-06, "logits/chosen": -2.712191104888916, "logits/rejected": -2.923964262008667, "logps/chosen": -461.9681091308594, "logps/rejected": -612.3899536132812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.432071208953857, "rewards/margins": 13.748298645019531, "rewards/rejected": -19.180370330810547, "step": 15052 }, { "epoch": 2.34, "learning_rate": 3.106854089942424e-06, "logits/chosen": -2.5471763610839844, "logits/rejected": -2.615854501724243, "logps/chosen": -316.1374816894531, "logps/rejected": -384.27630615234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.8775672912597656, "rewards/margins": 10.761568069458008, "rewards/rejected": -13.639135360717773, "step": 15053 }, { "epoch": 2.34, "learning_rate": 3.1061206494112765e-06, "logits/chosen": -2.489356756210327, "logits/rejected": -2.683515787124634, "logps/chosen": -194.52183532714844, "logps/rejected": -254.75494384765625, "loss": 0.1323, "rewards/accuracies": 1.0, "rewards/chosen": -7.417227268218994, "rewards/margins": 7.119110107421875, "rewards/rejected": -14.536336898803711, "step": 15054 }, { "epoch": 2.34, "learning_rate": 3.1053872088801288e-06, "logits/chosen": -2.609619617462158, "logits/rejected": -2.3523142337799072, "logps/chosen": -189.75245666503906, "logps/rejected": -170.12881469726562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.086510181427002, "rewards/margins": 8.082810401916504, "rewards/rejected": -12.169321060180664, "step": 15055 }, { "epoch": 2.34, "learning_rate": 3.104653768348981e-06, "logits/chosen": -2.8937976360321045, "logits/rejected": -2.866842269897461, "logps/chosen": -189.77606201171875, "logps/rejected": -236.41395568847656, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -7.169628620147705, "rewards/margins": 6.63785982131958, "rewards/rejected": -13.807488441467285, "step": 15056 }, { "epoch": 2.34, "learning_rate": 3.103920327817833e-06, "logits/chosen": -2.525496244430542, "logits/rejected": -2.7236056327819824, "logps/chosen": -444.6537780761719, "logps/rejected": -438.45709228515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.9840989112854, "rewards/margins": 9.284878730773926, "rewards/rejected": -15.268978118896484, "step": 15057 }, { "epoch": 2.34, "learning_rate": 3.103186887286685e-06, "logits/chosen": -1.7615838050842285, "logits/rejected": -2.736391067504883, "logps/chosen": -311.6722106933594, "logps/rejected": -281.7525329589844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.504038333892822, "rewards/margins": 8.425464630126953, "rewards/rejected": -13.929502487182617, "step": 15058 }, { "epoch": 2.34, "learning_rate": 3.102453446755537e-06, "logits/chosen": -1.7759943008422852, "logits/rejected": -2.998276948928833, "logps/chosen": -121.79280090332031, "logps/rejected": -406.79083251953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.815491199493408, "rewards/margins": 10.460041046142578, "rewards/rejected": -17.275531768798828, "step": 15059 }, { "epoch": 2.34, "learning_rate": 3.101720006224389e-06, "logits/chosen": -1.69757878780365, "logits/rejected": -2.6197736263275146, "logps/chosen": -154.14456176757812, "logps/rejected": -374.27423095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.500743865966797, "rewards/margins": 12.215415954589844, "rewards/rejected": -18.71615982055664, "step": 15060 }, { "epoch": 2.34, "learning_rate": 3.1009865656932413e-06, "logits/chosen": -2.991391897201538, "logits/rejected": -2.454742908477783, "logps/chosen": -302.7793884277344, "logps/rejected": -231.8355712890625, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -4.886404514312744, "rewards/margins": 4.986208438873291, "rewards/rejected": -9.872612953186035, "step": 15061 }, { "epoch": 2.34, "learning_rate": 3.100253125162093e-06, "logits/chosen": -2.180342674255371, "logits/rejected": -2.609872341156006, "logps/chosen": -134.73907470703125, "logps/rejected": -288.9293212890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.10575008392334, "rewards/margins": 10.834199905395508, "rewards/rejected": -15.939949035644531, "step": 15062 }, { "epoch": 2.34, "learning_rate": 3.099519684630946e-06, "logits/chosen": -2.564474582672119, "logits/rejected": -2.673840284347534, "logps/chosen": -572.0941162109375, "logps/rejected": -734.8739013671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.575838088989258, "rewards/margins": 12.69737720489502, "rewards/rejected": -20.273216247558594, "step": 15063 }, { "epoch": 2.34, "learning_rate": 3.098786244099798e-06, "logits/chosen": -2.387089729309082, "logits/rejected": -2.0346224308013916, "logps/chosen": -212.17860412597656, "logps/rejected": -188.06759643554688, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -5.646378993988037, "rewards/margins": 5.607869625091553, "rewards/rejected": -11.25424861907959, "step": 15064 }, { "epoch": 2.34, "learning_rate": 3.09805280356865e-06, "logits/chosen": -2.980429172515869, "logits/rejected": -3.017664909362793, "logps/chosen": -287.3126220703125, "logps/rejected": -323.60638427734375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -8.937129974365234, "rewards/margins": 8.582101821899414, "rewards/rejected": -17.51923179626465, "step": 15065 }, { "epoch": 2.34, "learning_rate": 3.097319363037502e-06, "logits/chosen": -2.9810290336608887, "logits/rejected": -3.163982391357422, "logps/chosen": -824.437744140625, "logps/rejected": -743.9087524414062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.809118747711182, "rewards/margins": 11.342340469360352, "rewards/rejected": -17.151458740234375, "step": 15066 }, { "epoch": 2.34, "learning_rate": 3.0965859225063543e-06, "logits/chosen": -2.021670341491699, "logits/rejected": -2.6873257160186768, "logps/chosen": -293.94512939453125, "logps/rejected": -399.71728515625, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -6.569780349731445, "rewards/margins": 7.711842060089111, "rewards/rejected": -14.281621932983398, "step": 15067 }, { "epoch": 2.34, "learning_rate": 3.095852481975206e-06, "logits/chosen": -1.9036107063293457, "logits/rejected": -2.5175392627716064, "logps/chosen": -147.28878784179688, "logps/rejected": -386.6023864746094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.364445209503174, "rewards/margins": 13.164525985717773, "rewards/rejected": -18.52897071838379, "step": 15068 }, { "epoch": 2.34, "learning_rate": 3.095119041444058e-06, "logits/chosen": -2.823385238647461, "logits/rejected": -1.9527957439422607, "logps/chosen": -758.9082641601562, "logps/rejected": -493.85968017578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.380840301513672, "rewards/margins": 9.593757629394531, "rewards/rejected": -14.97459888458252, "step": 15069 }, { "epoch": 2.34, "learning_rate": 3.0943856009129103e-06, "logits/chosen": -1.7995171546936035, "logits/rejected": -2.6393020153045654, "logps/chosen": -281.80780029296875, "logps/rejected": -430.75, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.375890254974365, "rewards/margins": 8.394472122192383, "rewards/rejected": -13.770362854003906, "step": 15070 }, { "epoch": 2.34, "learning_rate": 3.0936521603817626e-06, "logits/chosen": -2.640507936477661, "logits/rejected": -2.655395269393921, "logps/chosen": -221.6559295654297, "logps/rejected": -278.3095397949219, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -8.46103286743164, "rewards/margins": 5.027222633361816, "rewards/rejected": -13.48825454711914, "step": 15071 }, { "epoch": 2.34, "learning_rate": 3.092918719850615e-06, "logits/chosen": -2.5695972442626953, "logits/rejected": -2.9704668521881104, "logps/chosen": -91.57963562011719, "logps/rejected": -344.1004638671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.1962809562683105, "rewards/margins": 9.452738761901855, "rewards/rejected": -15.649019241333008, "step": 15072 }, { "epoch": 2.34, "learning_rate": 3.092185279319467e-06, "logits/chosen": -2.0958034992218018, "logits/rejected": -2.43765926361084, "logps/chosen": -377.28692626953125, "logps/rejected": -493.1423034667969, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -10.01639175415039, "rewards/margins": 6.3438568115234375, "rewards/rejected": -16.360248565673828, "step": 15073 }, { "epoch": 2.34, "learning_rate": 3.091451838788319e-06, "logits/chosen": -2.618004083633423, "logits/rejected": -2.733238697052002, "logps/chosen": -153.1279296875, "logps/rejected": -488.96533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0748677253723145, "rewards/margins": 13.192488670349121, "rewards/rejected": -18.267356872558594, "step": 15074 }, { "epoch": 2.34, "learning_rate": 3.090718398257171e-06, "logits/chosen": -1.7263318300247192, "logits/rejected": -2.505641222000122, "logps/chosen": -198.07791137695312, "logps/rejected": -457.15069580078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.594817161560059, "rewards/margins": 9.388985633850098, "rewards/rejected": -16.983802795410156, "step": 15075 }, { "epoch": 2.34, "learning_rate": 3.0899849577260233e-06, "logits/chosen": -1.6086945533752441, "logits/rejected": -2.8458516597747803, "logps/chosen": -139.0063934326172, "logps/rejected": -348.74090576171875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.100311279296875, "rewards/margins": 7.044672966003418, "rewards/rejected": -14.144984245300293, "step": 15076 }, { "epoch": 2.34, "learning_rate": 3.089251517194875e-06, "logits/chosen": -0.48772066831588745, "logits/rejected": -2.300427198410034, "logps/chosen": -178.80477905273438, "logps/rejected": -523.2075805664062, "loss": 0.3497, "rewards/accuracies": 0.5, "rewards/chosen": -8.905786514282227, "rewards/margins": 12.375144004821777, "rewards/rejected": -21.280929565429688, "step": 15077 }, { "epoch": 2.34, "learning_rate": 3.088518076663727e-06, "logits/chosen": -2.3900506496429443, "logits/rejected": -2.8205246925354004, "logps/chosen": -172.71762084960938, "logps/rejected": -396.8587646484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.908640384674072, "rewards/margins": 9.60965633392334, "rewards/rejected": -16.518295288085938, "step": 15078 }, { "epoch": 2.35, "learning_rate": 3.0877846361325794e-06, "logits/chosen": -2.717223882675171, "logits/rejected": -2.5805888175964355, "logps/chosen": -252.2542266845703, "logps/rejected": -298.0850830078125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -4.88897705078125, "rewards/margins": 7.363115310668945, "rewards/rejected": -12.252092361450195, "step": 15079 }, { "epoch": 2.35, "learning_rate": 3.0870511956014317e-06, "logits/chosen": -2.958918809890747, "logits/rejected": -3.1449384689331055, "logps/chosen": -416.92218017578125, "logps/rejected": -560.4039916992188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.054883003234863, "rewards/margins": 14.69425106048584, "rewards/rejected": -24.749134063720703, "step": 15080 }, { "epoch": 2.35, "learning_rate": 3.086317755070284e-06, "logits/chosen": -2.6956357955932617, "logits/rejected": -2.694258213043213, "logps/chosen": -231.65496826171875, "logps/rejected": -438.8934020996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6067962646484375, "rewards/margins": 12.103643417358398, "rewards/rejected": -18.710439682006836, "step": 15081 }, { "epoch": 2.35, "learning_rate": 3.085584314539136e-06, "logits/chosen": -2.6887102127075195, "logits/rejected": -2.6545982360839844, "logps/chosen": -405.7806396484375, "logps/rejected": -440.24713134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.924992561340332, "rewards/margins": 10.279045104980469, "rewards/rejected": -14.2040376663208, "step": 15082 }, { "epoch": 2.35, "learning_rate": 3.084850874007988e-06, "logits/chosen": -2.695493459701538, "logits/rejected": -2.7353522777557373, "logps/chosen": -178.99620056152344, "logps/rejected": -215.2086944580078, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -5.651281833648682, "rewards/margins": 5.7482194900512695, "rewards/rejected": -11.39950180053711, "step": 15083 }, { "epoch": 2.35, "learning_rate": 3.08411743347684e-06, "logits/chosen": -2.8111231327056885, "logits/rejected": -2.8930206298828125, "logps/chosen": -79.38441467285156, "logps/rejected": -241.16163635253906, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.804577827453613, "rewards/margins": 8.643997192382812, "rewards/rejected": -13.448575973510742, "step": 15084 }, { "epoch": 2.35, "learning_rate": 3.0833839929456923e-06, "logits/chosen": -2.5807480812072754, "logits/rejected": -2.624152421951294, "logps/chosen": -79.30984497070312, "logps/rejected": -153.23672485351562, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.905704975128174, "rewards/margins": 7.577317237854004, "rewards/rejected": -11.48302173614502, "step": 15085 }, { "epoch": 2.35, "learning_rate": 3.082650552414544e-06, "logits/chosen": -2.7872250080108643, "logits/rejected": -1.829958438873291, "logps/chosen": -284.2762451171875, "logps/rejected": -386.97320556640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.201226711273193, "rewards/margins": 13.115119934082031, "rewards/rejected": -17.316347122192383, "step": 15086 }, { "epoch": 2.35, "learning_rate": 3.081917111883396e-06, "logits/chosen": -1.4928796291351318, "logits/rejected": -2.909672260284424, "logps/chosen": -161.70367431640625, "logps/rejected": -564.9891967773438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.42296028137207, "rewards/margins": 10.18004035949707, "rewards/rejected": -19.60300064086914, "step": 15087 }, { "epoch": 2.35, "learning_rate": 3.081183671352249e-06, "logits/chosen": -2.445978879928589, "logits/rejected": -2.5943119525909424, "logps/chosen": -586.6727294921875, "logps/rejected": -580.1106567382812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.819322109222412, "rewards/margins": 12.799958229064941, "rewards/rejected": -17.619279861450195, "step": 15088 }, { "epoch": 2.35, "learning_rate": 3.0804502308211007e-06, "logits/chosen": -1.6394604444503784, "logits/rejected": -2.8760786056518555, "logps/chosen": -384.613037109375, "logps/rejected": -551.6290283203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.513297080993652, "rewards/margins": 9.473039627075195, "rewards/rejected": -14.986336708068848, "step": 15089 }, { "epoch": 2.35, "learning_rate": 3.079716790289953e-06, "logits/chosen": -2.9354088306427, "logits/rejected": -3.038539171218872, "logps/chosen": -124.68888854980469, "logps/rejected": -235.8717803955078, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -9.876774787902832, "rewards/margins": 5.467626571655273, "rewards/rejected": -15.344401359558105, "step": 15090 }, { "epoch": 2.35, "learning_rate": 3.078983349758805e-06, "logits/chosen": -1.6770596504211426, "logits/rejected": -2.613342046737671, "logps/chosen": -243.72003173828125, "logps/rejected": -503.9671936035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.475431442260742, "rewards/margins": 12.8427734375, "rewards/rejected": -22.318204879760742, "step": 15091 }, { "epoch": 2.35, "learning_rate": 3.078249909227657e-06, "logits/chosen": -2.532855749130249, "logits/rejected": -1.9718674421310425, "logps/chosen": -366.51898193359375, "logps/rejected": -297.7073059082031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.419694423675537, "rewards/margins": 9.203646659851074, "rewards/rejected": -12.62334156036377, "step": 15092 }, { "epoch": 2.35, "learning_rate": 3.077516468696509e-06, "logits/chosen": -1.9001774787902832, "logits/rejected": -2.5501139163970947, "logps/chosen": -262.834716796875, "logps/rejected": -513.0728759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.839489936828613, "rewards/margins": 13.626880645751953, "rewards/rejected": -18.466371536254883, "step": 15093 }, { "epoch": 2.35, "learning_rate": 3.0767830281653613e-06, "logits/chosen": -2.2460074424743652, "logits/rejected": -2.733044147491455, "logps/chosen": -388.940673828125, "logps/rejected": -427.504150390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.757277488708496, "rewards/margins": 9.116741180419922, "rewards/rejected": -15.874019622802734, "step": 15094 }, { "epoch": 2.35, "learning_rate": 3.0760495876342132e-06, "logits/chosen": -1.5886045694351196, "logits/rejected": -2.90562105178833, "logps/chosen": -126.01053619384766, "logps/rejected": -426.5262451171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.559601783752441, "rewards/margins": 9.544198989868164, "rewards/rejected": -18.103801727294922, "step": 15095 }, { "epoch": 2.35, "learning_rate": 3.0753161471030655e-06, "logits/chosen": -2.5872013568878174, "logits/rejected": -2.6669225692749023, "logps/chosen": -126.767578125, "logps/rejected": -240.96717834472656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.584998607635498, "rewards/margins": 8.076416015625, "rewards/rejected": -12.661415100097656, "step": 15096 }, { "epoch": 2.35, "learning_rate": 3.074582706571918e-06, "logits/chosen": -2.9306435585021973, "logits/rejected": -2.879136562347412, "logps/chosen": -130.26626586914062, "logps/rejected": -204.5653076171875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.836057186126709, "rewards/margins": 8.100318908691406, "rewards/rejected": -14.936376571655273, "step": 15097 }, { "epoch": 2.35, "learning_rate": 3.0738492660407697e-06, "logits/chosen": -0.989536702632904, "logits/rejected": -2.153104782104492, "logps/chosen": -258.74041748046875, "logps/rejected": -747.7900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.080817699432373, "rewards/margins": 16.41958999633789, "rewards/rejected": -20.500408172607422, "step": 15098 }, { "epoch": 2.35, "learning_rate": 3.073115825509622e-06, "logits/chosen": -1.66680908203125, "logits/rejected": -2.4421656131744385, "logps/chosen": -122.77227783203125, "logps/rejected": -259.00244140625, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -8.131209373474121, "rewards/margins": 6.149482727050781, "rewards/rejected": -14.280692100524902, "step": 15099 }, { "epoch": 2.35, "learning_rate": 3.072382384978474e-06, "logits/chosen": -2.438796043395996, "logits/rejected": -2.2888128757476807, "logps/chosen": -435.33343505859375, "logps/rejected": -516.5189208984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.896627426147461, "rewards/margins": 10.602849960327148, "rewards/rejected": -18.49947738647461, "step": 15100 }, { "epoch": 2.35, "learning_rate": 3.071648944447326e-06, "logits/chosen": -2.6176936626434326, "logits/rejected": -1.7483404874801636, "logps/chosen": -206.17486572265625, "logps/rejected": -157.34832763671875, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/chosen": -4.601614952087402, "rewards/margins": 4.897895812988281, "rewards/rejected": -9.49951171875, "step": 15101 }, { "epoch": 2.35, "learning_rate": 3.070915503916178e-06, "logits/chosen": -0.7211559414863586, "logits/rejected": -2.405540943145752, "logps/chosen": -177.97994995117188, "logps/rejected": -546.5394287109375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.275716781616211, "rewards/margins": 8.734804153442383, "rewards/rejected": -15.010520935058594, "step": 15102 }, { "epoch": 2.35, "learning_rate": 3.0701820633850304e-06, "logits/chosen": -2.427464485168457, "logits/rejected": -2.2147457599639893, "logps/chosen": -260.4694519042969, "logps/rejected": -402.1156005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.305414199829102, "rewards/margins": 12.012981414794922, "rewards/rejected": -16.318395614624023, "step": 15103 }, { "epoch": 2.35, "learning_rate": 3.0694486228538822e-06, "logits/chosen": -1.2455193996429443, "logits/rejected": -2.5581376552581787, "logps/chosen": -137.93592834472656, "logps/rejected": -278.535888671875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -7.434103965759277, "rewards/margins": 6.91342830657959, "rewards/rejected": -14.347532272338867, "step": 15104 }, { "epoch": 2.35, "learning_rate": 3.068715182322735e-06, "logits/chosen": -2.0086541175842285, "logits/rejected": -2.872312545776367, "logps/chosen": -127.63571166992188, "logps/rejected": -352.5233154296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.851644515991211, "rewards/margins": 9.879324913024902, "rewards/rejected": -16.730968475341797, "step": 15105 }, { "epoch": 2.35, "learning_rate": 3.067981741791587e-06, "logits/chosen": -1.783096432685852, "logits/rejected": -2.3343238830566406, "logps/chosen": -123.27630615234375, "logps/rejected": -229.60238647460938, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.729429721832275, "rewards/margins": 7.769682884216309, "rewards/rejected": -12.499113082885742, "step": 15106 }, { "epoch": 2.35, "learning_rate": 3.0672483012604387e-06, "logits/chosen": -1.6848984956741333, "logits/rejected": -2.7711033821105957, "logps/chosen": -286.9767761230469, "logps/rejected": -515.0325927734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.412283420562744, "rewards/margins": 8.100305557250977, "rewards/rejected": -13.512589454650879, "step": 15107 }, { "epoch": 2.35, "learning_rate": 3.066514860729291e-06, "logits/chosen": -1.6517363786697388, "logits/rejected": -2.7844390869140625, "logps/chosen": -173.75721740722656, "logps/rejected": -221.7837371826172, "loss": 2.3797, "rewards/accuracies": 0.5, "rewards/chosen": -9.894647598266602, "rewards/margins": 0.2506577968597412, "rewards/rejected": -10.145305633544922, "step": 15108 }, { "epoch": 2.35, "learning_rate": 3.065781420198143e-06, "logits/chosen": -0.8468056321144104, "logits/rejected": -2.59831166267395, "logps/chosen": -147.29290771484375, "logps/rejected": -416.742431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.292730331420898, "rewards/margins": 10.051504135131836, "rewards/rejected": -18.344234466552734, "step": 15109 }, { "epoch": 2.35, "learning_rate": 3.065047979666995e-06, "logits/chosen": -0.9827187061309814, "logits/rejected": -2.4744462966918945, "logps/chosen": -134.08078002929688, "logps/rejected": -575.396728515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.212043762207031, "rewards/margins": 11.101495742797852, "rewards/rejected": -20.313539505004883, "step": 15110 }, { "epoch": 2.35, "learning_rate": 3.064314539135847e-06, "logits/chosen": -2.2398757934570312, "logits/rejected": -2.4420878887176514, "logps/chosen": -134.816162109375, "logps/rejected": -262.5126953125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -7.037077903747559, "rewards/margins": 6.540205001831055, "rewards/rejected": -13.577282905578613, "step": 15111 }, { "epoch": 2.35, "learning_rate": 3.0635810986046994e-06, "logits/chosen": -1.770758867263794, "logits/rejected": -2.803705930709839, "logps/chosen": -157.52256774902344, "logps/rejected": -526.0525512695312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.535965919494629, "rewards/margins": 9.253350257873535, "rewards/rejected": -15.789316177368164, "step": 15112 }, { "epoch": 2.35, "learning_rate": 3.0628476580735517e-06, "logits/chosen": -1.3795678615570068, "logits/rejected": -2.6750783920288086, "logps/chosen": -158.66510009765625, "logps/rejected": -381.2398681640625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -5.541957855224609, "rewards/margins": 7.439072132110596, "rewards/rejected": -12.981030464172363, "step": 15113 }, { "epoch": 2.35, "learning_rate": 3.062114217542404e-06, "logits/chosen": -1.482515573501587, "logits/rejected": -2.448695182800293, "logps/chosen": -314.707275390625, "logps/rejected": -399.3729248046875, "loss": 0.0907, "rewards/accuracies": 1.0, "rewards/chosen": -6.101268768310547, "rewards/margins": 5.680904865264893, "rewards/rejected": -11.782173156738281, "step": 15114 }, { "epoch": 2.35, "learning_rate": 3.061380777011256e-06, "logits/chosen": -2.375138521194458, "logits/rejected": -2.759737968444824, "logps/chosen": -94.31263732910156, "logps/rejected": -396.482421875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.099530220031738, "rewards/margins": 9.125102996826172, "rewards/rejected": -13.224634170532227, "step": 15115 }, { "epoch": 2.35, "learning_rate": 3.060647336480108e-06, "logits/chosen": -1.9553418159484863, "logits/rejected": -2.644984722137451, "logps/chosen": -233.56472778320312, "logps/rejected": -415.04034423828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.422759056091309, "rewards/margins": 8.972646713256836, "rewards/rejected": -15.395406723022461, "step": 15116 }, { "epoch": 2.35, "learning_rate": 3.05991389594896e-06, "logits/chosen": -2.6093485355377197, "logits/rejected": -2.0075581073760986, "logps/chosen": -244.30538940429688, "logps/rejected": -206.88943481445312, "loss": 0.5733, "rewards/accuracies": 0.5, "rewards/chosen": -8.228854179382324, "rewards/margins": 2.147156238555908, "rewards/rejected": -10.37601089477539, "step": 15117 }, { "epoch": 2.35, "learning_rate": 3.059180455417812e-06, "logits/chosen": -2.7078499794006348, "logits/rejected": -2.272993803024292, "logps/chosen": -239.31784057617188, "logps/rejected": -394.1917724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.65264892578125, "rewards/margins": 14.89158821105957, "rewards/rejected": -20.544239044189453, "step": 15118 }, { "epoch": 2.35, "learning_rate": 3.0584470148866642e-06, "logits/chosen": -2.5475783348083496, "logits/rejected": -2.936300277709961, "logps/chosen": -244.68673706054688, "logps/rejected": -488.06671142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.371972560882568, "rewards/margins": 13.562923431396484, "rewards/rejected": -18.93489646911621, "step": 15119 }, { "epoch": 2.35, "learning_rate": 3.057713574355516e-06, "logits/chosen": -2.8966779708862305, "logits/rejected": -2.5618956089019775, "logps/chosen": -267.5501708984375, "logps/rejected": -360.67657470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.7519755363464355, "rewards/margins": 12.2896146774292, "rewards/rejected": -17.041589736938477, "step": 15120 }, { "epoch": 2.35, "learning_rate": 3.0569801338243684e-06, "logits/chosen": -2.1148386001586914, "logits/rejected": -2.5972237586975098, "logps/chosen": -148.28799438476562, "logps/rejected": -409.860595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.037262916564941, "rewards/margins": 13.384221076965332, "rewards/rejected": -19.421483993530273, "step": 15121 }, { "epoch": 2.35, "learning_rate": 3.0562466932932207e-06, "logits/chosen": -1.817261815071106, "logits/rejected": -2.770036220550537, "logps/chosen": -334.2330322265625, "logps/rejected": -523.610107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.088765144348145, "rewards/margins": 15.644535064697266, "rewards/rejected": -23.733299255371094, "step": 15122 }, { "epoch": 2.35, "learning_rate": 3.055513252762073e-06, "logits/chosen": -2.7520315647125244, "logits/rejected": -2.086015462875366, "logps/chosen": -379.3948974609375, "logps/rejected": -330.68072509765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.882258415222168, "rewards/margins": 9.538185119628906, "rewards/rejected": -18.420442581176758, "step": 15123 }, { "epoch": 2.35, "learning_rate": 3.054779812230925e-06, "logits/chosen": -2.723912239074707, "logits/rejected": -2.3024168014526367, "logps/chosen": -396.69915771484375, "logps/rejected": -334.01995849609375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -10.118581771850586, "rewards/margins": 7.288082599639893, "rewards/rejected": -17.40666389465332, "step": 15124 }, { "epoch": 2.35, "learning_rate": 3.054046371699777e-06, "logits/chosen": -2.276609182357788, "logits/rejected": -3.0154645442962646, "logps/chosen": -241.41281127929688, "logps/rejected": -538.6433715820312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.502214431762695, "rewards/margins": 9.895904541015625, "rewards/rejected": -18.39811897277832, "step": 15125 }, { "epoch": 2.35, "learning_rate": 3.053312931168629e-06, "logits/chosen": -2.755678415298462, "logits/rejected": -2.4197542667388916, "logps/chosen": -501.7864074707031, "logps/rejected": -406.26263427734375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -12.749284744262695, "rewards/margins": 6.152431011199951, "rewards/rejected": -18.901714324951172, "step": 15126 }, { "epoch": 2.35, "learning_rate": 3.052579490637481e-06, "logits/chosen": -2.791851758956909, "logits/rejected": -2.1747641563415527, "logps/chosen": -422.2020568847656, "logps/rejected": -350.8709716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.677091598510742, "rewards/margins": 9.313980102539062, "rewards/rejected": -15.991071701049805, "step": 15127 }, { "epoch": 2.35, "learning_rate": 3.0518460501063333e-06, "logits/chosen": -2.008054733276367, "logits/rejected": -2.975421190261841, "logps/chosen": -72.77684020996094, "logps/rejected": -311.9968566894531, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.598039150238037, "rewards/margins": 8.868243217468262, "rewards/rejected": -13.466282844543457, "step": 15128 }, { "epoch": 2.35, "learning_rate": 3.051112609575185e-06, "logits/chosen": -1.411230206489563, "logits/rejected": -2.6470253467559814, "logps/chosen": -279.0332336425781, "logps/rejected": -558.1174926757812, "loss": 0.1207, "rewards/accuracies": 1.0, "rewards/chosen": -7.474641799926758, "rewards/margins": 4.929388046264648, "rewards/rejected": -12.404029846191406, "step": 15129 }, { "epoch": 2.35, "learning_rate": 3.050379169044038e-06, "logits/chosen": -1.7926088571548462, "logits/rejected": -2.3020589351654053, "logps/chosen": -247.88284301757812, "logps/rejected": -322.4212646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2190728187561035, "rewards/margins": 11.54437255859375, "rewards/rejected": -17.763446807861328, "step": 15130 }, { "epoch": 2.35, "learning_rate": 3.0496457285128897e-06, "logits/chosen": -1.8296616077423096, "logits/rejected": -2.637782573699951, "logps/chosen": -189.66754150390625, "logps/rejected": -424.49652099609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.028692245483398, "rewards/margins": 9.013225555419922, "rewards/rejected": -15.04191780090332, "step": 15131 }, { "epoch": 2.35, "learning_rate": 3.048912287981742e-06, "logits/chosen": -2.096773147583008, "logits/rejected": -1.0979732275009155, "logps/chosen": -341.0168151855469, "logps/rejected": -160.91197204589844, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -2.3772804737091064, "rewards/margins": 5.445734024047852, "rewards/rejected": -7.823014736175537, "step": 15132 }, { "epoch": 2.35, "learning_rate": 3.048178847450594e-06, "logits/chosen": -1.794663429260254, "logits/rejected": -2.8895857334136963, "logps/chosen": -270.21209716796875, "logps/rejected": -692.0668334960938, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -11.529777526855469, "rewards/margins": 7.577616214752197, "rewards/rejected": -19.107393264770508, "step": 15133 }, { "epoch": 2.35, "learning_rate": 3.0474454069194462e-06, "logits/chosen": -2.4053664207458496, "logits/rejected": -2.7433719635009766, "logps/chosen": -204.29006958007812, "logps/rejected": -366.331298828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.902796745300293, "rewards/margins": 12.722015380859375, "rewards/rejected": -21.624813079833984, "step": 15134 }, { "epoch": 2.35, "learning_rate": 3.046711966388298e-06, "logits/chosen": -2.0099503993988037, "logits/rejected": -2.620601177215576, "logps/chosen": -494.6205749511719, "logps/rejected": -641.6581420898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.323315620422363, "rewards/margins": 13.616483688354492, "rewards/rejected": -23.939800262451172, "step": 15135 }, { "epoch": 2.35, "learning_rate": 3.04597852585715e-06, "logits/chosen": -2.5650880336761475, "logits/rejected": -2.724130392074585, "logps/chosen": -146.47671508789062, "logps/rejected": -188.10260009765625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.59628963470459, "rewards/margins": 6.572851181030273, "rewards/rejected": -12.169140815734863, "step": 15136 }, { "epoch": 2.35, "learning_rate": 3.0452450853260023e-06, "logits/chosen": -2.9998645782470703, "logits/rejected": -2.6664493083953857, "logps/chosen": -612.2554321289062, "logps/rejected": -714.832275390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.373493194580078, "rewards/margins": 8.77456283569336, "rewards/rejected": -19.148056030273438, "step": 15137 }, { "epoch": 2.35, "learning_rate": 3.0445116447948546e-06, "logits/chosen": -1.7114025354385376, "logits/rejected": -2.6065104007720947, "logps/chosen": -149.42344665527344, "logps/rejected": -559.9869384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.452754974365234, "rewards/margins": 13.83239459991455, "rewards/rejected": -20.28514862060547, "step": 15138 }, { "epoch": 2.35, "learning_rate": 3.043778204263707e-06, "logits/chosen": -3.0111958980560303, "logits/rejected": -2.853285551071167, "logps/chosen": -396.7227478027344, "logps/rejected": -584.453857421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.461151123046875, "rewards/margins": 10.355886459350586, "rewards/rejected": -16.81703758239746, "step": 15139 }, { "epoch": 2.35, "learning_rate": 3.0430447637325588e-06, "logits/chosen": -3.1595232486724854, "logits/rejected": -3.0831210613250732, "logps/chosen": -204.16954040527344, "logps/rejected": -160.25112915039062, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -5.896756172180176, "rewards/margins": 5.8809309005737305, "rewards/rejected": -11.777687072753906, "step": 15140 }, { "epoch": 2.35, "learning_rate": 3.042311323201411e-06, "logits/chosen": -2.5965240001678467, "logits/rejected": -2.018035411834717, "logps/chosen": -388.1612243652344, "logps/rejected": -541.2061157226562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.106539726257324, "rewards/margins": 9.837169647216797, "rewards/rejected": -18.943710327148438, "step": 15141 }, { "epoch": 2.35, "learning_rate": 3.041577882670263e-06, "logits/chosen": -1.044222354888916, "logits/rejected": -2.4742796421051025, "logps/chosen": -163.718505859375, "logps/rejected": -528.649658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.616171836853027, "rewards/margins": 12.032095909118652, "rewards/rejected": -18.64826774597168, "step": 15142 }, { "epoch": 2.36, "learning_rate": 3.0408444421391152e-06, "logits/chosen": -2.6733884811401367, "logits/rejected": -2.836742401123047, "logps/chosen": -375.7801513671875, "logps/rejected": -509.5586242675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.579263687133789, "rewards/margins": 12.280921936035156, "rewards/rejected": -20.860185623168945, "step": 15143 }, { "epoch": 2.36, "learning_rate": 3.040111001607967e-06, "logits/chosen": -1.6951793432235718, "logits/rejected": -2.7297966480255127, "logps/chosen": -146.3376007080078, "logps/rejected": -476.9291076660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.193873405456543, "rewards/margins": 13.173174858093262, "rewards/rejected": -18.367048263549805, "step": 15144 }, { "epoch": 2.36, "learning_rate": 3.0393775610768194e-06, "logits/chosen": -2.402371883392334, "logits/rejected": -2.552457571029663, "logps/chosen": -333.8156433105469, "logps/rejected": -429.3099060058594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.919841766357422, "rewards/margins": 12.626768112182617, "rewards/rejected": -21.546607971191406, "step": 15145 }, { "epoch": 2.36, "learning_rate": 3.0386441205456713e-06, "logits/chosen": -1.2976946830749512, "logits/rejected": -2.793135166168213, "logps/chosen": -149.2840118408203, "logps/rejected": -530.807861328125, "loss": 0.2472, "rewards/accuracies": 1.0, "rewards/chosen": -10.599153518676758, "rewards/margins": 6.229343891143799, "rewards/rejected": -16.8284969329834, "step": 15146 }, { "epoch": 2.36, "learning_rate": 3.0379106800145236e-06, "logits/chosen": -1.2428947687149048, "logits/rejected": -2.7131268978118896, "logps/chosen": -172.4417724609375, "logps/rejected": -595.9224853515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.712150812149048, "rewards/margins": 12.217083930969238, "rewards/rejected": -15.929235458374023, "step": 15147 }, { "epoch": 2.36, "learning_rate": 3.037177239483376e-06, "logits/chosen": -2.243936538696289, "logits/rejected": -2.82222580909729, "logps/chosen": -212.29025268554688, "logps/rejected": -316.3656921386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.746584892272949, "rewards/margins": 10.972782135009766, "rewards/rejected": -17.71936798095703, "step": 15148 }, { "epoch": 2.36, "learning_rate": 3.0364437989522278e-06, "logits/chosen": -2.6522862911224365, "logits/rejected": -2.0061769485473633, "logps/chosen": -276.3213806152344, "logps/rejected": -373.8990783691406, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -6.489981174468994, "rewards/margins": 6.986687183380127, "rewards/rejected": -13.476668357849121, "step": 15149 }, { "epoch": 2.36, "learning_rate": 3.03571035842108e-06, "logits/chosen": -2.0930769443511963, "logits/rejected": -2.5770397186279297, "logps/chosen": -164.9084014892578, "logps/rejected": -449.5740661621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.476061820983887, "rewards/margins": 14.345163345336914, "rewards/rejected": -21.821224212646484, "step": 15150 }, { "epoch": 2.36, "learning_rate": 3.034976917889932e-06, "logits/chosen": -2.6168603897094727, "logits/rejected": -2.7607054710388184, "logps/chosen": -105.25704956054688, "logps/rejected": -206.21841430664062, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.1835222244262695, "rewards/margins": 6.121931076049805, "rewards/rejected": -11.305453300476074, "step": 15151 }, { "epoch": 2.36, "learning_rate": 3.0342434773587843e-06, "logits/chosen": -2.091707229614258, "logits/rejected": -2.553104877471924, "logps/chosen": -193.69735717773438, "logps/rejected": -263.2171936035156, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.9606499671936035, "rewards/margins": 8.279134750366211, "rewards/rejected": -12.239784240722656, "step": 15152 }, { "epoch": 2.36, "learning_rate": 3.033510036827636e-06, "logits/chosen": -1.4609053134918213, "logits/rejected": -2.701366901397705, "logps/chosen": -184.33462524414062, "logps/rejected": -541.4386596679688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.034660339355469, "rewards/margins": 12.437862396240234, "rewards/rejected": -20.472522735595703, "step": 15153 }, { "epoch": 2.36, "learning_rate": 3.0327765962964884e-06, "logits/chosen": -2.9497828483581543, "logits/rejected": -2.7285516262054443, "logps/chosen": -373.6104736328125, "logps/rejected": -348.6832580566406, "loss": 0.4741, "rewards/accuracies": 0.5, "rewards/chosen": -10.504119873046875, "rewards/margins": 4.469949722290039, "rewards/rejected": -14.974069595336914, "step": 15154 }, { "epoch": 2.36, "learning_rate": 3.0320431557653407e-06, "logits/chosen": -3.046177387237549, "logits/rejected": -3.0300581455230713, "logps/chosen": -392.0086669921875, "logps/rejected": -444.2967529296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.844395160675049, "rewards/margins": 10.017248153686523, "rewards/rejected": -13.861642837524414, "step": 15155 }, { "epoch": 2.36, "learning_rate": 3.0313097152341926e-06, "logits/chosen": -2.255398750305176, "logits/rejected": -1.2012863159179688, "logps/chosen": -163.78082275390625, "logps/rejected": -176.62167358398438, "loss": 0.2611, "rewards/accuracies": 1.0, "rewards/chosen": -6.446717262268066, "rewards/margins": 3.93864107131958, "rewards/rejected": -10.385358810424805, "step": 15156 }, { "epoch": 2.36, "learning_rate": 3.030576274703045e-06, "logits/chosen": -2.517615556716919, "logits/rejected": -2.930373430252075, "logps/chosen": -140.81512451171875, "logps/rejected": -416.474609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.510061264038086, "rewards/margins": 13.55768871307373, "rewards/rejected": -20.0677490234375, "step": 15157 }, { "epoch": 2.36, "learning_rate": 3.029842834171897e-06, "logits/chosen": -2.4796738624572754, "logits/rejected": -2.5779502391815186, "logps/chosen": -453.95355224609375, "logps/rejected": -601.4816284179688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.124502658843994, "rewards/margins": 14.958399772644043, "rewards/rejected": -19.082902908325195, "step": 15158 }, { "epoch": 2.36, "learning_rate": 3.029109393640749e-06, "logits/chosen": -2.672178030014038, "logits/rejected": -2.6621413230895996, "logps/chosen": -139.12637329101562, "logps/rejected": -225.89266967773438, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.741520881652832, "rewards/margins": 6.469441890716553, "rewards/rejected": -13.210962295532227, "step": 15159 }, { "epoch": 2.36, "learning_rate": 3.028375953109601e-06, "logits/chosen": -2.174470901489258, "logits/rejected": -2.5710949897766113, "logps/chosen": -233.99325561523438, "logps/rejected": -419.2563781738281, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.773740291595459, "rewards/margins": 10.157164573669434, "rewards/rejected": -15.93090534210205, "step": 15160 }, { "epoch": 2.36, "learning_rate": 3.0276425125784533e-06, "logits/chosen": -2.624114990234375, "logits/rejected": -1.9319850206375122, "logps/chosen": -205.70953369140625, "logps/rejected": -410.8248291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.314523696899414, "rewards/margins": 11.995794296264648, "rewards/rejected": -18.310317993164062, "step": 15161 }, { "epoch": 2.36, "learning_rate": 3.026909072047305e-06, "logits/chosen": -2.5499236583709717, "logits/rejected": -2.143441677093506, "logps/chosen": -281.26800537109375, "logps/rejected": -327.62139892578125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -9.751542091369629, "rewards/margins": 7.401165008544922, "rewards/rejected": -17.152706146240234, "step": 15162 }, { "epoch": 2.36, "learning_rate": 3.0261756315161575e-06, "logits/chosen": -0.7650918960571289, "logits/rejected": -2.431689739227295, "logps/chosen": -103.90005493164062, "logps/rejected": -391.3568115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.355799674987793, "rewards/margins": 11.596874237060547, "rewards/rejected": -17.952672958374023, "step": 15163 }, { "epoch": 2.36, "learning_rate": 3.0254421909850098e-06, "logits/chosen": -2.251211404800415, "logits/rejected": -2.8123035430908203, "logps/chosen": -694.7054443359375, "logps/rejected": -701.2402954101562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.60425090789795, "rewards/margins": 10.776910781860352, "rewards/rejected": -20.381160736083984, "step": 15164 }, { "epoch": 2.36, "learning_rate": 3.024708750453862e-06, "logits/chosen": -0.5226227045059204, "logits/rejected": -2.6258270740509033, "logps/chosen": -107.62445068359375, "logps/rejected": -377.08819580078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.960165500640869, "rewards/margins": 8.253715515136719, "rewards/rejected": -15.213881492614746, "step": 15165 }, { "epoch": 2.36, "learning_rate": 3.023975309922714e-06, "logits/chosen": -2.5741348266601562, "logits/rejected": -2.152453660964966, "logps/chosen": -481.021484375, "logps/rejected": -465.21917724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.861223220825195, "rewards/margins": 10.536698341369629, "rewards/rejected": -15.39792251586914, "step": 15166 }, { "epoch": 2.36, "learning_rate": 3.023241869391566e-06, "logits/chosen": -2.7819302082061768, "logits/rejected": -2.204838752746582, "logps/chosen": -210.42562866210938, "logps/rejected": -202.9838104248047, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -4.87186861038208, "rewards/margins": 6.100556373596191, "rewards/rejected": -10.97242546081543, "step": 15167 }, { "epoch": 2.36, "learning_rate": 3.022508428860418e-06, "logits/chosen": -2.6776463985443115, "logits/rejected": -1.5572987794876099, "logps/chosen": -279.427001953125, "logps/rejected": -272.32696533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.013849258422852, "rewards/margins": 11.725543975830078, "rewards/rejected": -16.73939323425293, "step": 15168 }, { "epoch": 2.36, "learning_rate": 3.02177498832927e-06, "logits/chosen": -2.215331792831421, "logits/rejected": -2.575139045715332, "logps/chosen": -350.0668029785156, "logps/rejected": -572.4232177734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.552605628967285, "rewards/margins": 10.489265441894531, "rewards/rejected": -20.0418701171875, "step": 15169 }, { "epoch": 2.36, "learning_rate": 3.0210415477981223e-06, "logits/chosen": -1.2563209533691406, "logits/rejected": -2.412952423095703, "logps/chosen": -140.32508850097656, "logps/rejected": -437.2464294433594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.76250171661377, "rewards/margins": 9.317188262939453, "rewards/rejected": -18.079689025878906, "step": 15170 }, { "epoch": 2.36, "learning_rate": 3.020308107266974e-06, "logits/chosen": -2.1119399070739746, "logits/rejected": -2.4107141494750977, "logps/chosen": -162.54403686523438, "logps/rejected": -312.40924072265625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -5.792633056640625, "rewards/margins": 9.138395309448242, "rewards/rejected": -14.931028366088867, "step": 15171 }, { "epoch": 2.36, "learning_rate": 3.019574666735827e-06, "logits/chosen": -1.6111751794815063, "logits/rejected": -2.1128082275390625, "logps/chosen": -308.0358581542969, "logps/rejected": -248.09188842773438, "loss": 0.2288, "rewards/accuracies": 1.0, "rewards/chosen": -7.984972953796387, "rewards/margins": 4.166522026062012, "rewards/rejected": -12.151494979858398, "step": 15172 }, { "epoch": 2.36, "learning_rate": 3.0188412262046788e-06, "logits/chosen": -2.1921002864837646, "logits/rejected": -3.161482572555542, "logps/chosen": -189.45387268066406, "logps/rejected": -526.8509521484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.664553165435791, "rewards/margins": 9.146714210510254, "rewards/rejected": -16.811267852783203, "step": 15173 }, { "epoch": 2.36, "learning_rate": 3.018107785673531e-06, "logits/chosen": -2.7118704319000244, "logits/rejected": -2.8159446716308594, "logps/chosen": -462.6719970703125, "logps/rejected": -485.122314453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.327205657958984, "rewards/margins": 8.81905746459961, "rewards/rejected": -18.146263122558594, "step": 15174 }, { "epoch": 2.36, "learning_rate": 3.017374345142383e-06, "logits/chosen": -2.9966835975646973, "logits/rejected": -2.5270140171051025, "logps/chosen": -200.31710815429688, "logps/rejected": -203.25894165039062, "loss": 0.9174, "rewards/accuracies": 0.5, "rewards/chosen": -7.5564470291137695, "rewards/margins": 5.412412643432617, "rewards/rejected": -12.968859672546387, "step": 15175 }, { "epoch": 2.36, "learning_rate": 3.016640904611235e-06, "logits/chosen": -2.1295485496520996, "logits/rejected": -2.680276870727539, "logps/chosen": -207.61068725585938, "logps/rejected": -507.8254699707031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.415796279907227, "rewards/margins": 10.854415893554688, "rewards/rejected": -19.270212173461914, "step": 15176 }, { "epoch": 2.36, "learning_rate": 3.015907464080087e-06, "logits/chosen": -2.559816837310791, "logits/rejected": -0.8351709842681885, "logps/chosen": -500.08343505859375, "logps/rejected": -206.82418823242188, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -6.9415082931518555, "rewards/margins": 4.46122932434082, "rewards/rejected": -11.402738571166992, "step": 15177 }, { "epoch": 2.36, "learning_rate": 3.015174023548939e-06, "logits/chosen": -2.876131296157837, "logits/rejected": -2.831226110458374, "logps/chosen": -588.85693359375, "logps/rejected": -514.4064331054688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.733218669891357, "rewards/margins": 12.541431427001953, "rewards/rejected": -19.27465057373047, "step": 15178 }, { "epoch": 2.36, "learning_rate": 3.0144405830177913e-06, "logits/chosen": -1.5570502281188965, "logits/rejected": -2.4481377601623535, "logps/chosen": -217.48599243164062, "logps/rejected": -404.42584228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.167448997497559, "rewards/margins": 11.934669494628906, "rewards/rejected": -19.10211944580078, "step": 15179 }, { "epoch": 2.36, "learning_rate": 3.0137071424866436e-06, "logits/chosen": -2.7858755588531494, "logits/rejected": -2.459688901901245, "logps/chosen": -573.6912231445312, "logps/rejected": -754.4527587890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.097841262817383, "rewards/margins": 14.339797973632812, "rewards/rejected": -22.437639236450195, "step": 15180 }, { "epoch": 2.36, "learning_rate": 3.012973701955496e-06, "logits/chosen": -2.1758177280426025, "logits/rejected": -1.872266411781311, "logps/chosen": -626.4966430664062, "logps/rejected": -415.79876708984375, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -8.082021713256836, "rewards/margins": 7.883083343505859, "rewards/rejected": -15.965105056762695, "step": 15181 }, { "epoch": 2.36, "learning_rate": 3.012240261424348e-06, "logits/chosen": -2.7828660011291504, "logits/rejected": -2.6786251068115234, "logps/chosen": -161.09532165527344, "logps/rejected": -252.93789672851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.110335826873779, "rewards/margins": 10.304250717163086, "rewards/rejected": -16.41458511352539, "step": 15182 }, { "epoch": 2.36, "learning_rate": 3.0115068208932e-06, "logits/chosen": -2.416536331176758, "logits/rejected": -2.895833730697632, "logps/chosen": -307.5185241699219, "logps/rejected": -465.46795654296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.824161529541016, "rewards/margins": 8.42633056640625, "rewards/rejected": -16.250492095947266, "step": 15183 }, { "epoch": 2.36, "learning_rate": 3.010773380362052e-06, "logits/chosen": -2.876801013946533, "logits/rejected": -2.9943156242370605, "logps/chosen": -284.116943359375, "logps/rejected": -326.5191955566406, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": -9.601865768432617, "rewards/margins": 6.536495208740234, "rewards/rejected": -16.13836097717285, "step": 15184 }, { "epoch": 2.36, "learning_rate": 3.0100399398309043e-06, "logits/chosen": -2.462876558303833, "logits/rejected": -2.757129669189453, "logps/chosen": -221.53265380859375, "logps/rejected": -443.16387939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.497303009033203, "rewards/margins": 12.45946979522705, "rewards/rejected": -15.95677375793457, "step": 15185 }, { "epoch": 2.36, "learning_rate": 3.009306499299756e-06, "logits/chosen": -2.5862271785736084, "logits/rejected": -2.79134202003479, "logps/chosen": -149.96859741210938, "logps/rejected": -207.25039672851562, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.6960701942443848, "rewards/margins": 8.215559005737305, "rewards/rejected": -11.911628723144531, "step": 15186 }, { "epoch": 2.36, "learning_rate": 3.008573058768608e-06, "logits/chosen": -2.685422420501709, "logits/rejected": -1.1235986948013306, "logps/chosen": -357.2335205078125, "logps/rejected": -384.0782775878906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.5668487548828125, "rewards/margins": 12.41450023651123, "rewards/rejected": -17.98134994506836, "step": 15187 }, { "epoch": 2.36, "learning_rate": 3.0078396182374603e-06, "logits/chosen": -2.5298779010772705, "logits/rejected": -2.6768319606781006, "logps/chosen": -95.08377075195312, "logps/rejected": -239.27064514160156, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -5.8408894538879395, "rewards/margins": 7.683065414428711, "rewards/rejected": -13.523954391479492, "step": 15188 }, { "epoch": 2.36, "learning_rate": 3.0071061777063127e-06, "logits/chosen": -2.5391781330108643, "logits/rejected": -2.957735061645508, "logps/chosen": -292.8797912597656, "logps/rejected": -506.88134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.617584228515625, "rewards/margins": 12.740849494934082, "rewards/rejected": -19.35843276977539, "step": 15189 }, { "epoch": 2.36, "learning_rate": 3.006372737175165e-06, "logits/chosen": -1.4191755056381226, "logits/rejected": -2.5223922729492188, "logps/chosen": -176.28770446777344, "logps/rejected": -387.1485290527344, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -4.792090892791748, "rewards/margins": 8.25881576538086, "rewards/rejected": -13.050907135009766, "step": 15190 }, { "epoch": 2.36, "learning_rate": 3.005639296644017e-06, "logits/chosen": -2.2335691452026367, "logits/rejected": -2.7975051403045654, "logps/chosen": -182.86351013183594, "logps/rejected": -423.1920166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.608027935028076, "rewards/margins": 10.150431632995605, "rewards/rejected": -15.758459091186523, "step": 15191 }, { "epoch": 2.36, "learning_rate": 3.004905856112869e-06, "logits/chosen": -2.3482091426849365, "logits/rejected": -2.8907718658447266, "logps/chosen": -166.33169555664062, "logps/rejected": -417.76953125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.5205278396606445, "rewards/margins": 6.512822151184082, "rewards/rejected": -11.033349990844727, "step": 15192 }, { "epoch": 2.36, "learning_rate": 3.004172415581721e-06, "logits/chosen": -2.3433034420013428, "logits/rejected": -2.521984815597534, "logps/chosen": -197.9576416015625, "logps/rejected": -497.62127685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.295876502990723, "rewards/margins": 15.87757682800293, "rewards/rejected": -23.173452377319336, "step": 15193 }, { "epoch": 2.36, "learning_rate": 3.0034389750505733e-06, "logits/chosen": -2.004027843475342, "logits/rejected": -2.499100685119629, "logps/chosen": -221.11883544921875, "logps/rejected": -335.3147277832031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.216814041137695, "rewards/margins": 10.763603210449219, "rewards/rejected": -19.98041534423828, "step": 15194 }, { "epoch": 2.36, "learning_rate": 3.002705534519425e-06, "logits/chosen": -2.7960658073425293, "logits/rejected": -3.0462934970855713, "logps/chosen": -103.99189758300781, "logps/rejected": -251.06895446777344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.532045364379883, "rewards/margins": 9.397068977355957, "rewards/rejected": -15.92911434173584, "step": 15195 }, { "epoch": 2.36, "learning_rate": 3.001972093988277e-06, "logits/chosen": -2.432129144668579, "logits/rejected": -2.3290984630584717, "logps/chosen": -163.82763671875, "logps/rejected": -411.22918701171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.973451614379883, "rewards/margins": 12.351526260375977, "rewards/rejected": -17.32497787475586, "step": 15196 }, { "epoch": 2.36, "learning_rate": 3.00123865345713e-06, "logits/chosen": -2.2849228382110596, "logits/rejected": -2.783264398574829, "logps/chosen": -101.61388397216797, "logps/rejected": -219.07577514648438, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -6.406649589538574, "rewards/margins": 6.726439952850342, "rewards/rejected": -13.133090019226074, "step": 15197 }, { "epoch": 2.36, "learning_rate": 3.0005052129259817e-06, "logits/chosen": -2.2209632396698, "logits/rejected": -2.7216737270355225, "logps/chosen": -224.13926696777344, "logps/rejected": -445.4981689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.59128189086914, "rewards/margins": 11.86622428894043, "rewards/rejected": -20.45750617980957, "step": 15198 }, { "epoch": 2.36, "learning_rate": 2.999771772394834e-06, "logits/chosen": -2.6426382064819336, "logits/rejected": -1.7396923303604126, "logps/chosen": -470.05474853515625, "logps/rejected": -388.306884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.845465660095215, "rewards/margins": 12.707307815551758, "rewards/rejected": -19.552772521972656, "step": 15199 }, { "epoch": 2.36, "learning_rate": 2.999038331863686e-06, "logits/chosen": -2.743825912475586, "logits/rejected": -1.7380328178405762, "logps/chosen": -270.68865966796875, "logps/rejected": -237.43243408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.2297911643981934, "rewards/margins": 12.844951629638672, "rewards/rejected": -14.074743270874023, "step": 15200 }, { "epoch": 2.36, "learning_rate": 2.998304891332538e-06, "logits/chosen": -2.7096493244171143, "logits/rejected": -2.7113113403320312, "logps/chosen": -519.65185546875, "logps/rejected": -602.66015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.0001983642578125, "rewards/margins": 11.164764404296875, "rewards/rejected": -17.164962768554688, "step": 15201 }, { "epoch": 2.36, "learning_rate": 2.99757145080139e-06, "logits/chosen": -2.4405510425567627, "logits/rejected": -2.546858549118042, "logps/chosen": -379.216064453125, "logps/rejected": -555.01171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.226360321044922, "rewards/margins": 9.334602355957031, "rewards/rejected": -17.560962677001953, "step": 15202 }, { "epoch": 2.36, "learning_rate": 2.9968380102702423e-06, "logits/chosen": -2.35026478767395, "logits/rejected": -2.751650333404541, "logps/chosen": -208.38632202148438, "logps/rejected": -587.6961669921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.8503098487854, "rewards/margins": 10.241832733154297, "rewards/rejected": -15.092142105102539, "step": 15203 }, { "epoch": 2.36, "learning_rate": 2.9961045697390942e-06, "logits/chosen": -2.4786627292633057, "logits/rejected": -2.623945474624634, "logps/chosen": -211.544921875, "logps/rejected": -429.7322692871094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.932474136352539, "rewards/margins": 10.167154312133789, "rewards/rejected": -20.099628448486328, "step": 15204 }, { "epoch": 2.36, "learning_rate": 2.9953711292079465e-06, "logits/chosen": -2.7556262016296387, "logits/rejected": -2.5143394470214844, "logps/chosen": -266.6713562011719, "logps/rejected": -438.0703430175781, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.332758903503418, "rewards/margins": 10.266020774841309, "rewards/rejected": -17.598779678344727, "step": 15205 }, { "epoch": 2.36, "learning_rate": 2.994637688676799e-06, "logits/chosen": -1.7364975214004517, "logits/rejected": -2.7838358879089355, "logps/chosen": -94.45101165771484, "logps/rejected": -374.4874267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.393527507781982, "rewards/margins": 11.553943634033203, "rewards/rejected": -17.947471618652344, "step": 15206 }, { "epoch": 2.37, "learning_rate": 2.9939042481456507e-06, "logits/chosen": -1.299278736114502, "logits/rejected": -2.3800106048583984, "logps/chosen": -124.96917724609375, "logps/rejected": -314.618896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.864188194274902, "rewards/margins": 13.18214225769043, "rewards/rejected": -19.046329498291016, "step": 15207 }, { "epoch": 2.37, "learning_rate": 2.993170807614503e-06, "logits/chosen": -1.7855488061904907, "logits/rejected": -2.7037429809570312, "logps/chosen": -151.56443786621094, "logps/rejected": -422.74053955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.11772346496582, "rewards/margins": 11.563592910766602, "rewards/rejected": -16.681316375732422, "step": 15208 }, { "epoch": 2.37, "learning_rate": 2.992437367083355e-06, "logits/chosen": -2.1727232933044434, "logits/rejected": -2.868196487426758, "logps/chosen": -240.52035522460938, "logps/rejected": -541.0110473632812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.654067993164062, "rewards/margins": 11.939579010009766, "rewards/rejected": -20.593647003173828, "step": 15209 }, { "epoch": 2.37, "learning_rate": 2.991703926552207e-06, "logits/chosen": -1.275275468826294, "logits/rejected": -2.479745388031006, "logps/chosen": -231.75271606445312, "logps/rejected": -297.729736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.6902719736099243, "rewards/margins": 14.23100471496582, "rewards/rejected": -15.921276092529297, "step": 15210 }, { "epoch": 2.37, "learning_rate": 2.990970486021059e-06, "logits/chosen": -3.0462779998779297, "logits/rejected": -2.868536949157715, "logps/chosen": -734.3375244140625, "logps/rejected": -621.7463989257812, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.108177185058594, "rewards/margins": 7.083226680755615, "rewards/rejected": -11.191404342651367, "step": 15211 }, { "epoch": 2.37, "learning_rate": 2.9902370454899114e-06, "logits/chosen": -2.460350751876831, "logits/rejected": -2.824505090713501, "logps/chosen": -127.76651763916016, "logps/rejected": -413.689453125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.260499954223633, "rewards/margins": 9.076715469360352, "rewards/rejected": -13.337215423583984, "step": 15212 }, { "epoch": 2.37, "learning_rate": 2.9895036049587632e-06, "logits/chosen": -2.459432363510132, "logits/rejected": -1.9947435855865479, "logps/chosen": -516.4418334960938, "logps/rejected": -661.428466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.753713607788086, "rewards/margins": 14.03193187713623, "rewards/rejected": -18.78564453125, "step": 15213 }, { "epoch": 2.37, "learning_rate": 2.988770164427616e-06, "logits/chosen": -2.548962354660034, "logits/rejected": -2.627408266067505, "logps/chosen": -389.81683349609375, "logps/rejected": -618.2540283203125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.197379112243652, "rewards/margins": 9.118734359741211, "rewards/rejected": -16.31611442565918, "step": 15214 }, { "epoch": 2.37, "learning_rate": 2.988036723896468e-06, "logits/chosen": -2.6833205223083496, "logits/rejected": -2.838226795196533, "logps/chosen": -246.4134521484375, "logps/rejected": -505.8457336425781, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -8.255217552185059, "rewards/margins": 10.162971496582031, "rewards/rejected": -18.418188095092773, "step": 15215 }, { "epoch": 2.37, "learning_rate": 2.9873032833653197e-06, "logits/chosen": -2.7267353534698486, "logits/rejected": -2.443462371826172, "logps/chosen": -448.51483154296875, "logps/rejected": -622.668701171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.714468002319336, "rewards/margins": 13.034454345703125, "rewards/rejected": -19.74892234802246, "step": 15216 }, { "epoch": 2.37, "learning_rate": 2.986569842834172e-06, "logits/chosen": -1.8960254192352295, "logits/rejected": -2.8229029178619385, "logps/chosen": -290.71673583984375, "logps/rejected": -518.0234985351562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.427207946777344, "rewards/margins": 11.55252742767334, "rewards/rejected": -17.979736328125, "step": 15217 }, { "epoch": 2.37, "learning_rate": 2.985836402303024e-06, "logits/chosen": -1.4832582473754883, "logits/rejected": -2.3575778007507324, "logps/chosen": -150.1142120361328, "logps/rejected": -409.0108337402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4277238845825195, "rewards/margins": 11.608513832092285, "rewards/rejected": -17.036237716674805, "step": 15218 }, { "epoch": 2.37, "learning_rate": 2.985102961771876e-06, "logits/chosen": -1.7207627296447754, "logits/rejected": -2.834404468536377, "logps/chosen": -217.77293395996094, "logps/rejected": -597.3265380859375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -7.66135311126709, "rewards/margins": 8.445168495178223, "rewards/rejected": -16.106521606445312, "step": 15219 }, { "epoch": 2.37, "learning_rate": 2.984369521240728e-06, "logits/chosen": -2.9200851917266846, "logits/rejected": -3.1268022060394287, "logps/chosen": -230.4259033203125, "logps/rejected": -443.67547607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1326165199279785, "rewards/margins": 10.672811508178711, "rewards/rejected": -13.805427551269531, "step": 15220 }, { "epoch": 2.37, "learning_rate": 2.9836360807095804e-06, "logits/chosen": -3.0058774948120117, "logits/rejected": -2.684124708175659, "logps/chosen": -291.59381103515625, "logps/rejected": -257.0804443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.659372329711914, "rewards/margins": 12.36076545715332, "rewards/rejected": -17.020137786865234, "step": 15221 }, { "epoch": 2.37, "learning_rate": 2.9829026401784327e-06, "logits/chosen": -2.328218936920166, "logits/rejected": -2.7877280712127686, "logps/chosen": -295.9808654785156, "logps/rejected": -522.4318237304688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.82816219329834, "rewards/margins": 14.123613357543945, "rewards/rejected": -21.9517765045166, "step": 15222 }, { "epoch": 2.37, "learning_rate": 2.982169199647285e-06, "logits/chosen": -1.936402678489685, "logits/rejected": -2.4892799854278564, "logps/chosen": -299.65570068359375, "logps/rejected": -617.957763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7461466789245605, "rewards/margins": 13.546756744384766, "rewards/rejected": -19.292903900146484, "step": 15223 }, { "epoch": 2.37, "learning_rate": 2.981435759116137e-06, "logits/chosen": -1.9636929035186768, "logits/rejected": -2.7612180709838867, "logps/chosen": -138.72784423828125, "logps/rejected": -432.4550476074219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.061156272888184, "rewards/margins": 10.684215545654297, "rewards/rejected": -17.745372772216797, "step": 15224 }, { "epoch": 2.37, "learning_rate": 2.9807023185849887e-06, "logits/chosen": -2.3641345500946045, "logits/rejected": -2.620072841644287, "logps/chosen": -72.74465942382812, "logps/rejected": -253.49270629882812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.698056697845459, "rewards/margins": 9.846867561340332, "rewards/rejected": -14.544923782348633, "step": 15225 }, { "epoch": 2.37, "learning_rate": 2.979968878053841e-06, "logits/chosen": -2.058328866958618, "logits/rejected": -2.8052847385406494, "logps/chosen": -185.70166015625, "logps/rejected": -452.040283203125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.679826736450195, "rewards/margins": 7.7347731590271, "rewards/rejected": -14.414600372314453, "step": 15226 }, { "epoch": 2.37, "learning_rate": 2.979235437522693e-06, "logits/chosen": -2.687898635864258, "logits/rejected": -2.6344828605651855, "logps/chosen": -358.95599365234375, "logps/rejected": -532.9266357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.805485725402832, "rewards/margins": 13.705013275146484, "rewards/rejected": -22.510498046875, "step": 15227 }, { "epoch": 2.37, "learning_rate": 2.9785019969915452e-06, "logits/chosen": -2.5697054862976074, "logits/rejected": -1.926640272140503, "logps/chosen": -733.2066650390625, "logps/rejected": -720.3414306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.47490119934082, "rewards/margins": 13.113580703735352, "rewards/rejected": -24.588481903076172, "step": 15228 }, { "epoch": 2.37, "learning_rate": 2.977768556460397e-06, "logits/chosen": -1.9656912088394165, "logits/rejected": -2.8532474040985107, "logps/chosen": -261.7348327636719, "logps/rejected": -543.7916870117188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.700564861297607, "rewards/margins": 14.109352111816406, "rewards/rejected": -18.809917449951172, "step": 15229 }, { "epoch": 2.37, "learning_rate": 2.9770351159292494e-06, "logits/chosen": -2.7185256481170654, "logits/rejected": -2.8515217304229736, "logps/chosen": -1284.83935546875, "logps/rejected": -769.15283203125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -11.808191299438477, "rewards/margins": 5.828791618347168, "rewards/rejected": -17.636981964111328, "step": 15230 }, { "epoch": 2.37, "learning_rate": 2.9763016753981017e-06, "logits/chosen": -2.63986873626709, "logits/rejected": -2.6066818237304688, "logps/chosen": -244.79367065429688, "logps/rejected": -436.17132568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.917593479156494, "rewards/margins": 10.957595825195312, "rewards/rejected": -18.87519073486328, "step": 15231 }, { "epoch": 2.37, "learning_rate": 2.975568234866954e-06, "logits/chosen": -2.5919978618621826, "logits/rejected": -1.9720357656478882, "logps/chosen": -254.87701416015625, "logps/rejected": -206.9817657470703, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -5.584743022918701, "rewards/margins": 5.727412700653076, "rewards/rejected": -11.312155723571777, "step": 15232 }, { "epoch": 2.37, "learning_rate": 2.974834794335806e-06, "logits/chosen": -2.4252495765686035, "logits/rejected": -2.7928152084350586, "logps/chosen": -1035.20166015625, "logps/rejected": -1009.0038452148438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.519073486328125, "rewards/margins": 11.839405059814453, "rewards/rejected": -19.358478546142578, "step": 15233 }, { "epoch": 2.37, "learning_rate": 2.974101353804658e-06, "logits/chosen": -1.9593682289123535, "logits/rejected": -2.705976724624634, "logps/chosen": -497.57647705078125, "logps/rejected": -585.118408203125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -6.483132362365723, "rewards/margins": 9.058067321777344, "rewards/rejected": -15.54119873046875, "step": 15234 }, { "epoch": 2.37, "learning_rate": 2.97336791327351e-06, "logits/chosen": -2.4113543033599854, "logits/rejected": -0.7318307161331177, "logps/chosen": -316.8692626953125, "logps/rejected": -277.89813232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.382716178894043, "rewards/margins": 11.951919555664062, "rewards/rejected": -17.334636688232422, "step": 15235 }, { "epoch": 2.37, "learning_rate": 2.972634472742362e-06, "logits/chosen": -2.523176670074463, "logits/rejected": -2.761232852935791, "logps/chosen": -360.76788330078125, "logps/rejected": -469.122314453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.283361434936523, "rewards/margins": 9.298908233642578, "rewards/rejected": -15.582269668579102, "step": 15236 }, { "epoch": 2.37, "learning_rate": 2.9719010322112142e-06, "logits/chosen": -2.051210403442383, "logits/rejected": -2.495281934738159, "logps/chosen": -279.1663818359375, "logps/rejected": -480.08453369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.8903021812438965, "rewards/margins": 12.671910285949707, "rewards/rejected": -17.562211990356445, "step": 15237 }, { "epoch": 2.37, "learning_rate": 2.971167591680066e-06, "logits/chosen": -2.597581386566162, "logits/rejected": -1.8051176071166992, "logps/chosen": -253.60296630859375, "logps/rejected": -363.4026794433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.286261558532715, "rewards/margins": 12.397748947143555, "rewards/rejected": -18.684009552001953, "step": 15238 }, { "epoch": 2.37, "learning_rate": 2.9704341511489184e-06, "logits/chosen": -2.775935411453247, "logits/rejected": -2.0603880882263184, "logps/chosen": -196.7423858642578, "logps/rejected": -245.01791381835938, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/chosen": -6.400425434112549, "rewards/margins": 7.259973526000977, "rewards/rejected": -13.660398483276367, "step": 15239 }, { "epoch": 2.37, "learning_rate": 2.9697007106177707e-06, "logits/chosen": -0.7036027908325195, "logits/rejected": -1.9315651655197144, "logps/chosen": -360.95831298828125, "logps/rejected": -667.656005859375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -9.847498893737793, "rewards/margins": 10.290814399719238, "rewards/rejected": -20.13831329345703, "step": 15240 }, { "epoch": 2.37, "learning_rate": 2.968967270086623e-06, "logits/chosen": -2.654881000518799, "logits/rejected": -1.4271996021270752, "logps/chosen": -665.2709350585938, "logps/rejected": -449.03118896484375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.308661460876465, "rewards/margins": 9.070253372192383, "rewards/rejected": -13.378914833068848, "step": 15241 }, { "epoch": 2.37, "learning_rate": 2.968233829555475e-06, "logits/chosen": -2.719240427017212, "logits/rejected": -1.7091403007507324, "logps/chosen": -585.7405395507812, "logps/rejected": -326.09619140625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.268637180328369, "rewards/margins": 7.290492534637451, "rewards/rejected": -13.55912971496582, "step": 15242 }, { "epoch": 2.37, "learning_rate": 2.967500389024327e-06, "logits/chosen": -2.5966439247131348, "logits/rejected": -2.2483246326446533, "logps/chosen": -528.7122802734375, "logps/rejected": -571.8140258789062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.708405494689941, "rewards/margins": 10.406152725219727, "rewards/rejected": -20.114559173583984, "step": 15243 }, { "epoch": 2.37, "learning_rate": 2.966766948493179e-06, "logits/chosen": -1.462162971496582, "logits/rejected": -2.7012858390808105, "logps/chosen": -114.63275909423828, "logps/rejected": -533.2403564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.722903728485107, "rewards/margins": 13.83790397644043, "rewards/rejected": -19.560808181762695, "step": 15244 }, { "epoch": 2.37, "learning_rate": 2.966033507962031e-06, "logits/chosen": -2.5414516925811768, "logits/rejected": -2.8211517333984375, "logps/chosen": -294.42315673828125, "logps/rejected": -459.23565673828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.808574676513672, "rewards/margins": 11.004432678222656, "rewards/rejected": -18.813007354736328, "step": 15245 }, { "epoch": 2.37, "learning_rate": 2.9653000674308833e-06, "logits/chosen": -2.562007427215576, "logits/rejected": -3.0045175552368164, "logps/chosen": -160.36666870117188, "logps/rejected": -418.34234619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.660407543182373, "rewards/margins": 11.339065551757812, "rewards/rejected": -17.999473571777344, "step": 15246 }, { "epoch": 2.37, "learning_rate": 2.964566626899735e-06, "logits/chosen": -3.271982431411743, "logits/rejected": -3.2650206089019775, "logps/chosen": -81.96279907226562, "logps/rejected": -139.20751953125, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": -3.780761241912842, "rewards/margins": 5.5162787437438965, "rewards/rejected": -9.297039985656738, "step": 15247 }, { "epoch": 2.37, "learning_rate": 2.963833186368588e-06, "logits/chosen": -1.990691065788269, "logits/rejected": -2.7342617511749268, "logps/chosen": -159.9054718017578, "logps/rejected": -347.7773742675781, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -10.066758155822754, "rewards/margins": 8.033184051513672, "rewards/rejected": -18.09994125366211, "step": 15248 }, { "epoch": 2.37, "learning_rate": 2.9630997458374397e-06, "logits/chosen": -2.52933406829834, "logits/rejected": -2.6859025955200195, "logps/chosen": -115.7433090209961, "logps/rejected": -286.97991943359375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.833799362182617, "rewards/margins": 7.126911163330078, "rewards/rejected": -12.960710525512695, "step": 15249 }, { "epoch": 2.37, "learning_rate": 2.962366305306292e-06, "logits/chosen": -2.279796838760376, "logits/rejected": -2.4609272480010986, "logps/chosen": -416.95196533203125, "logps/rejected": -521.2887573242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.8857879638671875, "rewards/margins": 13.203808784484863, "rewards/rejected": -21.089595794677734, "step": 15250 }, { "epoch": 2.37, "learning_rate": 2.961632864775144e-06, "logits/chosen": -1.7007436752319336, "logits/rejected": -2.9468863010406494, "logps/chosen": -203.43592834472656, "logps/rejected": -517.70263671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.193960666656494, "rewards/margins": 11.423355102539062, "rewards/rejected": -18.61731719970703, "step": 15251 }, { "epoch": 2.37, "learning_rate": 2.9608994242439962e-06, "logits/chosen": -1.2150027751922607, "logits/rejected": -2.722229480743408, "logps/chosen": -255.635498046875, "logps/rejected": -529.24462890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.2777605056762695, "rewards/margins": 10.792787551879883, "rewards/rejected": -16.070547103881836, "step": 15252 }, { "epoch": 2.37, "learning_rate": 2.960165983712848e-06, "logits/chosen": -1.7750582695007324, "logits/rejected": -2.7918496131896973, "logps/chosen": -68.62379455566406, "logps/rejected": -393.56884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.161397933959961, "rewards/margins": 10.977072715759277, "rewards/rejected": -16.138471603393555, "step": 15253 }, { "epoch": 2.37, "learning_rate": 2.9594325431817004e-06, "logits/chosen": -2.6740241050720215, "logits/rejected": -1.8815795183181763, "logps/chosen": -262.6217956542969, "logps/rejected": -216.58395385742188, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -5.077790260314941, "rewards/margins": 5.4184465408325195, "rewards/rejected": -10.496236801147461, "step": 15254 }, { "epoch": 2.37, "learning_rate": 2.9586991026505523e-06, "logits/chosen": -2.9353652000427246, "logits/rejected": -2.6104156970977783, "logps/chosen": -239.83157348632812, "logps/rejected": -395.91729736328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.983564376831055, "rewards/margins": 10.117525100708008, "rewards/rejected": -17.101089477539062, "step": 15255 }, { "epoch": 2.37, "learning_rate": 2.9579656621194046e-06, "logits/chosen": -1.1963404417037964, "logits/rejected": -2.578401565551758, "logps/chosen": -188.20858764648438, "logps/rejected": -551.277099609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.243730545043945, "rewards/margins": 9.41573429107666, "rewards/rejected": -16.659465789794922, "step": 15256 }, { "epoch": 2.37, "learning_rate": 2.957232221588257e-06, "logits/chosen": -2.876999855041504, "logits/rejected": -2.884380340576172, "logps/chosen": -466.1133117675781, "logps/rejected": -522.732177734375, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -6.694349765777588, "rewards/margins": 7.321516513824463, "rewards/rejected": -14.01586627960205, "step": 15257 }, { "epoch": 2.37, "learning_rate": 2.9564987810571088e-06, "logits/chosen": -2.0734543800354004, "logits/rejected": -2.6586124897003174, "logps/chosen": -454.5540771484375, "logps/rejected": -697.8037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.297482490539551, "rewards/margins": 13.864526748657227, "rewards/rejected": -20.162010192871094, "step": 15258 }, { "epoch": 2.37, "learning_rate": 2.955765340525961e-06, "logits/chosen": -2.3566792011260986, "logits/rejected": -2.61661434173584, "logps/chosen": -207.46836853027344, "logps/rejected": -434.60430908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.14243483543396, "rewards/margins": 15.690019607543945, "rewards/rejected": -18.832454681396484, "step": 15259 }, { "epoch": 2.37, "learning_rate": 2.955031899994813e-06, "logits/chosen": -2.429213047027588, "logits/rejected": -3.036684989929199, "logps/chosen": -520.5631713867188, "logps/rejected": -625.6314697265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.509655952453613, "rewards/margins": 9.42383861541748, "rewards/rejected": -14.933494567871094, "step": 15260 }, { "epoch": 2.37, "learning_rate": 2.9542984594636653e-06, "logits/chosen": -1.1823811531066895, "logits/rejected": -1.460829734802246, "logps/chosen": -322.78546142578125, "logps/rejected": -294.1706237792969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.508369445800781, "rewards/margins": 8.783074378967285, "rewards/rejected": -14.291443824768066, "step": 15261 }, { "epoch": 2.37, "learning_rate": 2.953565018932517e-06, "logits/chosen": -0.9561373591423035, "logits/rejected": -2.839644193649292, "logps/chosen": -78.89502716064453, "logps/rejected": -573.2671508789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.773763656616211, "rewards/margins": 10.376754760742188, "rewards/rejected": -16.1505184173584, "step": 15262 }, { "epoch": 2.37, "learning_rate": 2.9528315784013694e-06, "logits/chosen": -3.1156630516052246, "logits/rejected": -2.685938596725464, "logps/chosen": -428.1737060546875, "logps/rejected": -419.8045654296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.656527519226074, "rewards/margins": 8.723543167114258, "rewards/rejected": -18.38007164001465, "step": 15263 }, { "epoch": 2.37, "learning_rate": 2.9520981378702213e-06, "logits/chosen": -2.432968854904175, "logits/rejected": -2.6487934589385986, "logps/chosen": -135.30775451660156, "logps/rejected": -398.5326232910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.614380836486816, "rewards/margins": 12.937702178955078, "rewards/rejected": -17.552082061767578, "step": 15264 }, { "epoch": 2.37, "learning_rate": 2.9513646973390736e-06, "logits/chosen": -2.3342807292938232, "logits/rejected": -2.72926664352417, "logps/chosen": -163.4857635498047, "logps/rejected": -355.0068359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.439327716827393, "rewards/margins": 12.020007133483887, "rewards/rejected": -17.459335327148438, "step": 15265 }, { "epoch": 2.37, "learning_rate": 2.950631256807926e-06, "logits/chosen": -2.054229974746704, "logits/rejected": -2.6526644229888916, "logps/chosen": -149.8461151123047, "logps/rejected": -301.76953125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -6.100008487701416, "rewards/margins": 7.5723700523376465, "rewards/rejected": -13.672378540039062, "step": 15266 }, { "epoch": 2.37, "learning_rate": 2.949897816276778e-06, "logits/chosen": -2.3000030517578125, "logits/rejected": -2.908719062805176, "logps/chosen": -88.09770965576172, "logps/rejected": -279.6220703125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.961298942565918, "rewards/margins": 7.159893035888672, "rewards/rejected": -14.121192932128906, "step": 15267 }, { "epoch": 2.37, "learning_rate": 2.94916437574563e-06, "logits/chosen": -2.524407148361206, "logits/rejected": -2.0150389671325684, "logps/chosen": -357.20391845703125, "logps/rejected": -368.2053527832031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0373756885528564, "rewards/margins": 12.500810623168945, "rewards/rejected": -15.538186073303223, "step": 15268 }, { "epoch": 2.37, "learning_rate": 2.948430935214482e-06, "logits/chosen": -1.8195991516113281, "logits/rejected": -2.650646686553955, "logps/chosen": -68.975341796875, "logps/rejected": -275.9154052734375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.3643550872802734, "rewards/margins": 11.083059310913086, "rewards/rejected": -14.44741439819336, "step": 15269 }, { "epoch": 2.37, "learning_rate": 2.9476974946833343e-06, "logits/chosen": -2.4790594577789307, "logits/rejected": -2.597344398498535, "logps/chosen": -323.9307556152344, "logps/rejected": -364.6181640625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.99119758605957, "rewards/margins": 7.680013656616211, "rewards/rejected": -13.671211242675781, "step": 15270 }, { "epoch": 2.37, "learning_rate": 2.946964054152186e-06, "logits/chosen": -2.644953727722168, "logits/rejected": -2.707231283187866, "logps/chosen": -135.6956787109375, "logps/rejected": -411.65924072265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.419713973999023, "rewards/margins": 12.200498580932617, "rewards/rejected": -20.62021255493164, "step": 15271 }, { "epoch": 2.38, "learning_rate": 2.9462306136210385e-06, "logits/chosen": -2.7402729988098145, "logits/rejected": -3.017627239227295, "logps/chosen": -590.32861328125, "logps/rejected": -543.2459716796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.017034530639648, "rewards/margins": 9.620203018188477, "rewards/rejected": -15.637237548828125, "step": 15272 }, { "epoch": 2.38, "learning_rate": 2.9454971730898908e-06, "logits/chosen": -2.5651261806488037, "logits/rejected": -2.8415513038635254, "logps/chosen": -187.4873046875, "logps/rejected": -291.533935546875, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -8.034351348876953, "rewards/margins": 6.300693988800049, "rewards/rejected": -14.335044860839844, "step": 15273 }, { "epoch": 2.38, "learning_rate": 2.9447637325587426e-06, "logits/chosen": -1.7719993591308594, "logits/rejected": -2.7416980266571045, "logps/chosen": -443.2374267578125, "logps/rejected": -640.201171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.93443489074707, "rewards/margins": 9.846244812011719, "rewards/rejected": -14.780679702758789, "step": 15274 }, { "epoch": 2.38, "learning_rate": 2.944030292027595e-06, "logits/chosen": -2.3321826457977295, "logits/rejected": -2.6495471000671387, "logps/chosen": -783.146484375, "logps/rejected": -765.482177734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.824816703796387, "rewards/margins": 10.027172088623047, "rewards/rejected": -17.85198974609375, "step": 15275 }, { "epoch": 2.38, "learning_rate": 2.943296851496447e-06, "logits/chosen": -2.632068634033203, "logits/rejected": -2.6783440113067627, "logps/chosen": -147.4165496826172, "logps/rejected": -192.7201690673828, "loss": 0.4727, "rewards/accuracies": 0.5, "rewards/chosen": -10.063348770141602, "rewards/margins": 4.052241802215576, "rewards/rejected": -14.115591049194336, "step": 15276 }, { "epoch": 2.38, "learning_rate": 2.942563410965299e-06, "logits/chosen": -1.936956524848938, "logits/rejected": -2.8042190074920654, "logps/chosen": -154.87310791015625, "logps/rejected": -369.05023193359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.190822124481201, "rewards/margins": 8.055760383605957, "rewards/rejected": -15.24658203125, "step": 15277 }, { "epoch": 2.38, "learning_rate": 2.941829970434151e-06, "logits/chosen": -2.317988634109497, "logits/rejected": -1.8963007926940918, "logps/chosen": -285.203125, "logps/rejected": -349.51068115234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.191972732543945, "rewards/margins": 8.960421562194824, "rewards/rejected": -15.15239429473877, "step": 15278 }, { "epoch": 2.38, "learning_rate": 2.9410965299030033e-06, "logits/chosen": -2.1545205116271973, "logits/rejected": -3.0618276596069336, "logps/chosen": -177.32769775390625, "logps/rejected": -510.716552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.941926956176758, "rewards/margins": 10.289030075073242, "rewards/rejected": -14.23095703125, "step": 15279 }, { "epoch": 2.38, "learning_rate": 2.940363089371855e-06, "logits/chosen": -2.7471044063568115, "logits/rejected": -2.4460537433624268, "logps/chosen": -368.99432373046875, "logps/rejected": -302.03802490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.415616512298584, "rewards/margins": 13.61996841430664, "rewards/rejected": -16.035585403442383, "step": 15280 }, { "epoch": 2.38, "learning_rate": 2.9396296488407075e-06, "logits/chosen": -2.4537699222564697, "logits/rejected": -1.7940374612808228, "logps/chosen": -832.1273193359375, "logps/rejected": -803.4432373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.638930797576904, "rewards/margins": 18.969158172607422, "rewards/rejected": -24.608089447021484, "step": 15281 }, { "epoch": 2.38, "learning_rate": 2.9388962083095598e-06, "logits/chosen": -2.7367560863494873, "logits/rejected": -2.838667631149292, "logps/chosen": -146.14453125, "logps/rejected": -307.2633056640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.467363357543945, "rewards/margins": 10.422209739685059, "rewards/rejected": -16.889572143554688, "step": 15282 }, { "epoch": 2.38, "learning_rate": 2.938162767778412e-06, "logits/chosen": -2.9174954891204834, "logits/rejected": -2.680222511291504, "logps/chosen": -319.63818359375, "logps/rejected": -422.5691223144531, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.822680711746216, "rewards/margins": 9.956846237182617, "rewards/rejected": -13.77952766418457, "step": 15283 }, { "epoch": 2.38, "learning_rate": 2.937429327247264e-06, "logits/chosen": -1.8874642848968506, "logits/rejected": -2.2941503524780273, "logps/chosen": -287.3682861328125, "logps/rejected": -424.5101013183594, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.053252220153809, "rewards/margins": 8.378273963928223, "rewards/rejected": -16.43152618408203, "step": 15284 }, { "epoch": 2.38, "learning_rate": 2.936695886716116e-06, "logits/chosen": -2.592428684234619, "logits/rejected": -2.2513256072998047, "logps/chosen": -470.7859191894531, "logps/rejected": -401.02142333984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.197846412658691, "rewards/margins": 8.726078033447266, "rewards/rejected": -15.923925399780273, "step": 15285 }, { "epoch": 2.38, "learning_rate": 2.935962446184968e-06, "logits/chosen": -1.620938777923584, "logits/rejected": -1.6493743658065796, "logps/chosen": -406.85321044921875, "logps/rejected": -439.1604919433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.196521759033203, "rewards/margins": 10.82276439666748, "rewards/rejected": -19.019287109375, "step": 15286 }, { "epoch": 2.38, "learning_rate": 2.93522900565382e-06, "logits/chosen": -2.635342836380005, "logits/rejected": -2.4264585971832275, "logps/chosen": -129.65087890625, "logps/rejected": -190.1216278076172, "loss": 0.0269, "rewards/accuracies": 1.0, "rewards/chosen": -8.703166007995605, "rewards/margins": 3.775864839553833, "rewards/rejected": -12.47903060913086, "step": 15287 }, { "epoch": 2.38, "learning_rate": 2.9344955651226723e-06, "logits/chosen": -1.8985469341278076, "logits/rejected": -2.780762195587158, "logps/chosen": -179.62002563476562, "logps/rejected": -417.52032470703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.64715576171875, "rewards/margins": 8.675369262695312, "rewards/rejected": -15.322525024414062, "step": 15288 }, { "epoch": 2.38, "learning_rate": 2.933762124591524e-06, "logits/chosen": -0.8127074241638184, "logits/rejected": -1.0313936471939087, "logps/chosen": -224.69378662109375, "logps/rejected": -466.7374267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.291438579559326, "rewards/margins": 13.672674179077148, "rewards/rejected": -18.964113235473633, "step": 15289 }, { "epoch": 2.38, "learning_rate": 2.933028684060377e-06, "logits/chosen": -2.971196413040161, "logits/rejected": -2.136101484298706, "logps/chosen": -163.22219848632812, "logps/rejected": -156.67233276367188, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.833381652832031, "rewards/margins": 6.783201694488525, "rewards/rejected": -12.616582870483398, "step": 15290 }, { "epoch": 2.38, "learning_rate": 2.932295243529229e-06, "logits/chosen": -1.4402085542678833, "logits/rejected": -2.2788095474243164, "logps/chosen": -116.7742919921875, "logps/rejected": -283.61199951171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -3.583956241607666, "rewards/margins": 9.469311714172363, "rewards/rejected": -13.053267478942871, "step": 15291 }, { "epoch": 2.38, "learning_rate": 2.931561802998081e-06, "logits/chosen": -2.8165576457977295, "logits/rejected": -1.8337783813476562, "logps/chosen": -311.1761474609375, "logps/rejected": -344.7150573730469, "loss": 0.0776, "rewards/accuracies": 1.0, "rewards/chosen": -7.12515926361084, "rewards/margins": 7.031209945678711, "rewards/rejected": -14.156370162963867, "step": 15292 }, { "epoch": 2.38, "learning_rate": 2.930828362466933e-06, "logits/chosen": -2.8352277278900146, "logits/rejected": -2.9921913146972656, "logps/chosen": -125.08399963378906, "logps/rejected": -447.1739501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.917924404144287, "rewards/margins": 16.617996215820312, "rewards/rejected": -21.535921096801758, "step": 15293 }, { "epoch": 2.38, "learning_rate": 2.930094921935785e-06, "logits/chosen": -2.7528774738311768, "logits/rejected": -2.9276952743530273, "logps/chosen": -116.9112319946289, "logps/rejected": -253.01339721679688, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.372430801391602, "rewards/margins": 7.7205705642700195, "rewards/rejected": -13.093000411987305, "step": 15294 }, { "epoch": 2.38, "learning_rate": 2.929361481404637e-06, "logits/chosen": -2.9895710945129395, "logits/rejected": -2.5689139366149902, "logps/chosen": -627.0957641601562, "logps/rejected": -281.2611083984375, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -8.626914978027344, "rewards/margins": 5.758996486663818, "rewards/rejected": -14.385910987854004, "step": 15295 }, { "epoch": 2.38, "learning_rate": 2.928628040873489e-06, "logits/chosen": -1.5835824012756348, "logits/rejected": -1.9659438133239746, "logps/chosen": -448.9029541015625, "logps/rejected": -435.0110778808594, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.9599289894104, "rewards/margins": 13.108975410461426, "rewards/rejected": -19.068904876708984, "step": 15296 }, { "epoch": 2.38, "learning_rate": 2.9278946003423413e-06, "logits/chosen": -1.3666326999664307, "logits/rejected": -2.590379476547241, "logps/chosen": -125.52693176269531, "logps/rejected": -420.3777160644531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.462803363800049, "rewards/margins": 8.41745662689209, "rewards/rejected": -13.880260467529297, "step": 15297 }, { "epoch": 2.38, "learning_rate": 2.9271611598111936e-06, "logits/chosen": -1.8368586301803589, "logits/rejected": -2.580204486846924, "logps/chosen": -96.9649429321289, "logps/rejected": -404.1279296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.509035110473633, "rewards/margins": 10.456561088562012, "rewards/rejected": -15.965596199035645, "step": 15298 }, { "epoch": 2.38, "learning_rate": 2.926427719280046e-06, "logits/chosen": -2.160418748855591, "logits/rejected": -2.7001631259918213, "logps/chosen": -212.07321166992188, "logps/rejected": -301.697265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.248966217041016, "rewards/margins": 9.097481727600098, "rewards/rejected": -13.346447944641113, "step": 15299 }, { "epoch": 2.38, "learning_rate": 2.925694278748898e-06, "logits/chosen": -2.6480002403259277, "logits/rejected": -3.031233072280884, "logps/chosen": -262.9903869628906, "logps/rejected": -599.360107421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.924920082092285, "rewards/margins": 12.864812850952148, "rewards/rejected": -18.789731979370117, "step": 15300 }, { "epoch": 2.38, "learning_rate": 2.92496083821775e-06, "logits/chosen": -2.9609174728393555, "logits/rejected": -2.891721248626709, "logps/chosen": -249.4443817138672, "logps/rejected": -233.63104248046875, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -4.370619297027588, "rewards/margins": 6.926385879516602, "rewards/rejected": -11.297004699707031, "step": 15301 }, { "epoch": 2.38, "learning_rate": 2.924227397686602e-06, "logits/chosen": -2.8994107246398926, "logits/rejected": -2.2228634357452393, "logps/chosen": -146.48681640625, "logps/rejected": -223.11587524414062, "loss": 0.0208, "rewards/accuracies": 1.0, "rewards/chosen": -5.487077713012695, "rewards/margins": 8.242475509643555, "rewards/rejected": -13.72955322265625, "step": 15302 }, { "epoch": 2.38, "learning_rate": 2.9234939571554543e-06, "logits/chosen": -2.1050217151641846, "logits/rejected": -2.426896572113037, "logps/chosen": -289.6680603027344, "logps/rejected": -488.1739196777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.891777038574219, "rewards/margins": 11.738025665283203, "rewards/rejected": -17.629802703857422, "step": 15303 }, { "epoch": 2.38, "learning_rate": 2.922760516624306e-06, "logits/chosen": -2.6361286640167236, "logits/rejected": -1.4350049495697021, "logps/chosen": -234.4275360107422, "logps/rejected": -434.9056091308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.758523941040039, "rewards/margins": 12.688648223876953, "rewards/rejected": -15.447172164916992, "step": 15304 }, { "epoch": 2.38, "learning_rate": 2.922027076093158e-06, "logits/chosen": -2.4121198654174805, "logits/rejected": -2.6293110847473145, "logps/chosen": -86.40086364746094, "logps/rejected": -238.22708129882812, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -5.445385456085205, "rewards/margins": 8.855470657348633, "rewards/rejected": -14.300856590270996, "step": 15305 }, { "epoch": 2.38, "learning_rate": 2.9212936355620104e-06, "logits/chosen": -2.319246768951416, "logits/rejected": -2.696988582611084, "logps/chosen": -134.5458221435547, "logps/rejected": -397.87249755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.411664009094238, "rewards/margins": 10.781614303588867, "rewards/rejected": -15.193277359008789, "step": 15306 }, { "epoch": 2.38, "learning_rate": 2.9205601950308627e-06, "logits/chosen": -2.5192580223083496, "logits/rejected": -2.81363844871521, "logps/chosen": -170.12542724609375, "logps/rejected": -317.463134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.087972640991211, "rewards/margins": 9.698336601257324, "rewards/rejected": -18.78630828857422, "step": 15307 }, { "epoch": 2.38, "learning_rate": 2.919826754499715e-06, "logits/chosen": -2.75003719329834, "logits/rejected": -1.685192584991455, "logps/chosen": -396.37811279296875, "logps/rejected": -315.6544494628906, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.648282051086426, "rewards/margins": 8.030061721801758, "rewards/rejected": -11.678342819213867, "step": 15308 }, { "epoch": 2.38, "learning_rate": 2.919093313968567e-06, "logits/chosen": -1.6801493167877197, "logits/rejected": -2.600804567337036, "logps/chosen": -256.64044189453125, "logps/rejected": -336.82537841796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.986681938171387, "rewards/margins": 9.2279634475708, "rewards/rejected": -14.214645385742188, "step": 15309 }, { "epoch": 2.38, "learning_rate": 2.918359873437419e-06, "logits/chosen": -1.97882080078125, "logits/rejected": -2.847929000854492, "logps/chosen": -258.31689453125, "logps/rejected": -447.4220275878906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.606578826904297, "rewards/margins": 9.277621269226074, "rewards/rejected": -17.884199142456055, "step": 15310 }, { "epoch": 2.38, "learning_rate": 2.917626432906271e-06, "logits/chosen": -2.617058515548706, "logits/rejected": -2.858299970626831, "logps/chosen": -205.32907104492188, "logps/rejected": -391.8406982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.498547554016113, "rewards/margins": 10.09765625, "rewards/rejected": -17.59620475769043, "step": 15311 }, { "epoch": 2.38, "learning_rate": 2.9168929923751233e-06, "logits/chosen": -3.0641372203826904, "logits/rejected": -2.5497350692749023, "logps/chosen": -609.7305908203125, "logps/rejected": -537.3944091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 0.6417908072471619, "rewards/margins": 15.293722152709961, "rewards/rejected": -14.651930809020996, "step": 15312 }, { "epoch": 2.38, "learning_rate": 2.916159551843975e-06, "logits/chosen": -1.2626782655715942, "logits/rejected": -2.4412569999694824, "logps/chosen": -178.4326629638672, "logps/rejected": -481.9488830566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.01099967956543, "rewards/margins": 11.263652801513672, "rewards/rejected": -16.2746524810791, "step": 15313 }, { "epoch": 2.38, "learning_rate": 2.915426111312827e-06, "logits/chosen": -1.8015220165252686, "logits/rejected": -2.5162837505340576, "logps/chosen": -210.11181640625, "logps/rejected": -458.8274841308594, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.268765926361084, "rewards/margins": 12.350452423095703, "rewards/rejected": -16.619218826293945, "step": 15314 }, { "epoch": 2.38, "learning_rate": 2.91469267078168e-06, "logits/chosen": -2.3221452236175537, "logits/rejected": -1.1930122375488281, "logps/chosen": -225.04244995117188, "logps/rejected": -376.8883056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.880974292755127, "rewards/margins": 13.084219932556152, "rewards/rejected": -15.965194702148438, "step": 15315 }, { "epoch": 2.38, "learning_rate": 2.9139592302505317e-06, "logits/chosen": -2.1980674266815186, "logits/rejected": -2.8076343536376953, "logps/chosen": -156.86849975585938, "logps/rejected": -398.357666015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.470003128051758, "rewards/margins": 9.796653747558594, "rewards/rejected": -16.26665687561035, "step": 15316 }, { "epoch": 2.38, "learning_rate": 2.913225789719384e-06, "logits/chosen": -0.9638875126838684, "logits/rejected": -2.167407751083374, "logps/chosen": -172.1822967529297, "logps/rejected": -525.311279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.1497368812561035, "rewards/margins": 11.584049224853516, "rewards/rejected": -18.73378562927246, "step": 15317 }, { "epoch": 2.38, "learning_rate": 2.912492349188236e-06, "logits/chosen": -2.6480417251586914, "logits/rejected": -2.7587621212005615, "logps/chosen": -125.66986846923828, "logps/rejected": -154.7498779296875, "loss": 1.1107, "rewards/accuracies": 0.5, "rewards/chosen": -6.145949840545654, "rewards/margins": 6.213188171386719, "rewards/rejected": -12.359138488769531, "step": 15318 }, { "epoch": 2.38, "learning_rate": 2.911758908657088e-06, "logits/chosen": -1.4507875442504883, "logits/rejected": -2.6890921592712402, "logps/chosen": -105.55545043945312, "logps/rejected": -480.84295654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.800036430358887, "rewards/margins": 16.59575080871582, "rewards/rejected": -21.39578628540039, "step": 15319 }, { "epoch": 2.38, "learning_rate": 2.91102546812594e-06, "logits/chosen": -2.5182695388793945, "logits/rejected": -2.804457902908325, "logps/chosen": -560.2801513671875, "logps/rejected": -667.30810546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.44061279296875, "rewards/margins": 13.826610565185547, "rewards/rejected": -19.267223358154297, "step": 15320 }, { "epoch": 2.38, "learning_rate": 2.9102920275947923e-06, "logits/chosen": -2.3431482315063477, "logits/rejected": -2.2909903526306152, "logps/chosen": -463.56329345703125, "logps/rejected": -712.6773681640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.65994644165039, "rewards/margins": 11.103103637695312, "rewards/rejected": -21.763050079345703, "step": 15321 }, { "epoch": 2.38, "learning_rate": 2.9095585870636442e-06, "logits/chosen": -2.525028705596924, "logits/rejected": -2.464452028274536, "logps/chosen": -561.743896484375, "logps/rejected": -584.7828369140625, "loss": 0.049, "rewards/accuracies": 1.0, "rewards/chosen": -10.926855087280273, "rewards/margins": 6.592915058135986, "rewards/rejected": -17.5197696685791, "step": 15322 }, { "epoch": 2.38, "learning_rate": 2.9088251465324965e-06, "logits/chosen": -2.4520931243896484, "logits/rejected": -2.0644564628601074, "logps/chosen": -407.1827697753906, "logps/rejected": -591.6890258789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.029547214508057, "rewards/margins": 14.541210174560547, "rewards/rejected": -21.570756912231445, "step": 15323 }, { "epoch": 2.38, "learning_rate": 2.908091706001349e-06, "logits/chosen": -3.0611915588378906, "logits/rejected": -2.9355685710906982, "logps/chosen": -200.49368286132812, "logps/rejected": -305.89666748046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.4036331176757812, "rewards/margins": 11.102468490600586, "rewards/rejected": -14.506101608276367, "step": 15324 }, { "epoch": 2.38, "learning_rate": 2.9073582654702007e-06, "logits/chosen": -2.598156690597534, "logits/rejected": -2.3156845569610596, "logps/chosen": -235.09945678710938, "logps/rejected": -353.79193115234375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -5.8039164543151855, "rewards/margins": 8.35001277923584, "rewards/rejected": -14.153928756713867, "step": 15325 }, { "epoch": 2.38, "learning_rate": 2.906624824939053e-06, "logits/chosen": -1.609580397605896, "logits/rejected": -2.7233245372772217, "logps/chosen": -520.7742309570312, "logps/rejected": -549.2587280273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.696632385253906, "rewards/margins": 11.761432647705078, "rewards/rejected": -21.458065032958984, "step": 15326 }, { "epoch": 2.38, "learning_rate": 2.905891384407905e-06, "logits/chosen": -2.6622209548950195, "logits/rejected": -2.9747517108917236, "logps/chosen": -213.7100830078125, "logps/rejected": -340.0237121582031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.024594306945801, "rewards/margins": 9.644587516784668, "rewards/rejected": -14.669181823730469, "step": 15327 }, { "epoch": 2.38, "learning_rate": 2.905157943876757e-06, "logits/chosen": -2.348724842071533, "logits/rejected": -2.9616634845733643, "logps/chosen": -109.65386962890625, "logps/rejected": -377.9539794921875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -7.743372440338135, "rewards/margins": 6.33173942565918, "rewards/rejected": -14.075111389160156, "step": 15328 }, { "epoch": 2.38, "learning_rate": 2.904424503345609e-06, "logits/chosen": -2.2403995990753174, "logits/rejected": -2.84123158454895, "logps/chosen": -348.296875, "logps/rejected": -475.9774169921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.020753860473633, "rewards/margins": 8.28010368347168, "rewards/rejected": -15.300857543945312, "step": 15329 }, { "epoch": 2.38, "learning_rate": 2.9036910628144614e-06, "logits/chosen": -2.49430775642395, "logits/rejected": -2.8796732425689697, "logps/chosen": -121.31813049316406, "logps/rejected": -294.02349853515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.819839000701904, "rewards/margins": 9.932605743408203, "rewards/rejected": -14.752445220947266, "step": 15330 }, { "epoch": 2.38, "learning_rate": 2.9029576222833132e-06, "logits/chosen": -2.52013897895813, "logits/rejected": -2.6534454822540283, "logps/chosen": -113.53279876708984, "logps/rejected": -272.7597351074219, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.962398529052734, "rewards/margins": 9.24062728881836, "rewards/rejected": -14.203025817871094, "step": 15331 }, { "epoch": 2.38, "learning_rate": 2.902224181752166e-06, "logits/chosen": -2.657533884048462, "logits/rejected": -1.973213791847229, "logps/chosen": -285.7740783691406, "logps/rejected": -198.99935913085938, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -6.436756134033203, "rewards/margins": 6.728928565979004, "rewards/rejected": -13.165684700012207, "step": 15332 }, { "epoch": 2.38, "learning_rate": 2.901490741221018e-06, "logits/chosen": -2.9741458892822266, "logits/rejected": -2.9711415767669678, "logps/chosen": -424.89385986328125, "logps/rejected": -447.8253479003906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9750847816467285, "rewards/margins": 10.984193801879883, "rewards/rejected": -15.95927906036377, "step": 15333 }, { "epoch": 2.38, "learning_rate": 2.9007573006898697e-06, "logits/chosen": -2.6360819339752197, "logits/rejected": -2.670186758041382, "logps/chosen": -134.55633544921875, "logps/rejected": -188.49542236328125, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -4.202481746673584, "rewards/margins": 5.806794166564941, "rewards/rejected": -10.009275436401367, "step": 15334 }, { "epoch": 2.38, "learning_rate": 2.900023860158722e-06, "logits/chosen": -2.8939995765686035, "logits/rejected": -3.1374351978302, "logps/chosen": -82.02152252197266, "logps/rejected": -285.7005615234375, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -6.083286285400391, "rewards/margins": 9.78777027130127, "rewards/rejected": -15.87105655670166, "step": 15335 }, { "epoch": 2.39, "learning_rate": 2.899290419627574e-06, "logits/chosen": -2.6596715450286865, "logits/rejected": -2.5200536251068115, "logps/chosen": -653.852783203125, "logps/rejected": -571.175048828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.2560200691223145, "rewards/margins": 10.513338088989258, "rewards/rejected": -14.769357681274414, "step": 15336 }, { "epoch": 2.39, "learning_rate": 2.8985569790964262e-06, "logits/chosen": -2.771501302719116, "logits/rejected": -2.091275930404663, "logps/chosen": -359.17694091796875, "logps/rejected": -329.5960998535156, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/chosen": -5.626774787902832, "rewards/margins": 5.295927047729492, "rewards/rejected": -10.922700881958008, "step": 15337 }, { "epoch": 2.39, "learning_rate": 2.897823538565278e-06, "logits/chosen": -2.1490988731384277, "logits/rejected": -2.570542097091675, "logps/chosen": -349.3795471191406, "logps/rejected": -316.4620056152344, "loss": 0.2291, "rewards/accuracies": 1.0, "rewards/chosen": -8.856618881225586, "rewards/margins": 4.910876274108887, "rewards/rejected": -13.767494201660156, "step": 15338 }, { "epoch": 2.39, "learning_rate": 2.8970900980341304e-06, "logits/chosen": -2.525362730026245, "logits/rejected": -1.809010624885559, "logps/chosen": -237.68519592285156, "logps/rejected": -336.96612548828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.215877532958984, "rewards/margins": 8.449058532714844, "rewards/rejected": -13.664936065673828, "step": 15339 }, { "epoch": 2.39, "learning_rate": 2.8963566575029827e-06, "logits/chosen": -2.6272518634796143, "logits/rejected": -2.255197525024414, "logps/chosen": -291.4136047363281, "logps/rejected": -230.27528381347656, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -5.28232479095459, "rewards/margins": 6.941840171813965, "rewards/rejected": -12.224164962768555, "step": 15340 }, { "epoch": 2.39, "learning_rate": 2.895623216971835e-06, "logits/chosen": -1.2290282249450684, "logits/rejected": -1.5541735887527466, "logps/chosen": -324.962158203125, "logps/rejected": -397.65167236328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.522619724273682, "rewards/margins": 10.060049057006836, "rewards/rejected": -15.58266830444336, "step": 15341 }, { "epoch": 2.39, "learning_rate": 2.894889776440687e-06, "logits/chosen": -1.7963004112243652, "logits/rejected": -2.245966672897339, "logps/chosen": -152.7709503173828, "logps/rejected": -238.7327880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.4982895851135254, "rewards/margins": 10.039130210876465, "rewards/rejected": -11.537420272827148, "step": 15342 }, { "epoch": 2.39, "learning_rate": 2.8941563359095388e-06, "logits/chosen": -2.3649299144744873, "logits/rejected": -2.6646652221679688, "logps/chosen": -109.20030212402344, "logps/rejected": -276.08953857421875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.4649553298950195, "rewards/margins": 7.451031684875488, "rewards/rejected": -13.915987014770508, "step": 15343 }, { "epoch": 2.39, "learning_rate": 2.893422895378391e-06, "logits/chosen": -2.8112597465515137, "logits/rejected": -2.819322347640991, "logps/chosen": -190.6479949951172, "logps/rejected": -431.7724304199219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.3762922286987305, "rewards/margins": 12.355463981628418, "rewards/rejected": -18.73175621032715, "step": 15344 }, { "epoch": 2.39, "learning_rate": 2.892689454847243e-06, "logits/chosen": -2.569892644882202, "logits/rejected": -2.5892951488494873, "logps/chosen": -386.6382141113281, "logps/rejected": -466.94677734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -8.326096534729004, "rewards/margins": 8.212797164916992, "rewards/rejected": -16.53889274597168, "step": 15345 }, { "epoch": 2.39, "learning_rate": 2.8919560143160952e-06, "logits/chosen": -2.1551554203033447, "logits/rejected": -2.5480241775512695, "logps/chosen": -131.343017578125, "logps/rejected": -267.7564697265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.8430298566818237, "rewards/margins": 11.354933738708496, "rewards/rejected": -13.19796371459961, "step": 15346 }, { "epoch": 2.39, "learning_rate": 2.891222573784947e-06, "logits/chosen": -2.976109743118286, "logits/rejected": -3.312488555908203, "logps/chosen": -81.06114196777344, "logps/rejected": -223.39833068847656, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -6.804531097412109, "rewards/margins": 5.475238800048828, "rewards/rejected": -12.279769897460938, "step": 15347 }, { "epoch": 2.39, "learning_rate": 2.8904891332537994e-06, "logits/chosen": -2.695610284805298, "logits/rejected": -2.808871269226074, "logps/chosen": -126.78889465332031, "logps/rejected": -284.63739013671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -9.03264331817627, "rewards/margins": 7.935964584350586, "rewards/rejected": -16.968608856201172, "step": 15348 }, { "epoch": 2.39, "learning_rate": 2.8897556927226517e-06, "logits/chosen": -2.3113534450531006, "logits/rejected": -2.767000913619995, "logps/chosen": -164.39910888671875, "logps/rejected": -437.95697021484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.58013391494751, "rewards/margins": 9.887819290161133, "rewards/rejected": -14.467952728271484, "step": 15349 }, { "epoch": 2.39, "learning_rate": 2.889022252191504e-06, "logits/chosen": -2.4491872787475586, "logits/rejected": -2.7281243801116943, "logps/chosen": -492.06475830078125, "logps/rejected": -552.7702026367188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.94351053237915, "rewards/margins": 11.293536186218262, "rewards/rejected": -17.23704719543457, "step": 15350 }, { "epoch": 2.39, "learning_rate": 2.888288811660356e-06, "logits/chosen": -1.2487200498580933, "logits/rejected": -2.562407970428467, "logps/chosen": -385.12652587890625, "logps/rejected": -812.0120849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.095628261566162, "rewards/margins": 13.147457122802734, "rewards/rejected": -19.243085861206055, "step": 15351 }, { "epoch": 2.39, "learning_rate": 2.887555371129208e-06, "logits/chosen": -2.799964427947998, "logits/rejected": -2.112576961517334, "logps/chosen": -203.23348999023438, "logps/rejected": -168.0523223876953, "loss": 0.1497, "rewards/accuracies": 1.0, "rewards/chosen": -2.604104518890381, "rewards/margins": 7.143332481384277, "rewards/rejected": -9.7474365234375, "step": 15352 }, { "epoch": 2.39, "learning_rate": 2.88682193059806e-06, "logits/chosen": -2.174874782562256, "logits/rejected": -2.9080865383148193, "logps/chosen": -260.2859802246094, "logps/rejected": -544.4134521484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.980463981628418, "rewards/margins": 10.226899147033691, "rewards/rejected": -17.20736312866211, "step": 15353 }, { "epoch": 2.39, "learning_rate": 2.886088490066912e-06, "logits/chosen": -2.571607828140259, "logits/rejected": -2.4221982955932617, "logps/chosen": -460.75665283203125, "logps/rejected": -464.7921142578125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.966052532196045, "rewards/margins": 9.811805725097656, "rewards/rejected": -13.77785873413086, "step": 15354 }, { "epoch": 2.39, "learning_rate": 2.8853550495357643e-06, "logits/chosen": -1.6562962532043457, "logits/rejected": -2.4111437797546387, "logps/chosen": -242.95860290527344, "logps/rejected": -401.9854736328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.384382247924805, "rewards/margins": 10.578302383422852, "rewards/rejected": -14.962684631347656, "step": 15355 }, { "epoch": 2.39, "learning_rate": 2.884621609004616e-06, "logits/chosen": -2.5904910564422607, "logits/rejected": -1.8004653453826904, "logps/chosen": -315.2592468261719, "logps/rejected": -300.09893798828125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.467735290527344, "rewards/margins": 11.26333999633789, "rewards/rejected": -15.731075286865234, "step": 15356 }, { "epoch": 2.39, "learning_rate": 2.883888168473469e-06, "logits/chosen": -1.9591373205184937, "logits/rejected": -2.5892677307128906, "logps/chosen": -225.04415893554688, "logps/rejected": -528.0479125976562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.688215732574463, "rewards/margins": 12.676980018615723, "rewards/rejected": -19.365196228027344, "step": 15357 }, { "epoch": 2.39, "learning_rate": 2.8831547279423207e-06, "logits/chosen": -2.6464250087738037, "logits/rejected": -2.7036354541778564, "logps/chosen": -162.9072723388672, "logps/rejected": -213.36767578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.925149440765381, "rewards/margins": 9.39299201965332, "rewards/rejected": -13.31814193725586, "step": 15358 }, { "epoch": 2.39, "learning_rate": 2.882421287411173e-06, "logits/chosen": -2.741558074951172, "logits/rejected": -2.7051568031311035, "logps/chosen": -224.26611328125, "logps/rejected": -419.81011962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8004424571990967, "rewards/margins": 10.22458267211914, "rewards/rejected": -13.0250244140625, "step": 15359 }, { "epoch": 2.39, "learning_rate": 2.881687846880025e-06, "logits/chosen": -2.093435049057007, "logits/rejected": -3.066610097885132, "logps/chosen": -144.3231964111328, "logps/rejected": -380.4206237792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.676758766174316, "rewards/margins": 11.953118324279785, "rewards/rejected": -17.6298770904541, "step": 15360 }, { "epoch": 2.39, "learning_rate": 2.8809544063488772e-06, "logits/chosen": -2.645601749420166, "logits/rejected": -2.774857521057129, "logps/chosen": -185.27207946777344, "logps/rejected": -296.2645263671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.763457298278809, "rewards/margins": 8.27621841430664, "rewards/rejected": -13.039676666259766, "step": 15361 }, { "epoch": 2.39, "learning_rate": 2.880220965817729e-06, "logits/chosen": -2.6450088024139404, "logits/rejected": -2.5676445960998535, "logps/chosen": -410.8765869140625, "logps/rejected": -410.66082763671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.36421537399292, "rewards/margins": 11.313555717468262, "rewards/rejected": -16.677770614624023, "step": 15362 }, { "epoch": 2.39, "learning_rate": 2.879487525286581e-06, "logits/chosen": -1.1723273992538452, "logits/rejected": -2.6996874809265137, "logps/chosen": -148.15679931640625, "logps/rejected": -330.2841796875, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -5.8867998123168945, "rewards/margins": 6.515266418457031, "rewards/rejected": -12.402067184448242, "step": 15363 }, { "epoch": 2.39, "learning_rate": 2.8787540847554333e-06, "logits/chosen": -1.3919956684112549, "logits/rejected": -2.2705042362213135, "logps/chosen": -258.14935302734375, "logps/rejected": -602.8648681640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.64021110534668, "rewards/margins": 14.469893455505371, "rewards/rejected": -20.110105514526367, "step": 15364 }, { "epoch": 2.39, "learning_rate": 2.8780206442242856e-06, "logits/chosen": -2.826138496398926, "logits/rejected": -2.040904998779297, "logps/chosen": -464.1327819824219, "logps/rejected": -481.5828552246094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.31151008605957, "rewards/margins": 10.034472465515137, "rewards/rejected": -15.345982551574707, "step": 15365 }, { "epoch": 2.39, "learning_rate": 2.877287203693138e-06, "logits/chosen": -2.5695011615753174, "logits/rejected": -1.7245370149612427, "logps/chosen": -398.729248046875, "logps/rejected": -385.1248474121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.573793411254883, "rewards/margins": 10.410541534423828, "rewards/rejected": -20.98433494567871, "step": 15366 }, { "epoch": 2.39, "learning_rate": 2.8765537631619898e-06, "logits/chosen": -1.7014727592468262, "logits/rejected": -2.600174903869629, "logps/chosen": -117.93423461914062, "logps/rejected": -381.7020263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.702662467956543, "rewards/margins": 10.098785400390625, "rewards/rejected": -16.801448822021484, "step": 15367 }, { "epoch": 2.39, "learning_rate": 2.875820322630842e-06, "logits/chosen": -1.6447418928146362, "logits/rejected": -2.733800172805786, "logps/chosen": -89.33309936523438, "logps/rejected": -615.2052001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.249950885772705, "rewards/margins": 17.357574462890625, "rewards/rejected": -22.607524871826172, "step": 15368 }, { "epoch": 2.39, "learning_rate": 2.875086882099694e-06, "logits/chosen": -2.611694812774658, "logits/rejected": -1.717449426651001, "logps/chosen": -252.8131103515625, "logps/rejected": -210.01507568359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -0.963358998298645, "rewards/margins": 10.435565948486328, "rewards/rejected": -11.398923873901367, "step": 15369 }, { "epoch": 2.39, "learning_rate": 2.8743534415685462e-06, "logits/chosen": -2.9677252769470215, "logits/rejected": -2.494015693664551, "logps/chosen": -850.8970947265625, "logps/rejected": -534.7623901367188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.044888019561768, "rewards/margins": 10.171117782592773, "rewards/rejected": -17.216005325317383, "step": 15370 }, { "epoch": 2.39, "learning_rate": 2.873620001037398e-06, "logits/chosen": -2.1746768951416016, "logits/rejected": -2.87056565284729, "logps/chosen": -271.6683044433594, "logps/rejected": -454.02301025390625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.4742231369018555, "rewards/margins": 9.51755142211914, "rewards/rejected": -14.99177360534668, "step": 15371 }, { "epoch": 2.39, "learning_rate": 2.8728865605062504e-06, "logits/chosen": -2.5186774730682373, "logits/rejected": -2.788625478744507, "logps/chosen": -282.3566589355469, "logps/rejected": -394.84765625, "loss": 0.0456, "rewards/accuracies": 1.0, "rewards/chosen": -9.878555297851562, "rewards/margins": 5.460845470428467, "rewards/rejected": -15.339400291442871, "step": 15372 }, { "epoch": 2.39, "learning_rate": 2.8721531199751023e-06, "logits/chosen": -2.6008009910583496, "logits/rejected": -2.1722412109375, "logps/chosen": -306.9713134765625, "logps/rejected": -383.8962707519531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.5946044921875, "rewards/margins": 9.557842254638672, "rewards/rejected": -17.152446746826172, "step": 15373 }, { "epoch": 2.39, "learning_rate": 2.8714196794439546e-06, "logits/chosen": -0.9871020913124084, "logits/rejected": -2.3891103267669678, "logps/chosen": -122.43701171875, "logps/rejected": -494.24560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.071495056152344, "rewards/margins": 14.569422721862793, "rewards/rejected": -21.640918731689453, "step": 15374 }, { "epoch": 2.39, "learning_rate": 2.870686238912807e-06, "logits/chosen": -2.422828435897827, "logits/rejected": -1.1424394845962524, "logps/chosen": -251.05850219726562, "logps/rejected": -190.43682861328125, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -5.870090007781982, "rewards/margins": 7.356126308441162, "rewards/rejected": -13.226216316223145, "step": 15375 }, { "epoch": 2.39, "learning_rate": 2.8699527983816588e-06, "logits/chosen": -0.9369763135910034, "logits/rejected": -2.5965497493743896, "logps/chosen": -132.24456787109375, "logps/rejected": -568.941650390625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -6.014092922210693, "rewards/margins": 11.992918014526367, "rewards/rejected": -18.00701141357422, "step": 15376 }, { "epoch": 2.39, "learning_rate": 2.869219357850511e-06, "logits/chosen": -2.4976141452789307, "logits/rejected": -1.2796196937561035, "logps/chosen": -237.64605712890625, "logps/rejected": -262.05035400390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -9.346735000610352, "rewards/margins": 7.310525894165039, "rewards/rejected": -16.65726089477539, "step": 15377 }, { "epoch": 2.39, "learning_rate": 2.868485917319363e-06, "logits/chosen": -2.5161571502685547, "logits/rejected": -2.938417673110962, "logps/chosen": -95.90758514404297, "logps/rejected": -298.28887939453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.426295280456543, "rewards/margins": 7.80613899230957, "rewards/rejected": -12.232434272766113, "step": 15378 }, { "epoch": 2.39, "learning_rate": 2.8677524767882153e-06, "logits/chosen": -2.2221944332122803, "logits/rejected": -2.727848768234253, "logps/chosen": -166.0297088623047, "logps/rejected": -505.9996032714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0762434005737305, "rewards/margins": 12.58180046081543, "rewards/rejected": -19.658044815063477, "step": 15379 }, { "epoch": 2.39, "learning_rate": 2.867019036257067e-06, "logits/chosen": -2.687213659286499, "logits/rejected": -1.834869146347046, "logps/chosen": -692.1961669921875, "logps/rejected": -425.8962097167969, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": -7.1110944747924805, "rewards/margins": 8.139924049377441, "rewards/rejected": -15.251018524169922, "step": 15380 }, { "epoch": 2.39, "learning_rate": 2.8662855957259194e-06, "logits/chosen": -2.445026397705078, "logits/rejected": -1.66160249710083, "logps/chosen": -508.26678466796875, "logps/rejected": -493.7292175292969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.4864397048950195, "rewards/margins": 11.263336181640625, "rewards/rejected": -16.749774932861328, "step": 15381 }, { "epoch": 2.39, "learning_rate": 2.8655521551947717e-06, "logits/chosen": -2.6018025875091553, "logits/rejected": -2.801567554473877, "logps/chosen": -382.4580078125, "logps/rejected": -418.68011474609375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -8.5518217086792, "rewards/margins": 9.271717071533203, "rewards/rejected": -17.82353973388672, "step": 15382 }, { "epoch": 2.39, "learning_rate": 2.8648187146636236e-06, "logits/chosen": -1.6694971323013306, "logits/rejected": -2.3692734241485596, "logps/chosen": -174.88287353515625, "logps/rejected": -437.64898681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.289212703704834, "rewards/margins": 16.283227920532227, "rewards/rejected": -21.57244110107422, "step": 15383 }, { "epoch": 2.39, "learning_rate": 2.864085274132476e-06, "logits/chosen": -2.0048320293426514, "logits/rejected": -2.5678324699401855, "logps/chosen": -113.32449340820312, "logps/rejected": -360.96185302734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.3985161781311035, "rewards/margins": 9.2904052734375, "rewards/rejected": -16.688920974731445, "step": 15384 }, { "epoch": 2.39, "learning_rate": 2.863351833601328e-06, "logits/chosen": -2.1182711124420166, "logits/rejected": -2.4691390991210938, "logps/chosen": -290.17138671875, "logps/rejected": -523.1688842773438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.798492431640625, "rewards/margins": 16.644102096557617, "rewards/rejected": -23.442594528198242, "step": 15385 }, { "epoch": 2.39, "learning_rate": 2.86261839307018e-06, "logits/chosen": -2.3030622005462646, "logits/rejected": -2.660189628601074, "logps/chosen": -115.23246002197266, "logps/rejected": -294.5610046386719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.930033206939697, "rewards/margins": 9.60225772857666, "rewards/rejected": -16.532291412353516, "step": 15386 }, { "epoch": 2.39, "learning_rate": 2.861884952539032e-06, "logits/chosen": -1.6721999645233154, "logits/rejected": -2.924959182739258, "logps/chosen": -175.33743286132812, "logps/rejected": -558.489501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.456002235412598, "rewards/margins": 14.299966812133789, "rewards/rejected": -19.75596809387207, "step": 15387 }, { "epoch": 2.39, "learning_rate": 2.8611515120078843e-06, "logits/chosen": -2.897684335708618, "logits/rejected": -2.6271958351135254, "logps/chosen": -628.4237060546875, "logps/rejected": -441.63934326171875, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -7.617735862731934, "rewards/margins": 7.121402263641357, "rewards/rejected": -14.739137649536133, "step": 15388 }, { "epoch": 2.39, "learning_rate": 2.860418071476736e-06, "logits/chosen": -2.335177421569824, "logits/rejected": -2.2655537128448486, "logps/chosen": -211.84823608398438, "logps/rejected": -324.4259033203125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.784979343414307, "rewards/margins": 8.991658210754395, "rewards/rejected": -13.77663803100586, "step": 15389 }, { "epoch": 2.39, "learning_rate": 2.8596846309455885e-06, "logits/chosen": -2.512439727783203, "logits/rejected": -2.685668468475342, "logps/chosen": -155.18328857421875, "logps/rejected": -263.146728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.494863033294678, "rewards/margins": 10.848112106323242, "rewards/rejected": -15.342975616455078, "step": 15390 }, { "epoch": 2.39, "learning_rate": 2.8589511904144408e-06, "logits/chosen": -2.08451771736145, "logits/rejected": -2.7155494689941406, "logps/chosen": -365.49041748046875, "logps/rejected": -550.8134765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -13.98576545715332, "rewards/margins": 9.293724060058594, "rewards/rejected": -23.279489517211914, "step": 15391 }, { "epoch": 2.39, "learning_rate": 2.8582177498832926e-06, "logits/chosen": -2.8168160915374756, "logits/rejected": -3.0193450450897217, "logps/chosen": -178.9163818359375, "logps/rejected": -279.7945251464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6445159912109375, "rewards/margins": 10.60142707824707, "rewards/rejected": -17.245941162109375, "step": 15392 }, { "epoch": 2.39, "learning_rate": 2.857484309352145e-06, "logits/chosen": -1.6670717000961304, "logits/rejected": -2.646949291229248, "logps/chosen": -337.9726257324219, "logps/rejected": -723.2053833007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.799702167510986, "rewards/margins": 17.676891326904297, "rewards/rejected": -24.476594924926758, "step": 15393 }, { "epoch": 2.39, "learning_rate": 2.856750868820997e-06, "logits/chosen": -2.74115252494812, "logits/rejected": -1.8688979148864746, "logps/chosen": -273.70367431640625, "logps/rejected": -141.6716766357422, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -5.179568290710449, "rewards/margins": 4.066700458526611, "rewards/rejected": -9.246269226074219, "step": 15394 }, { "epoch": 2.39, "learning_rate": 2.856017428289849e-06, "logits/chosen": -2.289867401123047, "logits/rejected": -2.7385177612304688, "logps/chosen": -273.5576171875, "logps/rejected": -386.5905456542969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.7030792236328125, "rewards/margins": 9.540772438049316, "rewards/rejected": -15.243851661682129, "step": 15395 }, { "epoch": 2.39, "learning_rate": 2.855283987758701e-06, "logits/chosen": -2.5687925815582275, "logits/rejected": -2.123094320297241, "logps/chosen": -146.61578369140625, "logps/rejected": -143.94293212890625, "loss": 0.1563, "rewards/accuracies": 1.0, "rewards/chosen": -7.132391929626465, "rewards/margins": 3.90042781829834, "rewards/rejected": -11.032819747924805, "step": 15396 }, { "epoch": 2.39, "learning_rate": 2.8545505472275533e-06, "logits/chosen": -2.6209499835968018, "logits/rejected": -2.003708839416504, "logps/chosen": -385.971923828125, "logps/rejected": -349.3419189453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -10.413529396057129, "rewards/margins": 7.035160064697266, "rewards/rejected": -17.448688507080078, "step": 15397 }, { "epoch": 2.39, "learning_rate": 2.853817106696405e-06, "logits/chosen": -1.9510632753372192, "logits/rejected": -2.432049512863159, "logps/chosen": -314.77337646484375, "logps/rejected": -507.3858337402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.376567840576172, "rewards/margins": 12.174575805664062, "rewards/rejected": -20.551143646240234, "step": 15398 }, { "epoch": 2.39, "learning_rate": 2.853083666165258e-06, "logits/chosen": -2.4594266414642334, "logits/rejected": -2.2160351276397705, "logps/chosen": -225.36265563964844, "logps/rejected": -306.17864990234375, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -9.160452842712402, "rewards/margins": 4.425406455993652, "rewards/rejected": -13.585859298706055, "step": 15399 }, { "epoch": 2.4, "learning_rate": 2.85235022563411e-06, "logits/chosen": -1.8387184143066406, "logits/rejected": -2.5212788581848145, "logps/chosen": -159.09121704101562, "logps/rejected": -299.31903076171875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.316763401031494, "rewards/margins": 7.692628383636475, "rewards/rejected": -14.009391784667969, "step": 15400 }, { "epoch": 2.4, "learning_rate": 2.851616785102962e-06, "logits/chosen": -1.9910314083099365, "logits/rejected": -2.667689085006714, "logps/chosen": -97.81211853027344, "logps/rejected": -432.64617919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9589009284973145, "rewards/margins": 15.377225875854492, "rewards/rejected": -19.33612823486328, "step": 15401 }, { "epoch": 2.4, "learning_rate": 2.850883344571814e-06, "logits/chosen": -2.6625170707702637, "logits/rejected": -2.8552262783050537, "logps/chosen": -196.95407104492188, "logps/rejected": -325.3639831542969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.003274917602539, "rewards/margins": 9.66571044921875, "rewards/rejected": -16.66898536682129, "step": 15402 }, { "epoch": 2.4, "learning_rate": 2.850149904040666e-06, "logits/chosen": -2.5981132984161377, "logits/rejected": -3.075115203857422, "logps/chosen": -121.78843688964844, "logps/rejected": -342.7113952636719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.430197238922119, "rewards/margins": 9.651849746704102, "rewards/rejected": -16.082046508789062, "step": 15403 }, { "epoch": 2.4, "learning_rate": 2.849416463509518e-06, "logits/chosen": -2.369443655014038, "logits/rejected": -2.802947998046875, "logps/chosen": -184.80104064941406, "logps/rejected": -431.7976989746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.677194595336914, "rewards/margins": 11.82470703125, "rewards/rejected": -19.501901626586914, "step": 15404 }, { "epoch": 2.4, "learning_rate": 2.84868302297837e-06, "logits/chosen": -2.5960710048675537, "logits/rejected": -3.0011258125305176, "logps/chosen": -237.531982421875, "logps/rejected": -564.7219848632812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.179140567779541, "rewards/margins": 11.357318878173828, "rewards/rejected": -16.536460876464844, "step": 15405 }, { "epoch": 2.4, "learning_rate": 2.8479495824472223e-06, "logits/chosen": -2.4387035369873047, "logits/rejected": -2.6737918853759766, "logps/chosen": -127.41673278808594, "logps/rejected": -301.9096374511719, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.282888412475586, "rewards/margins": 9.541171073913574, "rewards/rejected": -16.824058532714844, "step": 15406 }, { "epoch": 2.4, "learning_rate": 2.8472161419160746e-06, "logits/chosen": -2.014277696609497, "logits/rejected": -2.6485865116119385, "logps/chosen": -223.1385040283203, "logps/rejected": -393.1401062011719, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -8.293951034545898, "rewards/margins": 6.245736122131348, "rewards/rejected": -14.539688110351562, "step": 15407 }, { "epoch": 2.4, "learning_rate": 2.846482701384927e-06, "logits/chosen": -2.8815712928771973, "logits/rejected": -2.77085018157959, "logps/chosen": -224.9043731689453, "logps/rejected": -246.41758728027344, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.166355609893799, "rewards/margins": 6.757734298706055, "rewards/rejected": -12.924089431762695, "step": 15408 }, { "epoch": 2.4, "learning_rate": 2.845749260853779e-06, "logits/chosen": -0.9937775731086731, "logits/rejected": -1.2336055040359497, "logps/chosen": -157.4738006591797, "logps/rejected": -482.7054443359375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.327106475830078, "rewards/margins": 15.083854675292969, "rewards/rejected": -20.410961151123047, "step": 15409 }, { "epoch": 2.4, "learning_rate": 2.845015820322631e-06, "logits/chosen": -2.058000326156616, "logits/rejected": -2.8392174243927, "logps/chosen": -102.51881408691406, "logps/rejected": -351.540771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.292820930480957, "rewards/margins": 14.448386192321777, "rewards/rejected": -19.741207122802734, "step": 15410 }, { "epoch": 2.4, "learning_rate": 2.844282379791483e-06, "logits/chosen": -1.0606863498687744, "logits/rejected": -2.0232954025268555, "logps/chosen": -146.42315673828125, "logps/rejected": -467.0902099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.196100234985352, "rewards/margins": 15.10544490814209, "rewards/rejected": -21.301544189453125, "step": 15411 }, { "epoch": 2.4, "learning_rate": 2.843548939260335e-06, "logits/chosen": -2.637315511703491, "logits/rejected": -1.400138020515442, "logps/chosen": -164.43051147460938, "logps/rejected": -275.6811828613281, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -3.5567634105682373, "rewards/margins": 9.957281112670898, "rewards/rejected": -13.514043807983398, "step": 15412 }, { "epoch": 2.4, "learning_rate": 2.842815498729187e-06, "logits/chosen": -1.7369515895843506, "logits/rejected": -2.782439947128296, "logps/chosen": -133.87835693359375, "logps/rejected": -481.6949462890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.068401336669922, "rewards/margins": 11.31409740447998, "rewards/rejected": -19.38249969482422, "step": 15413 }, { "epoch": 2.4, "learning_rate": 2.842082058198039e-06, "logits/chosen": -2.9468255043029785, "logits/rejected": -2.6483683586120605, "logps/chosen": -793.9505004882812, "logps/rejected": -757.7070922851562, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.052329063415527, "rewards/margins": 11.220296859741211, "rewards/rejected": -20.272624969482422, "step": 15414 }, { "epoch": 2.4, "learning_rate": 2.8413486176668914e-06, "logits/chosen": -2.211475372314453, "logits/rejected": -2.657381296157837, "logps/chosen": -153.6338348388672, "logps/rejected": -344.88360595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4976959228515625, "rewards/margins": 15.471078872680664, "rewards/rejected": -19.968774795532227, "step": 15415 }, { "epoch": 2.4, "learning_rate": 2.8406151771357437e-06, "logits/chosen": -2.4977331161499023, "logits/rejected": -2.5904884338378906, "logps/chosen": -222.69772338867188, "logps/rejected": -357.81451416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.944880485534668, "rewards/margins": 12.198607444763184, "rewards/rejected": -17.14348793029785, "step": 15416 }, { "epoch": 2.4, "learning_rate": 2.839881736604596e-06, "logits/chosen": -2.528319835662842, "logits/rejected": -2.537348508834839, "logps/chosen": -152.4598388671875, "logps/rejected": -291.6358337402344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.130853652954102, "rewards/margins": 9.246131896972656, "rewards/rejected": -17.376985549926758, "step": 15417 }, { "epoch": 2.4, "learning_rate": 2.839148296073448e-06, "logits/chosen": -2.622408390045166, "logits/rejected": -2.694124937057495, "logps/chosen": -343.5093994140625, "logps/rejected": -490.4571533203125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -8.270614624023438, "rewards/margins": 12.693244934082031, "rewards/rejected": -20.96385955810547, "step": 15418 }, { "epoch": 2.4, "learning_rate": 2.8384148555423e-06, "logits/chosen": -0.8409698009490967, "logits/rejected": -2.3713886737823486, "logps/chosen": -261.4226989746094, "logps/rejected": -304.8855285644531, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -6.5383195877075195, "rewards/margins": 5.655401229858398, "rewards/rejected": -12.193720817565918, "step": 15419 }, { "epoch": 2.4, "learning_rate": 2.837681415011152e-06, "logits/chosen": -1.7967681884765625, "logits/rejected": -2.548977851867676, "logps/chosen": -110.426513671875, "logps/rejected": -392.9844055175781, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -5.388364791870117, "rewards/margins": 8.65371036529541, "rewards/rejected": -14.042075157165527, "step": 15420 }, { "epoch": 2.4, "learning_rate": 2.8369479744800043e-06, "logits/chosen": -2.533564329147339, "logits/rejected": -1.7259711027145386, "logps/chosen": -213.23085021972656, "logps/rejected": -236.8905487060547, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.267717361450195, "rewards/margins": 11.677291870117188, "rewards/rejected": -16.945009231567383, "step": 15421 }, { "epoch": 2.4, "learning_rate": 2.836214533948856e-06, "logits/chosen": -2.077906370162964, "logits/rejected": -2.8181519508361816, "logps/chosen": -477.4696350097656, "logps/rejected": -631.629150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.289640426635742, "rewards/margins": 12.242277145385742, "rewards/rejected": -18.531917572021484, "step": 15422 }, { "epoch": 2.4, "learning_rate": 2.835481093417708e-06, "logits/chosen": -2.6733808517456055, "logits/rejected": -2.7570853233337402, "logps/chosen": -690.04736328125, "logps/rejected": -910.5995483398438, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -8.027678489685059, "rewards/margins": 8.823896408081055, "rewards/rejected": -16.851573944091797, "step": 15423 }, { "epoch": 2.4, "learning_rate": 2.8347476528865604e-06, "logits/chosen": -2.485422134399414, "logits/rejected": -2.631009817123413, "logps/chosen": -319.53277587890625, "logps/rejected": -465.58660888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.22709846496582, "rewards/margins": 11.321015357971191, "rewards/rejected": -21.548114776611328, "step": 15424 }, { "epoch": 2.4, "learning_rate": 2.8340142123554127e-06, "logits/chosen": -2.3170268535614014, "logits/rejected": -2.7201144695281982, "logps/chosen": -422.931640625, "logps/rejected": -446.587890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.489311218261719, "rewards/margins": 9.962106704711914, "rewards/rejected": -17.451417922973633, "step": 15425 }, { "epoch": 2.4, "learning_rate": 2.833280771824265e-06, "logits/chosen": -1.199931263923645, "logits/rejected": -2.572319507598877, "logps/chosen": -133.8117218017578, "logps/rejected": -457.5310363769531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.1494550704956055, "rewards/margins": 10.398307800292969, "rewards/rejected": -15.547762870788574, "step": 15426 }, { "epoch": 2.4, "learning_rate": 2.832547331293117e-06, "logits/chosen": -2.6968562602996826, "logits/rejected": -2.7005844116210938, "logps/chosen": -187.02001953125, "logps/rejected": -263.41253662109375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.38867712020874, "rewards/margins": 7.456230640411377, "rewards/rejected": -13.844907760620117, "step": 15427 }, { "epoch": 2.4, "learning_rate": 2.831813890761969e-06, "logits/chosen": -2.6582438945770264, "logits/rejected": -1.8515064716339111, "logps/chosen": -952.4503173828125, "logps/rejected": -622.145263671875, "loss": 1.7539, "rewards/accuracies": 0.5, "rewards/chosen": -16.192779541015625, "rewards/margins": 4.846282005310059, "rewards/rejected": -21.0390625, "step": 15428 }, { "epoch": 2.4, "learning_rate": 2.831080450230821e-06, "logits/chosen": -2.958340883255005, "logits/rejected": -2.0987493991851807, "logps/chosen": -345.6669006347656, "logps/rejected": -265.8234558105469, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.959092617034912, "rewards/margins": 6.256235122680664, "rewards/rejected": -12.215328216552734, "step": 15429 }, { "epoch": 2.4, "learning_rate": 2.8303470096996733e-06, "logits/chosen": -2.7366487979888916, "logits/rejected": -2.5490469932556152, "logps/chosen": -197.92144775390625, "logps/rejected": -174.42762756347656, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.3233444690704346, "rewards/margins": 7.319551467895508, "rewards/rejected": -10.64289665222168, "step": 15430 }, { "epoch": 2.4, "learning_rate": 2.8296135691685252e-06, "logits/chosen": -2.095851421356201, "logits/rejected": -2.6070613861083984, "logps/chosen": -267.45086669921875, "logps/rejected": -535.0552978515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.602499961853027, "rewards/margins": 10.716121673583984, "rewards/rejected": -18.318622589111328, "step": 15431 }, { "epoch": 2.4, "learning_rate": 2.828880128637377e-06, "logits/chosen": -1.5910234451293945, "logits/rejected": -2.6675069332122803, "logps/chosen": -357.6855163574219, "logps/rejected": -565.2845458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.8910017013549805, "rewards/margins": 11.61291790008545, "rewards/rejected": -17.50391960144043, "step": 15432 }, { "epoch": 2.4, "learning_rate": 2.82814668810623e-06, "logits/chosen": -1.4452593326568604, "logits/rejected": -2.334233283996582, "logps/chosen": -238.85296630859375, "logps/rejected": -558.9868774414062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.724584579467773, "rewards/margins": 13.508272171020508, "rewards/rejected": -21.23285675048828, "step": 15433 }, { "epoch": 2.4, "learning_rate": 2.8274132475750817e-06, "logits/chosen": -2.110527276992798, "logits/rejected": -2.6340723037719727, "logps/chosen": -160.493408203125, "logps/rejected": -328.74334716796875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -4.348567008972168, "rewards/margins": 9.529321670532227, "rewards/rejected": -13.877889633178711, "step": 15434 }, { "epoch": 2.4, "learning_rate": 2.826679807043934e-06, "logits/chosen": -2.7988345623016357, "logits/rejected": -3.0133092403411865, "logps/chosen": -370.89825439453125, "logps/rejected": -417.2953796386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.210718154907227, "rewards/margins": 13.04549789428711, "rewards/rejected": -25.256216049194336, "step": 15435 }, { "epoch": 2.4, "learning_rate": 2.825946366512786e-06, "logits/chosen": -1.9501923322677612, "logits/rejected": -2.7102551460266113, "logps/chosen": -190.687255859375, "logps/rejected": -368.50372314453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.474539756774902, "rewards/margins": 7.985842227935791, "rewards/rejected": -13.460382461547852, "step": 15436 }, { "epoch": 2.4, "learning_rate": 2.825212925981638e-06, "logits/chosen": -2.4129087924957275, "logits/rejected": -3.0287582874298096, "logps/chosen": -292.9196472167969, "logps/rejected": -764.6494140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -11.851550102233887, "rewards/margins": 9.9873628616333, "rewards/rejected": -21.838912963867188, "step": 15437 }, { "epoch": 2.4, "learning_rate": 2.82447948545049e-06, "logits/chosen": -1.630881428718567, "logits/rejected": -2.6984026432037354, "logps/chosen": -203.71714782714844, "logps/rejected": -502.0526123046875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -6.604961395263672, "rewards/margins": 11.552775382995605, "rewards/rejected": -18.157737731933594, "step": 15438 }, { "epoch": 2.4, "learning_rate": 2.8237460449193424e-06, "logits/chosen": -2.7267844676971436, "logits/rejected": -2.997427225112915, "logps/chosen": -284.7056884765625, "logps/rejected": -550.383544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.11084508895874, "rewards/margins": 10.51634407043457, "rewards/rejected": -15.627189636230469, "step": 15439 }, { "epoch": 2.4, "learning_rate": 2.8230126043881942e-06, "logits/chosen": -2.407773971557617, "logits/rejected": -2.82334041595459, "logps/chosen": -735.3128051757812, "logps/rejected": -885.8062744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.071568489074707, "rewards/margins": 13.716629028320312, "rewards/rejected": -22.788196563720703, "step": 15440 }, { "epoch": 2.4, "learning_rate": 2.8222791638570465e-06, "logits/chosen": -2.74061918258667, "logits/rejected": -3.0623505115509033, "logps/chosen": -483.8470764160156, "logps/rejected": -491.1128234863281, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -10.262981414794922, "rewards/margins": 9.376216888427734, "rewards/rejected": -19.639198303222656, "step": 15441 }, { "epoch": 2.4, "learning_rate": 2.821545723325899e-06, "logits/chosen": -2.922968626022339, "logits/rejected": -2.8473141193389893, "logps/chosen": -893.1336669921875, "logps/rejected": -570.0491333007812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.765547275543213, "rewards/margins": 9.106704711914062, "rewards/rejected": -14.872251510620117, "step": 15442 }, { "epoch": 2.4, "learning_rate": 2.8208122827947507e-06, "logits/chosen": -2.6696839332580566, "logits/rejected": -1.929089069366455, "logps/chosen": -509.7146301269531, "logps/rejected": -418.1778564453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.286264419555664, "rewards/margins": 9.672069549560547, "rewards/rejected": -19.95833396911621, "step": 15443 }, { "epoch": 2.4, "learning_rate": 2.820078842263603e-06, "logits/chosen": -2.5835213661193848, "logits/rejected": -2.7180886268615723, "logps/chosen": -287.0561218261719, "logps/rejected": -363.6536865234375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -11.881933212280273, "rewards/margins": 5.774074554443359, "rewards/rejected": -17.656007766723633, "step": 15444 }, { "epoch": 2.4, "learning_rate": 2.819345401732455e-06, "logits/chosen": -2.507242202758789, "logits/rejected": -2.6188220977783203, "logps/chosen": -593.9652709960938, "logps/rejected": -631.2056274414062, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -7.2125139236450195, "rewards/margins": 8.135870933532715, "rewards/rejected": -15.348384857177734, "step": 15445 }, { "epoch": 2.4, "learning_rate": 2.818611961201307e-06, "logits/chosen": -1.8159183263778687, "logits/rejected": -2.708890676498413, "logps/chosen": -79.25433349609375, "logps/rejected": -326.8647766113281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.40629768371582, "rewards/margins": 9.771108627319336, "rewards/rejected": -16.177406311035156, "step": 15446 }, { "epoch": 2.4, "learning_rate": 2.817878520670159e-06, "logits/chosen": -1.6713943481445312, "logits/rejected": -2.1887667179107666, "logps/chosen": -183.0835723876953, "logps/rejected": -389.59368896484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.416797637939453, "rewards/margins": 8.826225280761719, "rewards/rejected": -16.243022918701172, "step": 15447 }, { "epoch": 2.4, "learning_rate": 2.8171450801390114e-06, "logits/chosen": -2.4626142978668213, "logits/rejected": -1.9525595903396606, "logps/chosen": -546.7451171875, "logps/rejected": -469.5216369628906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.390690803527832, "rewards/margins": 9.108495712280273, "rewards/rejected": -17.499187469482422, "step": 15448 }, { "epoch": 2.4, "learning_rate": 2.8164116396078633e-06, "logits/chosen": -2.523054838180542, "logits/rejected": -2.2684664726257324, "logps/chosen": -777.0986328125, "logps/rejected": -647.2388916015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.78629732131958, "rewards/margins": 8.909940719604492, "rewards/rejected": -14.69623851776123, "step": 15449 }, { "epoch": 2.4, "learning_rate": 2.815678199076716e-06, "logits/chosen": -2.535301446914673, "logits/rejected": -2.6111257076263428, "logps/chosen": -350.9910888671875, "logps/rejected": -726.8975830078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.166112899780273, "rewards/margins": 13.392091751098633, "rewards/rejected": -21.558204650878906, "step": 15450 }, { "epoch": 2.4, "learning_rate": 2.814944758545568e-06, "logits/chosen": -2.5603864192962646, "logits/rejected": -1.96628999710083, "logps/chosen": -246.98170471191406, "logps/rejected": -233.7749481201172, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.914787292480469, "rewards/margins": 8.298694610595703, "rewards/rejected": -13.213481903076172, "step": 15451 }, { "epoch": 2.4, "learning_rate": 2.8142113180144197e-06, "logits/chosen": -2.6625149250030518, "logits/rejected": -2.9095418453216553, "logps/chosen": -335.8629455566406, "logps/rejected": -459.1216125488281, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -5.614656448364258, "rewards/margins": 12.273847579956055, "rewards/rejected": -17.888504028320312, "step": 15452 }, { "epoch": 2.4, "learning_rate": 2.813477877483272e-06, "logits/chosen": -2.8599255084991455, "logits/rejected": -2.160369396209717, "logps/chosen": -375.9847717285156, "logps/rejected": -166.98336791992188, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -2.2923049926757812, "rewards/margins": 5.881683349609375, "rewards/rejected": -8.173988342285156, "step": 15453 }, { "epoch": 2.4, "learning_rate": 2.812744436952124e-06, "logits/chosen": -1.8203587532043457, "logits/rejected": -2.1809639930725098, "logps/chosen": -534.80126953125, "logps/rejected": -684.7156982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.37321662902832, "rewards/margins": 14.432451248168945, "rewards/rejected": -22.805667877197266, "step": 15454 }, { "epoch": 2.4, "learning_rate": 2.8120109964209762e-06, "logits/chosen": -1.853362798690796, "logits/rejected": -2.788456678390503, "logps/chosen": -118.09105682373047, "logps/rejected": -377.2452697753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.894618988037109, "rewards/margins": 10.97647476196289, "rewards/rejected": -16.87109375, "step": 15455 }, { "epoch": 2.4, "learning_rate": 2.811277555889828e-06, "logits/chosen": -1.208338975906372, "logits/rejected": -2.479613780975342, "logps/chosen": -149.8299560546875, "logps/rejected": -575.0477294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.790557861328125, "rewards/margins": 18.49980926513672, "rewards/rejected": -27.290367126464844, "step": 15456 }, { "epoch": 2.4, "learning_rate": 2.8105441153586804e-06, "logits/chosen": -2.0342657566070557, "logits/rejected": -2.170720338821411, "logps/chosen": -267.6961669921875, "logps/rejected": -500.21173095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.010332107543945, "rewards/margins": 15.575997352600098, "rewards/rejected": -22.58633041381836, "step": 15457 }, { "epoch": 2.4, "learning_rate": 2.8098106748275327e-06, "logits/chosen": -2.702712297439575, "logits/rejected": -2.256809711456299, "logps/chosen": -229.28163146972656, "logps/rejected": -364.0317687988281, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.4335246086120605, "rewards/margins": 11.846999168395996, "rewards/rejected": -17.2805233001709, "step": 15458 }, { "epoch": 2.4, "learning_rate": 2.809077234296385e-06, "logits/chosen": -2.6758933067321777, "logits/rejected": -2.091473340988159, "logps/chosen": -292.71844482421875, "logps/rejected": -141.60690307617188, "loss": 0.2421, "rewards/accuracies": 1.0, "rewards/chosen": -4.791316986083984, "rewards/margins": 2.7390496730804443, "rewards/rejected": -7.530366897583008, "step": 15459 }, { "epoch": 2.4, "learning_rate": 2.808343793765237e-06, "logits/chosen": -1.3360838890075684, "logits/rejected": -1.7643018960952759, "logps/chosen": -109.66375732421875, "logps/rejected": -355.1929626464844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.071149826049805, "rewards/margins": 10.723716735839844, "rewards/rejected": -15.794866561889648, "step": 15460 }, { "epoch": 2.4, "learning_rate": 2.8076103532340888e-06, "logits/chosen": -1.7791210412979126, "logits/rejected": -1.621160864830017, "logps/chosen": -486.9699401855469, "logps/rejected": -598.1214599609375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -6.42724609375, "rewards/margins": 9.462141036987305, "rewards/rejected": -15.889388084411621, "step": 15461 }, { "epoch": 2.4, "learning_rate": 2.806876912702941e-06, "logits/chosen": -2.843973398208618, "logits/rejected": -2.6992766857147217, "logps/chosen": -387.3060302734375, "logps/rejected": -582.805419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.3933563232421875, "rewards/margins": 18.012683868408203, "rewards/rejected": -23.40604019165039, "step": 15462 }, { "epoch": 2.4, "learning_rate": 2.806143472171793e-06, "logits/chosen": -2.2567694187164307, "logits/rejected": -2.636976957321167, "logps/chosen": -187.7049102783203, "logps/rejected": -454.94287109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.61703109741211, "rewards/margins": 8.725421905517578, "rewards/rejected": -19.342453002929688, "step": 15463 }, { "epoch": 2.4, "learning_rate": 2.8054100316406452e-06, "logits/chosen": -2.5279910564422607, "logits/rejected": -2.5926363468170166, "logps/chosen": -215.16566467285156, "logps/rejected": -346.3074951171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.4529924392700195, "rewards/margins": 8.71741008758545, "rewards/rejected": -16.17040252685547, "step": 15464 }, { "epoch": 2.41, "learning_rate": 2.804676591109497e-06, "logits/chosen": -2.392657518386841, "logits/rejected": -2.6975297927856445, "logps/chosen": -856.5930786132812, "logps/rejected": -931.090087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.05987548828125, "rewards/margins": 15.272624969482422, "rewards/rejected": -21.332500457763672, "step": 15465 }, { "epoch": 2.41, "learning_rate": 2.8039431505783494e-06, "logits/chosen": -2.3311972618103027, "logits/rejected": -2.7820687294006348, "logps/chosen": -211.21913146972656, "logps/rejected": -470.70989990234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.791382312774658, "rewards/margins": 10.168917655944824, "rewards/rejected": -14.960299491882324, "step": 15466 }, { "epoch": 2.41, "learning_rate": 2.8032097100472017e-06, "logits/chosen": -2.415248394012451, "logits/rejected": -2.639084577560425, "logps/chosen": -230.67547607421875, "logps/rejected": -351.20306396484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.122482776641846, "rewards/margins": 7.152105808258057, "rewards/rejected": -13.274588584899902, "step": 15467 }, { "epoch": 2.41, "learning_rate": 2.802476269516054e-06, "logits/chosen": -3.091433525085449, "logits/rejected": -3.1568048000335693, "logps/chosen": -131.9261474609375, "logps/rejected": -364.8441162109375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -6.996870994567871, "rewards/margins": 9.614421844482422, "rewards/rejected": -16.61129379272461, "step": 15468 }, { "epoch": 2.41, "learning_rate": 2.801742828984906e-06, "logits/chosen": -1.8335286378860474, "logits/rejected": -2.3401684761047363, "logps/chosen": -191.35272216796875, "logps/rejected": -310.5020751953125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.873950004577637, "rewards/margins": 8.830276489257812, "rewards/rejected": -15.70422649383545, "step": 15469 }, { "epoch": 2.41, "learning_rate": 2.8010093884537582e-06, "logits/chosen": -2.474496603012085, "logits/rejected": -2.567272186279297, "logps/chosen": -131.102294921875, "logps/rejected": -306.4617004394531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.647938251495361, "rewards/margins": 11.839518547058105, "rewards/rejected": -16.487457275390625, "step": 15470 }, { "epoch": 2.41, "learning_rate": 2.80027594792261e-06, "logits/chosen": -1.7472293376922607, "logits/rejected": -1.2504948377609253, "logps/chosen": -378.21075439453125, "logps/rejected": -352.818603515625, "loss": 0.0684, "rewards/accuracies": 1.0, "rewards/chosen": -7.159401893615723, "rewards/margins": 4.611632347106934, "rewards/rejected": -11.771034240722656, "step": 15471 }, { "epoch": 2.41, "learning_rate": 2.799542507391462e-06, "logits/chosen": -2.583634376525879, "logits/rejected": -2.719193458557129, "logps/chosen": -648.5850830078125, "logps/rejected": -747.5316772460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.36126708984375, "rewards/margins": 15.61759090423584, "rewards/rejected": -19.978858947753906, "step": 15472 }, { "epoch": 2.41, "learning_rate": 2.7988090668603143e-06, "logits/chosen": -2.5954880714416504, "logits/rejected": -3.0487112998962402, "logps/chosen": -118.82025146484375, "logps/rejected": -315.79742431640625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.50877571105957, "rewards/margins": 9.778809547424316, "rewards/rejected": -15.287585258483887, "step": 15473 }, { "epoch": 2.41, "learning_rate": 2.798075626329166e-06, "logits/chosen": -1.7443714141845703, "logits/rejected": -2.5947465896606445, "logps/chosen": -251.98806762695312, "logps/rejected": -666.0864868164062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.94051456451416, "rewards/margins": 15.33944320678711, "rewards/rejected": -20.279956817626953, "step": 15474 }, { "epoch": 2.41, "learning_rate": 2.797342185798019e-06, "logits/chosen": -0.9497764706611633, "logits/rejected": -1.9858880043029785, "logps/chosen": -194.4788818359375, "logps/rejected": -467.681396484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.731775283813477, "rewards/margins": 12.24976921081543, "rewards/rejected": -20.981544494628906, "step": 15475 }, { "epoch": 2.41, "learning_rate": 2.7966087452668708e-06, "logits/chosen": -2.330977439880371, "logits/rejected": -2.8206946849823, "logps/chosen": -155.8550262451172, "logps/rejected": -371.149169921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.890869140625, "rewards/margins": 12.04132080078125, "rewards/rejected": -17.93218994140625, "step": 15476 }, { "epoch": 2.41, "learning_rate": 2.795875304735723e-06, "logits/chosen": -2.4895856380462646, "logits/rejected": -2.3860812187194824, "logps/chosen": -202.1336212158203, "logps/rejected": -354.4463195800781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.808597564697266, "rewards/margins": 9.931447982788086, "rewards/rejected": -19.74004554748535, "step": 15477 }, { "epoch": 2.41, "learning_rate": 2.795141864204575e-06, "logits/chosen": -2.969945192337036, "logits/rejected": -3.0824358463287354, "logps/chosen": -120.1226806640625, "logps/rejected": -214.78012084960938, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -6.678679466247559, "rewards/margins": 6.431644439697266, "rewards/rejected": -13.11032485961914, "step": 15478 }, { "epoch": 2.41, "learning_rate": 2.7944084236734272e-06, "logits/chosen": -1.6581895351409912, "logits/rejected": -2.603731632232666, "logps/chosen": -164.72659301757812, "logps/rejected": -263.5568542480469, "loss": 3.2701, "rewards/accuracies": 0.5, "rewards/chosen": -9.056079864501953, "rewards/margins": 4.3821916580200195, "rewards/rejected": -13.438271522521973, "step": 15479 }, { "epoch": 2.41, "learning_rate": 2.793674983142279e-06, "logits/chosen": -1.9549363851547241, "logits/rejected": -2.7040841579437256, "logps/chosen": -374.35589599609375, "logps/rejected": -615.7322998046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.214532852172852, "rewards/margins": 9.92328929901123, "rewards/rejected": -21.137821197509766, "step": 15480 }, { "epoch": 2.41, "learning_rate": 2.792941542611131e-06, "logits/chosen": -2.510897397994995, "logits/rejected": -2.6778886318206787, "logps/chosen": -516.4913940429688, "logps/rejected": -404.1234130859375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -9.260164260864258, "rewards/margins": 4.9184889793396, "rewards/rejected": -14.178653717041016, "step": 15481 }, { "epoch": 2.41, "learning_rate": 2.7922081020799833e-06, "logits/chosen": -2.5032107830047607, "logits/rejected": -2.4835102558135986, "logps/chosen": -206.65853881835938, "logps/rejected": -295.95147705078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.737561225891113, "rewards/margins": 8.806446075439453, "rewards/rejected": -15.54400634765625, "step": 15482 }, { "epoch": 2.41, "learning_rate": 2.7914746615488356e-06, "logits/chosen": -2.0417122840881348, "logits/rejected": -2.7851881980895996, "logps/chosen": -271.4293212890625, "logps/rejected": -511.2947998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.587602615356445, "rewards/margins": 11.344137191772461, "rewards/rejected": -18.931739807128906, "step": 15483 }, { "epoch": 2.41, "learning_rate": 2.790741221017688e-06, "logits/chosen": -3.1142914295196533, "logits/rejected": -2.555299758911133, "logps/chosen": -308.03369140625, "logps/rejected": -232.36395263671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.743707656860352, "rewards/margins": 8.251456260681152, "rewards/rejected": -13.995163917541504, "step": 15484 }, { "epoch": 2.41, "learning_rate": 2.7900077804865398e-06, "logits/chosen": -2.699195623397827, "logits/rejected": -3.3467679023742676, "logps/chosen": -88.22901916503906, "logps/rejected": -324.78851318359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.818052291870117, "rewards/margins": 7.337924957275391, "rewards/rejected": -13.155977249145508, "step": 15485 }, { "epoch": 2.41, "learning_rate": 2.789274339955392e-06, "logits/chosen": -2.172980546951294, "logits/rejected": -2.770442008972168, "logps/chosen": -181.05032348632812, "logps/rejected": -409.0516357421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.865578651428223, "rewards/margins": 10.146799087524414, "rewards/rejected": -18.01237678527832, "step": 15486 }, { "epoch": 2.41, "learning_rate": 2.788540899424244e-06, "logits/chosen": -2.046872138977051, "logits/rejected": -2.2466673851013184, "logps/chosen": -203.26885986328125, "logps/rejected": -304.50408935546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.912209510803223, "rewards/margins": 11.375693321228027, "rewards/rejected": -17.28790283203125, "step": 15487 }, { "epoch": 2.41, "learning_rate": 2.7878074588930963e-06, "logits/chosen": -2.443525791168213, "logits/rejected": -2.6659512519836426, "logps/chosen": -333.80859375, "logps/rejected": -608.9793701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.875254154205322, "rewards/margins": 16.98145294189453, "rewards/rejected": -21.856704711914062, "step": 15488 }, { "epoch": 2.41, "learning_rate": 2.787074018361948e-06, "logits/chosen": -1.998765230178833, "logits/rejected": -2.399303913116455, "logps/chosen": -341.2774658203125, "logps/rejected": -483.93328857421875, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -12.532405853271484, "rewards/margins": 10.127323150634766, "rewards/rejected": -22.65972900390625, "step": 15489 }, { "epoch": 2.41, "learning_rate": 2.7863405778308004e-06, "logits/chosen": -2.3373374938964844, "logits/rejected": -2.7505040168762207, "logps/chosen": -189.20050048828125, "logps/rejected": -328.8951416015625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.04624605178833, "rewards/margins": 6.846944808959961, "rewards/rejected": -11.893190383911133, "step": 15490 }, { "epoch": 2.41, "learning_rate": 2.7856071372996523e-06, "logits/chosen": -1.9161479473114014, "logits/rejected": -2.9159679412841797, "logps/chosen": -592.4729614257812, "logps/rejected": -846.94287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.561337471008301, "rewards/margins": 15.582609176635742, "rewards/rejected": -21.143945693969727, "step": 15491 }, { "epoch": 2.41, "learning_rate": 2.7848736967685046e-06, "logits/chosen": -1.8513644933700562, "logits/rejected": -2.6967849731445312, "logps/chosen": -396.564208984375, "logps/rejected": -560.094970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.741445541381836, "rewards/margins": 11.077095031738281, "rewards/rejected": -19.818540573120117, "step": 15492 }, { "epoch": 2.41, "learning_rate": 2.784140256237357e-06, "logits/chosen": -2.202014684677124, "logits/rejected": -2.857754945755005, "logps/chosen": -648.4482421875, "logps/rejected": -689.67431640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.78683614730835, "rewards/margins": 10.749504089355469, "rewards/rejected": -15.536340713500977, "step": 15493 }, { "epoch": 2.41, "learning_rate": 2.783406815706209e-06, "logits/chosen": -2.811602830886841, "logits/rejected": -1.564565896987915, "logps/chosen": -247.71807861328125, "logps/rejected": -215.14498901367188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.3713912963867188, "rewards/margins": 10.57575511932373, "rewards/rejected": -13.947147369384766, "step": 15494 }, { "epoch": 2.41, "learning_rate": 2.782673375175061e-06, "logits/chosen": -2.6036736965179443, "logits/rejected": -2.622072696685791, "logps/chosen": -109.20198822021484, "logps/rejected": -134.4390411376953, "loss": 0.4666, "rewards/accuracies": 0.5, "rewards/chosen": -5.208014488220215, "rewards/margins": 5.054664611816406, "rewards/rejected": -10.262679100036621, "step": 15495 }, { "epoch": 2.41, "learning_rate": 2.781939934643913e-06, "logits/chosen": -2.551386833190918, "logits/rejected": -2.7870054244995117, "logps/chosen": -135.28372192382812, "logps/rejected": -307.55633544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.676952362060547, "rewards/margins": 9.182119369506836, "rewards/rejected": -17.859071731567383, "step": 15496 }, { "epoch": 2.41, "learning_rate": 2.7812064941127653e-06, "logits/chosen": -2.561352014541626, "logits/rejected": -2.8801262378692627, "logps/chosen": -161.66656494140625, "logps/rejected": -315.2289733886719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -11.626867294311523, "rewards/margins": 7.304533958435059, "rewards/rejected": -18.931400299072266, "step": 15497 }, { "epoch": 2.41, "learning_rate": 2.780473053581617e-06, "logits/chosen": -2.8610870838165283, "logits/rejected": -2.694979429244995, "logps/chosen": -648.64990234375, "logps/rejected": -1013.998779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.870810031890869, "rewards/margins": 14.97547721862793, "rewards/rejected": -20.84628677368164, "step": 15498 }, { "epoch": 2.41, "learning_rate": 2.7797396130504695e-06, "logits/chosen": -1.715645670890808, "logits/rejected": -2.5783543586730957, "logps/chosen": -613.5294799804688, "logps/rejected": -641.635986328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.572525978088379, "rewards/margins": 10.547271728515625, "rewards/rejected": -19.119796752929688, "step": 15499 }, { "epoch": 2.41, "learning_rate": 2.7790061725193218e-06, "logits/chosen": -2.8999574184417725, "logits/rejected": -2.9220950603485107, "logps/chosen": -470.8406982421875, "logps/rejected": -621.4293212890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.914102554321289, "rewards/margins": 11.99148941040039, "rewards/rejected": -16.90559196472168, "step": 15500 }, { "epoch": 2.41, "learning_rate": 2.7782727319881736e-06, "logits/chosen": -1.5825148820877075, "logits/rejected": -2.3805811405181885, "logps/chosen": -160.15814208984375, "logps/rejected": -277.97845458984375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -7.589626312255859, "rewards/margins": 7.204602241516113, "rewards/rejected": -14.794229507446289, "step": 15501 }, { "epoch": 2.41, "learning_rate": 2.777539291457026e-06, "logits/chosen": -1.8599244356155396, "logits/rejected": -2.785155773162842, "logps/chosen": -222.96197509765625, "logps/rejected": -541.3966674804688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.090330123901367, "rewards/margins": 9.993152618408203, "rewards/rejected": -19.08348274230957, "step": 15502 }, { "epoch": 2.41, "learning_rate": 2.776805850925878e-06, "logits/chosen": -2.677201747894287, "logits/rejected": -2.2920279502868652, "logps/chosen": -288.928955078125, "logps/rejected": -351.5774230957031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.81967306137085, "rewards/margins": 10.818717002868652, "rewards/rejected": -18.638389587402344, "step": 15503 }, { "epoch": 2.41, "learning_rate": 2.77607241039473e-06, "logits/chosen": -2.1620893478393555, "logits/rejected": -2.3553178310394287, "logps/chosen": -141.79644775390625, "logps/rejected": -312.2924499511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.695557594299316, "rewards/margins": 11.618778228759766, "rewards/rejected": -16.314334869384766, "step": 15504 }, { "epoch": 2.41, "learning_rate": 2.775338969863582e-06, "logits/chosen": -2.0275766849517822, "logits/rejected": -2.6110191345214844, "logps/chosen": -165.9232635498047, "logps/rejected": -431.2572937011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.573849201202393, "rewards/margins": 11.09282398223877, "rewards/rejected": -17.666671752929688, "step": 15505 }, { "epoch": 2.41, "learning_rate": 2.7746055293324343e-06, "logits/chosen": -2.5160233974456787, "logits/rejected": -2.2416043281555176, "logps/chosen": -554.2083740234375, "logps/rejected": -709.4561767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.119391441345215, "rewards/margins": 14.444231986999512, "rewards/rejected": -19.563623428344727, "step": 15506 }, { "epoch": 2.41, "learning_rate": 2.773872088801286e-06, "logits/chosen": -2.6429619789123535, "logits/rejected": -3.023540735244751, "logps/chosen": -101.46734619140625, "logps/rejected": -358.16217041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3953371047973633, "rewards/margins": 11.676897048950195, "rewards/rejected": -15.072234153747559, "step": 15507 }, { "epoch": 2.41, "learning_rate": 2.7731386482701385e-06, "logits/chosen": -2.527923822402954, "logits/rejected": -1.7978452444076538, "logps/chosen": -194.13485717773438, "logps/rejected": -236.74342346191406, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -5.157771110534668, "rewards/margins": 7.314061164855957, "rewards/rejected": -12.471832275390625, "step": 15508 }, { "epoch": 2.41, "learning_rate": 2.7724052077389908e-06, "logits/chosen": -1.0490423440933228, "logits/rejected": -1.8848695755004883, "logps/chosen": -261.6282958984375, "logps/rejected": -489.84991455078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.232686996459961, "rewards/margins": 11.87977409362793, "rewards/rejected": -18.11246109008789, "step": 15509 }, { "epoch": 2.41, "learning_rate": 2.7716717672078427e-06, "logits/chosen": -1.715103030204773, "logits/rejected": -2.887866497039795, "logps/chosen": -132.44285583496094, "logps/rejected": -703.1251220703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.57840347290039, "rewards/margins": 14.813817024230957, "rewards/rejected": -23.39221954345703, "step": 15510 }, { "epoch": 2.41, "learning_rate": 2.770938326676695e-06, "logits/chosen": -2.475097179412842, "logits/rejected": -2.8182199001312256, "logps/chosen": -499.6629638671875, "logps/rejected": -411.19403076171875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -10.722496032714844, "rewards/margins": 8.340984344482422, "rewards/rejected": -19.063480377197266, "step": 15511 }, { "epoch": 2.41, "learning_rate": 2.770204886145547e-06, "logits/chosen": -2.6236672401428223, "logits/rejected": -2.758697032928467, "logps/chosen": -207.41018676757812, "logps/rejected": -366.957275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.727921485900879, "rewards/margins": 9.699061393737793, "rewards/rejected": -14.426982879638672, "step": 15512 }, { "epoch": 2.41, "learning_rate": 2.769471445614399e-06, "logits/chosen": -2.0292978286743164, "logits/rejected": -2.3980355262756348, "logps/chosen": -731.2228393554688, "logps/rejected": -780.020751953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.309904098510742, "rewards/margins": 13.646563529968262, "rewards/rejected": -23.95646858215332, "step": 15513 }, { "epoch": 2.41, "learning_rate": 2.768738005083251e-06, "logits/chosen": -1.853147268295288, "logits/rejected": -2.6561875343322754, "logps/chosen": -139.27801513671875, "logps/rejected": -366.60980224609375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -8.031061172485352, "rewards/margins": 6.659836292266846, "rewards/rejected": -14.690896987915039, "step": 15514 }, { "epoch": 2.41, "learning_rate": 2.7680045645521033e-06, "logits/chosen": -2.334881067276001, "logits/rejected": -2.401623249053955, "logps/chosen": -290.8466491699219, "logps/rejected": -506.7825927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.417057991027832, "rewards/margins": 15.131942749023438, "rewards/rejected": -24.548999786376953, "step": 15515 }, { "epoch": 2.41, "learning_rate": 2.767271124020955e-06, "logits/chosen": -2.2036447525024414, "logits/rejected": -1.3561365604400635, "logps/chosen": -444.21636962890625, "logps/rejected": -375.6412048339844, "loss": 0.0401, "rewards/accuracies": 1.0, "rewards/chosen": -6.93899393081665, "rewards/margins": 8.810647964477539, "rewards/rejected": -15.749641418457031, "step": 15516 }, { "epoch": 2.41, "learning_rate": 2.766537683489808e-06, "logits/chosen": -2.3066701889038086, "logits/rejected": -2.506258726119995, "logps/chosen": -220.41476440429688, "logps/rejected": -412.4495849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.662632942199707, "rewards/margins": 10.739295959472656, "rewards/rejected": -20.401927947998047, "step": 15517 }, { "epoch": 2.41, "learning_rate": 2.76580424295866e-06, "logits/chosen": -2.361316442489624, "logits/rejected": -2.4701592922210693, "logps/chosen": -525.327880859375, "logps/rejected": -376.9327392578125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.449059963226318, "rewards/margins": 6.73184871673584, "rewards/rejected": -11.180909156799316, "step": 15518 }, { "epoch": 2.41, "learning_rate": 2.765070802427512e-06, "logits/chosen": -2.221810817718506, "logits/rejected": -3.0156784057617188, "logps/chosen": -154.08485412597656, "logps/rejected": -468.9779052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.513833045959473, "rewards/margins": 14.312891960144043, "rewards/rejected": -20.826725006103516, "step": 15519 }, { "epoch": 2.41, "learning_rate": 2.764337361896364e-06, "logits/chosen": -2.148437738418579, "logits/rejected": -2.9131810665130615, "logps/chosen": -104.41384887695312, "logps/rejected": -237.65122985839844, "loss": 0.1357, "rewards/accuracies": 1.0, "rewards/chosen": -8.899892807006836, "rewards/margins": 3.210838794708252, "rewards/rejected": -12.11073112487793, "step": 15520 }, { "epoch": 2.41, "learning_rate": 2.763603921365216e-06, "logits/chosen": -2.6857075691223145, "logits/rejected": -1.7425251007080078, "logps/chosen": -417.77447509765625, "logps/rejected": -346.1282958984375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.639799118041992, "rewards/margins": 9.478036880493164, "rewards/rejected": -15.117835998535156, "step": 15521 }, { "epoch": 2.41, "learning_rate": 2.762870480834068e-06, "logits/chosen": -1.7703577280044556, "logits/rejected": -3.0472819805145264, "logps/chosen": -272.54412841796875, "logps/rejected": -496.32232666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.5527925491333, "rewards/margins": 10.87517261505127, "rewards/rejected": -19.42796516418457, "step": 15522 }, { "epoch": 2.41, "learning_rate": 2.76213704030292e-06, "logits/chosen": -2.727428913116455, "logits/rejected": -2.991196632385254, "logps/chosen": -126.86054992675781, "logps/rejected": -424.1326904296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.430482864379883, "rewards/margins": 9.197969436645508, "rewards/rejected": -14.62845230102539, "step": 15523 }, { "epoch": 2.41, "learning_rate": 2.7614035997717723e-06, "logits/chosen": -2.6859941482543945, "logits/rejected": -2.650601625442505, "logps/chosen": -445.2922058105469, "logps/rejected": -456.64105224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.857027769088745, "rewards/margins": 12.206644058227539, "rewards/rejected": -15.063671112060547, "step": 15524 }, { "epoch": 2.41, "learning_rate": 2.7606701592406246e-06, "logits/chosen": -2.782198905944824, "logits/rejected": -2.242479085922241, "logps/chosen": -265.8017578125, "logps/rejected": -282.8149108886719, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -6.726809978485107, "rewards/margins": 4.949018478393555, "rewards/rejected": -11.67582893371582, "step": 15525 }, { "epoch": 2.41, "learning_rate": 2.759936718709477e-06, "logits/chosen": -1.7365663051605225, "logits/rejected": -2.9142794609069824, "logps/chosen": -149.34056091308594, "logps/rejected": -527.6423950195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.87293815612793, "rewards/margins": 15.924629211425781, "rewards/rejected": -20.79756736755371, "step": 15526 }, { "epoch": 2.41, "learning_rate": 2.759203278178329e-06, "logits/chosen": -1.9341213703155518, "logits/rejected": -2.6981899738311768, "logps/chosen": -288.24383544921875, "logps/rejected": -399.031982421875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -8.13637924194336, "rewards/margins": 6.704526901245117, "rewards/rejected": -14.840906143188477, "step": 15527 }, { "epoch": 2.41, "learning_rate": 2.758469837647181e-06, "logits/chosen": -1.737062692642212, "logits/rejected": -2.535186529159546, "logps/chosen": -228.8163604736328, "logps/rejected": -596.0928955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.567960739135742, "rewards/margins": 14.929705619812012, "rewards/rejected": -21.497665405273438, "step": 15528 }, { "epoch": 2.42, "learning_rate": 2.757736397116033e-06, "logits/chosen": -2.611541509628296, "logits/rejected": -2.6775388717651367, "logps/chosen": -282.75335693359375, "logps/rejected": -279.316650390625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.667057991027832, "rewards/margins": 9.868154525756836, "rewards/rejected": -16.53521156311035, "step": 15529 }, { "epoch": 2.42, "learning_rate": 2.757002956584885e-06, "logits/chosen": -2.5856425762176514, "logits/rejected": -2.29746150970459, "logps/chosen": -779.7774658203125, "logps/rejected": -614.3133544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.674389839172363, "rewards/margins": 11.664475440979004, "rewards/rejected": -19.338865280151367, "step": 15530 }, { "epoch": 2.42, "learning_rate": 2.756269516053737e-06, "logits/chosen": -2.2233827114105225, "logits/rejected": -2.6185238361358643, "logps/chosen": -525.2953491210938, "logps/rejected": -725.1343994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.306807041168213, "rewards/margins": 13.033618927001953, "rewards/rejected": -20.34042739868164, "step": 15531 }, { "epoch": 2.42, "learning_rate": 2.755536075522589e-06, "logits/chosen": -1.754814863204956, "logits/rejected": -2.8438327312469482, "logps/chosen": -328.121337890625, "logps/rejected": -345.817626953125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.718391418457031, "rewards/margins": 7.093624114990234, "rewards/rejected": -12.812015533447266, "step": 15532 }, { "epoch": 2.42, "learning_rate": 2.7548026349914414e-06, "logits/chosen": -2.7340333461761475, "logits/rejected": -2.8546829223632812, "logps/chosen": -163.32171630859375, "logps/rejected": -363.9683837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.06636905670166, "rewards/margins": 12.92465591430664, "rewards/rejected": -17.991025924682617, "step": 15533 }, { "epoch": 2.42, "learning_rate": 2.7540691944602937e-06, "logits/chosen": -2.7889068126678467, "logits/rejected": -2.2856695652008057, "logps/chosen": -588.534912109375, "logps/rejected": -536.6761474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.9970855712890625, "rewards/margins": 12.359448432922363, "rewards/rejected": -20.35653305053711, "step": 15534 }, { "epoch": 2.42, "learning_rate": 2.753335753929146e-06, "logits/chosen": -2.6427104473114014, "logits/rejected": -2.8501453399658203, "logps/chosen": -173.2987060546875, "logps/rejected": -263.2619323730469, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/chosen": -7.198825836181641, "rewards/margins": 5.178595066070557, "rewards/rejected": -12.377420425415039, "step": 15535 }, { "epoch": 2.42, "learning_rate": 2.752602313397998e-06, "logits/chosen": -2.7566401958465576, "logits/rejected": -1.6747469902038574, "logps/chosen": -402.6700134277344, "logps/rejected": -381.249267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.003720760345459, "rewards/margins": 11.750141143798828, "rewards/rejected": -17.753862380981445, "step": 15536 }, { "epoch": 2.42, "learning_rate": 2.75186887286685e-06, "logits/chosen": -2.9031083583831787, "logits/rejected": -3.20621395111084, "logps/chosen": -132.81341552734375, "logps/rejected": -395.3504333496094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.029674053192139, "rewards/margins": 9.558629989624023, "rewards/rejected": -13.588303565979004, "step": 15537 }, { "epoch": 2.42, "learning_rate": 2.751135432335702e-06, "logits/chosen": -2.796644926071167, "logits/rejected": -2.6540334224700928, "logps/chosen": -158.2749786376953, "logps/rejected": -223.36129760742188, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.16589879989624, "rewards/margins": 7.758092880249023, "rewards/rejected": -11.923992156982422, "step": 15538 }, { "epoch": 2.42, "learning_rate": 2.7504019918045543e-06, "logits/chosen": -2.1590707302093506, "logits/rejected": -2.7567009925842285, "logps/chosen": -209.62660217285156, "logps/rejected": -425.49127197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.277716636657715, "rewards/margins": 11.190779685974121, "rewards/rejected": -18.468496322631836, "step": 15539 }, { "epoch": 2.42, "learning_rate": 2.749668551273406e-06, "logits/chosen": -2.5846893787384033, "logits/rejected": -2.751675844192505, "logps/chosen": -101.69902038574219, "logps/rejected": -237.59384155273438, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.042078495025635, "rewards/margins": 8.429582595825195, "rewards/rejected": -14.471660614013672, "step": 15540 }, { "epoch": 2.42, "learning_rate": 2.748935110742258e-06, "logits/chosen": -2.713935613632202, "logits/rejected": -2.4529528617858887, "logps/chosen": -228.6788787841797, "logps/rejected": -223.35745239257812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.924501419067383, "rewards/margins": 9.40152359008789, "rewards/rejected": -14.326025009155273, "step": 15541 }, { "epoch": 2.42, "learning_rate": 2.748201670211111e-06, "logits/chosen": -1.9850112199783325, "logits/rejected": -2.297823905944824, "logps/chosen": -516.403564453125, "logps/rejected": -1524.6895751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -13.67465591430664, "rewards/margins": 16.455942153930664, "rewards/rejected": -30.130596160888672, "step": 15542 }, { "epoch": 2.42, "learning_rate": 2.7474682296799627e-06, "logits/chosen": -2.612553834915161, "logits/rejected": -2.7770543098449707, "logps/chosen": -95.84707641601562, "logps/rejected": -275.7369079589844, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.353557109832764, "rewards/margins": 11.53947639465332, "rewards/rejected": -15.893033981323242, "step": 15543 }, { "epoch": 2.42, "learning_rate": 2.746734789148815e-06, "logits/chosen": -1.9478540420532227, "logits/rejected": -2.608311176300049, "logps/chosen": -160.45254516601562, "logps/rejected": -429.43560791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.307378768920898, "rewards/margins": 13.152347564697266, "rewards/rejected": -17.459726333618164, "step": 15544 }, { "epoch": 2.42, "learning_rate": 2.746001348617667e-06, "logits/chosen": -2.821132183074951, "logits/rejected": -2.4263534545898438, "logps/chosen": -609.6858520507812, "logps/rejected": -542.394287109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.0973052978515625, "rewards/margins": 10.134540557861328, "rewards/rejected": -16.23184585571289, "step": 15545 }, { "epoch": 2.42, "learning_rate": 2.745267908086519e-06, "logits/chosen": -2.689138412475586, "logits/rejected": -2.8397486209869385, "logps/chosen": -249.72169494628906, "logps/rejected": -408.79754638671875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -9.250448226928711, "rewards/margins": 7.427231788635254, "rewards/rejected": -16.67768096923828, "step": 15546 }, { "epoch": 2.42, "learning_rate": 2.744534467555371e-06, "logits/chosen": -2.6059467792510986, "logits/rejected": -2.6387600898742676, "logps/chosen": -126.7254638671875, "logps/rejected": -274.4685363769531, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.938292026519775, "rewards/margins": 6.593765735626221, "rewards/rejected": -13.532057762145996, "step": 15547 }, { "epoch": 2.42, "learning_rate": 2.7438010270242234e-06, "logits/chosen": -2.786303758621216, "logits/rejected": -1.942618489265442, "logps/chosen": -619.549072265625, "logps/rejected": -314.8243713378906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3931832313537598, "rewards/margins": 10.034017562866211, "rewards/rejected": -13.427200317382812, "step": 15548 }, { "epoch": 2.42, "learning_rate": 2.7430675864930752e-06, "logits/chosen": -2.1768062114715576, "logits/rejected": -2.6840453147888184, "logps/chosen": -353.9871520996094, "logps/rejected": -649.9912109375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -6.0015974044799805, "rewards/margins": 8.531167030334473, "rewards/rejected": -14.532764434814453, "step": 15549 }, { "epoch": 2.42, "learning_rate": 2.7423341459619275e-06, "logits/chosen": -2.7283482551574707, "logits/rejected": -2.84476375579834, "logps/chosen": -678.5968627929688, "logps/rejected": -553.02099609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.8043012619018555, "rewards/margins": 8.739839553833008, "rewards/rejected": -14.544140815734863, "step": 15550 }, { "epoch": 2.42, "learning_rate": 2.74160070543078e-06, "logits/chosen": -2.55311918258667, "logits/rejected": -2.0785605907440186, "logps/chosen": -452.46173095703125, "logps/rejected": -467.1745910644531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.862905502319336, "rewards/margins": 9.227912902832031, "rewards/rejected": -16.090818405151367, "step": 15551 }, { "epoch": 2.42, "learning_rate": 2.7408672648996317e-06, "logits/chosen": -2.3142738342285156, "logits/rejected": -2.715937376022339, "logps/chosen": -124.39817810058594, "logps/rejected": -303.8119812011719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.306565284729004, "rewards/margins": 12.691045761108398, "rewards/rejected": -17.997610092163086, "step": 15552 }, { "epoch": 2.42, "learning_rate": 2.740133824368484e-06, "logits/chosen": -2.819222927093506, "logits/rejected": -2.802048921585083, "logps/chosen": -344.0845947265625, "logps/rejected": -374.7716064453125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -7.417750358581543, "rewards/margins": 9.32769775390625, "rewards/rejected": -16.745447158813477, "step": 15553 }, { "epoch": 2.42, "learning_rate": 2.739400383837336e-06, "logits/chosen": -1.567216157913208, "logits/rejected": -2.638932704925537, "logps/chosen": -95.99383544921875, "logps/rejected": -352.92083740234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.051026344299316, "rewards/margins": 10.286511421203613, "rewards/rejected": -15.33753776550293, "step": 15554 }, { "epoch": 2.42, "learning_rate": 2.738666943306188e-06, "logits/chosen": -2.7529420852661133, "logits/rejected": -2.6109118461608887, "logps/chosen": -597.6029052734375, "logps/rejected": -650.712158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.291131973266602, "rewards/margins": 15.850541114807129, "rewards/rejected": -26.141674041748047, "step": 15555 }, { "epoch": 2.42, "learning_rate": 2.73793350277504e-06, "logits/chosen": -2.315537214279175, "logits/rejected": -2.8056023120880127, "logps/chosen": -152.0098876953125, "logps/rejected": -340.14678955078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.495787620544434, "rewards/margins": 9.377555847167969, "rewards/rejected": -16.87334442138672, "step": 15556 }, { "epoch": 2.42, "learning_rate": 2.7372000622438924e-06, "logits/chosen": -2.8550727367401123, "logits/rejected": -2.9991495609283447, "logps/chosen": -116.85538482666016, "logps/rejected": -370.4143981933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.3520050048828125, "rewards/margins": 13.332306861877441, "rewards/rejected": -18.684310913085938, "step": 15557 }, { "epoch": 2.42, "learning_rate": 2.7364666217127443e-06, "logits/chosen": -2.8030154705047607, "logits/rejected": -1.9980578422546387, "logps/chosen": -568.1116333007812, "logps/rejected": -464.119140625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.666496276855469, "rewards/margins": 9.860597610473633, "rewards/rejected": -15.527093887329102, "step": 15558 }, { "epoch": 2.42, "learning_rate": 2.735733181181597e-06, "logits/chosen": -2.2212741374969482, "logits/rejected": -2.469507932662964, "logps/chosen": -123.62401580810547, "logps/rejected": -207.4833984375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -4.758604049682617, "rewards/margins": 7.497776031494141, "rewards/rejected": -12.256380081176758, "step": 15559 }, { "epoch": 2.42, "learning_rate": 2.734999740650449e-06, "logits/chosen": -2.44779896736145, "logits/rejected": -2.2894842624664307, "logps/chosen": -386.19793701171875, "logps/rejected": -411.05206298828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.186094284057617, "rewards/margins": 10.201988220214844, "rewards/rejected": -17.388084411621094, "step": 15560 }, { "epoch": 2.42, "learning_rate": 2.7342663001193007e-06, "logits/chosen": -2.5546820163726807, "logits/rejected": -1.5883864164352417, "logps/chosen": -857.3814697265625, "logps/rejected": -531.1856079101562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.865578651428223, "rewards/margins": 10.553892135620117, "rewards/rejected": -16.419469833374023, "step": 15561 }, { "epoch": 2.42, "learning_rate": 2.733532859588153e-06, "logits/chosen": -2.985949993133545, "logits/rejected": -2.7632694244384766, "logps/chosen": -198.6410675048828, "logps/rejected": -320.1060485839844, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.7983717918396, "rewards/margins": 9.853611946105957, "rewards/rejected": -14.651983261108398, "step": 15562 }, { "epoch": 2.42, "learning_rate": 2.732799419057005e-06, "logits/chosen": -1.2188607454299927, "logits/rejected": -2.3148093223571777, "logps/chosen": -95.56426239013672, "logps/rejected": -457.6934509277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.360847473144531, "rewards/margins": 13.13364315032959, "rewards/rejected": -18.494489669799805, "step": 15563 }, { "epoch": 2.42, "learning_rate": 2.7320659785258572e-06, "logits/chosen": -2.6293628215789795, "logits/rejected": -2.7052693367004395, "logps/chosen": -211.19232177734375, "logps/rejected": -346.35693359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.208868026733398, "rewards/margins": 9.65428352355957, "rewards/rejected": -14.863151550292969, "step": 15564 }, { "epoch": 2.42, "learning_rate": 2.731332537994709e-06, "logits/chosen": -2.6191813945770264, "logits/rejected": -2.671705961227417, "logps/chosen": -208.84478759765625, "logps/rejected": -319.3811950683594, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/chosen": -9.174478530883789, "rewards/margins": 6.001311779022217, "rewards/rejected": -15.175790786743164, "step": 15565 }, { "epoch": 2.42, "learning_rate": 2.7305990974635614e-06, "logits/chosen": -2.7997000217437744, "logits/rejected": -2.220132827758789, "logps/chosen": -290.3874816894531, "logps/rejected": -276.009033203125, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/chosen": -10.082709312438965, "rewards/margins": 5.540644645690918, "rewards/rejected": -15.623353958129883, "step": 15566 }, { "epoch": 2.42, "learning_rate": 2.7298656569324137e-06, "logits/chosen": -2.4992709159851074, "logits/rejected": -2.9493138790130615, "logps/chosen": -177.2696990966797, "logps/rejected": -272.21881103515625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -9.928328514099121, "rewards/margins": 6.381317138671875, "rewards/rejected": -16.309646606445312, "step": 15567 }, { "epoch": 2.42, "learning_rate": 2.729132216401266e-06, "logits/chosen": -2.2050514221191406, "logits/rejected": -2.822183847427368, "logps/chosen": -133.1538543701172, "logps/rejected": -470.9610595703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.663912773132324, "rewards/margins": 14.630346298217773, "rewards/rejected": -22.294260025024414, "step": 15568 }, { "epoch": 2.42, "learning_rate": 2.728398775870118e-06, "logits/chosen": -2.799030303955078, "logits/rejected": -2.0867490768432617, "logps/chosen": -243.42001342773438, "logps/rejected": -335.3365478515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.3053059577941895, "rewards/margins": 11.836408615112305, "rewards/rejected": -17.14171600341797, "step": 15569 }, { "epoch": 2.42, "learning_rate": 2.7276653353389698e-06, "logits/chosen": -2.1470565795898438, "logits/rejected": -2.6863529682159424, "logps/chosen": -231.95237731933594, "logps/rejected": -494.8065490722656, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -7.896293640136719, "rewards/margins": 7.365798473358154, "rewards/rejected": -15.262092590332031, "step": 15570 }, { "epoch": 2.42, "learning_rate": 2.726931894807822e-06, "logits/chosen": -2.0483779907226562, "logits/rejected": -2.9123027324676514, "logps/chosen": -235.80792236328125, "logps/rejected": -534.6851806640625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.908050537109375, "rewards/margins": 11.42697525024414, "rewards/rejected": -20.335025787353516, "step": 15571 }, { "epoch": 2.42, "learning_rate": 2.726198454276674e-06, "logits/chosen": -2.5281572341918945, "logits/rejected": -2.5886335372924805, "logps/chosen": -422.76580810546875, "logps/rejected": -466.95428466796875, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -7.533167839050293, "rewards/margins": 11.563520431518555, "rewards/rejected": -19.096689224243164, "step": 15572 }, { "epoch": 2.42, "learning_rate": 2.7254650137455262e-06, "logits/chosen": -2.1317920684814453, "logits/rejected": -2.821892499923706, "logps/chosen": -118.10277557373047, "logps/rejected": -242.8504638671875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -7.117197513580322, "rewards/margins": 8.70029067993164, "rewards/rejected": -15.817488670349121, "step": 15573 }, { "epoch": 2.42, "learning_rate": 2.724731573214378e-06, "logits/chosen": -2.203032970428467, "logits/rejected": -2.674248695373535, "logps/chosen": -339.2815856933594, "logps/rejected": -580.04443359375, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -10.416061401367188, "rewards/margins": 8.790068626403809, "rewards/rejected": -19.206130981445312, "step": 15574 }, { "epoch": 2.42, "learning_rate": 2.7239981326832304e-06, "logits/chosen": -2.3316657543182373, "logits/rejected": -2.7018206119537354, "logps/chosen": -230.15870666503906, "logps/rejected": -440.65899658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.37512493133545, "rewards/margins": 11.698906898498535, "rewards/rejected": -20.074031829833984, "step": 15575 }, { "epoch": 2.42, "learning_rate": 2.7232646921520827e-06, "logits/chosen": -2.7825493812561035, "logits/rejected": -2.5904853343963623, "logps/chosen": -333.07562255859375, "logps/rejected": -400.8219909667969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.508572578430176, "rewards/margins": 10.28995132446289, "rewards/rejected": -16.79852294921875, "step": 15576 }, { "epoch": 2.42, "learning_rate": 2.722531251620935e-06, "logits/chosen": -2.5814640522003174, "logits/rejected": -2.941223382949829, "logps/chosen": -226.4130859375, "logps/rejected": -404.2269592285156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.882754325866699, "rewards/margins": 12.074992179870605, "rewards/rejected": -17.957746505737305, "step": 15577 }, { "epoch": 2.42, "learning_rate": 2.721797811089787e-06, "logits/chosen": -2.066232204437256, "logits/rejected": -0.9949079155921936, "logps/chosen": -247.15121459960938, "logps/rejected": -178.33297729492188, "loss": 0.1014, "rewards/accuracies": 1.0, "rewards/chosen": -9.26314926147461, "rewards/margins": 2.62436842918396, "rewards/rejected": -11.887517929077148, "step": 15578 }, { "epoch": 2.42, "learning_rate": 2.7210643705586388e-06, "logits/chosen": -2.6097614765167236, "logits/rejected": -1.3017922639846802, "logps/chosen": -517.355224609375, "logps/rejected": -349.5401306152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.789057731628418, "rewards/margins": 13.120894432067871, "rewards/rejected": -21.90995216369629, "step": 15579 }, { "epoch": 2.42, "learning_rate": 2.720330930027491e-06, "logits/chosen": -2.871854543685913, "logits/rejected": -2.756098985671997, "logps/chosen": -388.0531005859375, "logps/rejected": -336.9265441894531, "loss": 0.6659, "rewards/accuracies": 0.5, "rewards/chosen": -6.64794921875, "rewards/margins": 6.012682914733887, "rewards/rejected": -12.660632133483887, "step": 15580 }, { "epoch": 2.42, "learning_rate": 2.719597489496343e-06, "logits/chosen": -0.8509390950202942, "logits/rejected": -2.547667980194092, "logps/chosen": -139.61666870117188, "logps/rejected": -406.7219543457031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.897523403167725, "rewards/margins": 9.000446319580078, "rewards/rejected": -14.897970199584961, "step": 15581 }, { "epoch": 2.42, "learning_rate": 2.7188640489651953e-06, "logits/chosen": -1.2837661504745483, "logits/rejected": -2.386803388595581, "logps/chosen": -163.52902221679688, "logps/rejected": -445.2316589355469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.01884651184082, "rewards/margins": 14.013717651367188, "rewards/rejected": -20.032564163208008, "step": 15582 }, { "epoch": 2.42, "learning_rate": 2.718130608434047e-06, "logits/chosen": -2.5101046562194824, "logits/rejected": -2.217459201812744, "logps/chosen": -597.9955444335938, "logps/rejected": -519.039794921875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.620397567749023, "rewards/margins": 11.632509231567383, "rewards/rejected": -19.252906799316406, "step": 15583 }, { "epoch": 2.42, "learning_rate": 2.7173971679029e-06, "logits/chosen": -2.791729688644409, "logits/rejected": -2.675557851791382, "logps/chosen": -285.53271484375, "logps/rejected": -466.6494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.696991920471191, "rewards/margins": 12.872289657592773, "rewards/rejected": -21.56928253173828, "step": 15584 }, { "epoch": 2.42, "learning_rate": 2.7166637273717517e-06, "logits/chosen": -1.5000452995300293, "logits/rejected": -2.5830116271972656, "logps/chosen": -413.7815856933594, "logps/rejected": -573.5457153320312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.868222236633301, "rewards/margins": 8.4396390914917, "rewards/rejected": -15.307861328125, "step": 15585 }, { "epoch": 2.42, "learning_rate": 2.715930286840604e-06, "logits/chosen": -2.8893797397613525, "logits/rejected": -1.982079029083252, "logps/chosen": -193.85752868652344, "logps/rejected": -192.71792602539062, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -6.557265281677246, "rewards/margins": 7.281460762023926, "rewards/rejected": -13.838726043701172, "step": 15586 }, { "epoch": 2.42, "learning_rate": 2.715196846309456e-06, "logits/chosen": -2.3393847942352295, "logits/rejected": -2.958529233932495, "logps/chosen": -74.67311096191406, "logps/rejected": -203.73171997070312, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -6.04985237121582, "rewards/margins": 6.081585884094238, "rewards/rejected": -12.131438255310059, "step": 15587 }, { "epoch": 2.42, "learning_rate": 2.7144634057783082e-06, "logits/chosen": -2.02301287651062, "logits/rejected": -2.745255947113037, "logps/chosen": -111.20388793945312, "logps/rejected": -561.27392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.274880409240723, "rewards/margins": 14.475826263427734, "rewards/rejected": -21.75070571899414, "step": 15588 }, { "epoch": 2.42, "learning_rate": 2.71372996524716e-06, "logits/chosen": -2.490163803100586, "logits/rejected": -2.627692461013794, "logps/chosen": -277.065185546875, "logps/rejected": -485.8150329589844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.494903564453125, "rewards/margins": 12.223199844360352, "rewards/rejected": -16.718103408813477, "step": 15589 }, { "epoch": 2.42, "learning_rate": 2.712996524716012e-06, "logits/chosen": -1.6802321672439575, "logits/rejected": -2.2545154094696045, "logps/chosen": -263.7486267089844, "logps/rejected": -529.216064453125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -6.926270484924316, "rewards/margins": 12.30234146118164, "rewards/rejected": -19.22861099243164, "step": 15590 }, { "epoch": 2.42, "learning_rate": 2.7122630841848643e-06, "logits/chosen": -2.824005126953125, "logits/rejected": -2.564594030380249, "logps/chosen": -611.1809692382812, "logps/rejected": -412.5028076171875, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -3.9826009273529053, "rewards/margins": 6.335399627685547, "rewards/rejected": -10.318000793457031, "step": 15591 }, { "epoch": 2.42, "learning_rate": 2.7115296436537166e-06, "logits/chosen": -2.006084680557251, "logits/rejected": -2.559431314468384, "logps/chosen": -196.17431640625, "logps/rejected": -339.98846435546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.811278343200684, "rewards/margins": 9.970840454101562, "rewards/rejected": -18.782119750976562, "step": 15592 }, { "epoch": 2.43, "learning_rate": 2.710796203122569e-06, "logits/chosen": -2.351480722427368, "logits/rejected": -2.852182149887085, "logps/chosen": -176.70326232910156, "logps/rejected": -340.56353759765625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.332106113433838, "rewards/margins": 7.631261825561523, "rewards/rejected": -12.963367462158203, "step": 15593 }, { "epoch": 2.43, "learning_rate": 2.7100627625914208e-06, "logits/chosen": -2.2671091556549072, "logits/rejected": -2.9507954120635986, "logps/chosen": -164.84075927734375, "logps/rejected": -392.3681335449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.623084545135498, "rewards/margins": 11.959088325500488, "rewards/rejected": -19.582172393798828, "step": 15594 }, { "epoch": 2.43, "learning_rate": 2.709329322060273e-06, "logits/chosen": -1.845444679260254, "logits/rejected": -2.582146406173706, "logps/chosen": -387.3616943359375, "logps/rejected": -628.6167602539062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.206806182861328, "rewards/margins": 11.059478759765625, "rewards/rejected": -19.266284942626953, "step": 15595 }, { "epoch": 2.43, "learning_rate": 2.708595881529125e-06, "logits/chosen": -2.440375328063965, "logits/rejected": -2.7939629554748535, "logps/chosen": -677.845458984375, "logps/rejected": -622.561767578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.918495178222656, "rewards/margins": 9.340631484985352, "rewards/rejected": -17.259126663208008, "step": 15596 }, { "epoch": 2.43, "learning_rate": 2.7078624409979772e-06, "logits/chosen": -2.746371030807495, "logits/rejected": -2.8303372859954834, "logps/chosen": -277.7934265136719, "logps/rejected": -440.046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -9.134105682373047, "rewards/margins": 7.885504722595215, "rewards/rejected": -17.019609451293945, "step": 15597 }, { "epoch": 2.43, "learning_rate": 2.707129000466829e-06, "logits/chosen": -2.1431760787963867, "logits/rejected": -2.8305623531341553, "logps/chosen": -336.3056945800781, "logps/rejected": -617.27587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.076581954956055, "rewards/margins": 12.965978622436523, "rewards/rejected": -20.042560577392578, "step": 15598 }, { "epoch": 2.43, "learning_rate": 2.706395559935681e-06, "logits/chosen": -1.0926308631896973, "logits/rejected": -2.177597761154175, "logps/chosen": -404.31597900390625, "logps/rejected": -429.8131103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.769840240478516, "rewards/margins": 12.001810073852539, "rewards/rejected": -19.771650314331055, "step": 15599 }, { "epoch": 2.43, "learning_rate": 2.7056621194045333e-06, "logits/chosen": -2.667433500289917, "logits/rejected": -1.7361044883728027, "logps/chosen": -382.5817565917969, "logps/rejected": -334.09271240234375, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -5.581007480621338, "rewards/margins": 7.552282333374023, "rewards/rejected": -13.133289337158203, "step": 15600 }, { "epoch": 2.43, "learning_rate": 2.7049286788733856e-06, "logits/chosen": -2.565880298614502, "logits/rejected": -2.257718563079834, "logps/chosen": -514.6697998046875, "logps/rejected": -700.704833984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.644532203674316, "rewards/margins": 9.510726928710938, "rewards/rejected": -17.155258178710938, "step": 15601 }, { "epoch": 2.43, "learning_rate": 2.704195238342238e-06, "logits/chosen": -1.7538731098175049, "logits/rejected": -2.6417086124420166, "logps/chosen": -231.4042510986328, "logps/rejected": -321.29388427734375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -9.100260734558105, "rewards/margins": 7.205300331115723, "rewards/rejected": -16.305561065673828, "step": 15602 }, { "epoch": 2.43, "learning_rate": 2.70346179781109e-06, "logits/chosen": -2.8754525184631348, "logits/rejected": -2.1427574157714844, "logps/chosen": -295.6883544921875, "logps/rejected": -218.86453247070312, "loss": 0.3086, "rewards/accuracies": 1.0, "rewards/chosen": -10.617979049682617, "rewards/margins": 1.733879566192627, "rewards/rejected": -12.351859092712402, "step": 15603 }, { "epoch": 2.43, "learning_rate": 2.702728357279942e-06, "logits/chosen": -0.7974675297737122, "logits/rejected": -2.6742465496063232, "logps/chosen": -167.69775390625, "logps/rejected": -661.686279296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.477748870849609, "rewards/margins": 12.950201988220215, "rewards/rejected": -18.42795181274414, "step": 15604 }, { "epoch": 2.43, "learning_rate": 2.701994916748794e-06, "logits/chosen": -2.746829032897949, "logits/rejected": -2.6676323413848877, "logps/chosen": -325.1900634765625, "logps/rejected": -360.1056823730469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.435487747192383, "rewards/margins": 9.574708938598633, "rewards/rejected": -18.010196685791016, "step": 15605 }, { "epoch": 2.43, "learning_rate": 2.7012614762176463e-06, "logits/chosen": -2.1881515979766846, "logits/rejected": -2.5523431301116943, "logps/chosen": -348.847900390625, "logps/rejected": -358.27227783203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.808605194091797, "rewards/margins": 9.484615325927734, "rewards/rejected": -15.293220520019531, "step": 15606 }, { "epoch": 2.43, "learning_rate": 2.700528035686498e-06, "logits/chosen": -2.4398887157440186, "logits/rejected": -2.5807242393493652, "logps/chosen": -224.2164306640625, "logps/rejected": -343.59722900390625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -8.174139022827148, "rewards/margins": 5.603606224060059, "rewards/rejected": -13.777745246887207, "step": 15607 }, { "epoch": 2.43, "learning_rate": 2.6997945951553505e-06, "logits/chosen": -2.676112413406372, "logits/rejected": -2.470585346221924, "logps/chosen": -181.5183563232422, "logps/rejected": -380.85589599609375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.215204238891602, "rewards/margins": 10.622314453125, "rewards/rejected": -18.837520599365234, "step": 15608 }, { "epoch": 2.43, "learning_rate": 2.6990611546242023e-06, "logits/chosen": -2.4875805377960205, "logits/rejected": -2.046400308609009, "logps/chosen": -237.9122772216797, "logps/rejected": -234.06597900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.369187116622925, "rewards/margins": 13.067499160766602, "rewards/rejected": -16.436687469482422, "step": 15609 }, { "epoch": 2.43, "learning_rate": 2.6983277140930546e-06, "logits/chosen": -2.469475030899048, "logits/rejected": -1.6790130138397217, "logps/chosen": -403.264404296875, "logps/rejected": -426.101806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.975018501281738, "rewards/margins": 14.35244369506836, "rewards/rejected": -20.327463150024414, "step": 15610 }, { "epoch": 2.43, "learning_rate": 2.697594273561907e-06, "logits/chosen": -1.7414318323135376, "logits/rejected": -2.677107572555542, "logps/chosen": -195.10211181640625, "logps/rejected": -322.263916015625, "loss": 0.0458, "rewards/accuracies": 1.0, "rewards/chosen": -6.945223808288574, "rewards/margins": 4.690443992614746, "rewards/rejected": -11.63566780090332, "step": 15611 }, { "epoch": 2.43, "learning_rate": 2.696860833030759e-06, "logits/chosen": -2.3992624282836914, "logits/rejected": -2.484861135482788, "logps/chosen": -135.31634521484375, "logps/rejected": -400.07763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.455748081207275, "rewards/margins": 13.335488319396973, "rewards/rejected": -17.791236877441406, "step": 15612 }, { "epoch": 2.43, "learning_rate": 2.696127392499611e-06, "logits/chosen": -2.3566412925720215, "logits/rejected": -0.8656602501869202, "logps/chosen": -488.22442626953125, "logps/rejected": -332.9334716796875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -8.107707977294922, "rewards/margins": 7.374714374542236, "rewards/rejected": -15.482421875, "step": 15613 }, { "epoch": 2.43, "learning_rate": 2.695393951968463e-06, "logits/chosen": -2.5354833602905273, "logits/rejected": -1.9831746816635132, "logps/chosen": -419.5318603515625, "logps/rejected": -279.9471130371094, "loss": 3.9139, "rewards/accuracies": 0.5, "rewards/chosen": -12.698637008666992, "rewards/margins": 0.37322998046875, "rewards/rejected": -13.071866989135742, "step": 15614 }, { "epoch": 2.43, "learning_rate": 2.6946605114373153e-06, "logits/chosen": -2.6653554439544678, "logits/rejected": -2.3658461570739746, "logps/chosen": -472.3190612792969, "logps/rejected": -638.3455810546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.94582462310791, "rewards/margins": 9.552984237670898, "rewards/rejected": -16.498809814453125, "step": 15615 }, { "epoch": 2.43, "learning_rate": 2.693927070906167e-06, "logits/chosen": -2.641435384750366, "logits/rejected": -2.7725982666015625, "logps/chosen": -132.6559600830078, "logps/rejected": -263.93414306640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.2745041847229, "rewards/margins": 9.53725814819336, "rewards/rejected": -15.811762809753418, "step": 15616 }, { "epoch": 2.43, "learning_rate": 2.6931936303750195e-06, "logits/chosen": -1.0439581871032715, "logits/rejected": -2.0630640983581543, "logps/chosen": -260.7965087890625, "logps/rejected": -488.07421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.087923049926758, "rewards/margins": 8.782180786132812, "rewards/rejected": -15.87010383605957, "step": 15617 }, { "epoch": 2.43, "learning_rate": 2.6924601898438718e-06, "logits/chosen": -1.9566274881362915, "logits/rejected": -2.659302234649658, "logps/chosen": -147.66262817382812, "logps/rejected": -338.0423583984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.510473251342773, "rewards/margins": 9.60934066772461, "rewards/rejected": -15.119813919067383, "step": 15618 }, { "epoch": 2.43, "learning_rate": 2.6917267493127237e-06, "logits/chosen": -1.248286247253418, "logits/rejected": -2.777048110961914, "logps/chosen": -89.389404296875, "logps/rejected": -557.277099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.047192096710205, "rewards/margins": 14.064229011535645, "rewards/rejected": -20.111421585083008, "step": 15619 }, { "epoch": 2.43, "learning_rate": 2.690993308781576e-06, "logits/chosen": -1.7357177734375, "logits/rejected": -3.0039260387420654, "logps/chosen": -163.82608032226562, "logps/rejected": -376.1824951171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.234503269195557, "rewards/margins": 10.318937301635742, "rewards/rejected": -17.55344009399414, "step": 15620 }, { "epoch": 2.43, "learning_rate": 2.690259868250428e-06, "logits/chosen": -2.6634538173675537, "logits/rejected": -2.410672426223755, "logps/chosen": -290.83953857421875, "logps/rejected": -580.2054443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.232178688049316, "rewards/margins": 13.082443237304688, "rewards/rejected": -23.314620971679688, "step": 15621 }, { "epoch": 2.43, "learning_rate": 2.68952642771928e-06, "logits/chosen": -2.092099189758301, "logits/rejected": -2.4633448123931885, "logps/chosen": -265.570556640625, "logps/rejected": -348.6515808105469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3820720911026, "rewards/margins": 11.20207691192627, "rewards/rejected": -12.584148406982422, "step": 15622 }, { "epoch": 2.43, "learning_rate": 2.688792987188132e-06, "logits/chosen": -1.4239223003387451, "logits/rejected": -2.347325563430786, "logps/chosen": -98.1068115234375, "logps/rejected": -335.460693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.233134746551514, "rewards/margins": 13.88713264465332, "rewards/rejected": -21.120267868041992, "step": 15623 }, { "epoch": 2.43, "learning_rate": 2.6880595466569843e-06, "logits/chosen": -3.0609984397888184, "logits/rejected": -2.5664827823638916, "logps/chosen": -455.12176513671875, "logps/rejected": -364.4420166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.291563987731934, "rewards/margins": 12.148885726928711, "rewards/rejected": -22.44045066833496, "step": 15624 }, { "epoch": 2.43, "learning_rate": 2.687326106125836e-06, "logits/chosen": -1.6065200567245483, "logits/rejected": -2.4772307872772217, "logps/chosen": -192.64242553710938, "logps/rejected": -296.1069030761719, "loss": 0.0924, "rewards/accuracies": 1.0, "rewards/chosen": -8.835855484008789, "rewards/margins": 4.001169681549072, "rewards/rejected": -12.83702564239502, "step": 15625 }, { "epoch": 2.43, "learning_rate": 2.6865926655946885e-06, "logits/chosen": -2.1928045749664307, "logits/rejected": -2.842125654220581, "logps/chosen": -300.3174133300781, "logps/rejected": -491.23297119140625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.418241500854492, "rewards/margins": 10.727115631103516, "rewards/rejected": -19.145357131958008, "step": 15626 }, { "epoch": 2.43, "learning_rate": 2.685859225063541e-06, "logits/chosen": -2.2484610080718994, "logits/rejected": -2.4260141849517822, "logps/chosen": -171.45700073242188, "logps/rejected": -324.5538330078125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.293951511383057, "rewards/margins": 8.91006088256836, "rewards/rejected": -15.204012870788574, "step": 15627 }, { "epoch": 2.43, "learning_rate": 2.685125784532393e-06, "logits/chosen": -2.5815820693969727, "logits/rejected": -2.739957571029663, "logps/chosen": -193.56869506835938, "logps/rejected": -346.75537109375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.28677225112915, "rewards/margins": 9.7260160446167, "rewards/rejected": -16.012788772583008, "step": 15628 }, { "epoch": 2.43, "learning_rate": 2.684392344001245e-06, "logits/chosen": -1.6022831201553345, "logits/rejected": -2.3176944255828857, "logps/chosen": -235.8596649169922, "logps/rejected": -668.179443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.929759979248047, "rewards/margins": 12.303674697875977, "rewards/rejected": -23.23343276977539, "step": 15629 }, { "epoch": 2.43, "learning_rate": 2.683658903470097e-06, "logits/chosen": -2.7530300617218018, "logits/rejected": -2.858630895614624, "logps/chosen": -404.7057800292969, "logps/rejected": -402.2226257324219, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.208317756652832, "rewards/margins": 7.406615257263184, "rewards/rejected": -12.614933013916016, "step": 15630 }, { "epoch": 2.43, "learning_rate": 2.682925462938949e-06, "logits/chosen": -1.5411709547042847, "logits/rejected": -2.7358179092407227, "logps/chosen": -127.44239044189453, "logps/rejected": -486.1591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.991077423095703, "rewards/margins": 15.23269271850586, "rewards/rejected": -20.223770141601562, "step": 15631 }, { "epoch": 2.43, "learning_rate": 2.682192022407801e-06, "logits/chosen": -2.7849254608154297, "logits/rejected": -2.857591152191162, "logps/chosen": -186.5059814453125, "logps/rejected": -220.48397827148438, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -6.111823081970215, "rewards/margins": 5.248213768005371, "rewards/rejected": -11.360036849975586, "step": 15632 }, { "epoch": 2.43, "learning_rate": 2.6814585818766533e-06, "logits/chosen": -1.2524127960205078, "logits/rejected": -2.297154426574707, "logps/chosen": -190.71038818359375, "logps/rejected": -536.9092407226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.892605781555176, "rewards/margins": 11.918152809143066, "rewards/rejected": -19.810758590698242, "step": 15633 }, { "epoch": 2.43, "learning_rate": 2.6807251413455052e-06, "logits/chosen": -2.6134257316589355, "logits/rejected": -1.943389654159546, "logps/chosen": -245.49440002441406, "logps/rejected": -152.38345336914062, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -7.501308441162109, "rewards/margins": 4.765704154968262, "rewards/rejected": -12.267013549804688, "step": 15634 }, { "epoch": 2.43, "learning_rate": 2.679991700814358e-06, "logits/chosen": -2.620461940765381, "logits/rejected": -2.664354085922241, "logps/chosen": -201.89263916015625, "logps/rejected": -249.19898986816406, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.632404804229736, "rewards/margins": 6.251528263092041, "rewards/rejected": -10.883933067321777, "step": 15635 }, { "epoch": 2.43, "learning_rate": 2.67925826028321e-06, "logits/chosen": -1.005496859550476, "logits/rejected": -1.8762967586517334, "logps/chosen": -296.765625, "logps/rejected": -544.7306518554688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.22019100189209, "rewards/margins": 16.62754249572754, "rewards/rejected": -23.847732543945312, "step": 15636 }, { "epoch": 2.43, "learning_rate": 2.678524819752062e-06, "logits/chosen": -1.1072733402252197, "logits/rejected": -2.535078525543213, "logps/chosen": -117.49895477294922, "logps/rejected": -296.1869201660156, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.01399564743042, "rewards/margins": 7.382474899291992, "rewards/rejected": -14.39647102355957, "step": 15637 }, { "epoch": 2.43, "learning_rate": 2.677791379220914e-06, "logits/chosen": -0.7864423394203186, "logits/rejected": -2.6108438968658447, "logps/chosen": -232.97390747070312, "logps/rejected": -725.97998046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.210544586181641, "rewards/margins": 8.375078201293945, "rewards/rejected": -15.585622787475586, "step": 15638 }, { "epoch": 2.43, "learning_rate": 2.677057938689766e-06, "logits/chosen": -2.5305705070495605, "logits/rejected": -2.5979933738708496, "logps/chosen": -443.9753723144531, "logps/rejected": -676.39306640625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.938174247741699, "rewards/margins": 9.162341117858887, "rewards/rejected": -16.100515365600586, "step": 15639 }, { "epoch": 2.43, "learning_rate": 2.676324498158618e-06, "logits/chosen": -2.411186456680298, "logits/rejected": -2.7940282821655273, "logps/chosen": -475.3768310546875, "logps/rejected": -719.9822387695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.838936805725098, "rewards/margins": 10.975995063781738, "rewards/rejected": -19.814931869506836, "step": 15640 }, { "epoch": 2.43, "learning_rate": 2.67559105762747e-06, "logits/chosen": -2.9605462551116943, "logits/rejected": -2.694061517715454, "logps/chosen": -156.64431762695312, "logps/rejected": -205.23922729492188, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -4.526455402374268, "rewards/margins": 8.16478443145752, "rewards/rejected": -12.691240310668945, "step": 15641 }, { "epoch": 2.43, "learning_rate": 2.6748576170963224e-06, "logits/chosen": -2.037025213241577, "logits/rejected": -2.8039233684539795, "logps/chosen": -238.95053100585938, "logps/rejected": -577.0882568359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.478405475616455, "rewards/margins": 8.264036178588867, "rewards/rejected": -15.742441177368164, "step": 15642 }, { "epoch": 2.43, "learning_rate": 2.6741241765651747e-06, "logits/chosen": -2.0295979976654053, "logits/rejected": -2.6419224739074707, "logps/chosen": -590.0439453125, "logps/rejected": -376.0210876464844, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.995322227478027, "rewards/margins": 8.28920841217041, "rewards/rejected": -14.284530639648438, "step": 15643 }, { "epoch": 2.43, "learning_rate": 2.673390736034027e-06, "logits/chosen": -2.7011523246765137, "logits/rejected": -2.5824315547943115, "logps/chosen": -826.6246337890625, "logps/rejected": -673.6119384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.325613021850586, "rewards/margins": 12.049976348876953, "rewards/rejected": -17.37558937072754, "step": 15644 }, { "epoch": 2.43, "learning_rate": 2.672657295502879e-06, "logits/chosen": -1.7029118537902832, "logits/rejected": -2.7332651615142822, "logps/chosen": -405.0136413574219, "logps/rejected": -557.41357421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.039798736572266, "rewards/margins": 8.653512954711914, "rewards/rejected": -13.69331169128418, "step": 15645 }, { "epoch": 2.43, "learning_rate": 2.671923854971731e-06, "logits/chosen": -1.7687591314315796, "logits/rejected": -2.523926258087158, "logps/chosen": -280.22222900390625, "logps/rejected": -429.0909423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.2393646240234375, "rewards/margins": 12.089605331420898, "rewards/rejected": -18.328969955444336, "step": 15646 }, { "epoch": 2.43, "learning_rate": 2.671190414440583e-06, "logits/chosen": -1.527213215827942, "logits/rejected": -2.566366672515869, "logps/chosen": -105.619140625, "logps/rejected": -426.5560302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.89219331741333, "rewards/margins": 13.382485389709473, "rewards/rejected": -16.27467918395996, "step": 15647 }, { "epoch": 2.43, "learning_rate": 2.670456973909435e-06, "logits/chosen": -2.291836738586426, "logits/rejected": -2.7528879642486572, "logps/chosen": -143.0452880859375, "logps/rejected": -354.0218505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.522821426391602, "rewards/margins": 11.364578247070312, "rewards/rejected": -17.887399673461914, "step": 15648 }, { "epoch": 2.43, "learning_rate": 2.669723533378287e-06, "logits/chosen": -3.0027852058410645, "logits/rejected": -2.908005714416504, "logps/chosen": -471.48974609375, "logps/rejected": -409.35711669921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.686779022216797, "rewards/margins": 8.052937507629395, "rewards/rejected": -12.739716529846191, "step": 15649 }, { "epoch": 2.43, "learning_rate": 2.668990092847139e-06, "logits/chosen": -1.997754454612732, "logits/rejected": -1.1348763704299927, "logps/chosen": -237.04173278808594, "logps/rejected": -221.59129333496094, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.350522518157959, "rewards/margins": 6.398675441741943, "rewards/rejected": -12.749197959899902, "step": 15650 }, { "epoch": 2.43, "learning_rate": 2.6682566523159914e-06, "logits/chosen": -0.7098926901817322, "logits/rejected": -1.839465618133545, "logps/chosen": -219.2389678955078, "logps/rejected": -430.73004150390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.96821403503418, "rewards/margins": 11.839588165283203, "rewards/rejected": -16.80780029296875, "step": 15651 }, { "epoch": 2.43, "learning_rate": 2.6675232117848437e-06, "logits/chosen": -0.8904545903205872, "logits/rejected": -1.3118361234664917, "logps/chosen": -141.98443603515625, "logps/rejected": -365.49737548828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.154526710510254, "rewards/margins": 10.021158218383789, "rewards/rejected": -15.17568588256836, "step": 15652 }, { "epoch": 2.43, "learning_rate": 2.666789771253696e-06, "logits/chosen": -1.7547272443771362, "logits/rejected": -2.7133753299713135, "logps/chosen": -166.46914672851562, "logps/rejected": -489.8670654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.362704277038574, "rewards/margins": 14.790376663208008, "rewards/rejected": -21.1530818939209, "step": 15653 }, { "epoch": 2.43, "learning_rate": 2.666056330722548e-06, "logits/chosen": -2.148622989654541, "logits/rejected": -2.8840222358703613, "logps/chosen": -251.72552490234375, "logps/rejected": -411.0970153808594, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.678623199462891, "rewards/margins": 9.24344253540039, "rewards/rejected": -13.922065734863281, "step": 15654 }, { "epoch": 2.43, "learning_rate": 2.6653228901914e-06, "logits/chosen": -2.0650691986083984, "logits/rejected": -2.449279308319092, "logps/chosen": -108.42925262451172, "logps/rejected": -240.89810180664062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.6211652755737305, "rewards/margins": 8.53781795501709, "rewards/rejected": -15.15898323059082, "step": 15655 }, { "epoch": 2.43, "learning_rate": 2.664589449660252e-06, "logits/chosen": -2.4148170948028564, "logits/rejected": -2.852670192718506, "logps/chosen": -205.77467346191406, "logps/rejected": -382.8150329589844, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -8.9827241897583, "rewards/margins": 3.7513582706451416, "rewards/rejected": -12.734082221984863, "step": 15656 }, { "epoch": 2.43, "learning_rate": 2.6638560091291043e-06, "logits/chosen": -2.163296937942505, "logits/rejected": -2.5713703632354736, "logps/chosen": -386.2784729003906, "logps/rejected": -372.3356018066406, "loss": 2.8696, "rewards/accuracies": 0.5, "rewards/chosen": -7.716416835784912, "rewards/margins": 4.942357063293457, "rewards/rejected": -12.658773422241211, "step": 15657 }, { "epoch": 2.44, "learning_rate": 2.6631225685979562e-06, "logits/chosen": -2.326610803604126, "logits/rejected": -1.8700357675552368, "logps/chosen": -269.9105224609375, "logps/rejected": -370.7839660644531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.406764030456543, "rewards/margins": 10.866674423217773, "rewards/rejected": -19.2734375, "step": 15658 }, { "epoch": 2.44, "learning_rate": 2.662389128066808e-06, "logits/chosen": -2.289938449859619, "logits/rejected": -2.442425489425659, "logps/chosen": -592.020263671875, "logps/rejected": -589.0701293945312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.082089424133301, "rewards/margins": 12.719009399414062, "rewards/rejected": -18.801097869873047, "step": 15659 }, { "epoch": 2.44, "learning_rate": 2.661655687535661e-06, "logits/chosen": -2.671074867248535, "logits/rejected": -2.1474082469940186, "logps/chosen": -894.6983032226562, "logps/rejected": -766.421630859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.744383811950684, "rewards/margins": 13.330129623413086, "rewards/rejected": -22.074514389038086, "step": 15660 }, { "epoch": 2.44, "learning_rate": 2.6609222470045127e-06, "logits/chosen": -3.0066027641296387, "logits/rejected": -3.034803628921509, "logps/chosen": -209.7247772216797, "logps/rejected": -255.61038208007812, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -9.863544464111328, "rewards/margins": 7.013297080993652, "rewards/rejected": -16.876840591430664, "step": 15661 }, { "epoch": 2.44, "learning_rate": 2.660188806473365e-06, "logits/chosen": -2.4652695655822754, "logits/rejected": -2.6049695014953613, "logps/chosen": -183.84246826171875, "logps/rejected": -431.4321594238281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.032236099243164, "rewards/margins": 8.972246170043945, "rewards/rejected": -15.00448226928711, "step": 15662 }, { "epoch": 2.44, "learning_rate": 2.659455365942217e-06, "logits/chosen": -1.673546314239502, "logits/rejected": -2.5351970195770264, "logps/chosen": -335.2130126953125, "logps/rejected": -609.3338012695312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.193728446960449, "rewards/margins": 11.247783660888672, "rewards/rejected": -17.441511154174805, "step": 15663 }, { "epoch": 2.44, "learning_rate": 2.658721925411069e-06, "logits/chosen": -1.9506455659866333, "logits/rejected": -2.7097315788269043, "logps/chosen": -106.24261474609375, "logps/rejected": -339.8990478515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.232926368713379, "rewards/margins": 9.770098686218262, "rewards/rejected": -16.00302505493164, "step": 15664 }, { "epoch": 2.44, "learning_rate": 2.657988484879921e-06, "logits/chosen": -1.940367579460144, "logits/rejected": -3.036548137664795, "logps/chosen": -232.20401000976562, "logps/rejected": -629.7864990234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.653348922729492, "rewards/margins": 11.255087852478027, "rewards/rejected": -15.90843677520752, "step": 15665 }, { "epoch": 2.44, "learning_rate": 2.6572550443487734e-06, "logits/chosen": -2.0727078914642334, "logits/rejected": -2.508500814437866, "logps/chosen": -146.26107788085938, "logps/rejected": -341.34259033203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.460012435913086, "rewards/margins": 9.486666679382324, "rewards/rejected": -15.94667911529541, "step": 15666 }, { "epoch": 2.44, "learning_rate": 2.6565216038176252e-06, "logits/chosen": -2.3075413703918457, "logits/rejected": -2.7575490474700928, "logps/chosen": -111.60682678222656, "logps/rejected": -287.77886962890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -7.364548683166504, "rewards/margins": 7.21612024307251, "rewards/rejected": -14.580669403076172, "step": 15667 }, { "epoch": 2.44, "learning_rate": 2.6557881632864775e-06, "logits/chosen": -1.316227912902832, "logits/rejected": -2.5618937015533447, "logps/chosen": -196.9309539794922, "logps/rejected": -390.2822570800781, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -9.157952308654785, "rewards/margins": 7.77033805847168, "rewards/rejected": -16.92829132080078, "step": 15668 }, { "epoch": 2.44, "learning_rate": 2.65505472275533e-06, "logits/chosen": -2.019000291824341, "logits/rejected": -2.6617653369903564, "logps/chosen": -161.98768615722656, "logps/rejected": -562.7952880859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.042519569396973, "rewards/margins": 13.670068740844727, "rewards/rejected": -20.712587356567383, "step": 15669 }, { "epoch": 2.44, "learning_rate": 2.6543212822241817e-06, "logits/chosen": -2.0488181114196777, "logits/rejected": -2.5307328701019287, "logps/chosen": -91.40031433105469, "logps/rejected": -282.9585266113281, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.854306221008301, "rewards/margins": 9.760273933410645, "rewards/rejected": -16.614580154418945, "step": 15670 }, { "epoch": 2.44, "learning_rate": 2.653587841693034e-06, "logits/chosen": -2.328449249267578, "logits/rejected": -2.618394136428833, "logps/chosen": -798.51708984375, "logps/rejected": -890.354736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.846685409545898, "rewards/margins": 11.77558708190918, "rewards/rejected": -22.622272491455078, "step": 15671 }, { "epoch": 2.44, "learning_rate": 2.652854401161886e-06, "logits/chosen": -2.5742218494415283, "logits/rejected": -2.6963632106781006, "logps/chosen": -142.8760986328125, "logps/rejected": -287.0462646484375, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -8.362096786499023, "rewards/margins": 6.430598258972168, "rewards/rejected": -14.792695045471191, "step": 15672 }, { "epoch": 2.44, "learning_rate": 2.652120960630738e-06, "logits/chosen": -2.6973748207092285, "logits/rejected": -2.731170654296875, "logps/chosen": -425.37451171875, "logps/rejected": -467.652099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.678112983703613, "rewards/margins": 13.758543968200684, "rewards/rejected": -23.436656951904297, "step": 15673 }, { "epoch": 2.44, "learning_rate": 2.65138752009959e-06, "logits/chosen": -1.9096283912658691, "logits/rejected": -2.6606698036193848, "logps/chosen": -279.0455322265625, "logps/rejected": -525.1602172851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.645727157592773, "rewards/margins": 11.738138198852539, "rewards/rejected": -20.383865356445312, "step": 15674 }, { "epoch": 2.44, "learning_rate": 2.6506540795684424e-06, "logits/chosen": -1.5013256072998047, "logits/rejected": -2.498204231262207, "logps/chosen": -128.70449829101562, "logps/rejected": -705.3949584960938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.581467151641846, "rewards/margins": 15.470564842224121, "rewards/rejected": -21.052032470703125, "step": 15675 }, { "epoch": 2.44, "learning_rate": 2.6499206390372943e-06, "logits/chosen": -1.2536637783050537, "logits/rejected": -2.6185646057128906, "logps/chosen": -128.96121215820312, "logps/rejected": -450.4788818359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.497486114501953, "rewards/margins": 9.320262908935547, "rewards/rejected": -16.8177490234375, "step": 15676 }, { "epoch": 2.44, "learning_rate": 2.649187198506147e-06, "logits/chosen": -2.5727598667144775, "logits/rejected": -2.8755648136138916, "logps/chosen": -161.55795288085938, "logps/rejected": -242.02767944335938, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/chosen": -8.683357238769531, "rewards/margins": 5.173660755157471, "rewards/rejected": -13.857017517089844, "step": 15677 }, { "epoch": 2.44, "learning_rate": 2.648453757974999e-06, "logits/chosen": -2.74296498298645, "logits/rejected": -2.1387221813201904, "logps/chosen": -812.07568359375, "logps/rejected": -545.98193359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2245025634765625, "rewards/margins": 12.153757095336914, "rewards/rejected": -19.378259658813477, "step": 15678 }, { "epoch": 2.44, "learning_rate": 2.6477203174438507e-06, "logits/chosen": -2.327469825744629, "logits/rejected": -1.456419825553894, "logps/chosen": -292.01519775390625, "logps/rejected": -237.76431274414062, "loss": 0.3778, "rewards/accuracies": 0.5, "rewards/chosen": -6.616060733795166, "rewards/margins": 3.799917221069336, "rewards/rejected": -10.415977478027344, "step": 15679 }, { "epoch": 2.44, "learning_rate": 2.646986876912703e-06, "logits/chosen": -1.4853949546813965, "logits/rejected": -1.9700289964675903, "logps/chosen": -138.72042846679688, "logps/rejected": -366.6953430175781, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.1613850593566895, "rewards/margins": 9.575265884399414, "rewards/rejected": -15.736650466918945, "step": 15680 }, { "epoch": 2.44, "learning_rate": 2.646253436381555e-06, "logits/chosen": -1.7389602661132812, "logits/rejected": -2.7228305339813232, "logps/chosen": -240.5107421875, "logps/rejected": -478.80389404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.18471622467041, "rewards/margins": 13.389451026916504, "rewards/rejected": -22.574167251586914, "step": 15681 }, { "epoch": 2.44, "learning_rate": 2.6455199958504072e-06, "logits/chosen": -1.8574291467666626, "logits/rejected": -2.574141263961792, "logps/chosen": -206.2812042236328, "logps/rejected": -416.2116394042969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.280448913574219, "rewards/margins": 8.662519454956055, "rewards/rejected": -16.942968368530273, "step": 15682 }, { "epoch": 2.44, "learning_rate": 2.644786555319259e-06, "logits/chosen": -1.826154112815857, "logits/rejected": -2.3486850261688232, "logps/chosen": -138.90878295898438, "logps/rejected": -362.0638427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.060950756072998, "rewards/margins": 12.972772598266602, "rewards/rejected": -18.033723831176758, "step": 15683 }, { "epoch": 2.44, "learning_rate": 2.6440531147881114e-06, "logits/chosen": -1.473118543624878, "logits/rejected": -2.470745086669922, "logps/chosen": -204.62249755859375, "logps/rejected": -433.96405029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.491825103759766, "rewards/margins": 11.214364051818848, "rewards/rejected": -17.706188201904297, "step": 15684 }, { "epoch": 2.44, "learning_rate": 2.6433196742569637e-06, "logits/chosen": -2.5715649127960205, "logits/rejected": -2.6622931957244873, "logps/chosen": -139.37144470214844, "logps/rejected": -212.13299560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0356903076171875, "rewards/margins": 10.090530395507812, "rewards/rejected": -14.126220703125, "step": 15685 }, { "epoch": 2.44, "learning_rate": 2.642586233725816e-06, "logits/chosen": -2.4662108421325684, "logits/rejected": -2.8419885635375977, "logps/chosen": -488.4304504394531, "logps/rejected": -478.32794189453125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -8.341154098510742, "rewards/margins": 6.8645477294921875, "rewards/rejected": -15.20570182800293, "step": 15686 }, { "epoch": 2.44, "learning_rate": 2.641852793194668e-06, "logits/chosen": -2.938143253326416, "logits/rejected": -3.003692865371704, "logps/chosen": -132.99786376953125, "logps/rejected": -332.7833251953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.544422149658203, "rewards/margins": 9.869565963745117, "rewards/rejected": -16.413986206054688, "step": 15687 }, { "epoch": 2.44, "learning_rate": 2.6411193526635198e-06, "logits/chosen": -2.5484206676483154, "logits/rejected": -2.604597806930542, "logps/chosen": -237.33175659179688, "logps/rejected": -287.8870849609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.117289543151855, "rewards/margins": 8.768533706665039, "rewards/rejected": -16.885822296142578, "step": 15688 }, { "epoch": 2.44, "learning_rate": 2.640385912132372e-06, "logits/chosen": -2.670219659805298, "logits/rejected": -2.6480319499969482, "logps/chosen": -267.14483642578125, "logps/rejected": -210.99244689941406, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -5.036379337310791, "rewards/margins": 7.340514659881592, "rewards/rejected": -12.376893997192383, "step": 15689 }, { "epoch": 2.44, "learning_rate": 2.639652471601224e-06, "logits/chosen": -1.0519001483917236, "logits/rejected": -1.4778012037277222, "logps/chosen": -267.7526550292969, "logps/rejected": -371.53173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.940678596496582, "rewards/margins": 13.795778274536133, "rewards/rejected": -18.73645782470703, "step": 15690 }, { "epoch": 2.44, "learning_rate": 2.6389190310700763e-06, "logits/chosen": -1.888705849647522, "logits/rejected": -2.5652220249176025, "logps/chosen": -119.2083740234375, "logps/rejected": -444.32415771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.929659843444824, "rewards/margins": 15.426904678344727, "rewards/rejected": -21.356565475463867, "step": 15691 }, { "epoch": 2.44, "learning_rate": 2.638185590538928e-06, "logits/chosen": -1.5928268432617188, "logits/rejected": -2.50128436088562, "logps/chosen": -388.8808288574219, "logps/rejected": -557.4735717773438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.111562252044678, "rewards/margins": 11.809032440185547, "rewards/rejected": -17.92059326171875, "step": 15692 }, { "epoch": 2.44, "learning_rate": 2.6374521500077804e-06, "logits/chosen": -2.596376657485962, "logits/rejected": -1.0024415254592896, "logps/chosen": -366.12689208984375, "logps/rejected": -342.6943664550781, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -7.010939121246338, "rewards/margins": 8.424407958984375, "rewards/rejected": -15.435346603393555, "step": 15693 }, { "epoch": 2.44, "learning_rate": 2.6367187094766327e-06, "logits/chosen": -2.397148370742798, "logits/rejected": -2.6213483810424805, "logps/chosen": -139.1676025390625, "logps/rejected": -394.56976318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.281808853149414, "rewards/margins": 13.321656227111816, "rewards/rejected": -22.603466033935547, "step": 15694 }, { "epoch": 2.44, "learning_rate": 2.635985268945485e-06, "logits/chosen": -1.8229166269302368, "logits/rejected": -2.6964468955993652, "logps/chosen": -67.60777282714844, "logps/rejected": -408.5647888183594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.442249298095703, "rewards/margins": 14.362842559814453, "rewards/rejected": -19.805091857910156, "step": 15695 }, { "epoch": 2.44, "learning_rate": 2.635251828414337e-06, "logits/chosen": -2.7083959579467773, "logits/rejected": -3.014882802963257, "logps/chosen": -212.66140747070312, "logps/rejected": -296.7434387207031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.741309404373169, "rewards/margins": 11.667698860168457, "rewards/rejected": -15.409008026123047, "step": 15696 }, { "epoch": 2.44, "learning_rate": 2.634518387883189e-06, "logits/chosen": -2.634673833847046, "logits/rejected": -2.720198631286621, "logps/chosen": -100.27125549316406, "logps/rejected": -314.21856689453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.689772605895996, "rewards/margins": 10.063675880432129, "rewards/rejected": -15.753448486328125, "step": 15697 }, { "epoch": 2.44, "learning_rate": 2.633784947352041e-06, "logits/chosen": -2.91706919670105, "logits/rejected": -3.0637946128845215, "logps/chosen": -124.75094604492188, "logps/rejected": -208.6629638671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.211172103881836, "rewards/margins": 9.794041633605957, "rewards/rejected": -14.00521469116211, "step": 15698 }, { "epoch": 2.44, "learning_rate": 2.633051506820893e-06, "logits/chosen": -2.838156223297119, "logits/rejected": -2.3515517711639404, "logps/chosen": -894.4970703125, "logps/rejected": -620.95849609375, "loss": 0.0684, "rewards/accuracies": 1.0, "rewards/chosen": -4.1805524826049805, "rewards/margins": 12.894771575927734, "rewards/rejected": -17.07532501220703, "step": 15699 }, { "epoch": 2.44, "learning_rate": 2.6323180662897453e-06, "logits/chosen": -2.489063262939453, "logits/rejected": -2.8418495655059814, "logps/chosen": -192.79296875, "logps/rejected": -405.53289794921875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -6.1620330810546875, "rewards/margins": 10.19672966003418, "rewards/rejected": -16.358762741088867, "step": 15700 }, { "epoch": 2.44, "learning_rate": 2.631584625758597e-06, "logits/chosen": -1.034762978553772, "logits/rejected": -2.3590989112854004, "logps/chosen": -96.77973175048828, "logps/rejected": -330.6999206542969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.64786958694458, "rewards/margins": 11.334985733032227, "rewards/rejected": -16.98285675048828, "step": 15701 }, { "epoch": 2.44, "learning_rate": 2.63085118522745e-06, "logits/chosen": -2.7115087509155273, "logits/rejected": -2.709136962890625, "logps/chosen": -244.7218780517578, "logps/rejected": -393.18109130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7074480056762695, "rewards/margins": 10.789684295654297, "rewards/rejected": -13.49713134765625, "step": 15702 }, { "epoch": 2.44, "learning_rate": 2.6301177446963018e-06, "logits/chosen": -1.091282844543457, "logits/rejected": -2.3266358375549316, "logps/chosen": -96.67193603515625, "logps/rejected": -334.8207092285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.902843475341797, "rewards/margins": 11.597491264343262, "rewards/rejected": -19.500335693359375, "step": 15703 }, { "epoch": 2.44, "learning_rate": 2.629384304165154e-06, "logits/chosen": -1.3179017305374146, "logits/rejected": -2.0200233459472656, "logps/chosen": -357.69464111328125, "logps/rejected": -537.886474609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.749439239501953, "rewards/margins": 9.096769332885742, "rewards/rejected": -20.846208572387695, "step": 15704 }, { "epoch": 2.44, "learning_rate": 2.628650863634006e-06, "logits/chosen": -2.7296810150146484, "logits/rejected": -2.940509080886841, "logps/chosen": -172.46299743652344, "logps/rejected": -433.388916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.75486946105957, "rewards/margins": 13.211845397949219, "rewards/rejected": -17.96671485900879, "step": 15705 }, { "epoch": 2.44, "learning_rate": 2.6279174231028582e-06, "logits/chosen": -2.845829486846924, "logits/rejected": -2.924494743347168, "logps/chosen": -434.2982177734375, "logps/rejected": -398.378173828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.825026512145996, "rewards/margins": 9.381790161132812, "rewards/rejected": -17.206817626953125, "step": 15706 }, { "epoch": 2.44, "learning_rate": 2.62718398257171e-06, "logits/chosen": -2.9613964557647705, "logits/rejected": -2.775047779083252, "logps/chosen": -620.825927734375, "logps/rejected": -433.783447265625, "loss": 0.1132, "rewards/accuracies": 1.0, "rewards/chosen": -9.456645965576172, "rewards/margins": 5.223196506500244, "rewards/rejected": -14.679841995239258, "step": 15707 }, { "epoch": 2.44, "learning_rate": 2.626450542040562e-06, "logits/chosen": -2.4809646606445312, "logits/rejected": -2.583004951477051, "logps/chosen": -297.5533752441406, "logps/rejected": -378.0062255859375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -9.03770923614502, "rewards/margins": 6.640949249267578, "rewards/rejected": -15.678658485412598, "step": 15708 }, { "epoch": 2.44, "learning_rate": 2.6257171015094143e-06, "logits/chosen": -1.3292455673217773, "logits/rejected": -2.6792829036712646, "logps/chosen": -136.42051696777344, "logps/rejected": -575.05712890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.5373358726501465, "rewards/margins": 17.26479721069336, "rewards/rejected": -21.80213165283203, "step": 15709 }, { "epoch": 2.44, "learning_rate": 2.6249836609782666e-06, "logits/chosen": -1.7362322807312012, "logits/rejected": -2.338223934173584, "logps/chosen": -156.77662658691406, "logps/rejected": -484.90301513671875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -6.590864181518555, "rewards/margins": 14.702783584594727, "rewards/rejected": -21.29364776611328, "step": 15710 }, { "epoch": 2.44, "learning_rate": 2.624250220447119e-06, "logits/chosen": -2.5435760021209717, "logits/rejected": -2.9062397480010986, "logps/chosen": -89.70863342285156, "logps/rejected": -442.35107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.889426231384277, "rewards/margins": 14.84138298034668, "rewards/rejected": -19.73080825805664, "step": 15711 }, { "epoch": 2.44, "learning_rate": 2.6235167799159708e-06, "logits/chosen": -1.4679155349731445, "logits/rejected": -2.180058240890503, "logps/chosen": -154.83111572265625, "logps/rejected": -451.49530029296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.403074264526367, "rewards/margins": 15.338485717773438, "rewards/rejected": -22.741559982299805, "step": 15712 }, { "epoch": 2.44, "learning_rate": 2.622783339384823e-06, "logits/chosen": -2.778611421585083, "logits/rejected": -2.2600674629211426, "logps/chosen": -385.5851135253906, "logps/rejected": -559.14208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.761038780212402, "rewards/margins": 10.846291542053223, "rewards/rejected": -17.607330322265625, "step": 15713 }, { "epoch": 2.44, "learning_rate": 2.622049898853675e-06, "logits/chosen": -1.5326428413391113, "logits/rejected": -2.2227864265441895, "logps/chosen": -139.40298461914062, "logps/rejected": -399.2057189941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.523638725280762, "rewards/margins": 10.421167373657227, "rewards/rejected": -15.944807052612305, "step": 15714 }, { "epoch": 2.44, "learning_rate": 2.6213164583225273e-06, "logits/chosen": -0.8751150965690613, "logits/rejected": -2.589808940887451, "logps/chosen": -237.36947631835938, "logps/rejected": -530.3171997070312, "loss": 0.5709, "rewards/accuracies": 0.5, "rewards/chosen": -9.548691749572754, "rewards/margins": 8.904285430908203, "rewards/rejected": -18.452978134155273, "step": 15715 }, { "epoch": 2.44, "learning_rate": 2.620583017791379e-06, "logits/chosen": -2.6318020820617676, "logits/rejected": -2.477966547012329, "logps/chosen": -128.0448455810547, "logps/rejected": -386.951904296875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -7.669394493103027, "rewards/margins": 8.374258041381836, "rewards/rejected": -16.043651580810547, "step": 15716 }, { "epoch": 2.44, "learning_rate": 2.619849577260231e-06, "logits/chosen": -2.847041130065918, "logits/rejected": -2.7358386516571045, "logps/chosen": -269.25238037109375, "logps/rejected": -340.03118896484375, "loss": 0.6472, "rewards/accuracies": 0.5, "rewards/chosen": -10.152155876159668, "rewards/margins": 4.918330192565918, "rewards/rejected": -15.070486068725586, "step": 15717 }, { "epoch": 2.44, "learning_rate": 2.6191161367290833e-06, "logits/chosen": -2.64699387550354, "logits/rejected": -1.2745225429534912, "logps/chosen": -219.7899169921875, "logps/rejected": -172.55499267578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -3.679985523223877, "rewards/margins": 7.580981254577637, "rewards/rejected": -11.260967254638672, "step": 15718 }, { "epoch": 2.44, "learning_rate": 2.6183826961979356e-06, "logits/chosen": -2.0793817043304443, "logits/rejected": -3.0845584869384766, "logps/chosen": -181.81333923339844, "logps/rejected": -446.04510498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.105401992797852, "rewards/margins": 12.308982849121094, "rewards/rejected": -18.414386749267578, "step": 15719 }, { "epoch": 2.44, "learning_rate": 2.617649255666788e-06, "logits/chosen": -2.083712339401245, "logits/rejected": -2.970831871032715, "logps/chosen": -83.8888931274414, "logps/rejected": -582.0365600585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.449023723602295, "rewards/margins": 16.555992126464844, "rewards/rejected": -22.005016326904297, "step": 15720 }, { "epoch": 2.44, "learning_rate": 2.61691581513564e-06, "logits/chosen": -2.7311925888061523, "logits/rejected": -2.947746515274048, "logps/chosen": -188.0096435546875, "logps/rejected": -424.7711486816406, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -11.034544944763184, "rewards/margins": 6.028298377990723, "rewards/rejected": -17.062843322753906, "step": 15721 }, { "epoch": 2.45, "learning_rate": 2.616182374604492e-06, "logits/chosen": -2.6902899742126465, "logits/rejected": -2.919360876083374, "logps/chosen": -588.278076171875, "logps/rejected": -717.150146484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.474382400512695, "rewards/margins": 9.30794906616211, "rewards/rejected": -15.782331466674805, "step": 15722 }, { "epoch": 2.45, "learning_rate": 2.615448934073344e-06, "logits/chosen": -1.7687489986419678, "logits/rejected": -2.835232973098755, "logps/chosen": -229.6593780517578, "logps/rejected": -462.7841796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.754208087921143, "rewards/margins": 8.74499225616455, "rewards/rejected": -13.499200820922852, "step": 15723 }, { "epoch": 2.45, "learning_rate": 2.6147154935421963e-06, "logits/chosen": -1.8773601055145264, "logits/rejected": -2.9080569744110107, "logps/chosen": -247.20660400390625, "logps/rejected": -539.79736328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.843982696533203, "rewards/margins": 8.944589614868164, "rewards/rejected": -17.788570404052734, "step": 15724 }, { "epoch": 2.45, "learning_rate": 2.613982053011048e-06, "logits/chosen": -2.8943428993225098, "logits/rejected": -3.1382052898406982, "logps/chosen": -282.313232421875, "logps/rejected": -442.8482971191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.61265754699707, "rewards/margins": 11.835136413574219, "rewards/rejected": -20.44779396057129, "step": 15725 }, { "epoch": 2.45, "learning_rate": 2.6132486124799005e-06, "logits/chosen": -2.352527141571045, "logits/rejected": -2.6603822708129883, "logps/chosen": -194.13670349121094, "logps/rejected": -427.87786865234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.724242210388184, "rewards/margins": 12.17613410949707, "rewards/rejected": -17.90037727355957, "step": 15726 }, { "epoch": 2.45, "learning_rate": 2.6125151719487528e-06, "logits/chosen": -1.9239516258239746, "logits/rejected": -2.771111011505127, "logps/chosen": -132.46432495117188, "logps/rejected": -538.262939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.209380149841309, "rewards/margins": 18.341392517089844, "rewards/rejected": -23.550771713256836, "step": 15727 }, { "epoch": 2.45, "learning_rate": 2.6117817314176046e-06, "logits/chosen": -2.4263858795166016, "logits/rejected": -2.849883794784546, "logps/chosen": -615.9598388671875, "logps/rejected": -826.236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0680623054504395, "rewards/margins": 14.464548110961914, "rewards/rejected": -19.532609939575195, "step": 15728 }, { "epoch": 2.45, "learning_rate": 2.611048290886457e-06, "logits/chosen": -2.3540163040161133, "logits/rejected": -2.847195863723755, "logps/chosen": -133.55889892578125, "logps/rejected": -262.93255615234375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.802399635314941, "rewards/margins": 7.621335029602051, "rewards/rejected": -13.423734664916992, "step": 15729 }, { "epoch": 2.45, "learning_rate": 2.610314850355309e-06, "logits/chosen": -2.661569118499756, "logits/rejected": -2.7982022762298584, "logps/chosen": -200.9059600830078, "logps/rejected": -379.25762939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.675379276275635, "rewards/margins": 9.084244728088379, "rewards/rejected": -13.759624481201172, "step": 15730 }, { "epoch": 2.45, "learning_rate": 2.609581409824161e-06, "logits/chosen": -1.0447486639022827, "logits/rejected": -2.45318603515625, "logps/chosen": -212.23574829101562, "logps/rejected": -569.602294921875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.8510894775390625, "rewards/margins": 11.577302932739258, "rewards/rejected": -18.42839241027832, "step": 15731 }, { "epoch": 2.45, "learning_rate": 2.608847969293013e-06, "logits/chosen": -2.175395965576172, "logits/rejected": -2.8366549015045166, "logps/chosen": -267.4883728027344, "logps/rejected": -460.39031982421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.927165508270264, "rewards/margins": 11.199466705322266, "rewards/rejected": -19.126632690429688, "step": 15732 }, { "epoch": 2.45, "learning_rate": 2.6081145287618653e-06, "logits/chosen": -1.8106403350830078, "logits/rejected": -2.306468963623047, "logps/chosen": -246.62643432617188, "logps/rejected": -521.1430053710938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.183137893676758, "rewards/margins": 11.330404281616211, "rewards/rejected": -19.51354217529297, "step": 15733 }, { "epoch": 2.45, "learning_rate": 2.607381088230717e-06, "logits/chosen": -1.6875849962234497, "logits/rejected": -2.876959800720215, "logps/chosen": -115.04121398925781, "logps/rejected": -457.03131103515625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.873087406158447, "rewards/margins": 10.81074047088623, "rewards/rejected": -18.683828353881836, "step": 15734 }, { "epoch": 2.45, "learning_rate": 2.6066476476995695e-06, "logits/chosen": -2.3498072624206543, "logits/rejected": -2.7244040966033936, "logps/chosen": -114.16288757324219, "logps/rejected": -300.55914306640625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -8.306815147399902, "rewards/margins": 7.302921295166016, "rewards/rejected": -15.609737396240234, "step": 15735 }, { "epoch": 2.45, "learning_rate": 2.605914207168422e-06, "logits/chosen": -2.364434242248535, "logits/rejected": -2.5303633213043213, "logps/chosen": -174.49942016601562, "logps/rejected": -316.61285400390625, "loss": 1.3342, "rewards/accuracies": 0.5, "rewards/chosen": -6.521151542663574, "rewards/margins": 6.740884304046631, "rewards/rejected": -13.262035369873047, "step": 15736 }, { "epoch": 2.45, "learning_rate": 2.6051807666372737e-06, "logits/chosen": -1.693832278251648, "logits/rejected": -1.7656052112579346, "logps/chosen": -252.44459533691406, "logps/rejected": -408.3291320800781, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -6.948981285095215, "rewards/margins": 5.200871467590332, "rewards/rejected": -12.149852752685547, "step": 15737 }, { "epoch": 2.45, "learning_rate": 2.604447326106126e-06, "logits/chosen": -2.8819198608398438, "logits/rejected": -2.3807053565979004, "logps/chosen": -274.40447998046875, "logps/rejected": -172.37631225585938, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": -5.901001930236816, "rewards/margins": 3.3981404304504395, "rewards/rejected": -9.299142837524414, "step": 15738 }, { "epoch": 2.45, "learning_rate": 2.603713885574978e-06, "logits/chosen": -2.931729555130005, "logits/rejected": -2.5809099674224854, "logps/chosen": -670.1581420898438, "logps/rejected": -505.1220397949219, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -8.632610321044922, "rewards/margins": 8.107931137084961, "rewards/rejected": -16.740541458129883, "step": 15739 }, { "epoch": 2.45, "learning_rate": 2.60298044504383e-06, "logits/chosen": -1.9390325546264648, "logits/rejected": -2.7158501148223877, "logps/chosen": -189.19192504882812, "logps/rejected": -536.7107543945312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.7390241622924805, "rewards/margins": 10.826276779174805, "rewards/rejected": -17.5653018951416, "step": 15740 }, { "epoch": 2.45, "learning_rate": 2.602247004512682e-06, "logits/chosen": -2.1234912872314453, "logits/rejected": -2.4939610958099365, "logps/chosen": -188.93260192871094, "logps/rejected": -272.09918212890625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -8.095635414123535, "rewards/margins": 6.237921714782715, "rewards/rejected": -14.33355712890625, "step": 15741 }, { "epoch": 2.45, "learning_rate": 2.6015135639815343e-06, "logits/chosen": -1.7731684446334839, "logits/rejected": -2.5041589736938477, "logps/chosen": -173.5255126953125, "logps/rejected": -448.1308288574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.690155029296875, "rewards/margins": 12.834463119506836, "rewards/rejected": -21.524620056152344, "step": 15742 }, { "epoch": 2.45, "learning_rate": 2.600780123450386e-06, "logits/chosen": -2.610276699066162, "logits/rejected": -3.039616107940674, "logps/chosen": -88.52738952636719, "logps/rejected": -395.4405517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.989706993103027, "rewards/margins": 14.574456214904785, "rewards/rejected": -20.564163208007812, "step": 15743 }, { "epoch": 2.45, "learning_rate": 2.600046682919239e-06, "logits/chosen": -1.8491175174713135, "logits/rejected": -2.650909423828125, "logps/chosen": -288.32281494140625, "logps/rejected": -559.5783081054688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.547870635986328, "rewards/margins": 10.396838188171387, "rewards/rejected": -17.94470977783203, "step": 15744 }, { "epoch": 2.45, "learning_rate": 2.599313242388091e-06, "logits/chosen": -2.9483466148376465, "logits/rejected": -2.807806968688965, "logps/chosen": -1164.8543701171875, "logps/rejected": -950.1348876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.230200290679932, "rewards/margins": 11.103203773498535, "rewards/rejected": -17.333404541015625, "step": 15745 }, { "epoch": 2.45, "learning_rate": 2.598579801856943e-06, "logits/chosen": -2.7920992374420166, "logits/rejected": -2.6287057399749756, "logps/chosen": -231.2781982421875, "logps/rejected": -195.00021362304688, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -5.162877559661865, "rewards/margins": 5.402193069458008, "rewards/rejected": -10.565070152282715, "step": 15746 }, { "epoch": 2.45, "learning_rate": 2.597846361325795e-06, "logits/chosen": -0.756134569644928, "logits/rejected": -2.226850748062134, "logps/chosen": -118.60966491699219, "logps/rejected": -389.947509765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.425800323486328, "rewards/margins": 9.322576522827148, "rewards/rejected": -17.748376846313477, "step": 15747 }, { "epoch": 2.45, "learning_rate": 2.597112920794647e-06, "logits/chosen": -1.9533072710037231, "logits/rejected": -2.6529526710510254, "logps/chosen": -137.14065551757812, "logps/rejected": -507.38165283203125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.735749244689941, "rewards/margins": 11.82325553894043, "rewards/rejected": -17.559005737304688, "step": 15748 }, { "epoch": 2.45, "learning_rate": 2.596379480263499e-06, "logits/chosen": -2.7013933658599854, "logits/rejected": -3.1285526752471924, "logps/chosen": -360.122314453125, "logps/rejected": -551.669677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.955677032470703, "rewards/margins": 13.309392929077148, "rewards/rejected": -19.26506996154785, "step": 15749 }, { "epoch": 2.45, "learning_rate": 2.595646039732351e-06, "logits/chosen": -1.6563897132873535, "logits/rejected": -2.5109570026397705, "logps/chosen": -201.4588623046875, "logps/rejected": -533.7786865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.733316421508789, "rewards/margins": 15.653244018554688, "rewards/rejected": -25.386558532714844, "step": 15750 }, { "epoch": 2.45, "learning_rate": 2.5949125992012034e-06, "logits/chosen": -2.2631213665008545, "logits/rejected": -2.9376988410949707, "logps/chosen": -342.239501953125, "logps/rejected": -832.3721313476562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.09882926940918, "rewards/margins": 11.963945388793945, "rewards/rejected": -20.062774658203125, "step": 15751 }, { "epoch": 2.45, "learning_rate": 2.5941791586700557e-06, "logits/chosen": -1.5795490741729736, "logits/rejected": -2.455610752105713, "logps/chosen": -132.1373291015625, "logps/rejected": -255.62010192871094, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -6.12569522857666, "rewards/margins": 5.792435169219971, "rewards/rejected": -11.918130874633789, "step": 15752 }, { "epoch": 2.45, "learning_rate": 2.593445718138908e-06, "logits/chosen": -2.407496452331543, "logits/rejected": -2.038320779800415, "logps/chosen": -85.18321990966797, "logps/rejected": -210.419921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4045286178588867, "rewards/margins": 10.0814208984375, "rewards/rejected": -13.48594856262207, "step": 15753 }, { "epoch": 2.45, "learning_rate": 2.59271227760776e-06, "logits/chosen": -1.200858473777771, "logits/rejected": -2.42215633392334, "logps/chosen": -219.016845703125, "logps/rejected": -369.880615234375, "loss": 1.039, "rewards/accuracies": 0.5, "rewards/chosen": -8.845125198364258, "rewards/margins": 4.386202812194824, "rewards/rejected": -13.231328964233398, "step": 15754 }, { "epoch": 2.45, "learning_rate": 2.591978837076612e-06, "logits/chosen": -2.6141934394836426, "logits/rejected": -2.3754000663757324, "logps/chosen": -625.007568359375, "logps/rejected": -469.4410400390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.018003463745117, "rewards/margins": 8.915850639343262, "rewards/rejected": -19.933853149414062, "step": 15755 }, { "epoch": 2.45, "learning_rate": 2.591245396545464e-06, "logits/chosen": -1.976000428199768, "logits/rejected": -2.5964884757995605, "logps/chosen": -110.92049407958984, "logps/rejected": -411.6732177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.436285018920898, "rewards/margins": 10.942776679992676, "rewards/rejected": -17.37906265258789, "step": 15756 }, { "epoch": 2.45, "learning_rate": 2.590511956014316e-06, "logits/chosen": -1.2832642793655396, "logits/rejected": -2.5654523372650146, "logps/chosen": -104.34416961669922, "logps/rejected": -351.6664733886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.239585876464844, "rewards/margins": 10.905097961425781, "rewards/rejected": -18.144683837890625, "step": 15757 }, { "epoch": 2.45, "learning_rate": 2.589778515483168e-06, "logits/chosen": -1.7995456457138062, "logits/rejected": -2.7999727725982666, "logps/chosen": -274.5632629394531, "logps/rejected": -470.3379211425781, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -12.249260902404785, "rewards/margins": 5.8686628341674805, "rewards/rejected": -18.117923736572266, "step": 15758 }, { "epoch": 2.45, "learning_rate": 2.58904507495202e-06, "logits/chosen": -1.2635631561279297, "logits/rejected": -2.7038397789001465, "logps/chosen": -309.01373291015625, "logps/rejected": -793.8591918945312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.244593620300293, "rewards/margins": 15.485355377197266, "rewards/rejected": -21.729948043823242, "step": 15759 }, { "epoch": 2.45, "learning_rate": 2.5883116344208724e-06, "logits/chosen": -2.5594265460968018, "logits/rejected": -2.5764923095703125, "logps/chosen": -298.69720458984375, "logps/rejected": -432.92877197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.023200035095215, "rewards/margins": 14.027318954467773, "rewards/rejected": -23.050518035888672, "step": 15760 }, { "epoch": 2.45, "learning_rate": 2.5875781938897247e-06, "logits/chosen": -1.8537523746490479, "logits/rejected": -2.7577555179595947, "logps/chosen": -195.03143310546875, "logps/rejected": -578.64306640625, "loss": 0.0391, "rewards/accuracies": 1.0, "rewards/chosen": -7.434218406677246, "rewards/margins": 5.4030680656433105, "rewards/rejected": -12.837286949157715, "step": 15761 }, { "epoch": 2.45, "learning_rate": 2.586844753358577e-06, "logits/chosen": -1.3554036617279053, "logits/rejected": -2.757460594177246, "logps/chosen": -156.132568359375, "logps/rejected": -460.19091796875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -7.753520965576172, "rewards/margins": 7.384519577026367, "rewards/rejected": -15.138040542602539, "step": 15762 }, { "epoch": 2.45, "learning_rate": 2.586111312827429e-06, "logits/chosen": -1.5687094926834106, "logits/rejected": -2.4329617023468018, "logps/chosen": -270.71759033203125, "logps/rejected": -549.6527099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.1321587562561035, "rewards/margins": 12.404862403869629, "rewards/rejected": -17.53702163696289, "step": 15763 }, { "epoch": 2.45, "learning_rate": 2.585377872296281e-06, "logits/chosen": -1.0698868036270142, "logits/rejected": -2.646383762359619, "logps/chosen": -92.3672103881836, "logps/rejected": -359.93218994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.959991931915283, "rewards/margins": 13.5686674118042, "rewards/rejected": -19.52865982055664, "step": 15764 }, { "epoch": 2.45, "learning_rate": 2.584644431765133e-06, "logits/chosen": -2.6567940711975098, "logits/rejected": -1.9238901138305664, "logps/chosen": -491.8664245605469, "logps/rejected": -357.21343994140625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -6.074259281158447, "rewards/margins": 8.18680191040039, "rewards/rejected": -14.261061668395996, "step": 15765 }, { "epoch": 2.45, "learning_rate": 2.583910991233985e-06, "logits/chosen": -2.1888632774353027, "logits/rejected": -2.669477939605713, "logps/chosen": -156.2393798828125, "logps/rejected": -361.23931884765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.672793388366699, "rewards/margins": 9.046699523925781, "rewards/rejected": -15.71949291229248, "step": 15766 }, { "epoch": 2.45, "learning_rate": 2.5831775507028372e-06, "logits/chosen": -1.934881567955017, "logits/rejected": -2.7844653129577637, "logps/chosen": -118.59144592285156, "logps/rejected": -483.65618896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9672632217407227, "rewards/margins": 16.822078704833984, "rewards/rejected": -20.78934097290039, "step": 15767 }, { "epoch": 2.45, "learning_rate": 2.582444110171689e-06, "logits/chosen": -1.9430598020553589, "logits/rejected": -2.501070261001587, "logps/chosen": -529.1022338867188, "logps/rejected": -675.6754150390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.94085168838501, "rewards/margins": 11.694598197937012, "rewards/rejected": -18.635448455810547, "step": 15768 }, { "epoch": 2.45, "learning_rate": 2.581710669640542e-06, "logits/chosen": -0.9885514378547668, "logits/rejected": -2.0769760608673096, "logps/chosen": -378.6011962890625, "logps/rejected": -584.2254638671875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -8.887885093688965, "rewards/margins": 8.65074634552002, "rewards/rejected": -17.538631439208984, "step": 15769 }, { "epoch": 2.45, "learning_rate": 2.5809772291093937e-06, "logits/chosen": -2.5089707374572754, "logits/rejected": -2.6829659938812256, "logps/chosen": -353.8167724609375, "logps/rejected": -221.347900390625, "loss": 2.3367, "rewards/accuracies": 0.0, "rewards/chosen": -8.5767822265625, "rewards/margins": -2.0724828243255615, "rewards/rejected": -6.504299163818359, "step": 15770 }, { "epoch": 2.45, "learning_rate": 2.580243788578246e-06, "logits/chosen": -2.310966730117798, "logits/rejected": -2.7496514320373535, "logps/chosen": -284.4654235839844, "logps/rejected": -461.810791015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.535337924957275, "rewards/margins": 8.327472686767578, "rewards/rejected": -14.862810134887695, "step": 15771 }, { "epoch": 2.45, "learning_rate": 2.579510348047098e-06, "logits/chosen": -2.4957239627838135, "logits/rejected": -2.7571940422058105, "logps/chosen": -105.34323120117188, "logps/rejected": -208.00518798828125, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -8.055310249328613, "rewards/margins": 5.886593818664551, "rewards/rejected": -13.941904067993164, "step": 15772 }, { "epoch": 2.45, "learning_rate": 2.57877690751595e-06, "logits/chosen": -1.3979047536849976, "logits/rejected": -2.7339088916778564, "logps/chosen": -142.80979919433594, "logps/rejected": -555.4160766601562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.135213375091553, "rewards/margins": 11.379050254821777, "rewards/rejected": -16.514263153076172, "step": 15773 }, { "epoch": 2.45, "learning_rate": 2.578043466984802e-06, "logits/chosen": -1.708390474319458, "logits/rejected": -2.5016753673553467, "logps/chosen": -166.2400665283203, "logps/rejected": -349.3541259765625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -5.888714790344238, "rewards/margins": 9.502906799316406, "rewards/rejected": -15.391621589660645, "step": 15774 }, { "epoch": 2.45, "learning_rate": 2.5773100264536544e-06, "logits/chosen": -2.646017551422119, "logits/rejected": -2.8443148136138916, "logps/chosen": -171.9197998046875, "logps/rejected": -384.4942626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.028209209442139, "rewards/margins": 13.79670524597168, "rewards/rejected": -19.824913024902344, "step": 15775 }, { "epoch": 2.45, "learning_rate": 2.5765765859225062e-06, "logits/chosen": -2.5911686420440674, "logits/rejected": -2.34012508392334, "logps/chosen": -273.2048034667969, "logps/rejected": -508.3275146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.774374485015869, "rewards/margins": 10.413458824157715, "rewards/rejected": -16.187833786010742, "step": 15776 }, { "epoch": 2.45, "learning_rate": 2.5758431453913585e-06, "logits/chosen": -2.1811578273773193, "logits/rejected": -2.5177974700927734, "logps/chosen": -220.18048095703125, "logps/rejected": -316.285888671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.659557342529297, "rewards/margins": 8.3825044631958, "rewards/rejected": -14.042060852050781, "step": 15777 }, { "epoch": 2.45, "learning_rate": 2.575109704860211e-06, "logits/chosen": -1.955877661705017, "logits/rejected": -2.7863035202026367, "logps/chosen": -447.1775207519531, "logps/rejected": -654.8935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.393957614898682, "rewards/margins": 12.140861511230469, "rewards/rejected": -19.534818649291992, "step": 15778 }, { "epoch": 2.45, "learning_rate": 2.5743762643290627e-06, "logits/chosen": -1.9761165380477905, "logits/rejected": -2.6980581283569336, "logps/chosen": -183.84075927734375, "logps/rejected": -269.6689453125, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/chosen": -6.896076679229736, "rewards/margins": 6.7942962646484375, "rewards/rejected": -13.690372467041016, "step": 15779 }, { "epoch": 2.45, "learning_rate": 2.573642823797915e-06, "logits/chosen": -2.5247063636779785, "logits/rejected": -2.554504632949829, "logps/chosen": -430.214111328125, "logps/rejected": -457.4989318847656, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.753496170043945, "rewards/margins": 8.131939888000488, "rewards/rejected": -16.88543701171875, "step": 15780 }, { "epoch": 2.45, "learning_rate": 2.572909383266767e-06, "logits/chosen": -1.7810801267623901, "logits/rejected": -2.3890132904052734, "logps/chosen": -140.80670166015625, "logps/rejected": -312.3703918457031, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -6.87797212600708, "rewards/margins": 7.050758361816406, "rewards/rejected": -13.928730964660645, "step": 15781 }, { "epoch": 2.45, "learning_rate": 2.572175942735619e-06, "logits/chosen": -2.9066131114959717, "logits/rejected": -2.802401542663574, "logps/chosen": -83.58951568603516, "logps/rejected": -210.2611083984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.016356945037842, "rewards/margins": 8.284687995910645, "rewards/rejected": -12.301044464111328, "step": 15782 }, { "epoch": 2.45, "learning_rate": 2.571442502204471e-06, "logits/chosen": -2.7090036869049072, "logits/rejected": -1.7498114109039307, "logps/chosen": -302.0007629394531, "logps/rejected": -200.6832275390625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -6.9629998207092285, "rewards/margins": 6.609048366546631, "rewards/rejected": -13.57204818725586, "step": 15783 }, { "epoch": 2.45, "learning_rate": 2.5707090616733234e-06, "logits/chosen": -2.691852331161499, "logits/rejected": -2.704922914505005, "logps/chosen": -144.61451721191406, "logps/rejected": -200.18605041503906, "loss": 0.0477, "rewards/accuracies": 1.0, "rewards/chosen": -5.129751205444336, "rewards/margins": 6.186175346374512, "rewards/rejected": -11.315925598144531, "step": 15784 }, { "epoch": 2.45, "learning_rate": 2.5699756211421753e-06, "logits/chosen": -2.589390516281128, "logits/rejected": -2.4546780586242676, "logps/chosen": -216.04818725585938, "logps/rejected": -368.0614013671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.858541011810303, "rewards/margins": 8.101031303405762, "rewards/rejected": -14.959571838378906, "step": 15785 }, { "epoch": 2.46, "learning_rate": 2.5692421806110276e-06, "logits/chosen": -2.3882951736450195, "logits/rejected": -2.706601858139038, "logps/chosen": -111.67643737792969, "logps/rejected": -247.67185974121094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.408388137817383, "rewards/margins": 10.191739082336426, "rewards/rejected": -16.600128173828125, "step": 15786 }, { "epoch": 2.46, "learning_rate": 2.56850874007988e-06, "logits/chosen": -1.9441832304000854, "logits/rejected": -2.596796989440918, "logps/chosen": -563.619384765625, "logps/rejected": -706.395751953125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.861314296722412, "rewards/margins": 9.017372131347656, "rewards/rejected": -14.878686904907227, "step": 15787 }, { "epoch": 2.46, "learning_rate": 2.5677752995487317e-06, "logits/chosen": -2.4219000339508057, "logits/rejected": -1.770704984664917, "logps/chosen": -293.2822265625, "logps/rejected": -417.3392028808594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.972160339355469, "rewards/margins": 9.108989715576172, "rewards/rejected": -14.08115005493164, "step": 15788 }, { "epoch": 2.46, "learning_rate": 2.567041859017584e-06, "logits/chosen": -0.9888486862182617, "logits/rejected": -2.4083821773529053, "logps/chosen": -161.27635192871094, "logps/rejected": -542.0970458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.763640880584717, "rewards/margins": 15.53474235534668, "rewards/rejected": -23.298383712768555, "step": 15789 }, { "epoch": 2.46, "learning_rate": 2.566308418486436e-06, "logits/chosen": -1.2517130374908447, "logits/rejected": -2.6725337505340576, "logps/chosen": -129.63421630859375, "logps/rejected": -428.2307434082031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.527014255523682, "rewards/margins": 9.993170738220215, "rewards/rejected": -17.520185470581055, "step": 15790 }, { "epoch": 2.46, "learning_rate": 2.5655749779552882e-06, "logits/chosen": -2.7460196018218994, "logits/rejected": -2.4245071411132812, "logps/chosen": -478.07269287109375, "logps/rejected": -701.2500610351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.896676063537598, "rewards/margins": 12.628105163574219, "rewards/rejected": -17.524782180786133, "step": 15791 }, { "epoch": 2.46, "learning_rate": 2.56484153742414e-06, "logits/chosen": -2.5805206298828125, "logits/rejected": -1.8764913082122803, "logps/chosen": -554.9114990234375, "logps/rejected": -400.02978515625, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -8.60826587677002, "rewards/margins": 6.684044361114502, "rewards/rejected": -15.29231071472168, "step": 15792 }, { "epoch": 2.46, "learning_rate": 2.5641080968929924e-06, "logits/chosen": -2.7104482650756836, "logits/rejected": -2.5531246662139893, "logps/chosen": -264.9669494628906, "logps/rejected": -330.8875732421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.071406841278076, "rewards/margins": 9.400205612182617, "rewards/rejected": -15.471612930297852, "step": 15793 }, { "epoch": 2.46, "learning_rate": 2.5633746563618443e-06, "logits/chosen": -2.4354469776153564, "logits/rejected": -1.9010590314865112, "logps/chosen": -676.9717407226562, "logps/rejected": -503.34796142578125, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": -10.939130783081055, "rewards/margins": 4.2626237869262695, "rewards/rejected": -15.20175552368164, "step": 15794 }, { "epoch": 2.46, "learning_rate": 2.562641215830697e-06, "logits/chosen": -1.71906316280365, "logits/rejected": -2.5041346549987793, "logps/chosen": -559.5299682617188, "logps/rejected": -692.6082153320312, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -8.446426391601562, "rewards/margins": 11.583731651306152, "rewards/rejected": -20.03015899658203, "step": 15795 }, { "epoch": 2.46, "learning_rate": 2.561907775299549e-06, "logits/chosen": -1.0111212730407715, "logits/rejected": -2.6934335231781006, "logps/chosen": -156.5386962890625, "logps/rejected": -813.7271728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.3987836837768555, "rewards/margins": 11.148149490356445, "rewards/rejected": -17.546932220458984, "step": 15796 }, { "epoch": 2.46, "learning_rate": 2.5611743347684008e-06, "logits/chosen": -1.3008308410644531, "logits/rejected": -2.566516160964966, "logps/chosen": -307.09747314453125, "logps/rejected": -499.4301452636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.309900283813477, "rewards/margins": 11.109245300292969, "rewards/rejected": -15.419145584106445, "step": 15797 }, { "epoch": 2.46, "learning_rate": 2.560440894237253e-06, "logits/chosen": -2.608943223953247, "logits/rejected": -1.8185819387435913, "logps/chosen": -655.1661376953125, "logps/rejected": -463.714111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.514432907104492, "rewards/margins": 14.16402816772461, "rewards/rejected": -22.678462982177734, "step": 15798 }, { "epoch": 2.46, "learning_rate": 2.559707453706105e-06, "logits/chosen": -1.7146714925765991, "logits/rejected": -2.48422908782959, "logps/chosen": -423.2862548828125, "logps/rejected": -686.4573974609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -12.371175765991211, "rewards/margins": 9.388164520263672, "rewards/rejected": -21.75933837890625, "step": 15799 }, { "epoch": 2.46, "learning_rate": 2.5589740131749572e-06, "logits/chosen": -2.7257866859436035, "logits/rejected": -1.8073712587356567, "logps/chosen": -872.1630249023438, "logps/rejected": -655.4803466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.39770221710205, "rewards/margins": 11.097890853881836, "rewards/rejected": -19.495594024658203, "step": 15800 }, { "epoch": 2.46, "learning_rate": 2.558240572643809e-06, "logits/chosen": -2.178130626678467, "logits/rejected": -1.7731550931930542, "logps/chosen": -288.1010437011719, "logps/rejected": -355.432373046875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.72629451751709, "rewards/margins": 9.46734619140625, "rewards/rejected": -16.193641662597656, "step": 15801 }, { "epoch": 2.46, "learning_rate": 2.5575071321126614e-06, "logits/chosen": -1.444016695022583, "logits/rejected": -2.507054567337036, "logps/chosen": -139.36572265625, "logps/rejected": -248.8104248046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.321059226989746, "rewards/margins": 9.311175346374512, "rewards/rejected": -14.632234573364258, "step": 15802 }, { "epoch": 2.46, "learning_rate": 2.5567736915815137e-06, "logits/chosen": -2.7060835361480713, "logits/rejected": -2.929230213165283, "logps/chosen": -100.93185424804688, "logps/rejected": -253.4928436279297, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.963196754455566, "rewards/margins": 9.13538646697998, "rewards/rejected": -15.098583221435547, "step": 15803 }, { "epoch": 2.46, "learning_rate": 2.556040251050366e-06, "logits/chosen": -2.8043875694274902, "logits/rejected": -2.5867502689361572, "logps/chosen": -205.49691772460938, "logps/rejected": -273.0509033203125, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -5.77542781829834, "rewards/margins": 6.80879020690918, "rewards/rejected": -12.58421802520752, "step": 15804 }, { "epoch": 2.46, "learning_rate": 2.555306810519218e-06, "logits/chosen": -2.263763427734375, "logits/rejected": -2.79374098777771, "logps/chosen": -162.39981079101562, "logps/rejected": -412.71246337890625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.330939292907715, "rewards/margins": 10.861814498901367, "rewards/rejected": -15.192754745483398, "step": 15805 }, { "epoch": 2.46, "learning_rate": 2.5545733699880698e-06, "logits/chosen": -1.9523303508758545, "logits/rejected": -2.7913284301757812, "logps/chosen": -124.24542999267578, "logps/rejected": -368.36199951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.444106101989746, "rewards/margins": 12.702381134033203, "rewards/rejected": -19.146488189697266, "step": 15806 }, { "epoch": 2.46, "learning_rate": 2.553839929456922e-06, "logits/chosen": -2.2102742195129395, "logits/rejected": -2.678511142730713, "logps/chosen": -91.05703735351562, "logps/rejected": -193.3572540283203, "loss": 0.5503, "rewards/accuracies": 0.5, "rewards/chosen": -5.465836524963379, "rewards/margins": 4.951616287231445, "rewards/rejected": -10.417452812194824, "step": 15807 }, { "epoch": 2.46, "learning_rate": 2.553106488925774e-06, "logits/chosen": -2.5581929683685303, "logits/rejected": -2.1805949211120605, "logps/chosen": -143.7241973876953, "logps/rejected": -228.6197967529297, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.346748113632202, "rewards/margins": 8.966081619262695, "rewards/rejected": -12.312829971313477, "step": 15808 }, { "epoch": 2.46, "learning_rate": 2.5523730483946263e-06, "logits/chosen": -2.1219029426574707, "logits/rejected": -2.463479995727539, "logps/chosen": -474.82666015625, "logps/rejected": -842.1744384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.833100318908691, "rewards/margins": 12.155241966247559, "rewards/rejected": -21.98834228515625, "step": 15809 }, { "epoch": 2.46, "learning_rate": 2.551639607863478e-06, "logits/chosen": -2.7102255821228027, "logits/rejected": -3.126997947692871, "logps/chosen": -225.23854064941406, "logps/rejected": -407.983154296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.251176834106445, "rewards/margins": 8.521713256835938, "rewards/rejected": -13.772890090942383, "step": 15810 }, { "epoch": 2.46, "learning_rate": 2.5509061673323304e-06, "logits/chosen": -2.4610369205474854, "logits/rejected": -2.726999521255493, "logps/chosen": -248.4622802734375, "logps/rejected": -287.48345947265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3142223358154297, "rewards/margins": 9.935209274291992, "rewards/rejected": -13.249431610107422, "step": 15811 }, { "epoch": 2.46, "learning_rate": 2.5501727268011827e-06, "logits/chosen": -2.863248348236084, "logits/rejected": -1.7856792211532593, "logps/chosen": -324.2999572753906, "logps/rejected": -280.58612060546875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -7.492761611938477, "rewards/margins": 8.671407699584961, "rewards/rejected": -16.164169311523438, "step": 15812 }, { "epoch": 2.46, "learning_rate": 2.549439286270035e-06, "logits/chosen": -2.2325875759124756, "logits/rejected": -2.632096290588379, "logps/chosen": -280.0856628417969, "logps/rejected": -378.345947265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -7.570060729980469, "rewards/margins": 7.596444129943848, "rewards/rejected": -15.166505813598633, "step": 15813 }, { "epoch": 2.46, "learning_rate": 2.548705845738887e-06, "logits/chosen": -2.751584768295288, "logits/rejected": -2.842186212539673, "logps/chosen": -236.30734252929688, "logps/rejected": -386.2427062988281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.763498306274414, "rewards/margins": 8.687078475952148, "rewards/rejected": -15.450576782226562, "step": 15814 }, { "epoch": 2.46, "learning_rate": 2.5479724052077392e-06, "logits/chosen": -1.8223997354507446, "logits/rejected": -2.519402027130127, "logps/chosen": -244.44639587402344, "logps/rejected": -428.0984802246094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.333417892456055, "rewards/margins": 10.836593627929688, "rewards/rejected": -19.17000961303711, "step": 15815 }, { "epoch": 2.46, "learning_rate": 2.547238964676591e-06, "logits/chosen": -2.0048911571502686, "logits/rejected": -2.6610207557678223, "logps/chosen": -136.6224365234375, "logps/rejected": -507.99859619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8949179649353027, "rewards/margins": 13.899120330810547, "rewards/rejected": -17.794038772583008, "step": 15816 }, { "epoch": 2.46, "learning_rate": 2.546505524145443e-06, "logits/chosen": -2.3231825828552246, "logits/rejected": -2.9277491569519043, "logps/chosen": -189.1232147216797, "logps/rejected": -469.8367614746094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.277531623840332, "rewards/margins": 9.372428894042969, "rewards/rejected": -13.649959564208984, "step": 15817 }, { "epoch": 2.46, "learning_rate": 2.5457720836142953e-06, "logits/chosen": -1.9167989492416382, "logits/rejected": -3.070751905441284, "logps/chosen": -910.6202392578125, "logps/rejected": -1130.774658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.833185195922852, "rewards/margins": 13.184564590454102, "rewards/rejected": -19.017749786376953, "step": 15818 }, { "epoch": 2.46, "learning_rate": 2.545038643083147e-06, "logits/chosen": -1.3458218574523926, "logits/rejected": -2.534010410308838, "logps/chosen": -363.9810791015625, "logps/rejected": -477.2199401855469, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -9.708184242248535, "rewards/margins": 7.0686726570129395, "rewards/rejected": -16.776857376098633, "step": 15819 }, { "epoch": 2.46, "learning_rate": 2.544305202552e-06, "logits/chosen": -2.775006055831909, "logits/rejected": -2.414451837539673, "logps/chosen": -473.0721435546875, "logps/rejected": -381.633056640625, "loss": 0.1269, "rewards/accuracies": 1.0, "rewards/chosen": -6.51592493057251, "rewards/margins": 6.719308853149414, "rewards/rejected": -13.235233306884766, "step": 15820 }, { "epoch": 2.46, "learning_rate": 2.5435717620208518e-06, "logits/chosen": -2.7906453609466553, "logits/rejected": -1.8737733364105225, "logps/chosen": -295.657958984375, "logps/rejected": -218.6717987060547, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.3255308866500854, "rewards/margins": 10.810745239257812, "rewards/rejected": -12.136275291442871, "step": 15821 }, { "epoch": 2.46, "learning_rate": 2.542838321489704e-06, "logits/chosen": -1.0420013666152954, "logits/rejected": -2.4153637886047363, "logps/chosen": -177.976318359375, "logps/rejected": -492.6498718261719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.844945907592773, "rewards/margins": 9.510883331298828, "rewards/rejected": -16.3558292388916, "step": 15822 }, { "epoch": 2.46, "learning_rate": 2.542104880958556e-06, "logits/chosen": -2.716785430908203, "logits/rejected": -2.1596007347106934, "logps/chosen": -342.0712890625, "logps/rejected": -322.0667419433594, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.748260021209717, "rewards/margins": 9.429801940917969, "rewards/rejected": -16.178062438964844, "step": 15823 }, { "epoch": 2.46, "learning_rate": 2.5413714404274083e-06, "logits/chosen": -2.59039568901062, "logits/rejected": -2.5447404384613037, "logps/chosen": -180.36184692382812, "logps/rejected": -221.16720581054688, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.748166084289551, "rewards/margins": 6.262274742126465, "rewards/rejected": -13.010440826416016, "step": 15824 }, { "epoch": 2.46, "learning_rate": 2.54063799989626e-06, "logits/chosen": -2.419494390487671, "logits/rejected": -2.9296677112579346, "logps/chosen": -180.010498046875, "logps/rejected": -464.1361083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.47250509262085, "rewards/margins": 9.588570594787598, "rewards/rejected": -17.061077117919922, "step": 15825 }, { "epoch": 2.46, "learning_rate": 2.539904559365112e-06, "logits/chosen": -2.1301019191741943, "logits/rejected": -2.6640994548797607, "logps/chosen": -102.04002380371094, "logps/rejected": -265.8254089355469, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.6420674324035645, "rewards/margins": 8.206413269042969, "rewards/rejected": -13.848480224609375, "step": 15826 }, { "epoch": 2.46, "learning_rate": 2.5391711188339643e-06, "logits/chosen": -2.466956853866577, "logits/rejected": -2.5843214988708496, "logps/chosen": -233.80667114257812, "logps/rejected": -331.7220458984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.362188339233398, "rewards/margins": 8.001312255859375, "rewards/rejected": -16.363500595092773, "step": 15827 }, { "epoch": 2.46, "learning_rate": 2.5384376783028166e-06, "logits/chosen": -1.3830333948135376, "logits/rejected": -2.6518115997314453, "logps/chosen": -95.31639862060547, "logps/rejected": -519.3792724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.298968315124512, "rewards/margins": 15.859855651855469, "rewards/rejected": -22.158824920654297, "step": 15828 }, { "epoch": 2.46, "learning_rate": 2.537704237771669e-06, "logits/chosen": -2.7059531211853027, "logits/rejected": -2.519646644592285, "logps/chosen": -270.3047790527344, "logps/rejected": -336.24822998046875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.859012603759766, "rewards/margins": 7.01800012588501, "rewards/rejected": -11.877012252807617, "step": 15829 }, { "epoch": 2.46, "learning_rate": 2.536970797240521e-06, "logits/chosen": -1.645589828491211, "logits/rejected": -2.6716434955596924, "logps/chosen": -166.8807830810547, "logps/rejected": -424.92816162109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.298081398010254, "rewards/margins": 13.594706535339355, "rewards/rejected": -18.89278793334961, "step": 15830 }, { "epoch": 2.46, "learning_rate": 2.536237356709373e-06, "logits/chosen": -2.0164945125579834, "logits/rejected": -2.4194979667663574, "logps/chosen": -203.91409301757812, "logps/rejected": -404.88079833984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.8792009353637695, "rewards/margins": 10.967155456542969, "rewards/rejected": -16.846355438232422, "step": 15831 }, { "epoch": 2.46, "learning_rate": 2.535503916178225e-06, "logits/chosen": -1.6170233488082886, "logits/rejected": -2.5975594520568848, "logps/chosen": -244.6924591064453, "logps/rejected": -405.6217346191406, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -7.4583306312561035, "rewards/margins": 6.927072048187256, "rewards/rejected": -14.38540267944336, "step": 15832 }, { "epoch": 2.46, "learning_rate": 2.5347704756470773e-06, "logits/chosen": -1.8077226877212524, "logits/rejected": -2.3356688022613525, "logps/chosen": -242.10440063476562, "logps/rejected": -414.9150695800781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.862602233886719, "rewards/margins": 10.037965774536133, "rewards/rejected": -18.90056800842285, "step": 15833 }, { "epoch": 2.46, "learning_rate": 2.534037035115929e-06, "logits/chosen": -2.537292003631592, "logits/rejected": -2.513385057449341, "logps/chosen": -219.7042999267578, "logps/rejected": -334.8365478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.394805431365967, "rewards/margins": 13.288607597351074, "rewards/rejected": -18.683414459228516, "step": 15834 }, { "epoch": 2.46, "learning_rate": 2.533303594584781e-06, "logits/chosen": -2.2348310947418213, "logits/rejected": -2.7190864086151123, "logps/chosen": -147.1943359375, "logps/rejected": -461.9491271972656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.424285888671875, "rewards/margins": 9.608498573303223, "rewards/rejected": -13.032784461975098, "step": 15835 }, { "epoch": 2.46, "learning_rate": 2.5325701540536333e-06, "logits/chosen": -2.4018115997314453, "logits/rejected": -2.623286485671997, "logps/chosen": -137.9407196044922, "logps/rejected": -356.8546447753906, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -8.771249771118164, "rewards/margins": 9.537361145019531, "rewards/rejected": -18.308612823486328, "step": 15836 }, { "epoch": 2.46, "learning_rate": 2.5318367135224856e-06, "logits/chosen": -2.962080478668213, "logits/rejected": -2.180436134338379, "logps/chosen": -686.648193359375, "logps/rejected": -567.341064453125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.420468330383301, "rewards/margins": 9.442559242248535, "rewards/rejected": -14.863027572631836, "step": 15837 }, { "epoch": 2.46, "learning_rate": 2.531103272991338e-06, "logits/chosen": -2.45456600189209, "logits/rejected": -2.5319719314575195, "logps/chosen": -161.9449462890625, "logps/rejected": -206.93386840820312, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -5.022064208984375, "rewards/margins": 5.376564025878906, "rewards/rejected": -10.398628234863281, "step": 15838 }, { "epoch": 2.46, "learning_rate": 2.53036983246019e-06, "logits/chosen": -2.0296401977539062, "logits/rejected": -2.3838307857513428, "logps/chosen": -113.74441528320312, "logps/rejected": -178.69195556640625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -2.7271718978881836, "rewards/margins": 7.583611488342285, "rewards/rejected": -10.310783386230469, "step": 15839 }, { "epoch": 2.46, "learning_rate": 2.529636391929042e-06, "logits/chosen": -2.713700532913208, "logits/rejected": -1.624921202659607, "logps/chosen": -438.60516357421875, "logps/rejected": -278.90155029296875, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/chosen": -7.412038803100586, "rewards/margins": 6.312230587005615, "rewards/rejected": -13.72426986694336, "step": 15840 }, { "epoch": 2.46, "learning_rate": 2.528902951397894e-06, "logits/chosen": -2.8061864376068115, "logits/rejected": -2.5331006050109863, "logps/chosen": -623.88916015625, "logps/rejected": -571.64599609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.502774238586426, "rewards/margins": 11.581029891967773, "rewards/rejected": -18.083805084228516, "step": 15841 }, { "epoch": 2.46, "learning_rate": 2.5281695108667463e-06, "logits/chosen": -1.558777093887329, "logits/rejected": -2.6146678924560547, "logps/chosen": -125.96371459960938, "logps/rejected": -347.2429504394531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.5915422439575195, "rewards/margins": 9.183979988098145, "rewards/rejected": -16.775522232055664, "step": 15842 }, { "epoch": 2.46, "learning_rate": 2.527436070335598e-06, "logits/chosen": -2.788515090942383, "logits/rejected": -1.3992482423782349, "logps/chosen": -311.11004638671875, "logps/rejected": -194.7028045654297, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -3.1116561889648438, "rewards/margins": 6.856805801391602, "rewards/rejected": -9.968461990356445, "step": 15843 }, { "epoch": 2.46, "learning_rate": 2.5267026298044505e-06, "logits/chosen": -1.7804843187332153, "logits/rejected": -2.588595151901245, "logps/chosen": -347.3781433105469, "logps/rejected": -497.55255126953125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.531615257263184, "rewards/margins": 8.27879524230957, "rewards/rejected": -14.810409545898438, "step": 15844 }, { "epoch": 2.46, "learning_rate": 2.5259691892733028e-06, "logits/chosen": -3.220336437225342, "logits/rejected": -3.2111706733703613, "logps/chosen": -224.47445678710938, "logps/rejected": -222.7626953125, "loss": 0.0633, "rewards/accuracies": 1.0, "rewards/chosen": -5.9047136306762695, "rewards/margins": 4.739288330078125, "rewards/rejected": -10.644001960754395, "step": 15845 }, { "epoch": 2.46, "learning_rate": 2.5252357487421547e-06, "logits/chosen": -1.9037690162658691, "logits/rejected": -3.062638759613037, "logps/chosen": -220.04425048828125, "logps/rejected": -449.0469970703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.896487236022949, "rewards/margins": 9.260110855102539, "rewards/rejected": -15.156599044799805, "step": 15846 }, { "epoch": 2.46, "learning_rate": 2.524502308211007e-06, "logits/chosen": -2.5288569927215576, "logits/rejected": -3.0737223625183105, "logps/chosen": -399.23077392578125, "logps/rejected": -577.5807495117188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0538201332092285, "rewards/margins": 12.736688613891602, "rewards/rejected": -18.790508270263672, "step": 15847 }, { "epoch": 2.46, "learning_rate": 2.523768867679859e-06, "logits/chosen": -2.8266079425811768, "logits/rejected": -2.6133406162261963, "logps/chosen": -277.4039306640625, "logps/rejected": -338.53497314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.751523017883301, "rewards/margins": 10.206998825073242, "rewards/rejected": -16.95852279663086, "step": 15848 }, { "epoch": 2.46, "learning_rate": 2.523035427148711e-06, "logits/chosen": -2.265120267868042, "logits/rejected": -2.817892551422119, "logps/chosen": -117.10506439208984, "logps/rejected": -394.117431640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.472171783447266, "rewards/margins": 9.794612884521484, "rewards/rejected": -16.26678466796875, "step": 15849 }, { "epoch": 2.47, "learning_rate": 2.522301986617563e-06, "logits/chosen": -2.847813367843628, "logits/rejected": -2.6419694423675537, "logps/chosen": -357.77392578125, "logps/rejected": -325.15911865234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.744296073913574, "rewards/margins": 10.300108909606934, "rewards/rejected": -16.044404983520508, "step": 15850 }, { "epoch": 2.47, "learning_rate": 2.5215685460864153e-06, "logits/chosen": -2.8332464694976807, "logits/rejected": -2.5412447452545166, "logps/chosen": -495.0471496582031, "logps/rejected": -469.6424255371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.858194828033447, "rewards/margins": 11.272798538208008, "rewards/rejected": -18.130992889404297, "step": 15851 }, { "epoch": 2.47, "learning_rate": 2.520835105555267e-06, "logits/chosen": -1.932838797569275, "logits/rejected": -1.99858820438385, "logps/chosen": -538.545166015625, "logps/rejected": -564.1441650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.501916885375977, "rewards/margins": 12.124789237976074, "rewards/rejected": -18.626705169677734, "step": 15852 }, { "epoch": 2.47, "learning_rate": 2.5201016650241195e-06, "logits/chosen": -1.7754250764846802, "logits/rejected": -2.6910598278045654, "logps/chosen": -500.5836486816406, "logps/rejected": -669.624267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.009056091308594, "rewards/margins": 11.907428741455078, "rewards/rejected": -22.916484832763672, "step": 15853 }, { "epoch": 2.47, "learning_rate": 2.519368224492972e-06, "logits/chosen": -1.9717451333999634, "logits/rejected": -2.737807273864746, "logps/chosen": -203.5917510986328, "logps/rejected": -259.93548583984375, "loss": 0.076, "rewards/accuracies": 1.0, "rewards/chosen": -7.995712757110596, "rewards/margins": 5.0921406745910645, "rewards/rejected": -13.08785343170166, "step": 15854 }, { "epoch": 2.47, "learning_rate": 2.5186347839618237e-06, "logits/chosen": -1.8425196409225464, "logits/rejected": -2.470824718475342, "logps/chosen": -410.1494140625, "logps/rejected": -520.674560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.564868450164795, "rewards/margins": 10.544027328491211, "rewards/rejected": -16.108896255493164, "step": 15855 }, { "epoch": 2.47, "learning_rate": 2.517901343430676e-06, "logits/chosen": -2.6827168464660645, "logits/rejected": -2.8735203742980957, "logps/chosen": -131.97396850585938, "logps/rejected": -344.0032958984375, "loss": 0.4258, "rewards/accuracies": 0.5, "rewards/chosen": -10.519765853881836, "rewards/margins": 2.7611632347106934, "rewards/rejected": -13.280928611755371, "step": 15856 }, { "epoch": 2.47, "learning_rate": 2.517167902899528e-06, "logits/chosen": -2.847545623779297, "logits/rejected": -1.345291256904602, "logps/chosen": -358.41497802734375, "logps/rejected": -146.7168731689453, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.0626091957092285, "rewards/margins": 8.653024673461914, "rewards/rejected": -10.715633392333984, "step": 15857 }, { "epoch": 2.47, "learning_rate": 2.51643446236838e-06, "logits/chosen": -1.9532231092453003, "logits/rejected": -2.5571160316467285, "logps/chosen": -225.41131591796875, "logps/rejected": -399.71014404296875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.123470306396484, "rewards/margins": 9.571390151977539, "rewards/rejected": -17.69485855102539, "step": 15858 }, { "epoch": 2.47, "learning_rate": 2.515701021837232e-06, "logits/chosen": -2.531583070755005, "logits/rejected": -2.3369410037994385, "logps/chosen": -207.28265380859375, "logps/rejected": -251.39697265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.5879006385803223, "rewards/margins": 9.244465827941895, "rewards/rejected": -11.832366943359375, "step": 15859 }, { "epoch": 2.47, "learning_rate": 2.5149675813060843e-06, "logits/chosen": -0.5091460347175598, "logits/rejected": -1.9346961975097656, "logps/chosen": -190.11758422851562, "logps/rejected": -653.0316162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.492042541503906, "rewards/margins": 12.095870971679688, "rewards/rejected": -18.587913513183594, "step": 15860 }, { "epoch": 2.47, "learning_rate": 2.5142341407749362e-06, "logits/chosen": -1.6888395547866821, "logits/rejected": -3.039097309112549, "logps/chosen": -348.1927795410156, "logps/rejected": -696.6829833984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.597916603088379, "rewards/margins": 11.752199172973633, "rewards/rejected": -17.350116729736328, "step": 15861 }, { "epoch": 2.47, "learning_rate": 2.513500700243789e-06, "logits/chosen": -1.6971213817596436, "logits/rejected": -2.150998830795288, "logps/chosen": -160.26382446289062, "logps/rejected": -350.774169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.372584819793701, "rewards/margins": 10.247098922729492, "rewards/rejected": -17.61968231201172, "step": 15862 }, { "epoch": 2.47, "learning_rate": 2.512767259712641e-06, "logits/chosen": -2.780080556869507, "logits/rejected": -2.0448830127716064, "logps/chosen": -490.2225036621094, "logps/rejected": -456.7945251464844, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -6.911694526672363, "rewards/margins": 7.304513454437256, "rewards/rejected": -14.216207504272461, "step": 15863 }, { "epoch": 2.47, "learning_rate": 2.512033819181493e-06, "logits/chosen": -2.0711939334869385, "logits/rejected": -2.397836923599243, "logps/chosen": -268.851806640625, "logps/rejected": -479.4903869628906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.81538200378418, "rewards/margins": 12.432659149169922, "rewards/rejected": -22.2480411529541, "step": 15864 }, { "epoch": 2.47, "learning_rate": 2.511300378650345e-06, "logits/chosen": -0.8456557393074036, "logits/rejected": -1.5224194526672363, "logps/chosen": -228.536865234375, "logps/rejected": -523.5174560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.957884788513184, "rewards/margins": 14.176313400268555, "rewards/rejected": -19.134197235107422, "step": 15865 }, { "epoch": 2.47, "learning_rate": 2.510566938119197e-06, "logits/chosen": -2.280031442642212, "logits/rejected": -2.7744152545928955, "logps/chosen": -92.62995910644531, "logps/rejected": -244.98968505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.344356536865234, "rewards/margins": 10.433906555175781, "rewards/rejected": -15.778263092041016, "step": 15866 }, { "epoch": 2.47, "learning_rate": 2.509833497588049e-06, "logits/chosen": -2.6671361923217773, "logits/rejected": -2.4213969707489014, "logps/chosen": -183.57931518554688, "logps/rejected": -317.9179992675781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.965230703353882, "rewards/margins": 13.231273651123047, "rewards/rejected": -17.196504592895508, "step": 15867 }, { "epoch": 2.47, "learning_rate": 2.509100057056901e-06, "logits/chosen": -1.5867732763290405, "logits/rejected": -2.4854273796081543, "logps/chosen": -128.69100952148438, "logps/rejected": -354.69598388671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.002777099609375, "rewards/margins": 8.986857414245605, "rewards/rejected": -16.989633560180664, "step": 15868 }, { "epoch": 2.47, "learning_rate": 2.5083666165257534e-06, "logits/chosen": -1.390241026878357, "logits/rejected": -2.601834535598755, "logps/chosen": -205.7311553955078, "logps/rejected": -326.3406677246094, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.130537033081055, "rewards/margins": 9.265972137451172, "rewards/rejected": -14.396509170532227, "step": 15869 }, { "epoch": 2.47, "learning_rate": 2.5076331759946057e-06, "logits/chosen": -2.767421007156372, "logits/rejected": -2.8501176834106445, "logps/chosen": -375.30609130859375, "logps/rejected": -522.3556518554688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.879660606384277, "rewards/margins": 9.959335327148438, "rewards/rejected": -17.83899688720703, "step": 15870 }, { "epoch": 2.47, "learning_rate": 2.506899735463458e-06, "logits/chosen": -2.5826406478881836, "logits/rejected": -2.1706759929656982, "logps/chosen": -326.03277587890625, "logps/rejected": -676.44873046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.259575843811035, "rewards/margins": 14.70322036743164, "rewards/rejected": -19.96279525756836, "step": 15871 }, { "epoch": 2.47, "learning_rate": 2.50616629493231e-06, "logits/chosen": -2.2082102298736572, "logits/rejected": -2.9494481086730957, "logps/chosen": -267.9847412109375, "logps/rejected": -430.88232421875, "loss": 0.2027, "rewards/accuracies": 1.0, "rewards/chosen": -4.983005046844482, "rewards/margins": 5.122971534729004, "rewards/rejected": -10.105976104736328, "step": 15872 }, { "epoch": 2.47, "learning_rate": 2.505432854401162e-06, "logits/chosen": -2.835101842880249, "logits/rejected": -2.562681198120117, "logps/chosen": -546.390625, "logps/rejected": -621.5743408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.093124389648438, "rewards/margins": 16.551326751708984, "rewards/rejected": -24.644451141357422, "step": 15873 }, { "epoch": 2.47, "learning_rate": 2.504699413870014e-06, "logits/chosen": -2.8988561630249023, "logits/rejected": -2.69478440284729, "logps/chosen": -169.99986267089844, "logps/rejected": -217.01011657714844, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.280277252197266, "rewards/margins": 8.11742877960205, "rewards/rejected": -12.397706031799316, "step": 15874 }, { "epoch": 2.47, "learning_rate": 2.503965973338866e-06, "logits/chosen": -0.5178089737892151, "logits/rejected": -2.606135606765747, "logps/chosen": -116.56300354003906, "logps/rejected": -548.37841796875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -7.5771074295043945, "rewards/margins": 6.99061393737793, "rewards/rejected": -14.567721366882324, "step": 15875 }, { "epoch": 2.47, "learning_rate": 2.503232532807718e-06, "logits/chosen": -2.350457191467285, "logits/rejected": -2.3448455333709717, "logps/chosen": -146.96432495117188, "logps/rejected": -278.0717468261719, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.966277599334717, "rewards/margins": 7.432609558105469, "rewards/rejected": -13.398887634277344, "step": 15876 }, { "epoch": 2.47, "learning_rate": 2.50249909227657e-06, "logits/chosen": -2.396960496902466, "logits/rejected": -3.053396701812744, "logps/chosen": -278.1410827636719, "logps/rejected": -438.23370361328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.854454040527344, "rewards/margins": 10.297172546386719, "rewards/rejected": -16.151626586914062, "step": 15877 }, { "epoch": 2.47, "learning_rate": 2.5017656517454224e-06, "logits/chosen": -2.7934648990631104, "logits/rejected": -2.9607744216918945, "logps/chosen": -354.0933837890625, "logps/rejected": -337.06072998046875, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/chosen": -7.938121795654297, "rewards/margins": 4.573365688323975, "rewards/rejected": -12.51148796081543, "step": 15878 }, { "epoch": 2.47, "learning_rate": 2.5010322112142747e-06, "logits/chosen": -2.5980422496795654, "logits/rejected": -2.922969102859497, "logps/chosen": -353.3458251953125, "logps/rejected": -451.73828125, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -6.1339311599731445, "rewards/margins": 7.177643775939941, "rewards/rejected": -13.311574935913086, "step": 15879 }, { "epoch": 2.47, "learning_rate": 2.500298770683127e-06, "logits/chosen": -2.657878875732422, "logits/rejected": -3.1754751205444336, "logps/chosen": -92.98067474365234, "logps/rejected": -411.27191162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.197478294372559, "rewards/margins": 12.416240692138672, "rewards/rejected": -18.613719940185547, "step": 15880 }, { "epoch": 2.47, "learning_rate": 2.499565330151979e-06, "logits/chosen": -1.2125016450881958, "logits/rejected": -2.6134679317474365, "logps/chosen": -237.74221801757812, "logps/rejected": -358.3099365234375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -4.226991176605225, "rewards/margins": 6.696824073791504, "rewards/rejected": -10.92381477355957, "step": 15881 }, { "epoch": 2.47, "learning_rate": 2.498831889620831e-06, "logits/chosen": -2.616100311279297, "logits/rejected": -2.814749002456665, "logps/chosen": -290.8782043457031, "logps/rejected": -411.1876220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.882160186767578, "rewards/margins": 11.396632194519043, "rewards/rejected": -18.278793334960938, "step": 15882 }, { "epoch": 2.47, "learning_rate": 2.498098449089683e-06, "logits/chosen": -2.628847599029541, "logits/rejected": -2.8439345359802246, "logps/chosen": -108.6005630493164, "logps/rejected": -273.8681640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.546608924865723, "rewards/margins": 11.242690086364746, "rewards/rejected": -20.78929901123047, "step": 15883 }, { "epoch": 2.47, "learning_rate": 2.497365008558535e-06, "logits/chosen": -2.7492380142211914, "logits/rejected": -2.803251028060913, "logps/chosen": -124.53015899658203, "logps/rejected": -269.95867919921875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.341550827026367, "rewards/margins": 7.239655494689941, "rewards/rejected": -15.581206321716309, "step": 15884 }, { "epoch": 2.47, "learning_rate": 2.4966315680273872e-06, "logits/chosen": -1.5372689962387085, "logits/rejected": -2.5022265911102295, "logps/chosen": -197.2584686279297, "logps/rejected": -416.3077392578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.488219261169434, "rewards/margins": 9.714595794677734, "rewards/rejected": -16.202816009521484, "step": 15885 }, { "epoch": 2.47, "learning_rate": 2.495898127496239e-06, "logits/chosen": -1.7977735996246338, "logits/rejected": -2.821061849594116, "logps/chosen": -109.29427337646484, "logps/rejected": -297.23797607421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.896245956420898, "rewards/margins": 7.078587532043457, "rewards/rejected": -13.974833488464355, "step": 15886 }, { "epoch": 2.47, "learning_rate": 2.495164686965092e-06, "logits/chosen": -1.6862611770629883, "logits/rejected": -2.812757968902588, "logps/chosen": -297.9324035644531, "logps/rejected": -723.22314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.324384927749634, "rewards/margins": 14.035767555236816, "rewards/rejected": -17.360153198242188, "step": 15887 }, { "epoch": 2.47, "learning_rate": 2.4944312464339437e-06, "logits/chosen": -2.4790403842926025, "logits/rejected": -2.7357726097106934, "logps/chosen": -201.63772583007812, "logps/rejected": -378.5802001953125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -5.8358659744262695, "rewards/margins": 8.34145736694336, "rewards/rejected": -14.177323341369629, "step": 15888 }, { "epoch": 2.47, "learning_rate": 2.493697805902796e-06, "logits/chosen": -2.795708656311035, "logits/rejected": -3.331634283065796, "logps/chosen": -98.61540222167969, "logps/rejected": -356.55767822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.554502964019775, "rewards/margins": 11.279062271118164, "rewards/rejected": -18.83356475830078, "step": 15889 }, { "epoch": 2.47, "learning_rate": 2.492964365371648e-06, "logits/chosen": -2.345428228378296, "logits/rejected": -2.7622733116149902, "logps/chosen": -437.6565246582031, "logps/rejected": -591.6829833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.97000503540039, "rewards/margins": 10.827219009399414, "rewards/rejected": -20.797224044799805, "step": 15890 }, { "epoch": 2.47, "learning_rate": 2.4922309248405e-06, "logits/chosen": -2.666196346282959, "logits/rejected": -2.8076210021972656, "logps/chosen": -152.12130737304688, "logps/rejected": -291.6112060546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.7699480056762695, "rewards/margins": 9.00015926361084, "rewards/rejected": -13.77010726928711, "step": 15891 }, { "epoch": 2.47, "learning_rate": 2.491497484309352e-06, "logits/chosen": -1.9353861808776855, "logits/rejected": -2.582819700241089, "logps/chosen": -214.70697021484375, "logps/rejected": -555.1419677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.552639961242676, "rewards/margins": 13.774248123168945, "rewards/rejected": -19.326889038085938, "step": 15892 }, { "epoch": 2.47, "learning_rate": 2.4907640437782044e-06, "logits/chosen": -0.8420404195785522, "logits/rejected": -2.0822346210479736, "logps/chosen": -243.138916015625, "logps/rejected": -593.526611328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.3643059730529785, "rewards/margins": 14.027233123779297, "rewards/rejected": -20.39154052734375, "step": 15893 }, { "epoch": 2.47, "learning_rate": 2.4900306032470563e-06, "logits/chosen": -2.619605302810669, "logits/rejected": -2.555830717086792, "logps/chosen": -275.0494079589844, "logps/rejected": -385.3013916015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.864531517028809, "rewards/margins": 8.347738265991211, "rewards/rejected": -15.21226978302002, "step": 15894 }, { "epoch": 2.47, "learning_rate": 2.4892971627159086e-06, "logits/chosen": -3.0357747077941895, "logits/rejected": -3.189275026321411, "logps/chosen": -188.84219360351562, "logps/rejected": -364.20550537109375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -6.540947914123535, "rewards/margins": 6.443353652954102, "rewards/rejected": -12.984302520751953, "step": 15895 }, { "epoch": 2.47, "learning_rate": 2.488563722184761e-06, "logits/chosen": -2.3673946857452393, "logits/rejected": -2.8918721675872803, "logps/chosen": -447.1678466796875, "logps/rejected": -515.322021484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.358099460601807, "rewards/margins": 9.25930404663086, "rewards/rejected": -13.617403030395508, "step": 15896 }, { "epoch": 2.47, "learning_rate": 2.4878302816536127e-06, "logits/chosen": -2.441471576690674, "logits/rejected": -0.5116394758224487, "logps/chosen": -337.72174072265625, "logps/rejected": -208.31605529785156, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -6.568633556365967, "rewards/margins": 6.23995304107666, "rewards/rejected": -12.808587074279785, "step": 15897 }, { "epoch": 2.47, "learning_rate": 2.487096841122465e-06, "logits/chosen": -2.4503281116485596, "logits/rejected": -2.7183706760406494, "logps/chosen": -363.2025146484375, "logps/rejected": -493.43115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.824776649475098, "rewards/margins": 13.556039810180664, "rewards/rejected": -19.380815505981445, "step": 15898 }, { "epoch": 2.47, "learning_rate": 2.486363400591317e-06, "logits/chosen": -0.8616707921028137, "logits/rejected": -1.1311075687408447, "logps/chosen": -154.03369140625, "logps/rejected": -331.8109130859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.1267900466918945, "rewards/margins": 9.607669830322266, "rewards/rejected": -15.73445987701416, "step": 15899 }, { "epoch": 2.47, "learning_rate": 2.4856299600601692e-06, "logits/chosen": -2.6366727352142334, "logits/rejected": -2.729794979095459, "logps/chosen": -334.13909912109375, "logps/rejected": -528.4219970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.723422050476074, "rewards/margins": 14.311625480651855, "rewards/rejected": -18.03504753112793, "step": 15900 }, { "epoch": 2.47, "learning_rate": 2.484896519529021e-06, "logits/chosen": -1.917407751083374, "logits/rejected": -2.818859100341797, "logps/chosen": -266.2751159667969, "logps/rejected": -532.9340209960938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.529492378234863, "rewards/margins": 10.97313117980957, "rewards/rejected": -18.50262451171875, "step": 15901 }, { "epoch": 2.47, "learning_rate": 2.4841630789978734e-06, "logits/chosen": -2.506291389465332, "logits/rejected": -2.588921546936035, "logps/chosen": -119.81828308105469, "logps/rejected": -149.5252227783203, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -4.206689357757568, "rewards/margins": 4.280766487121582, "rewards/rejected": -8.487455368041992, "step": 15902 }, { "epoch": 2.47, "learning_rate": 2.4834296384667253e-06, "logits/chosen": -2.5191781520843506, "logits/rejected": -2.733642578125, "logps/chosen": -142.81455993652344, "logps/rejected": -404.2571105957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.213784694671631, "rewards/margins": 11.223332405090332, "rewards/rejected": -18.437116622924805, "step": 15903 }, { "epoch": 2.47, "learning_rate": 2.4826961979355776e-06, "logits/chosen": -1.9206557273864746, "logits/rejected": -2.4590954780578613, "logps/chosen": -211.65005493164062, "logps/rejected": -405.9411926269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.247748374938965, "rewards/margins": 11.641134262084961, "rewards/rejected": -19.888883590698242, "step": 15904 }, { "epoch": 2.47, "learning_rate": 2.48196275740443e-06, "logits/chosen": -2.4692161083221436, "logits/rejected": -3.0484654903411865, "logps/chosen": -327.9759521484375, "logps/rejected": -478.53857421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.008550643920898, "rewards/margins": 11.240620613098145, "rewards/rejected": -16.24917221069336, "step": 15905 }, { "epoch": 2.47, "learning_rate": 2.4812293168732818e-06, "logits/chosen": -1.7935436964035034, "logits/rejected": -2.380476951599121, "logps/chosen": -221.1834716796875, "logps/rejected": -327.46539306640625, "loss": 0.0476, "rewards/accuracies": 1.0, "rewards/chosen": -9.168549537658691, "rewards/margins": 4.27924919128418, "rewards/rejected": -13.447797775268555, "step": 15906 }, { "epoch": 2.47, "learning_rate": 2.480495876342134e-06, "logits/chosen": -2.532552719116211, "logits/rejected": -2.7734808921813965, "logps/chosen": -220.7327880859375, "logps/rejected": -386.2301025390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.031885862350464, "rewards/margins": 12.547468185424805, "rewards/rejected": -15.579354286193848, "step": 15907 }, { "epoch": 2.47, "learning_rate": 2.479762435810986e-06, "logits/chosen": -1.8242084980010986, "logits/rejected": -2.54085111618042, "logps/chosen": -149.71739196777344, "logps/rejected": -310.0519714355469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.920378684997559, "rewards/margins": 8.630290985107422, "rewards/rejected": -15.550668716430664, "step": 15908 }, { "epoch": 2.47, "learning_rate": 2.4790289952798382e-06, "logits/chosen": -1.371786117553711, "logits/rejected": -2.688192844390869, "logps/chosen": -162.64089965820312, "logps/rejected": -715.1444702148438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.843697547912598, "rewards/margins": 11.700285911560059, "rewards/rejected": -19.543983459472656, "step": 15909 }, { "epoch": 2.47, "learning_rate": 2.47829555474869e-06, "logits/chosen": -1.6019175052642822, "logits/rejected": -2.5759129524230957, "logps/chosen": -205.90086364746094, "logps/rejected": -420.16558837890625, "loss": 0.1594, "rewards/accuracies": 1.0, "rewards/chosen": -4.409661293029785, "rewards/margins": 8.063224792480469, "rewards/rejected": -12.472885131835938, "step": 15910 }, { "epoch": 2.47, "learning_rate": 2.4775621142175424e-06, "logits/chosen": -1.6796303987503052, "logits/rejected": -2.611100196838379, "logps/chosen": -149.26329040527344, "logps/rejected": -363.6245422363281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.559107780456543, "rewards/margins": 9.087577819824219, "rewards/rejected": -15.646686553955078, "step": 15911 }, { "epoch": 2.47, "learning_rate": 2.4768286736863947e-06, "logits/chosen": -2.252248764038086, "logits/rejected": -2.829310417175293, "logps/chosen": -221.51614379882812, "logps/rejected": -366.50494384765625, "loss": 1.9236, "rewards/accuracies": 0.5, "rewards/chosen": -7.147729873657227, "rewards/margins": 3.6674249172210693, "rewards/rejected": -10.815154075622559, "step": 15912 }, { "epoch": 2.47, "learning_rate": 2.476095233155247e-06, "logits/chosen": -2.5671706199645996, "logits/rejected": -2.986025810241699, "logps/chosen": -143.13194274902344, "logps/rejected": -269.09149169921875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -8.552164077758789, "rewards/margins": 7.608011245727539, "rewards/rejected": -16.160175323486328, "step": 15913 }, { "epoch": 2.47, "learning_rate": 2.475361792624099e-06, "logits/chosen": -1.9728492498397827, "logits/rejected": -2.7878689765930176, "logps/chosen": -118.95260620117188, "logps/rejected": -307.52728271484375, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -3.6313018798828125, "rewards/margins": 7.98530912399292, "rewards/rejected": -11.61661148071289, "step": 15914 }, { "epoch": 2.48, "learning_rate": 2.4746283520929508e-06, "logits/chosen": -1.62504243850708, "logits/rejected": -2.6318390369415283, "logps/chosen": -190.67678833007812, "logps/rejected": -520.025634765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.971706390380859, "rewards/margins": 10.3267822265625, "rewards/rejected": -17.29848861694336, "step": 15915 }, { "epoch": 2.48, "learning_rate": 2.473894911561803e-06, "logits/chosen": -2.740919351577759, "logits/rejected": -2.9307668209075928, "logps/chosen": -170.28868103027344, "logps/rejected": -351.75054931640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.011598587036133, "rewards/margins": 11.39775562286377, "rewards/rejected": -14.409354209899902, "step": 15916 }, { "epoch": 2.48, "learning_rate": 2.473161471030655e-06, "logits/chosen": -2.6240177154541016, "logits/rejected": -2.053832769393921, "logps/chosen": -140.17420959472656, "logps/rejected": -139.3786163330078, "loss": 0.0223, "rewards/accuracies": 1.0, "rewards/chosen": -6.485116004943848, "rewards/margins": 3.879833221435547, "rewards/rejected": -10.364949226379395, "step": 15917 }, { "epoch": 2.48, "learning_rate": 2.4724280304995073e-06, "logits/chosen": -2.1676111221313477, "logits/rejected": -2.80755352973938, "logps/chosen": -110.80479431152344, "logps/rejected": -348.49273681640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.843570709228516, "rewards/margins": 9.451581954956055, "rewards/rejected": -15.29515266418457, "step": 15918 }, { "epoch": 2.48, "learning_rate": 2.471694589968359e-06, "logits/chosen": -2.6100234985351562, "logits/rejected": -2.9654312133789062, "logps/chosen": -267.5915832519531, "logps/rejected": -473.5232849121094, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.62230110168457, "rewards/margins": 8.602856636047363, "rewards/rejected": -16.22515869140625, "step": 15919 }, { "epoch": 2.48, "learning_rate": 2.4709611494372114e-06, "logits/chosen": -2.087880849838257, "logits/rejected": -2.7557640075683594, "logps/chosen": -380.45751953125, "logps/rejected": -642.6661987304688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.9800615310668945, "rewards/margins": 17.615184783935547, "rewards/rejected": -25.595245361328125, "step": 15920 }, { "epoch": 2.48, "learning_rate": 2.4702277089060637e-06, "logits/chosen": -2.243378162384033, "logits/rejected": -2.901071548461914, "logps/chosen": -288.58160400390625, "logps/rejected": -502.46942138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.936051368713379, "rewards/margins": 10.432952880859375, "rewards/rejected": -15.36900520324707, "step": 15921 }, { "epoch": 2.48, "learning_rate": 2.469494268374916e-06, "logits/chosen": -2.6393165588378906, "logits/rejected": -2.660166025161743, "logps/chosen": -476.2135925292969, "logps/rejected": -469.2802734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.9979119300842285, "rewards/margins": 7.647373199462891, "rewards/rejected": -13.645284652709961, "step": 15922 }, { "epoch": 2.48, "learning_rate": 2.468760827843768e-06, "logits/chosen": -2.654127359390259, "logits/rejected": -2.9495315551757812, "logps/chosen": -175.64459228515625, "logps/rejected": -279.5189208984375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.478630065917969, "rewards/margins": 7.293267250061035, "rewards/rejected": -13.771897315979004, "step": 15923 }, { "epoch": 2.48, "learning_rate": 2.46802738731262e-06, "logits/chosen": -1.9466800689697266, "logits/rejected": -2.399394989013672, "logps/chosen": -194.04684448242188, "logps/rejected": -261.1448669433594, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.093608856201172, "rewards/margins": 7.26474666595459, "rewards/rejected": -11.358356475830078, "step": 15924 }, { "epoch": 2.48, "learning_rate": 2.467293946781472e-06, "logits/chosen": -2.0358855724334717, "logits/rejected": -2.861607313156128, "logps/chosen": -111.40655517578125, "logps/rejected": -358.5361328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.368819713592529, "rewards/margins": 9.667318344116211, "rewards/rejected": -15.036138534545898, "step": 15925 }, { "epoch": 2.48, "learning_rate": 2.466560506250324e-06, "logits/chosen": -1.8481758832931519, "logits/rejected": -2.554863214492798, "logps/chosen": -142.7657470703125, "logps/rejected": -281.96087646484375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -9.863079071044922, "rewards/margins": 5.157417297363281, "rewards/rejected": -15.020496368408203, "step": 15926 }, { "epoch": 2.48, "learning_rate": 2.4658270657191763e-06, "logits/chosen": -2.253936767578125, "logits/rejected": -2.887392044067383, "logps/chosen": -150.62350463867188, "logps/rejected": -279.9666442871094, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.695860862731934, "rewards/margins": 7.109989166259766, "rewards/rejected": -12.805850982666016, "step": 15927 }, { "epoch": 2.48, "learning_rate": 2.465093625188028e-06, "logits/chosen": -2.166245698928833, "logits/rejected": -2.422346353530884, "logps/chosen": -163.67018127441406, "logps/rejected": -422.8282470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.817507028579712, "rewards/margins": 13.229935646057129, "rewards/rejected": -17.047443389892578, "step": 15928 }, { "epoch": 2.48, "learning_rate": 2.464360184656881e-06, "logits/chosen": -1.2846451997756958, "logits/rejected": -2.6861989498138428, "logps/chosen": -205.8234405517578, "logps/rejected": -517.499267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.731077194213867, "rewards/margins": 9.29073715209961, "rewards/rejected": -20.021814346313477, "step": 15929 }, { "epoch": 2.48, "learning_rate": 2.4636267441257328e-06, "logits/chosen": -2.852025270462036, "logits/rejected": -2.8935210704803467, "logps/chosen": -911.9923095703125, "logps/rejected": -821.798095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.988590955734253, "rewards/margins": 13.924297332763672, "rewards/rejected": -17.912887573242188, "step": 15930 }, { "epoch": 2.48, "learning_rate": 2.462893303594585e-06, "logits/chosen": -2.5761005878448486, "logits/rejected": -2.7016265392303467, "logps/chosen": -97.03135681152344, "logps/rejected": -242.95350646972656, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -7.651951789855957, "rewards/margins": 4.675558090209961, "rewards/rejected": -12.327510833740234, "step": 15931 }, { "epoch": 2.48, "learning_rate": 2.462159863063437e-06, "logits/chosen": -2.606829881668091, "logits/rejected": -2.3676233291625977, "logps/chosen": -291.55963134765625, "logps/rejected": -467.650634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.070425510406494, "rewards/margins": 13.439929962158203, "rewards/rejected": -17.510356903076172, "step": 15932 }, { "epoch": 2.48, "learning_rate": 2.4614264225322892e-06, "logits/chosen": -2.352515459060669, "logits/rejected": -2.7222344875335693, "logps/chosen": -200.11929321289062, "logps/rejected": -484.3674621582031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.858959197998047, "rewards/margins": 9.861544609069824, "rewards/rejected": -17.720504760742188, "step": 15933 }, { "epoch": 2.48, "learning_rate": 2.460692982001141e-06, "logits/chosen": -2.7329821586608887, "logits/rejected": -1.9985867738723755, "logps/chosen": -289.7107849121094, "logps/rejected": -323.0848388671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.909822940826416, "rewards/margins": 8.588082313537598, "rewards/rejected": -15.497905731201172, "step": 15934 }, { "epoch": 2.48, "learning_rate": 2.459959541469993e-06, "logits/chosen": -1.920183777809143, "logits/rejected": -2.7536306381225586, "logps/chosen": -263.45770263671875, "logps/rejected": -626.8012084960938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.25316047668457, "rewards/margins": 12.394912719726562, "rewards/rejected": -20.648073196411133, "step": 15935 }, { "epoch": 2.48, "learning_rate": 2.4592261009388453e-06, "logits/chosen": -2.6364715099334717, "logits/rejected": -2.7966368198394775, "logps/chosen": -77.45771789550781, "logps/rejected": -219.2161865234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.674121379852295, "rewards/margins": 7.832172393798828, "rewards/rejected": -12.506294250488281, "step": 15936 }, { "epoch": 2.48, "learning_rate": 2.4584926604076976e-06, "logits/chosen": -2.8662915229797363, "logits/rejected": -2.7107737064361572, "logps/chosen": -223.7827911376953, "logps/rejected": -148.32388305664062, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -6.7453413009643555, "rewards/margins": 4.398686408996582, "rewards/rejected": -11.144027709960938, "step": 15937 }, { "epoch": 2.48, "learning_rate": 2.45775921987655e-06, "logits/chosen": -1.4215385913848877, "logits/rejected": -2.8831539154052734, "logps/chosen": -121.48558807373047, "logps/rejected": -398.6783447265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.037009239196777, "rewards/margins": 7.6214070320129395, "rewards/rejected": -13.658416748046875, "step": 15938 }, { "epoch": 2.48, "learning_rate": 2.4570257793454018e-06, "logits/chosen": -1.711125135421753, "logits/rejected": -2.753464937210083, "logps/chosen": -137.39122009277344, "logps/rejected": -511.462890625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.201585292816162, "rewards/margins": 11.015792846679688, "rewards/rejected": -16.217378616333008, "step": 15939 }, { "epoch": 2.48, "learning_rate": 2.456292338814254e-06, "logits/chosen": -2.351804733276367, "logits/rejected": -2.529189109802246, "logps/chosen": -330.4544982910156, "logps/rejected": -333.25946044921875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.340264320373535, "rewards/margins": 6.585024356842041, "rewards/rejected": -12.925289154052734, "step": 15940 }, { "epoch": 2.48, "learning_rate": 2.455558898283106e-06, "logits/chosen": -2.758161783218384, "logits/rejected": -2.160090684890747, "logps/chosen": -216.53587341308594, "logps/rejected": -201.16851806640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8824949264526367, "rewards/margins": 9.225051879882812, "rewards/rejected": -12.107547760009766, "step": 15941 }, { "epoch": 2.48, "learning_rate": 2.4548254577519583e-06, "logits/chosen": -1.6006696224212646, "logits/rejected": -2.7583048343658447, "logps/chosen": -242.62460327148438, "logps/rejected": -460.057861328125, "loss": 0.8905, "rewards/accuracies": 0.5, "rewards/chosen": -6.720125198364258, "rewards/margins": 10.685293197631836, "rewards/rejected": -17.405418395996094, "step": 15942 }, { "epoch": 2.48, "learning_rate": 2.45409201722081e-06, "logits/chosen": -2.7261388301849365, "logits/rejected": -1.6639457941055298, "logps/chosen": -613.4136352539062, "logps/rejected": -636.54345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.848873138427734, "rewards/margins": 11.85743236541748, "rewards/rejected": -16.70630645751953, "step": 15943 }, { "epoch": 2.48, "learning_rate": 2.453358576689662e-06, "logits/chosen": -2.0560169219970703, "logits/rejected": -2.2958920001983643, "logps/chosen": -134.375244140625, "logps/rejected": -323.1919860839844, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.280080795288086, "rewards/margins": 8.991586685180664, "rewards/rejected": -15.27166748046875, "step": 15944 }, { "epoch": 2.48, "learning_rate": 2.4526251361585143e-06, "logits/chosen": -2.721717596054077, "logits/rejected": -2.743650436401367, "logps/chosen": -268.10968017578125, "logps/rejected": -286.4970703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.019654273986816, "rewards/margins": 7.99346923828125, "rewards/rejected": -14.01312255859375, "step": 15945 }, { "epoch": 2.48, "learning_rate": 2.4518916956273666e-06, "logits/chosen": -2.3465895652770996, "logits/rejected": -2.751495361328125, "logps/chosen": -216.9175262451172, "logps/rejected": -522.6513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.122402191162109, "rewards/margins": 13.123489379882812, "rewards/rejected": -18.245891571044922, "step": 15946 }, { "epoch": 2.48, "learning_rate": 2.451158255096219e-06, "logits/chosen": -2.313321828842163, "logits/rejected": -2.770188808441162, "logps/chosen": -599.8157348632812, "logps/rejected": -459.06072998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.662519454956055, "rewards/margins": 10.607283592224121, "rewards/rejected": -18.26980209350586, "step": 15947 }, { "epoch": 2.48, "learning_rate": 2.450424814565071e-06, "logits/chosen": -2.359917402267456, "logits/rejected": -2.9139859676361084, "logps/chosen": -134.86419677734375, "logps/rejected": -405.85321044921875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.684408187866211, "rewards/margins": 10.842294692993164, "rewards/rejected": -16.526702880859375, "step": 15948 }, { "epoch": 2.48, "learning_rate": 2.449691374033923e-06, "logits/chosen": -2.643141746520996, "logits/rejected": -2.774686336517334, "logps/chosen": -122.33309173583984, "logps/rejected": -347.35748291015625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.431006908416748, "rewards/margins": 7.123770236968994, "rewards/rejected": -13.554777145385742, "step": 15949 }, { "epoch": 2.48, "learning_rate": 2.448957933502775e-06, "logits/chosen": -2.9759767055511475, "logits/rejected": -2.9651706218719482, "logps/chosen": -133.04425048828125, "logps/rejected": -242.75442504882812, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.9464967250823975, "rewards/margins": 9.835824966430664, "rewards/rejected": -13.78232192993164, "step": 15950 }, { "epoch": 2.48, "learning_rate": 2.4482244929716273e-06, "logits/chosen": -2.355919122695923, "logits/rejected": -2.9024112224578857, "logps/chosen": -229.1187744140625, "logps/rejected": -371.8028869628906, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -3.949540615081787, "rewards/margins": 6.635739326477051, "rewards/rejected": -10.585280418395996, "step": 15951 }, { "epoch": 2.48, "learning_rate": 2.447491052440479e-06, "logits/chosen": -2.362238645553589, "logits/rejected": -2.753338575363159, "logps/chosen": -182.80841064453125, "logps/rejected": -297.593994140625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -7.859743118286133, "rewards/margins": 6.886947154998779, "rewards/rejected": -14.74669075012207, "step": 15952 }, { "epoch": 2.48, "learning_rate": 2.446757611909331e-06, "logits/chosen": -1.570208191871643, "logits/rejected": -2.625185966491699, "logps/chosen": -201.50389099121094, "logps/rejected": -396.8294677734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.224156379699707, "rewards/margins": 9.294661521911621, "rewards/rejected": -18.518817901611328, "step": 15953 }, { "epoch": 2.48, "learning_rate": 2.4460241713781838e-06, "logits/chosen": -3.0097975730895996, "logits/rejected": -2.9098074436187744, "logps/chosen": -372.297607421875, "logps/rejected": -421.7384033203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.746471405029297, "rewards/margins": 9.682641983032227, "rewards/rejected": -18.429113388061523, "step": 15954 }, { "epoch": 2.48, "learning_rate": 2.4452907308470356e-06, "logits/chosen": -1.407104253768921, "logits/rejected": -2.6773927211761475, "logps/chosen": -131.7088623046875, "logps/rejected": -392.0303039550781, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -6.949292182922363, "rewards/margins": 7.237689018249512, "rewards/rejected": -14.186981201171875, "step": 15955 }, { "epoch": 2.48, "learning_rate": 2.444557290315888e-06, "logits/chosen": -2.0098578929901123, "logits/rejected": -1.7651907205581665, "logps/chosen": -757.0460815429688, "logps/rejected": -702.2310791015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.789277076721191, "rewards/margins": 14.36613655090332, "rewards/rejected": -19.155414581298828, "step": 15956 }, { "epoch": 2.48, "learning_rate": 2.44382384978474e-06, "logits/chosen": -2.8351950645446777, "logits/rejected": -2.3559160232543945, "logps/chosen": -688.4788818359375, "logps/rejected": -509.6305847167969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.81987190246582, "rewards/margins": 9.092208862304688, "rewards/rejected": -15.912080764770508, "step": 15957 }, { "epoch": 2.48, "learning_rate": 2.443090409253592e-06, "logits/chosen": -2.43841814994812, "logits/rejected": -1.880420446395874, "logps/chosen": -356.9281005859375, "logps/rejected": -409.1374816894531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.552260398864746, "rewards/margins": 9.707852363586426, "rewards/rejected": -16.260112762451172, "step": 15958 }, { "epoch": 2.48, "learning_rate": 2.442356968722444e-06, "logits/chosen": -0.8753297328948975, "logits/rejected": -2.4932186603546143, "logps/chosen": -143.1802215576172, "logps/rejected": -540.6729736328125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.386349201202393, "rewards/margins": 8.194256782531738, "rewards/rejected": -13.580605506896973, "step": 15959 }, { "epoch": 2.48, "learning_rate": 2.4416235281912963e-06, "logits/chosen": -2.1750948429107666, "logits/rejected": -2.908156394958496, "logps/chosen": -158.62416076660156, "logps/rejected": -406.5063171386719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.547551155090332, "rewards/margins": 8.412934303283691, "rewards/rejected": -14.960485458374023, "step": 15960 }, { "epoch": 2.48, "learning_rate": 2.440890087660148e-06, "logits/chosen": -3.0474350452423096, "logits/rejected": -2.816866397857666, "logps/chosen": -148.76904296875, "logps/rejected": -236.42669677734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.949790000915527, "rewards/margins": 10.078350067138672, "rewards/rejected": -19.028141021728516, "step": 15961 }, { "epoch": 2.48, "learning_rate": 2.4401566471290005e-06, "logits/chosen": -2.7990455627441406, "logits/rejected": -2.8683435916900635, "logps/chosen": -93.66200256347656, "logps/rejected": -274.6772155761719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.070539951324463, "rewards/margins": 10.082834243774414, "rewards/rejected": -13.153373718261719, "step": 15962 }, { "epoch": 2.48, "learning_rate": 2.439423206597853e-06, "logits/chosen": -1.8614721298217773, "logits/rejected": -2.596491813659668, "logps/chosen": -357.5562744140625, "logps/rejected": -563.226318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4561567306518555, "rewards/margins": 11.3760404586792, "rewards/rejected": -16.832197189331055, "step": 15963 }, { "epoch": 2.48, "learning_rate": 2.4386897660667047e-06, "logits/chosen": -2.7611582279205322, "logits/rejected": -2.7237579822540283, "logps/chosen": -246.88743591308594, "logps/rejected": -339.828857421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.5121307373046875, "rewards/margins": 8.7244291305542, "rewards/rejected": -14.236560821533203, "step": 15964 }, { "epoch": 2.48, "learning_rate": 2.437956325535557e-06, "logits/chosen": -2.8957998752593994, "logits/rejected": -2.3174314498901367, "logps/chosen": -445.2790832519531, "logps/rejected": -442.20947265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.783418655395508, "rewards/margins": 9.855100631713867, "rewards/rejected": -16.638519287109375, "step": 15965 }, { "epoch": 2.48, "learning_rate": 2.437222885004409e-06, "logits/chosen": -2.029960870742798, "logits/rejected": -2.6823384761810303, "logps/chosen": -215.36367797851562, "logps/rejected": -412.2733154296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.563996315002441, "rewards/margins": 8.7070951461792, "rewards/rejected": -16.27109146118164, "step": 15966 }, { "epoch": 2.48, "learning_rate": 2.436489444473261e-06, "logits/chosen": -2.3501734733581543, "logits/rejected": -2.5392684936523438, "logps/chosen": -375.9532470703125, "logps/rejected": -433.38873291015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.101693153381348, "rewards/margins": 12.547736167907715, "rewards/rejected": -17.649429321289062, "step": 15967 }, { "epoch": 2.48, "learning_rate": 2.435756003942113e-06, "logits/chosen": -2.7606451511383057, "logits/rejected": -2.5503792762756348, "logps/chosen": -863.9352416992188, "logps/rejected": -651.3582153320312, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -9.159806251525879, "rewards/margins": 7.245128631591797, "rewards/rejected": -16.40493392944336, "step": 15968 }, { "epoch": 2.48, "learning_rate": 2.4350225634109653e-06, "logits/chosen": -2.0725607872009277, "logits/rejected": -2.5032613277435303, "logps/chosen": -234.84561157226562, "logps/rejected": -434.6763916015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.28713321685791, "rewards/margins": 12.215508460998535, "rewards/rejected": -16.502641677856445, "step": 15969 }, { "epoch": 2.48, "learning_rate": 2.4342891228798172e-06, "logits/chosen": -2.720216751098633, "logits/rejected": -2.2469513416290283, "logps/chosen": -246.55770874023438, "logps/rejected": -366.3140869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.636547088623047, "rewards/margins": 13.72542953491211, "rewards/rejected": -17.361976623535156, "step": 15970 }, { "epoch": 2.48, "learning_rate": 2.43355568234867e-06, "logits/chosen": -2.4473788738250732, "logits/rejected": -2.9231348037719727, "logps/chosen": -100.37833404541016, "logps/rejected": -276.44927978515625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.223543643951416, "rewards/margins": 7.542694091796875, "rewards/rejected": -14.766237258911133, "step": 15971 }, { "epoch": 2.48, "learning_rate": 2.432822241817522e-06, "logits/chosen": -2.517063617706299, "logits/rejected": -2.299262285232544, "logps/chosen": -436.9268798828125, "logps/rejected": -426.7830810546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.180318832397461, "rewards/margins": 11.45095157623291, "rewards/rejected": -15.631270408630371, "step": 15972 }, { "epoch": 2.48, "learning_rate": 2.4320888012863737e-06, "logits/chosen": -2.8026325702667236, "logits/rejected": -2.9268171787261963, "logps/chosen": -166.68463134765625, "logps/rejected": -370.7398681640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.137773513793945, "rewards/margins": 14.51948070526123, "rewards/rejected": -21.65725326538086, "step": 15973 }, { "epoch": 2.48, "learning_rate": 2.431355360755226e-06, "logits/chosen": -1.6397604942321777, "logits/rejected": -2.376507520675659, "logps/chosen": -191.32037353515625, "logps/rejected": -446.46002197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.591638565063477, "rewards/margins": 12.435770034790039, "rewards/rejected": -18.027408599853516, "step": 15974 }, { "epoch": 2.48, "learning_rate": 2.430621920224078e-06, "logits/chosen": -2.8908472061157227, "logits/rejected": -2.223766326904297, "logps/chosen": -661.244873046875, "logps/rejected": -557.5729370117188, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.953310966491699, "rewards/margins": 8.286312103271484, "rewards/rejected": -15.239622116088867, "step": 15975 }, { "epoch": 2.48, "learning_rate": 2.42988847969293e-06, "logits/chosen": -2.370943069458008, "logits/rejected": -2.5710439682006836, "logps/chosen": -284.4114074707031, "logps/rejected": -253.98162841796875, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -9.164721488952637, "rewards/margins": 4.654533386230469, "rewards/rejected": -13.819255828857422, "step": 15976 }, { "epoch": 2.48, "learning_rate": 2.429155039161782e-06, "logits/chosen": -1.481542944908142, "logits/rejected": -2.5281028747558594, "logps/chosen": -112.22286987304688, "logps/rejected": -500.3518371582031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.633976936340332, "rewards/margins": 13.768804550170898, "rewards/rejected": -20.402782440185547, "step": 15977 }, { "epoch": 2.48, "learning_rate": 2.4284215986306344e-06, "logits/chosen": -1.8829913139343262, "logits/rejected": -2.704097032546997, "logps/chosen": -425.13824462890625, "logps/rejected": -1226.3326416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.031560897827148, "rewards/margins": 15.301465034484863, "rewards/rejected": -21.333026885986328, "step": 15978 }, { "epoch": 2.49, "learning_rate": 2.4276881580994867e-06, "logits/chosen": -2.027958869934082, "logits/rejected": -1.9020037651062012, "logps/chosen": -385.443359375, "logps/rejected": -504.6727294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.588980674743652, "rewards/margins": 13.168171882629395, "rewards/rejected": -17.757152557373047, "step": 15979 }, { "epoch": 2.49, "learning_rate": 2.426954717568339e-06, "logits/chosen": -2.625884532928467, "logits/rejected": -1.4193482398986816, "logps/chosen": -261.38897705078125, "logps/rejected": -277.5435791015625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.212666988372803, "rewards/margins": 7.494320869445801, "rewards/rejected": -13.706987380981445, "step": 15980 }, { "epoch": 2.49, "learning_rate": 2.426221277037191e-06, "logits/chosen": -2.6038973331451416, "logits/rejected": -2.204688787460327, "logps/chosen": -275.4644775390625, "logps/rejected": -415.23663330078125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -8.458478927612305, "rewards/margins": 14.22241497039795, "rewards/rejected": -22.68089485168457, "step": 15981 }, { "epoch": 2.49, "learning_rate": 2.425487836506043e-06, "logits/chosen": -1.7573429346084595, "logits/rejected": -2.954878568649292, "logps/chosen": -180.36447143554688, "logps/rejected": -618.0195922851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5621705055236816, "rewards/margins": 13.99995231628418, "rewards/rejected": -17.562122344970703, "step": 15982 }, { "epoch": 2.49, "learning_rate": 2.424754395974895e-06, "logits/chosen": -2.131071090698242, "logits/rejected": -2.9367520809173584, "logps/chosen": -485.42608642578125, "logps/rejected": -1030.875732421875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -8.924016952514648, "rewards/margins": 7.166213035583496, "rewards/rejected": -16.09023094177246, "step": 15983 }, { "epoch": 2.49, "learning_rate": 2.424020955443747e-06, "logits/chosen": -2.6039843559265137, "logits/rejected": -2.898334503173828, "logps/chosen": -367.6061096191406, "logps/rejected": -570.185302734375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -10.319433212280273, "rewards/margins": 7.684351444244385, "rewards/rejected": -18.0037841796875, "step": 15984 }, { "epoch": 2.49, "learning_rate": 2.423287514912599e-06, "logits/chosen": -2.5308172702789307, "logits/rejected": -3.0787410736083984, "logps/chosen": -450.2707824707031, "logps/rejected": -538.578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.127312660217285, "rewards/margins": 9.628316879272461, "rewards/rejected": -16.755630493164062, "step": 15985 }, { "epoch": 2.49, "learning_rate": 2.422554074381451e-06, "logits/chosen": -2.3094699382781982, "logits/rejected": -2.434089422225952, "logps/chosen": -168.516357421875, "logps/rejected": -466.1025695800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9156970977783203, "rewards/margins": 12.582157135009766, "rewards/rejected": -16.497854232788086, "step": 15986 }, { "epoch": 2.49, "learning_rate": 2.4218206338503034e-06, "logits/chosen": -2.6446852684020996, "logits/rejected": -2.508941411972046, "logps/chosen": -394.0119934082031, "logps/rejected": -503.000244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.763449192047119, "rewards/margins": 13.731813430786133, "rewards/rejected": -17.495262145996094, "step": 15987 }, { "epoch": 2.49, "learning_rate": 2.4210871933191557e-06, "logits/chosen": -2.9199564456939697, "logits/rejected": -2.054551839828491, "logps/chosen": -234.1769256591797, "logps/rejected": -120.97065734863281, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -3.690920352935791, "rewards/margins": 6.274615287780762, "rewards/rejected": -9.965536117553711, "step": 15988 }, { "epoch": 2.49, "learning_rate": 2.420353752788008e-06, "logits/chosen": -2.76216459274292, "logits/rejected": -2.651421308517456, "logps/chosen": -547.24560546875, "logps/rejected": -721.740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.19838285446167, "rewards/margins": 11.94149398803711, "rewards/rejected": -19.139877319335938, "step": 15989 }, { "epoch": 2.49, "learning_rate": 2.41962031225686e-06, "logits/chosen": -2.5266807079315186, "logits/rejected": -2.455678939819336, "logps/chosen": -478.7529296875, "logps/rejected": -556.6395263671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.90703010559082, "rewards/margins": 11.479881286621094, "rewards/rejected": -20.386911392211914, "step": 15990 }, { "epoch": 2.49, "learning_rate": 2.418886871725712e-06, "logits/chosen": -2.4415948390960693, "logits/rejected": -2.7903127670288086, "logps/chosen": -116.09805297851562, "logps/rejected": -487.604736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.604474067687988, "rewards/margins": 14.610611915588379, "rewards/rejected": -21.215085983276367, "step": 15991 }, { "epoch": 2.49, "learning_rate": 2.418153431194564e-06, "logits/chosen": -3.003955125808716, "logits/rejected": -3.012415647506714, "logps/chosen": -277.72064208984375, "logps/rejected": -454.8420715332031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.786924362182617, "rewards/margins": 12.871026039123535, "rewards/rejected": -21.65795135498047, "step": 15992 }, { "epoch": 2.49, "learning_rate": 2.417419990663416e-06, "logits/chosen": -3.078071355819702, "logits/rejected": -2.8410584926605225, "logps/chosen": -185.87965393066406, "logps/rejected": -204.77182006835938, "loss": 0.1445, "rewards/accuracies": 1.0, "rewards/chosen": -6.297906875610352, "rewards/margins": 6.349301338195801, "rewards/rejected": -12.647209167480469, "step": 15993 }, { "epoch": 2.49, "learning_rate": 2.4166865501322682e-06, "logits/chosen": -2.3713314533233643, "logits/rejected": -2.7854361534118652, "logps/chosen": -94.23141479492188, "logps/rejected": -256.557861328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.457409381866455, "rewards/margins": 8.909913063049316, "rewards/rejected": -15.36732292175293, "step": 15994 }, { "epoch": 2.49, "learning_rate": 2.41595310960112e-06, "logits/chosen": -1.9440348148345947, "logits/rejected": -2.6746602058410645, "logps/chosen": -156.29066467285156, "logps/rejected": -404.9786682128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.877358436584473, "rewards/margins": 11.0068941116333, "rewards/rejected": -15.884252548217773, "step": 15995 }, { "epoch": 2.49, "learning_rate": 2.4152196690699724e-06, "logits/chosen": -2.52036452293396, "logits/rejected": -2.0631000995635986, "logps/chosen": -526.5421752929688, "logps/rejected": -560.1882934570312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.877630233764648, "rewards/margins": 10.736251831054688, "rewards/rejected": -20.613882064819336, "step": 15996 }, { "epoch": 2.49, "learning_rate": 2.4144862285388247e-06, "logits/chosen": -2.660369634628296, "logits/rejected": -2.7782180309295654, "logps/chosen": -141.18223571777344, "logps/rejected": -308.885498046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.599635124206543, "rewards/margins": 9.482545852661133, "rewards/rejected": -14.082180976867676, "step": 15997 }, { "epoch": 2.49, "learning_rate": 2.413752788007677e-06, "logits/chosen": -2.0810649394989014, "logits/rejected": -2.9808762073516846, "logps/chosen": -170.7178955078125, "logps/rejected": -410.240478515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.384615421295166, "rewards/margins": 9.836684226989746, "rewards/rejected": -15.22130012512207, "step": 15998 }, { "epoch": 2.49, "learning_rate": 2.413019347476529e-06, "logits/chosen": -2.6149444580078125, "logits/rejected": -2.7327687740325928, "logps/chosen": -331.0235290527344, "logps/rejected": -505.58270263671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.639634132385254, "rewards/margins": 12.338252067565918, "rewards/rejected": -18.977886199951172, "step": 15999 }, { "epoch": 2.49, "learning_rate": 2.412285906945381e-06, "logits/chosen": -2.5573363304138184, "logits/rejected": -2.8388373851776123, "logps/chosen": -101.60417938232422, "logps/rejected": -317.83544921875, "loss": 0.0146, "rewards/accuracies": 1.0, "rewards/chosen": -6.071666717529297, "rewards/margins": 7.64329195022583, "rewards/rejected": -13.714958190917969, "step": 16000 }, { "epoch": 2.49, "learning_rate": 2.411552466414233e-06, "logits/chosen": -2.626082420349121, "logits/rejected": -2.7134697437286377, "logps/chosen": -472.4629211425781, "logps/rejected": -515.8160400390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.519752502441406, "rewards/margins": 8.959711074829102, "rewards/rejected": -16.479463577270508, "step": 16001 }, { "epoch": 2.49, "learning_rate": 2.4108190258830854e-06, "logits/chosen": -2.496277332305908, "logits/rejected": -2.850238084793091, "logps/chosen": -121.60794067382812, "logps/rejected": -388.9216613769531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.600165367126465, "rewards/margins": 12.252985000610352, "rewards/rejected": -16.8531494140625, "step": 16002 }, { "epoch": 2.49, "learning_rate": 2.4100855853519372e-06, "logits/chosen": -2.658940076828003, "logits/rejected": -2.0435421466827393, "logps/chosen": -541.3970947265625, "logps/rejected": -512.679443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.834405422210693, "rewards/margins": 11.464983940124512, "rewards/rejected": -17.299388885498047, "step": 16003 }, { "epoch": 2.49, "learning_rate": 2.409352144820789e-06, "logits/chosen": -1.1090844869613647, "logits/rejected": -2.6755177974700928, "logps/chosen": -194.6221923828125, "logps/rejected": -713.6654663085938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.870280742645264, "rewards/margins": 15.788788795471191, "rewards/rejected": -20.659069061279297, "step": 16004 }, { "epoch": 2.49, "learning_rate": 2.408618704289642e-06, "logits/chosen": -3.0985419750213623, "logits/rejected": -2.3025736808776855, "logps/chosen": -339.1596984863281, "logps/rejected": -273.97296142578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.239526271820068, "rewards/margins": 8.766661643981934, "rewards/rejected": -14.006187438964844, "step": 16005 }, { "epoch": 2.49, "learning_rate": 2.4078852637584937e-06, "logits/chosen": -3.0464842319488525, "logits/rejected": -2.513378858566284, "logps/chosen": -504.51422119140625, "logps/rejected": -515.3142700195312, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -5.758955478668213, "rewards/margins": 8.859406471252441, "rewards/rejected": -14.618362426757812, "step": 16006 }, { "epoch": 2.49, "learning_rate": 2.407151823227346e-06, "logits/chosen": -2.766596794128418, "logits/rejected": -2.896571397781372, "logps/chosen": -126.1426773071289, "logps/rejected": -288.7738342285156, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.299562454223633, "rewards/margins": 7.939133167266846, "rewards/rejected": -15.23869514465332, "step": 16007 }, { "epoch": 2.49, "learning_rate": 2.406418382696198e-06, "logits/chosen": -1.647923231124878, "logits/rejected": -2.7808685302734375, "logps/chosen": -222.40122985839844, "logps/rejected": -508.4122314453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.4978578090667725, "rewards/margins": 13.18997573852539, "rewards/rejected": -15.687833786010742, "step": 16008 }, { "epoch": 2.49, "learning_rate": 2.40568494216505e-06, "logits/chosen": -2.5680031776428223, "logits/rejected": -1.5951789617538452, "logps/chosen": -259.8477783203125, "logps/rejected": -379.9125061035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.1951470375061035, "rewards/margins": 13.231358528137207, "rewards/rejected": -17.42650604248047, "step": 16009 }, { "epoch": 2.49, "learning_rate": 2.404951501633902e-06, "logits/chosen": -2.5684468746185303, "logits/rejected": -2.2420597076416016, "logps/chosen": -208.33709716796875, "logps/rejected": -337.5365295410156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.401749610900879, "rewards/margins": 9.698969841003418, "rewards/rejected": -15.100719451904297, "step": 16010 }, { "epoch": 2.49, "learning_rate": 2.4042180611027544e-06, "logits/chosen": -2.9527134895324707, "logits/rejected": -2.5741705894470215, "logps/chosen": -356.74957275390625, "logps/rejected": -347.972900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.712316513061523, "rewards/margins": 12.013795852661133, "rewards/rejected": -20.726112365722656, "step": 16011 }, { "epoch": 2.49, "learning_rate": 2.4034846205716063e-06, "logits/chosen": -1.5417509078979492, "logits/rejected": -2.697648525238037, "logps/chosen": -228.00466918945312, "logps/rejected": -387.94683837890625, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -6.188238620758057, "rewards/margins": 7.386877536773682, "rewards/rejected": -13.575116157531738, "step": 16012 }, { "epoch": 2.49, "learning_rate": 2.4027511800404586e-06, "logits/chosen": -2.756542682647705, "logits/rejected": -2.281450033187866, "logps/chosen": -585.9185791015625, "logps/rejected": -616.489990234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.967218399047852, "rewards/margins": 9.104222297668457, "rewards/rejected": -18.071441650390625, "step": 16013 }, { "epoch": 2.49, "learning_rate": 2.402017739509311e-06, "logits/chosen": -2.6081385612487793, "logits/rejected": -3.0409939289093018, "logps/chosen": -402.1255798339844, "logps/rejected": -521.610595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.728857040405273, "rewards/margins": 11.865829467773438, "rewards/rejected": -18.59468650817871, "step": 16014 }, { "epoch": 2.49, "learning_rate": 2.4012842989781627e-06, "logits/chosen": -1.9237648248672485, "logits/rejected": -2.8831498622894287, "logps/chosen": -299.38525390625, "logps/rejected": -723.7195434570312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.774892330169678, "rewards/margins": 12.07480239868164, "rewards/rejected": -19.849695205688477, "step": 16015 }, { "epoch": 2.49, "learning_rate": 2.400550858447015e-06, "logits/chosen": -1.8607310056686401, "logits/rejected": -2.76965594291687, "logps/chosen": -173.50656127929688, "logps/rejected": -528.7020874023438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.161949157714844, "rewards/margins": 9.083412170410156, "rewards/rejected": -17.245361328125, "step": 16016 }, { "epoch": 2.49, "learning_rate": 2.399817417915867e-06, "logits/chosen": -1.4854291677474976, "logits/rejected": -2.9606552124023438, "logps/chosen": -236.2664337158203, "logps/rejected": -713.4420776367188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.683197975158691, "rewards/margins": 9.053977966308594, "rewards/rejected": -15.737176895141602, "step": 16017 }, { "epoch": 2.49, "learning_rate": 2.3990839773847192e-06, "logits/chosen": -1.6961493492126465, "logits/rejected": -2.8129565715789795, "logps/chosen": -165.0892791748047, "logps/rejected": -480.76080322265625, "loss": 0.031, "rewards/accuracies": 1.0, "rewards/chosen": -8.03183364868164, "rewards/margins": 8.755417823791504, "rewards/rejected": -16.787250518798828, "step": 16018 }, { "epoch": 2.49, "learning_rate": 2.398350536853571e-06, "logits/chosen": -2.0558364391326904, "logits/rejected": -2.8425636291503906, "logps/chosen": -108.6700668334961, "logps/rejected": -341.10174560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.985922336578369, "rewards/margins": 11.968171119689941, "rewards/rejected": -16.95409393310547, "step": 16019 }, { "epoch": 2.49, "learning_rate": 2.3976170963224234e-06, "logits/chosen": -2.231086015701294, "logits/rejected": -2.5711653232574463, "logps/chosen": -279.23345947265625, "logps/rejected": -553.185791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.378678321838379, "rewards/margins": 12.54488754272461, "rewards/rejected": -17.923564910888672, "step": 16020 }, { "epoch": 2.49, "learning_rate": 2.3968836557912753e-06, "logits/chosen": -2.662841558456421, "logits/rejected": -2.6881704330444336, "logps/chosen": -266.7198486328125, "logps/rejected": -320.66021728515625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.218709945678711, "rewards/margins": 6.750978469848633, "rewards/rejected": -12.969688415527344, "step": 16021 }, { "epoch": 2.49, "learning_rate": 2.3961502152601276e-06, "logits/chosen": -2.0980331897735596, "logits/rejected": -2.7059266567230225, "logps/chosen": -292.62957763671875, "logps/rejected": -434.5065612792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.700818061828613, "rewards/margins": 11.91806411743164, "rewards/rejected": -19.618881225585938, "step": 16022 }, { "epoch": 2.49, "learning_rate": 2.39541677472898e-06, "logits/chosen": -2.9281415939331055, "logits/rejected": -3.103173017501831, "logps/chosen": -73.81021118164062, "logps/rejected": -393.19927978515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.076871395111084, "rewards/margins": 9.438255310058594, "rewards/rejected": -14.51512622833252, "step": 16023 }, { "epoch": 2.49, "learning_rate": 2.3946833341978318e-06, "logits/chosen": -1.8476262092590332, "logits/rejected": -2.739750385284424, "logps/chosen": -157.73977661132812, "logps/rejected": -360.10577392578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8382248878479004, "rewards/margins": 9.61661148071289, "rewards/rejected": -13.454835891723633, "step": 16024 }, { "epoch": 2.49, "learning_rate": 2.393949893666684e-06, "logits/chosen": -2.413902997970581, "logits/rejected": -1.6860337257385254, "logps/chosen": -242.21240234375, "logps/rejected": -407.4482421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -10.661683082580566, "rewards/margins": 11.078876495361328, "rewards/rejected": -21.740558624267578, "step": 16025 }, { "epoch": 2.49, "learning_rate": 2.393216453135536e-06, "logits/chosen": -2.18353533744812, "logits/rejected": -2.7338788509368896, "logps/chosen": -811.8828125, "logps/rejected": -808.324462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.139431953430176, "rewards/margins": 12.66202449798584, "rewards/rejected": -21.801456451416016, "step": 16026 }, { "epoch": 2.49, "learning_rate": 2.3924830126043883e-06, "logits/chosen": -2.294520378112793, "logits/rejected": -2.8421080112457275, "logps/chosen": -209.76194763183594, "logps/rejected": -308.117919921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.945475101470947, "rewards/margins": 9.33894157409668, "rewards/rejected": -14.284416198730469, "step": 16027 }, { "epoch": 2.49, "learning_rate": 2.39174957207324e-06, "logits/chosen": -1.3603925704956055, "logits/rejected": -2.4224765300750732, "logps/chosen": -265.77813720703125, "logps/rejected": -571.89794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.843644618988037, "rewards/margins": 13.007853507995605, "rewards/rejected": -18.851497650146484, "step": 16028 }, { "epoch": 2.49, "learning_rate": 2.3910161315420924e-06, "logits/chosen": -2.7738139629364014, "logits/rejected": -2.277437686920166, "logps/chosen": -395.28179931640625, "logps/rejected": -384.6592712402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.828706741333008, "rewards/margins": 12.336153030395508, "rewards/rejected": -18.164859771728516, "step": 16029 }, { "epoch": 2.49, "learning_rate": 2.3902826910109447e-06, "logits/chosen": -1.779355764389038, "logits/rejected": -2.5959038734436035, "logps/chosen": -307.2717590332031, "logps/rejected": -438.6244201660156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.00228214263916, "rewards/margins": 9.539572715759277, "rewards/rejected": -14.541854858398438, "step": 16030 }, { "epoch": 2.49, "learning_rate": 2.389549250479797e-06, "logits/chosen": -1.0132118463516235, "logits/rejected": -2.083592414855957, "logps/chosen": -225.37477111816406, "logps/rejected": -554.62158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.3121232986450195, "rewards/margins": 15.01138687133789, "rewards/rejected": -21.323509216308594, "step": 16031 }, { "epoch": 2.49, "learning_rate": 2.388815809948649e-06, "logits/chosen": -0.9743466973304749, "logits/rejected": -1.5877493619918823, "logps/chosen": -227.64749145507812, "logps/rejected": -735.0964965820312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.345117568969727, "rewards/margins": 15.428256034851074, "rewards/rejected": -23.773374557495117, "step": 16032 }, { "epoch": 2.49, "learning_rate": 2.388082369417501e-06, "logits/chosen": -2.5709006786346436, "logits/rejected": -1.8928972482681274, "logps/chosen": -429.69970703125, "logps/rejected": -472.30120849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.151061058044434, "rewards/margins": 11.77983570098877, "rewards/rejected": -19.930896759033203, "step": 16033 }, { "epoch": 2.49, "learning_rate": 2.387348928886353e-06, "logits/chosen": -1.4686172008514404, "logits/rejected": -2.245555877685547, "logps/chosen": -190.04025268554688, "logps/rejected": -386.66961669921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.333494186401367, "rewards/margins": 10.49209213256836, "rewards/rejected": -14.825586318969727, "step": 16034 }, { "epoch": 2.49, "learning_rate": 2.386615488355205e-06, "logits/chosen": -1.8941359519958496, "logits/rejected": -2.5908825397491455, "logps/chosen": -160.30177307128906, "logps/rejected": -363.25732421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.840463638305664, "rewards/margins": 8.259828567504883, "rewards/rejected": -16.100292205810547, "step": 16035 }, { "epoch": 2.49, "learning_rate": 2.3858820478240573e-06, "logits/chosen": -2.9537570476531982, "logits/rejected": -2.8751885890960693, "logps/chosen": -338.83392333984375, "logps/rejected": -302.20416259765625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -7.874886512756348, "rewards/margins": 6.084412574768066, "rewards/rejected": -13.959299087524414, "step": 16036 }, { "epoch": 2.49, "learning_rate": 2.385148607292909e-06, "logits/chosen": -2.539639472961426, "logits/rejected": -2.7459216117858887, "logps/chosen": -85.32164001464844, "logps/rejected": -371.4521179199219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.192396640777588, "rewards/margins": 8.590030670166016, "rewards/rejected": -14.782427787780762, "step": 16037 }, { "epoch": 2.49, "learning_rate": 2.3844151667617615e-06, "logits/chosen": -2.1018223762512207, "logits/rejected": -2.6155216693878174, "logps/chosen": -230.59695434570312, "logps/rejected": -336.760498046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.109947204589844, "rewards/margins": 10.535872459411621, "rewards/rejected": -17.64582061767578, "step": 16038 }, { "epoch": 2.49, "learning_rate": 2.3836817262306138e-06, "logits/chosen": -1.8694614171981812, "logits/rejected": -2.706697463989258, "logps/chosen": -227.9321746826172, "logps/rejected": -403.7203063964844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.6223621368408203, "rewards/margins": 10.386518478393555, "rewards/rejected": -14.008880615234375, "step": 16039 }, { "epoch": 2.49, "learning_rate": 2.382948285699466e-06, "logits/chosen": -2.0420796871185303, "logits/rejected": -2.6477794647216797, "logps/chosen": -709.5963134765625, "logps/rejected": -690.0115966796875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.468486785888672, "rewards/margins": 11.582210540771484, "rewards/rejected": -18.050697326660156, "step": 16040 }, { "epoch": 2.49, "learning_rate": 2.382214845168318e-06, "logits/chosen": -2.936091661453247, "logits/rejected": -3.2461137771606445, "logps/chosen": -192.82876586914062, "logps/rejected": -317.93743896484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.2297163009643555, "rewards/margins": 8.101828575134277, "rewards/rejected": -15.331544876098633, "step": 16041 }, { "epoch": 2.49, "learning_rate": 2.38148140463717e-06, "logits/chosen": -2.145348310470581, "logits/rejected": -2.777071237564087, "logps/chosen": -228.22866821289062, "logps/rejected": -493.2306213378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.718696594238281, "rewards/margins": 13.293127059936523, "rewards/rejected": -23.011821746826172, "step": 16042 }, { "epoch": 2.5, "learning_rate": 2.380747964106022e-06, "logits/chosen": -2.385915994644165, "logits/rejected": -3.0240962505340576, "logps/chosen": -218.95565795898438, "logps/rejected": -549.8232421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.913914680480957, "rewards/margins": 8.7977294921875, "rewards/rejected": -14.711644172668457, "step": 16043 }, { "epoch": 2.5, "learning_rate": 2.380014523574874e-06, "logits/chosen": -2.5817906856536865, "logits/rejected": -2.2996504306793213, "logps/chosen": -209.1966552734375, "logps/rejected": -283.37188720703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.456058502197266, "rewards/margins": 8.471881866455078, "rewards/rejected": -16.927940368652344, "step": 16044 }, { "epoch": 2.5, "learning_rate": 2.3792810830437263e-06, "logits/chosen": -1.6450577974319458, "logits/rejected": -2.5415549278259277, "logps/chosen": -220.9734344482422, "logps/rejected": -447.1409912109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.136282920837402, "rewards/margins": 10.953659057617188, "rewards/rejected": -18.089942932128906, "step": 16045 }, { "epoch": 2.5, "learning_rate": 2.378547642512578e-06, "logits/chosen": -1.7543920278549194, "logits/rejected": -2.089398145675659, "logps/chosen": -1048.8662109375, "logps/rejected": -671.76513671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.826187610626221, "rewards/margins": 16.216711044311523, "rewards/rejected": -22.04290008544922, "step": 16046 }, { "epoch": 2.5, "learning_rate": 2.377814201981431e-06, "logits/chosen": -2.391616106033325, "logits/rejected": -2.827300548553467, "logps/chosen": -140.753173828125, "logps/rejected": -316.4173583984375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -6.3556694984436035, "rewards/margins": 9.244525909423828, "rewards/rejected": -15.600194931030273, "step": 16047 }, { "epoch": 2.5, "learning_rate": 2.3770807614502828e-06, "logits/chosen": -2.697173833847046, "logits/rejected": -1.966670274734497, "logps/chosen": -268.77777099609375, "logps/rejected": -376.2556457519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0807037353515625, "rewards/margins": 12.41191291809082, "rewards/rejected": -16.492616653442383, "step": 16048 }, { "epoch": 2.5, "learning_rate": 2.376347320919135e-06, "logits/chosen": -2.2795188426971436, "logits/rejected": -2.8641841411590576, "logps/chosen": -148.92283630371094, "logps/rejected": -248.63546752929688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.400993347167969, "rewards/margins": 7.2762370109558105, "rewards/rejected": -11.677230834960938, "step": 16049 }, { "epoch": 2.5, "learning_rate": 2.375613880387987e-06, "logits/chosen": -2.6431424617767334, "logits/rejected": -3.0353400707244873, "logps/chosen": -285.5164794921875, "logps/rejected": -444.8579406738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.593635559082031, "rewards/margins": 13.25117301940918, "rewards/rejected": -17.84480857849121, "step": 16050 }, { "epoch": 2.5, "learning_rate": 2.3748804398568393e-06, "logits/chosen": -2.0804803371429443, "logits/rejected": -2.3988211154937744, "logps/chosen": -206.3600311279297, "logps/rejected": -278.5065002441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.242969989776611, "rewards/margins": 10.872777938842773, "rewards/rejected": -16.115747451782227, "step": 16051 }, { "epoch": 2.5, "learning_rate": 2.374146999325691e-06, "logits/chosen": -1.97950279712677, "logits/rejected": -2.723181962966919, "logps/chosen": -174.12966918945312, "logps/rejected": -444.7990417480469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.645543575286865, "rewards/margins": 9.29928207397461, "rewards/rejected": -14.944826126098633, "step": 16052 }, { "epoch": 2.5, "learning_rate": 2.373413558794543e-06, "logits/chosen": -0.7018293738365173, "logits/rejected": -1.7699267864227295, "logps/chosen": -207.8938446044922, "logps/rejected": -460.7904968261719, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/chosen": -6.3520050048828125, "rewards/margins": 9.004864692687988, "rewards/rejected": -15.356870651245117, "step": 16053 }, { "epoch": 2.5, "learning_rate": 2.3726801182633953e-06, "logits/chosen": -2.33432674407959, "logits/rejected": -2.6005091667175293, "logps/chosen": -729.0713500976562, "logps/rejected": -854.7720947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.315402507781982, "rewards/margins": 14.293081283569336, "rewards/rejected": -21.608484268188477, "step": 16054 }, { "epoch": 2.5, "learning_rate": 2.3719466777322476e-06, "logits/chosen": -2.77211332321167, "logits/rejected": -2.441495418548584, "logps/chosen": -437.828857421875, "logps/rejected": -468.48822021484375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -3.648966073989868, "rewards/margins": 11.102165222167969, "rewards/rejected": -14.751131057739258, "step": 16055 }, { "epoch": 2.5, "learning_rate": 2.3712132372011e-06, "logits/chosen": -2.4956507682800293, "logits/rejected": -2.8598217964172363, "logps/chosen": -159.2099151611328, "logps/rejected": -404.4013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.873119354248047, "rewards/margins": 13.177480697631836, "rewards/rejected": -20.050600051879883, "step": 16056 }, { "epoch": 2.5, "learning_rate": 2.370479796669952e-06, "logits/chosen": -2.82889986038208, "logits/rejected": -2.7357020378112793, "logps/chosen": -436.8194580078125, "logps/rejected": -402.58038330078125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.654755592346191, "rewards/margins": 7.9924163818359375, "rewards/rejected": -12.647171020507812, "step": 16057 }, { "epoch": 2.5, "learning_rate": 2.369746356138804e-06, "logits/chosen": -2.3367412090301514, "logits/rejected": -2.663503408432007, "logps/chosen": -109.95722961425781, "logps/rejected": -298.604736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.078622817993164, "rewards/margins": 11.286150932312012, "rewards/rejected": -16.36477279663086, "step": 16058 }, { "epoch": 2.5, "learning_rate": 2.369012915607656e-06, "logits/chosen": -2.7534189224243164, "logits/rejected": -2.802293539047241, "logps/chosen": -460.4090576171875, "logps/rejected": -420.96942138671875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -9.60521411895752, "rewards/margins": 7.875988483428955, "rewards/rejected": -17.481203079223633, "step": 16059 }, { "epoch": 2.5, "learning_rate": 2.3682794750765083e-06, "logits/chosen": -1.2824172973632812, "logits/rejected": -2.3912389278411865, "logps/chosen": -643.7005615234375, "logps/rejected": -804.0386962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.978649139404297, "rewards/margins": 11.695281982421875, "rewards/rejected": -18.673931121826172, "step": 16060 }, { "epoch": 2.5, "learning_rate": 2.36754603454536e-06, "logits/chosen": -2.6933252811431885, "logits/rejected": -2.356846570968628, "logps/chosen": -483.3417663574219, "logps/rejected": -410.51373291015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.082819938659668, "rewards/margins": 7.979790687561035, "rewards/rejected": -14.062610626220703, "step": 16061 }, { "epoch": 2.5, "learning_rate": 2.366812594014212e-06, "logits/chosen": -2.4563236236572266, "logits/rejected": -2.3250436782836914, "logps/chosen": -243.40380859375, "logps/rejected": -472.7847900390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -10.296005249023438, "rewards/margins": 7.993238925933838, "rewards/rejected": -18.289243698120117, "step": 16062 }, { "epoch": 2.5, "learning_rate": 2.3660791534830643e-06, "logits/chosen": -2.9696927070617676, "logits/rejected": -1.7567386627197266, "logps/chosen": -752.509033203125, "logps/rejected": -515.433837890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.882593154907227, "rewards/margins": 9.751593589782715, "rewards/rejected": -15.634186744689941, "step": 16063 }, { "epoch": 2.5, "learning_rate": 2.3653457129519166e-06, "logits/chosen": -2.6037094593048096, "logits/rejected": -2.327420949935913, "logps/chosen": -349.8072814941406, "logps/rejected": -494.5194091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.179557800292969, "rewards/margins": 12.01279067993164, "rewards/rejected": -16.19234848022461, "step": 16064 }, { "epoch": 2.5, "learning_rate": 2.364612272420769e-06, "logits/chosen": -2.6726460456848145, "logits/rejected": -2.5305371284484863, "logps/chosen": -124.27764892578125, "logps/rejected": -289.9508056640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.876580238342285, "rewards/margins": 10.777149200439453, "rewards/rejected": -16.653730392456055, "step": 16065 }, { "epoch": 2.5, "learning_rate": 2.363878831889621e-06, "logits/chosen": -2.6612708568573, "logits/rejected": -2.653061628341675, "logps/chosen": -447.6262512207031, "logps/rejected": -426.20819091796875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.473970890045166, "rewards/margins": 11.015960693359375, "rewards/rejected": -18.489931106567383, "step": 16066 }, { "epoch": 2.5, "learning_rate": 2.363145391358473e-06, "logits/chosen": -2.648027181625366, "logits/rejected": -1.712652564048767, "logps/chosen": -412.4954528808594, "logps/rejected": -354.52679443359375, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -6.022559642791748, "rewards/margins": 8.821922302246094, "rewards/rejected": -14.844482421875, "step": 16067 }, { "epoch": 2.5, "learning_rate": 2.362411950827325e-06, "logits/chosen": -2.8598783016204834, "logits/rejected": -2.9644501209259033, "logps/chosen": -67.598388671875, "logps/rejected": -230.98468017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.966787338256836, "rewards/margins": 11.758935928344727, "rewards/rejected": -16.725723266601562, "step": 16068 }, { "epoch": 2.5, "learning_rate": 2.3616785102961773e-06, "logits/chosen": -2.884685516357422, "logits/rejected": -2.8569018840789795, "logps/chosen": -133.84170532226562, "logps/rejected": -220.08152770996094, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.50932502746582, "rewards/margins": 6.664623260498047, "rewards/rejected": -12.173948287963867, "step": 16069 }, { "epoch": 2.5, "learning_rate": 2.360945069765029e-06, "logits/chosen": -2.857492446899414, "logits/rejected": -2.45515775680542, "logps/chosen": -213.87783813476562, "logps/rejected": -355.5857238769531, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/chosen": -7.303767681121826, "rewards/margins": 12.111690521240234, "rewards/rejected": -19.415456771850586, "step": 16070 }, { "epoch": 2.5, "learning_rate": 2.360211629233881e-06, "logits/chosen": -2.681857109069824, "logits/rejected": -3.205763816833496, "logps/chosen": -109.80529022216797, "logps/rejected": -411.6222229003906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.435674667358398, "rewards/margins": 8.631730079650879, "rewards/rejected": -16.067405700683594, "step": 16071 }, { "epoch": 2.5, "learning_rate": 2.3594781887027338e-06, "logits/chosen": -2.7050745487213135, "logits/rejected": -2.7010722160339355, "logps/chosen": -210.36602783203125, "logps/rejected": -262.6875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.144270420074463, "rewards/margins": 7.887298583984375, "rewards/rejected": -13.03156852722168, "step": 16072 }, { "epoch": 2.5, "learning_rate": 2.3587447481715857e-06, "logits/chosen": -2.069582939147949, "logits/rejected": -2.6899380683898926, "logps/chosen": -158.52230834960938, "logps/rejected": -400.941650390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.304973125457764, "rewards/margins": 12.898123741149902, "rewards/rejected": -17.20309829711914, "step": 16073 }, { "epoch": 2.5, "learning_rate": 2.358011307640438e-06, "logits/chosen": -1.0551642179489136, "logits/rejected": -2.540588617324829, "logps/chosen": -129.9324493408203, "logps/rejected": -527.6448974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.610078811645508, "rewards/margins": 15.359278678894043, "rewards/rejected": -21.969356536865234, "step": 16074 }, { "epoch": 2.5, "learning_rate": 2.35727786710929e-06, "logits/chosen": -1.7626179456710815, "logits/rejected": -2.739448070526123, "logps/chosen": -196.40777587890625, "logps/rejected": -449.2994384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.352618217468262, "rewards/margins": 15.400554656982422, "rewards/rejected": -20.753173828125, "step": 16075 }, { "epoch": 2.5, "learning_rate": 2.356544426578142e-06, "logits/chosen": -2.1447854042053223, "logits/rejected": -2.7697832584381104, "logps/chosen": -137.76124572753906, "logps/rejected": -337.69415283203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.228375434875488, "rewards/margins": 8.301122665405273, "rewards/rejected": -16.529499053955078, "step": 16076 }, { "epoch": 2.5, "learning_rate": 2.355810986046994e-06, "logits/chosen": -2.8252339363098145, "logits/rejected": -2.1745715141296387, "logps/chosen": -208.99998474121094, "logps/rejected": -305.8025817871094, "loss": 0.0237, "rewards/accuracies": 1.0, "rewards/chosen": -7.101206302642822, "rewards/margins": 8.334508895874023, "rewards/rejected": -15.435714721679688, "step": 16077 }, { "epoch": 2.5, "learning_rate": 2.3550775455158463e-06, "logits/chosen": -2.6876909732818604, "logits/rejected": -3.126464366912842, "logps/chosen": -145.33856201171875, "logps/rejected": -463.88623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.463777542114258, "rewards/margins": 11.677936553955078, "rewards/rejected": -18.141714096069336, "step": 16078 }, { "epoch": 2.5, "learning_rate": 2.354344104984698e-06, "logits/chosen": -2.6159539222717285, "logits/rejected": -1.4985462427139282, "logps/chosen": -276.563720703125, "logps/rejected": -208.69070434570312, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -4.18996524810791, "rewards/margins": 9.179574012756348, "rewards/rejected": -13.369539260864258, "step": 16079 }, { "epoch": 2.5, "learning_rate": 2.3536106644535505e-06, "logits/chosen": -2.054966688156128, "logits/rejected": -2.8563594818115234, "logps/chosen": -137.07177734375, "logps/rejected": -280.6447448730469, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -6.2894439697265625, "rewards/margins": 7.8913068771362305, "rewards/rejected": -14.18075180053711, "step": 16080 }, { "epoch": 2.5, "learning_rate": 2.352877223922403e-06, "logits/chosen": -2.4614064693450928, "logits/rejected": -1.9556618928909302, "logps/chosen": -288.86224365234375, "logps/rejected": -387.76715087890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.468991279602051, "rewards/margins": 9.699085235595703, "rewards/rejected": -17.168075561523438, "step": 16081 }, { "epoch": 2.5, "learning_rate": 2.3521437833912547e-06, "logits/chosen": -2.837623357772827, "logits/rejected": -2.600449562072754, "logps/chosen": -238.44479370117188, "logps/rejected": -246.506591796875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -4.933114051818848, "rewards/margins": 5.577383518218994, "rewards/rejected": -10.510498046875, "step": 16082 }, { "epoch": 2.5, "learning_rate": 2.351410342860107e-06, "logits/chosen": -2.8964650630950928, "logits/rejected": -2.9132015705108643, "logps/chosen": -338.7413330078125, "logps/rejected": -446.971923828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.970514297485352, "rewards/margins": 8.508171081542969, "rewards/rejected": -13.47868537902832, "step": 16083 }, { "epoch": 2.5, "learning_rate": 2.350676902328959e-06, "logits/chosen": -1.653620958328247, "logits/rejected": -2.4985921382904053, "logps/chosen": -221.2924346923828, "logps/rejected": -299.2003173828125, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -6.3132195472717285, "rewards/margins": 4.417932987213135, "rewards/rejected": -10.731152534484863, "step": 16084 }, { "epoch": 2.5, "learning_rate": 2.349943461797811e-06, "logits/chosen": -2.4974820613861084, "logits/rejected": -2.6987221240997314, "logps/chosen": -85.57090759277344, "logps/rejected": -345.9761962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.734500885009766, "rewards/margins": 9.89543628692627, "rewards/rejected": -15.629936218261719, "step": 16085 }, { "epoch": 2.5, "learning_rate": 2.349210021266663e-06, "logits/chosen": -2.6791086196899414, "logits/rejected": -2.2234702110290527, "logps/chosen": -870.7935180664062, "logps/rejected": -654.7833862304688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.200325012207031, "rewards/margins": 12.745845794677734, "rewards/rejected": -18.946170806884766, "step": 16086 }, { "epoch": 2.5, "learning_rate": 2.3484765807355153e-06, "logits/chosen": -2.7901833057403564, "logits/rejected": -1.5534673929214478, "logps/chosen": -293.91082763671875, "logps/rejected": -233.5355987548828, "loss": 0.2448, "rewards/accuracies": 1.0, "rewards/chosen": -6.63727331161499, "rewards/margins": 5.346685409545898, "rewards/rejected": -11.983959197998047, "step": 16087 }, { "epoch": 2.5, "learning_rate": 2.3477431402043672e-06, "logits/chosen": -1.7661426067352295, "logits/rejected": -2.4738686084747314, "logps/chosen": -230.32156372070312, "logps/rejected": -337.11822509765625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -6.4560651779174805, "rewards/margins": 5.52006721496582, "rewards/rejected": -11.976133346557617, "step": 16088 }, { "epoch": 2.5, "learning_rate": 2.34700969967322e-06, "logits/chosen": -2.283198595046997, "logits/rejected": -1.510359525680542, "logps/chosen": -259.0531005859375, "logps/rejected": -239.97674560546875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.034443378448486, "rewards/margins": 6.9753289222717285, "rewards/rejected": -14.009772300720215, "step": 16089 }, { "epoch": 2.5, "learning_rate": 2.346276259142072e-06, "logits/chosen": -1.4686611890792847, "logits/rejected": -2.579627752304077, "logps/chosen": -129.44708251953125, "logps/rejected": -548.7391357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3707501888275146, "rewards/margins": 12.126449584960938, "rewards/rejected": -15.497200012207031, "step": 16090 }, { "epoch": 2.5, "learning_rate": 2.3455428186109237e-06, "logits/chosen": -0.7142592072486877, "logits/rejected": -2.5021936893463135, "logps/chosen": -177.49195861816406, "logps/rejected": -423.113525390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.192024230957031, "rewards/margins": 9.009211540222168, "rewards/rejected": -16.201236724853516, "step": 16091 }, { "epoch": 2.5, "learning_rate": 2.344809378079776e-06, "logits/chosen": -2.1028342247009277, "logits/rejected": -2.394190788269043, "logps/chosen": -161.94366455078125, "logps/rejected": -164.99905395507812, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.939851760864258, "rewards/margins": 6.131777763366699, "rewards/rejected": -11.071629524230957, "step": 16092 }, { "epoch": 2.5, "learning_rate": 2.344075937548628e-06, "logits/chosen": -2.0017309188842773, "logits/rejected": -2.905039072036743, "logps/chosen": -301.9315490722656, "logps/rejected": -557.4381103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.354433536529541, "rewards/margins": 14.609814643859863, "rewards/rejected": -20.964248657226562, "step": 16093 }, { "epoch": 2.5, "learning_rate": 2.34334249701748e-06, "logits/chosen": -1.8655401468276978, "logits/rejected": -2.724614381790161, "logps/chosen": -235.49392700195312, "logps/rejected": -650.5977783203125, "loss": 0.2573, "rewards/accuracies": 1.0, "rewards/chosen": -11.528687477111816, "rewards/margins": 5.692839622497559, "rewards/rejected": -17.221527099609375, "step": 16094 }, { "epoch": 2.5, "learning_rate": 2.342609056486332e-06, "logits/chosen": -3.1345062255859375, "logits/rejected": -2.685082197189331, "logps/chosen": -355.70574951171875, "logps/rejected": -235.20083618164062, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.3762397766113281, "rewards/margins": 7.90489387512207, "rewards/rejected": -9.281133651733398, "step": 16095 }, { "epoch": 2.5, "learning_rate": 2.3418756159551844e-06, "logits/chosen": -2.678041458129883, "logits/rejected": -2.2916791439056396, "logps/chosen": -450.9559326171875, "logps/rejected": -364.3013610839844, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": -8.090890884399414, "rewards/margins": 6.775085926055908, "rewards/rejected": -14.865976333618164, "step": 16096 }, { "epoch": 2.5, "learning_rate": 2.3411421754240367e-06, "logits/chosen": -1.645491123199463, "logits/rejected": -2.531233787536621, "logps/chosen": -144.5970458984375, "logps/rejected": -386.74163818359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.631041049957275, "rewards/margins": 9.386104583740234, "rewards/rejected": -15.017145156860352, "step": 16097 }, { "epoch": 2.5, "learning_rate": 2.340408734892889e-06, "logits/chosen": -2.4609742164611816, "logits/rejected": -2.2632787227630615, "logps/chosen": -861.9390258789062, "logps/rejected": -690.4605712890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.455216407775879, "rewards/margins": 9.899946212768555, "rewards/rejected": -16.355161666870117, "step": 16098 }, { "epoch": 2.5, "learning_rate": 2.339675294361741e-06, "logits/chosen": -2.8710174560546875, "logits/rejected": -2.458590030670166, "logps/chosen": -278.0926818847656, "logps/rejected": -270.21533203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.529315948486328, "rewards/margins": 8.59617805480957, "rewards/rejected": -18.125492095947266, "step": 16099 }, { "epoch": 2.5, "learning_rate": 2.338941853830593e-06, "logits/chosen": -1.5072277784347534, "logits/rejected": -2.395901679992676, "logps/chosen": -572.75927734375, "logps/rejected": -875.4269409179688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.990057945251465, "rewards/margins": 13.198342323303223, "rewards/rejected": -20.188400268554688, "step": 16100 }, { "epoch": 2.5, "learning_rate": 2.338208413299445e-06, "logits/chosen": -2.50765323638916, "logits/rejected": -2.819143772125244, "logps/chosen": -370.8671875, "logps/rejected": -499.01629638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.3231377601623535, "rewards/margins": 11.345418930053711, "rewards/rejected": -16.66855812072754, "step": 16101 }, { "epoch": 2.5, "learning_rate": 2.337474972768297e-06, "logits/chosen": -2.2699804306030273, "logits/rejected": -1.867967963218689, "logps/chosen": -229.22305297851562, "logps/rejected": -319.10040283203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.98845100402832, "rewards/margins": 9.166074752807617, "rewards/rejected": -17.154525756835938, "step": 16102 }, { "epoch": 2.5, "learning_rate": 2.336741532237149e-06, "logits/chosen": -3.057422399520874, "logits/rejected": -2.3088629245758057, "logps/chosen": -500.45562744140625, "logps/rejected": -409.6593322753906, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.043910503387451, "rewards/margins": 9.221416473388672, "rewards/rejected": -16.26532745361328, "step": 16103 }, { "epoch": 2.5, "learning_rate": 2.336008091706001e-06, "logits/chosen": -2.4447948932647705, "logits/rejected": -2.6841511726379395, "logps/chosen": -87.35545349121094, "logps/rejected": -244.54745483398438, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -5.900784015655518, "rewards/margins": 7.868473529815674, "rewards/rejected": -13.769257545471191, "step": 16104 }, { "epoch": 2.5, "learning_rate": 2.3352746511748534e-06, "logits/chosen": -1.795166254043579, "logits/rejected": -2.8012802600860596, "logps/chosen": -143.90487670898438, "logps/rejected": -432.63916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.00074577331543, "rewards/margins": 12.252717971801758, "rewards/rejected": -17.253463745117188, "step": 16105 }, { "epoch": 2.5, "learning_rate": 2.3345412106437057e-06, "logits/chosen": -2.8004629611968994, "logits/rejected": -2.270062208175659, "logps/chosen": -314.6951904296875, "logps/rejected": -395.8330078125, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -9.855031967163086, "rewards/margins": 6.946773529052734, "rewards/rejected": -16.80180549621582, "step": 16106 }, { "epoch": 2.5, "learning_rate": 2.333807770112558e-06, "logits/chosen": -1.745685338973999, "logits/rejected": -2.7774600982666016, "logps/chosen": -164.89208984375, "logps/rejected": -565.9725341796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.795069217681885, "rewards/margins": 12.136226654052734, "rewards/rejected": -17.93129539489746, "step": 16107 }, { "epoch": 2.51, "learning_rate": 2.33307432958141e-06, "logits/chosen": -2.0965397357940674, "logits/rejected": -2.670487880706787, "logps/chosen": -251.32241821289062, "logps/rejected": -419.67730712890625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.4861555099487305, "rewards/margins": 8.456552505493164, "rewards/rejected": -13.942708969116211, "step": 16108 }, { "epoch": 2.51, "learning_rate": 2.332340889050262e-06, "logits/chosen": -2.1663873195648193, "logits/rejected": -2.8769102096557617, "logps/chosen": -168.25787353515625, "logps/rejected": -439.1759033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.860295295715332, "rewards/margins": 12.860172271728516, "rewards/rejected": -18.720468521118164, "step": 16109 }, { "epoch": 2.51, "learning_rate": 2.331607448519114e-06, "logits/chosen": -1.870709776878357, "logits/rejected": -2.3785667419433594, "logps/chosen": -163.3603515625, "logps/rejected": -371.0934143066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.027369499206543, "rewards/margins": 14.33827018737793, "rewards/rejected": -20.36564064025879, "step": 16110 }, { "epoch": 2.51, "learning_rate": 2.330874007987966e-06, "logits/chosen": -2.522984504699707, "logits/rejected": -3.016282558441162, "logps/chosen": -247.80581665039062, "logps/rejected": -442.4182434082031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.586841583251953, "rewards/margins": 10.242626190185547, "rewards/rejected": -14.8294677734375, "step": 16111 }, { "epoch": 2.51, "learning_rate": 2.3301405674568182e-06, "logits/chosen": -2.639906644821167, "logits/rejected": -2.846604824066162, "logps/chosen": -174.92910766601562, "logps/rejected": -521.3447265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.406291961669922, "rewards/margins": 12.07846736907959, "rewards/rejected": -17.484760284423828, "step": 16112 }, { "epoch": 2.51, "learning_rate": 2.32940712692567e-06, "logits/chosen": -1.7967606782913208, "logits/rejected": -2.8030033111572266, "logps/chosen": -237.74087524414062, "logps/rejected": -504.9996643066406, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.21389627456665, "rewards/margins": 7.102327346801758, "rewards/rejected": -11.31622314453125, "step": 16113 }, { "epoch": 2.51, "learning_rate": 2.328673686394523e-06, "logits/chosen": -1.0383176803588867, "logits/rejected": -2.6071369647979736, "logps/chosen": -124.16050720214844, "logps/rejected": -444.0946044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.174276828765869, "rewards/margins": 11.090993881225586, "rewards/rejected": -18.265270233154297, "step": 16114 }, { "epoch": 2.51, "learning_rate": 2.3279402458633747e-06, "logits/chosen": -2.90346622467041, "logits/rejected": -2.0292067527770996, "logps/chosen": -533.2047119140625, "logps/rejected": -293.3703918457031, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -3.3388092517852783, "rewards/margins": 8.802440643310547, "rewards/rejected": -12.141249656677246, "step": 16115 }, { "epoch": 2.51, "learning_rate": 2.327206805332227e-06, "logits/chosen": -2.7282183170318604, "logits/rejected": -1.4308316707611084, "logps/chosen": -327.56573486328125, "logps/rejected": -276.7109375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -3.5003480911254883, "rewards/margins": 6.1714019775390625, "rewards/rejected": -9.67175006866455, "step": 16116 }, { "epoch": 2.51, "learning_rate": 2.326473364801079e-06, "logits/chosen": -2.7256205081939697, "logits/rejected": -2.839341163635254, "logps/chosen": -119.85077667236328, "logps/rejected": -523.9600830078125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.863152980804443, "rewards/margins": 11.187054634094238, "rewards/rejected": -18.050209045410156, "step": 16117 }, { "epoch": 2.51, "learning_rate": 2.325739924269931e-06, "logits/chosen": -2.8159449100494385, "logits/rejected": -2.6700456142425537, "logps/chosen": -351.4178161621094, "logps/rejected": -441.5364990234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.791707038879395, "rewards/margins": 8.816292762756348, "rewards/rejected": -18.607999801635742, "step": 16118 }, { "epoch": 2.51, "learning_rate": 2.325006483738783e-06, "logits/chosen": -2.534515619277954, "logits/rejected": -2.101715326309204, "logps/chosen": -202.8311309814453, "logps/rejected": -220.48117065429688, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -3.716355800628662, "rewards/margins": 7.171740531921387, "rewards/rejected": -10.88809585571289, "step": 16119 }, { "epoch": 2.51, "learning_rate": 2.3242730432076354e-06, "logits/chosen": -2.70938777923584, "logits/rejected": -2.7107033729553223, "logps/chosen": -291.9294128417969, "logps/rejected": -378.0029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.254878044128418, "rewards/margins": 12.649353981018066, "rewards/rejected": -17.904232025146484, "step": 16120 }, { "epoch": 2.51, "learning_rate": 2.3235396026764873e-06, "logits/chosen": -1.453850269317627, "logits/rejected": -2.7384347915649414, "logps/chosen": -569.26806640625, "logps/rejected": -690.0463256835938, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.719970703125, "rewards/margins": 10.38354778289795, "rewards/rejected": -16.103519439697266, "step": 16121 }, { "epoch": 2.51, "learning_rate": 2.3228061621453396e-06, "logits/chosen": -2.0635199546813965, "logits/rejected": -2.789713144302368, "logps/chosen": -295.91424560546875, "logps/rejected": -309.4520568847656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.405374526977539, "rewards/margins": 9.62447738647461, "rewards/rejected": -14.029851913452148, "step": 16122 }, { "epoch": 2.51, "learning_rate": 2.322072721614192e-06, "logits/chosen": -1.831244707107544, "logits/rejected": -2.053370475769043, "logps/chosen": -627.8221435546875, "logps/rejected": -713.58642578125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -12.168279647827148, "rewards/margins": 12.020439147949219, "rewards/rejected": -24.188720703125, "step": 16123 }, { "epoch": 2.51, "learning_rate": 2.3213392810830437e-06, "logits/chosen": -1.9991037845611572, "logits/rejected": -2.6628425121307373, "logps/chosen": -102.8438720703125, "logps/rejected": -272.06463623046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.782812118530273, "rewards/margins": 8.084564208984375, "rewards/rejected": -15.867376327514648, "step": 16124 }, { "epoch": 2.51, "learning_rate": 2.320605840551896e-06, "logits/chosen": -1.3861278295516968, "logits/rejected": -2.3989298343658447, "logps/chosen": -175.98883056640625, "logps/rejected": -545.364990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.888933181762695, "rewards/margins": 14.281655311584473, "rewards/rejected": -19.170589447021484, "step": 16125 }, { "epoch": 2.51, "learning_rate": 2.319872400020748e-06, "logits/chosen": -1.5086390972137451, "logits/rejected": -2.937086343765259, "logps/chosen": -225.4060516357422, "logps/rejected": -946.648193359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.028689384460449, "rewards/margins": 15.128870010375977, "rewards/rejected": -20.15755844116211, "step": 16126 }, { "epoch": 2.51, "learning_rate": 2.3191389594896002e-06, "logits/chosen": -1.8092797994613647, "logits/rejected": -2.66086745262146, "logps/chosen": -379.6266174316406, "logps/rejected": -565.2454833984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -8.702768325805664, "rewards/margins": 9.565826416015625, "rewards/rejected": -18.26859474182129, "step": 16127 }, { "epoch": 2.51, "learning_rate": 2.318405518958452e-06, "logits/chosen": -2.7989981174468994, "logits/rejected": -2.706998109817505, "logps/chosen": -213.82420349121094, "logps/rejected": -339.66705322265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.560044765472412, "rewards/margins": 7.9746623039245605, "rewards/rejected": -11.534708023071289, "step": 16128 }, { "epoch": 2.51, "learning_rate": 2.3176720784273044e-06, "logits/chosen": -2.6304824352264404, "logits/rejected": -1.36959969997406, "logps/chosen": -460.36700439453125, "logps/rejected": -401.5082092285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.957241058349609, "rewards/margins": 11.987823486328125, "rewards/rejected": -17.945064544677734, "step": 16129 }, { "epoch": 2.51, "learning_rate": 2.3169386378961563e-06, "logits/chosen": -2.1656076908111572, "logits/rejected": -2.6798927783966064, "logps/chosen": -205.10421752929688, "logps/rejected": -279.5260009765625, "loss": 1.9087, "rewards/accuracies": 0.5, "rewards/chosen": -10.089080810546875, "rewards/margins": 3.971285343170166, "rewards/rejected": -14.060365676879883, "step": 16130 }, { "epoch": 2.51, "learning_rate": 2.3162051973650086e-06, "logits/chosen": -2.655550956726074, "logits/rejected": -2.7450063228607178, "logps/chosen": -142.87881469726562, "logps/rejected": -374.08209228515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.3864521980285645, "rewards/margins": 10.711055755615234, "rewards/rejected": -17.09750747680664, "step": 16131 }, { "epoch": 2.51, "learning_rate": 2.315471756833861e-06, "logits/chosen": -1.7570313215255737, "logits/rejected": -2.513765335083008, "logps/chosen": -186.64837646484375, "logps/rejected": -331.85467529296875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.684410095214844, "rewards/margins": 7.916651725769043, "rewards/rejected": -12.601062774658203, "step": 16132 }, { "epoch": 2.51, "learning_rate": 2.3147383163027128e-06, "logits/chosen": -2.242030143737793, "logits/rejected": -2.859278917312622, "logps/chosen": -110.22886657714844, "logps/rejected": -301.2935791015625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -7.562440872192383, "rewards/margins": 8.058162689208984, "rewards/rejected": -15.620603561401367, "step": 16133 }, { "epoch": 2.51, "learning_rate": 2.314004875771565e-06, "logits/chosen": -2.6549482345581055, "logits/rejected": -2.6757900714874268, "logps/chosen": -525.7545166015625, "logps/rejected": -591.601806640625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.461524963378906, "rewards/margins": 9.457194328308105, "rewards/rejected": -17.918720245361328, "step": 16134 }, { "epoch": 2.51, "learning_rate": 2.313271435240417e-06, "logits/chosen": -2.6295645236968994, "logits/rejected": -2.9240360260009766, "logps/chosen": -899.7788696289062, "logps/rejected": -639.871826171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.329985618591309, "rewards/margins": 9.279767990112305, "rewards/rejected": -16.609752655029297, "step": 16135 }, { "epoch": 2.51, "learning_rate": 2.3125379947092692e-06, "logits/chosen": -2.7464606761932373, "logits/rejected": -2.769717216491699, "logps/chosen": -158.77940368652344, "logps/rejected": -361.51617431640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.7060651779174805, "rewards/margins": 13.608509063720703, "rewards/rejected": -20.3145751953125, "step": 16136 }, { "epoch": 2.51, "learning_rate": 2.311804554178121e-06, "logits/chosen": -2.6455166339874268, "logits/rejected": -2.8431217670440674, "logps/chosen": -128.38233947753906, "logps/rejected": -523.9891967773438, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.641196250915527, "rewards/margins": 11.433154106140137, "rewards/rejected": -16.074350357055664, "step": 16137 }, { "epoch": 2.51, "learning_rate": 2.3110711136469734e-06, "logits/chosen": -2.5638058185577393, "logits/rejected": -2.2457408905029297, "logps/chosen": -362.3431701660156, "logps/rejected": -408.1824951171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.832183837890625, "rewards/margins": 12.038209915161133, "rewards/rejected": -18.870393753051758, "step": 16138 }, { "epoch": 2.51, "learning_rate": 2.3103376731158257e-06, "logits/chosen": -1.73542320728302, "logits/rejected": -2.9924862384796143, "logps/chosen": -353.53680419921875, "logps/rejected": -573.7255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.432995796203613, "rewards/margins": 10.76419734954834, "rewards/rejected": -18.197193145751953, "step": 16139 }, { "epoch": 2.51, "learning_rate": 2.3096042325846776e-06, "logits/chosen": -1.9026416540145874, "logits/rejected": -2.7757036685943604, "logps/chosen": -154.7590789794922, "logps/rejected": -563.06494140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.468621730804443, "rewards/margins": 10.520922660827637, "rewards/rejected": -17.989543914794922, "step": 16140 }, { "epoch": 2.51, "learning_rate": 2.30887079205353e-06, "logits/chosen": -2.915462017059326, "logits/rejected": -2.9047443866729736, "logps/chosen": -194.39044189453125, "logps/rejected": -286.78729248046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.633100986480713, "rewards/margins": 9.705726623535156, "rewards/rejected": -15.338827133178711, "step": 16141 }, { "epoch": 2.51, "learning_rate": 2.3081373515223818e-06, "logits/chosen": -2.5419135093688965, "logits/rejected": -2.797217607498169, "logps/chosen": -174.0013427734375, "logps/rejected": -311.2409362792969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.392498016357422, "rewards/margins": 8.69528579711914, "rewards/rejected": -18.087783813476562, "step": 16142 }, { "epoch": 2.51, "learning_rate": 2.307403910991234e-06, "logits/chosen": -2.5665206909179688, "logits/rejected": -2.110520362854004, "logps/chosen": -739.54931640625, "logps/rejected": -783.1426391601562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.396071910858154, "rewards/margins": 13.070451736450195, "rewards/rejected": -18.466524124145508, "step": 16143 }, { "epoch": 2.51, "learning_rate": 2.306670470460086e-06, "logits/chosen": -3.039034843444824, "logits/rejected": -2.9688987731933594, "logps/chosen": -552.85986328125, "logps/rejected": -508.04547119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.08164119720459, "rewards/margins": 11.768594741821289, "rewards/rejected": -16.850234985351562, "step": 16144 }, { "epoch": 2.51, "learning_rate": 2.3059370299289383e-06, "logits/chosen": -2.6295437812805176, "logits/rejected": -2.8777642250061035, "logps/chosen": -160.9722137451172, "logps/rejected": -358.6514892578125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -9.419236183166504, "rewards/margins": 6.724297523498535, "rewards/rejected": -16.14353370666504, "step": 16145 }, { "epoch": 2.51, "learning_rate": 2.30520358939779e-06, "logits/chosen": -0.6168714165687561, "logits/rejected": -2.6131067276000977, "logps/chosen": -116.90400695800781, "logps/rejected": -588.7957763671875, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -8.462289810180664, "rewards/margins": 7.9112091064453125, "rewards/rejected": -16.373498916625977, "step": 16146 }, { "epoch": 2.51, "learning_rate": 2.3044701488666424e-06, "logits/chosen": -2.32281231880188, "logits/rejected": -1.4141201972961426, "logps/chosen": -271.27398681640625, "logps/rejected": -202.69395446777344, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.013324737548828, "rewards/margins": 7.280221939086914, "rewards/rejected": -13.293546676635742, "step": 16147 }, { "epoch": 2.51, "learning_rate": 2.3037367083354947e-06, "logits/chosen": -1.8167610168457031, "logits/rejected": -2.6502983570098877, "logps/chosen": -132.7971954345703, "logps/rejected": -391.79644775390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.304773330688477, "rewards/margins": 10.413808822631836, "rewards/rejected": -15.718582153320312, "step": 16148 }, { "epoch": 2.51, "learning_rate": 2.303003267804347e-06, "logits/chosen": -2.895235300064087, "logits/rejected": -2.2073073387145996, "logps/chosen": -317.99285888671875, "logps/rejected": -356.2146301269531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.094097852706909, "rewards/margins": 8.497159957885742, "rewards/rejected": -11.591257095336914, "step": 16149 }, { "epoch": 2.51, "learning_rate": 2.302269827273199e-06, "logits/chosen": -1.8272252082824707, "logits/rejected": -2.7294981479644775, "logps/chosen": -131.92445373535156, "logps/rejected": -447.633056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.277163505554199, "rewards/margins": 11.533354759216309, "rewards/rejected": -16.810518264770508, "step": 16150 }, { "epoch": 2.51, "learning_rate": 2.301536386742051e-06, "logits/chosen": -2.503034830093384, "logits/rejected": -0.7108560800552368, "logps/chosen": -224.6235809326172, "logps/rejected": -159.05908203125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.838907718658447, "rewards/margins": 6.996635437011719, "rewards/rejected": -12.835543632507324, "step": 16151 }, { "epoch": 2.51, "learning_rate": 2.300802946210903e-06, "logits/chosen": -1.5163065195083618, "logits/rejected": -2.575392484664917, "logps/chosen": -237.8203125, "logps/rejected": -533.2208251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7446045875549316, "rewards/margins": 14.530946731567383, "rewards/rejected": -18.275550842285156, "step": 16152 }, { "epoch": 2.51, "learning_rate": 2.300069505679755e-06, "logits/chosen": -1.9066574573516846, "logits/rejected": -2.8481674194335938, "logps/chosen": -91.24652099609375, "logps/rejected": -303.7602844238281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7575225830078125, "rewards/margins": 10.543070793151855, "rewards/rejected": -16.300594329833984, "step": 16153 }, { "epoch": 2.51, "learning_rate": 2.2993360651486073e-06, "logits/chosen": -2.739184617996216, "logits/rejected": -1.5526396036148071, "logps/chosen": -463.49578857421875, "logps/rejected": -321.5601501464844, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/chosen": -6.745347023010254, "rewards/margins": 10.28671932220459, "rewards/rejected": -17.032066345214844, "step": 16154 }, { "epoch": 2.51, "learning_rate": 2.298602624617459e-06, "logits/chosen": -2.20511794090271, "logits/rejected": -2.0942494869232178, "logps/chosen": -276.5107421875, "logps/rejected": -385.85418701171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.5059356689453125, "rewards/margins": 8.527406692504883, "rewards/rejected": -16.033342361450195, "step": 16155 }, { "epoch": 2.51, "learning_rate": 2.297869184086312e-06, "logits/chosen": -1.2337472438812256, "logits/rejected": -2.50567626953125, "logps/chosen": -132.33401489257812, "logps/rejected": -306.65386962890625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.543718338012695, "rewards/margins": 7.6431427001953125, "rewards/rejected": -13.186861038208008, "step": 16156 }, { "epoch": 2.51, "learning_rate": 2.2971357435551638e-06, "logits/chosen": -2.393249034881592, "logits/rejected": -3.02103590965271, "logps/chosen": -117.56272888183594, "logps/rejected": -325.613037109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.512958526611328, "rewards/margins": 12.02532958984375, "rewards/rejected": -17.538288116455078, "step": 16157 }, { "epoch": 2.51, "learning_rate": 2.296402303024016e-06, "logits/chosen": -2.650836706161499, "logits/rejected": -1.8273751735687256, "logps/chosen": -186.92770385742188, "logps/rejected": -268.80377197265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.1508140563964844, "rewards/margins": 13.362236022949219, "rewards/rejected": -15.513050079345703, "step": 16158 }, { "epoch": 2.51, "learning_rate": 2.295668862492868e-06, "logits/chosen": -2.426229238510132, "logits/rejected": -2.7703349590301514, "logps/chosen": -106.1206283569336, "logps/rejected": -254.69869995117188, "loss": 0.0518, "rewards/accuracies": 1.0, "rewards/chosen": -7.936682224273682, "rewards/margins": 5.831719875335693, "rewards/rejected": -13.768402099609375, "step": 16159 }, { "epoch": 2.51, "learning_rate": 2.29493542196172e-06, "logits/chosen": -2.6815345287323, "logits/rejected": -2.5004963874816895, "logps/chosen": -472.35791015625, "logps/rejected": -638.225830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.599193572998047, "rewards/margins": 14.032581329345703, "rewards/rejected": -23.63177490234375, "step": 16160 }, { "epoch": 2.51, "learning_rate": 2.294201981430572e-06, "logits/chosen": -2.881413459777832, "logits/rejected": -3.091677188873291, "logps/chosen": -471.025634765625, "logps/rejected": -540.385009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.724875450134277, "rewards/margins": 12.529276847839355, "rewards/rejected": -18.254152297973633, "step": 16161 }, { "epoch": 2.51, "learning_rate": 2.293468540899424e-06, "logits/chosen": -2.6747541427612305, "logits/rejected": -2.848158121109009, "logps/chosen": -662.434814453125, "logps/rejected": -614.379150390625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -8.867291450500488, "rewards/margins": 8.557165145874023, "rewards/rejected": -17.424457550048828, "step": 16162 }, { "epoch": 2.51, "learning_rate": 2.2927351003682763e-06, "logits/chosen": -1.8218902349472046, "logits/rejected": -2.6392464637756348, "logps/chosen": -92.82591247558594, "logps/rejected": -361.36383056640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.87541389465332, "rewards/margins": 8.906835556030273, "rewards/rejected": -15.782249450683594, "step": 16163 }, { "epoch": 2.51, "learning_rate": 2.2920016598371286e-06, "logits/chosen": -2.614823341369629, "logits/rejected": -2.8476369380950928, "logps/chosen": -236.46194458007812, "logps/rejected": -454.50787353515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.959765434265137, "rewards/margins": 8.790102005004883, "rewards/rejected": -15.749866485595703, "step": 16164 }, { "epoch": 2.51, "learning_rate": 2.291268219305981e-06, "logits/chosen": -1.741084337234497, "logits/rejected": -2.703223705291748, "logps/chosen": -275.0670166015625, "logps/rejected": -549.9713134765625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.83120059967041, "rewards/margins": 10.537392616271973, "rewards/rejected": -20.368593215942383, "step": 16165 }, { "epoch": 2.51, "learning_rate": 2.290534778774833e-06, "logits/chosen": -1.658144235610962, "logits/rejected": -2.897505044937134, "logps/chosen": -230.55422973632812, "logps/rejected": -363.4496765136719, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.341881275177002, "rewards/margins": 7.442893981933594, "rewards/rejected": -13.784774780273438, "step": 16166 }, { "epoch": 2.51, "learning_rate": 2.289801338243685e-06, "logits/chosen": -2.8292922973632812, "logits/rejected": -2.960810899734497, "logps/chosen": -317.15191650390625, "logps/rejected": -374.8311462402344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.70310378074646, "rewards/margins": 12.879777908325195, "rewards/rejected": -15.582880973815918, "step": 16167 }, { "epoch": 2.51, "learning_rate": 2.289067897712537e-06, "logits/chosen": -1.989682912826538, "logits/rejected": -2.549792766571045, "logps/chosen": -255.8360595703125, "logps/rejected": -418.69952392578125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.742611885070801, "rewards/margins": 10.062872886657715, "rewards/rejected": -15.805484771728516, "step": 16168 }, { "epoch": 2.51, "learning_rate": 2.2883344571813893e-06, "logits/chosen": -1.573277235031128, "logits/rejected": -2.6893444061279297, "logps/chosen": -133.5255889892578, "logps/rejected": -407.242431640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.573519229888916, "rewards/margins": 9.388799667358398, "rewards/rejected": -14.962318420410156, "step": 16169 }, { "epoch": 2.51, "learning_rate": 2.287601016650241e-06, "logits/chosen": -2.623579502105713, "logits/rejected": -2.819916248321533, "logps/chosen": -90.57417297363281, "logps/rejected": -289.0475769042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.197920322418213, "rewards/margins": 10.685054779052734, "rewards/rejected": -14.882976531982422, "step": 16170 }, { "epoch": 2.51, "learning_rate": 2.286867576119093e-06, "logits/chosen": -2.4592247009277344, "logits/rejected": -2.1302573680877686, "logps/chosen": -229.99911499023438, "logps/rejected": -312.2810974121094, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -6.144349098205566, "rewards/margins": 5.751093864440918, "rewards/rejected": -11.895442962646484, "step": 16171 }, { "epoch": 2.52, "learning_rate": 2.2861341355879453e-06, "logits/chosen": -2.196495532989502, "logits/rejected": -2.5540928840637207, "logps/chosen": -390.22265625, "logps/rejected": -456.84033203125, "loss": 1.389, "rewards/accuracies": 0.5, "rewards/chosen": -8.53526782989502, "rewards/margins": 3.578596591949463, "rewards/rejected": -12.11386489868164, "step": 16172 }, { "epoch": 2.52, "learning_rate": 2.2854006950567976e-06, "logits/chosen": -1.9124740362167358, "logits/rejected": -2.544508457183838, "logps/chosen": -86.5951156616211, "logps/rejected": -209.27151489257812, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -6.096132755279541, "rewards/margins": 6.220926761627197, "rewards/rejected": -12.317059516906738, "step": 16173 }, { "epoch": 2.52, "learning_rate": 2.28466725452565e-06, "logits/chosen": -2.3933115005493164, "logits/rejected": -1.228265643119812, "logps/chosen": -587.2518920898438, "logps/rejected": -410.7806396484375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -5.321779251098633, "rewards/margins": 11.628992080688477, "rewards/rejected": -16.95077133178711, "step": 16174 }, { "epoch": 2.52, "learning_rate": 2.283933813994502e-06, "logits/chosen": -2.487577438354492, "logits/rejected": -2.730470657348633, "logps/chosen": -123.30896759033203, "logps/rejected": -238.2313232421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.8676629066467285, "rewards/margins": 9.750216484069824, "rewards/rejected": -14.617879867553711, "step": 16175 }, { "epoch": 2.52, "learning_rate": 2.283200373463354e-06, "logits/chosen": -2.2987935543060303, "logits/rejected": -2.001887321472168, "logps/chosen": -206.43777465820312, "logps/rejected": -303.10797119140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.249213218688965, "rewards/margins": 8.434194564819336, "rewards/rejected": -13.6834077835083, "step": 16176 }, { "epoch": 2.52, "learning_rate": 2.282466932932206e-06, "logits/chosen": -2.0947365760803223, "logits/rejected": -2.7822275161743164, "logps/chosen": -116.33168029785156, "logps/rejected": -433.8941650390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.40179967880249, "rewards/margins": 10.560308456420898, "rewards/rejected": -15.96210765838623, "step": 16177 }, { "epoch": 2.52, "learning_rate": 2.2817334924010583e-06, "logits/chosen": -1.944169521331787, "logits/rejected": -2.696289539337158, "logps/chosen": -544.442626953125, "logps/rejected": -621.5506591796875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -9.450355529785156, "rewards/margins": 9.75438404083252, "rewards/rejected": -19.20473861694336, "step": 16178 }, { "epoch": 2.52, "learning_rate": 2.28100005186991e-06, "logits/chosen": -2.568596601486206, "logits/rejected": -2.7899508476257324, "logps/chosen": -120.51806640625, "logps/rejected": -321.5636291503906, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.184523105621338, "rewards/margins": 10.246856689453125, "rewards/rejected": -15.431379318237305, "step": 16179 }, { "epoch": 2.52, "learning_rate": 2.280266611338762e-06, "logits/chosen": -2.9024243354797363, "logits/rejected": -1.863043189048767, "logps/chosen": -548.1203002929688, "logps/rejected": -480.02545166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -13.369482040405273, "rewards/margins": 10.752058029174805, "rewards/rejected": -24.121540069580078, "step": 16180 }, { "epoch": 2.52, "learning_rate": 2.2795331708076144e-06, "logits/chosen": -2.460350513458252, "logits/rejected": -2.8186511993408203, "logps/chosen": -589.0078125, "logps/rejected": -668.9534912109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.010520935058594, "rewards/margins": 9.597965240478516, "rewards/rejected": -17.60848617553711, "step": 16181 }, { "epoch": 2.52, "learning_rate": 2.2787997302764667e-06, "logits/chosen": -2.3764352798461914, "logits/rejected": -2.7015140056610107, "logps/chosen": -556.1132202148438, "logps/rejected": -604.3405151367188, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.678399085998535, "rewards/margins": 11.356246948242188, "rewards/rejected": -21.034645080566406, "step": 16182 }, { "epoch": 2.52, "learning_rate": 2.278066289745319e-06, "logits/chosen": -2.989229679107666, "logits/rejected": -3.1350274085998535, "logps/chosen": -106.51544189453125, "logps/rejected": -219.95880126953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.199923515319824, "rewards/margins": 7.244040489196777, "rewards/rejected": -12.443964004516602, "step": 16183 }, { "epoch": 2.52, "learning_rate": 2.277332849214171e-06, "logits/chosen": -2.157961368560791, "logits/rejected": -2.6655921936035156, "logps/chosen": -511.7059326171875, "logps/rejected": -609.5343627929688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.893854141235352, "rewards/margins": 12.656970977783203, "rewards/rejected": -20.550825119018555, "step": 16184 }, { "epoch": 2.52, "learning_rate": 2.276599408683023e-06, "logits/chosen": -1.2424858808517456, "logits/rejected": -2.6700098514556885, "logps/chosen": -247.9224090576172, "logps/rejected": -450.32684326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.352982521057129, "rewards/margins": 11.257536888122559, "rewards/rejected": -19.610519409179688, "step": 16185 }, { "epoch": 2.52, "learning_rate": 2.275865968151875e-06, "logits/chosen": -2.7475364208221436, "logits/rejected": -2.415372848510742, "logps/chosen": -220.27316284179688, "logps/rejected": -248.48240661621094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.1876840591430664, "rewards/margins": 9.16441535949707, "rewards/rejected": -11.35209846496582, "step": 16186 }, { "epoch": 2.52, "learning_rate": 2.2751325276207273e-06, "logits/chosen": -2.16133451461792, "logits/rejected": -2.7072577476501465, "logps/chosen": -234.27938842773438, "logps/rejected": -387.6819763183594, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -8.126572608947754, "rewards/margins": 7.987183094024658, "rewards/rejected": -16.11375617980957, "step": 16187 }, { "epoch": 2.52, "learning_rate": 2.274399087089579e-06, "logits/chosen": -1.7137911319732666, "logits/rejected": -2.553279399871826, "logps/chosen": -282.9765625, "logps/rejected": -598.991943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.566078186035156, "rewards/margins": 14.407512664794922, "rewards/rejected": -22.973590850830078, "step": 16188 }, { "epoch": 2.52, "learning_rate": 2.2736656465584315e-06, "logits/chosen": -2.6256628036499023, "logits/rejected": -2.505141496658325, "logps/chosen": -203.17434692382812, "logps/rejected": -307.20654296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.821624755859375, "rewards/margins": 10.614492416381836, "rewards/rejected": -14.436117172241211, "step": 16189 }, { "epoch": 2.52, "learning_rate": 2.272932206027284e-06, "logits/chosen": -2.4295942783355713, "logits/rejected": -2.884575128555298, "logps/chosen": -179.41050720214844, "logps/rejected": -433.28387451171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.229269981384277, "rewards/margins": 12.169837951660156, "rewards/rejected": -20.39910888671875, "step": 16190 }, { "epoch": 2.52, "learning_rate": 2.2721987654961357e-06, "logits/chosen": -2.1573727130889893, "logits/rejected": -2.4582297801971436, "logps/chosen": -533.022705078125, "logps/rejected": -656.7503051757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.052462577819824, "rewards/margins": 16.300111770629883, "rewards/rejected": -28.35257339477539, "step": 16191 }, { "epoch": 2.52, "learning_rate": 2.271465324964988e-06, "logits/chosen": -2.560413122177124, "logits/rejected": -2.382115125656128, "logps/chosen": -516.8004150390625, "logps/rejected": -570.5174560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.884920597076416, "rewards/margins": 13.19260025024414, "rewards/rejected": -18.07752227783203, "step": 16192 }, { "epoch": 2.52, "learning_rate": 2.27073188443384e-06, "logits/chosen": -2.765160083770752, "logits/rejected": -2.2781028747558594, "logps/chosen": -314.91925048828125, "logps/rejected": -135.79782104492188, "loss": 0.1701, "rewards/accuracies": 1.0, "rewards/chosen": -5.597856044769287, "rewards/margins": 3.616650342941284, "rewards/rejected": -9.214506149291992, "step": 16193 }, { "epoch": 2.52, "learning_rate": 2.269998443902692e-06, "logits/chosen": -1.6094774007797241, "logits/rejected": -2.5300087928771973, "logps/chosen": -180.06597900390625, "logps/rejected": -455.96630859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.50101089477539, "rewards/margins": 11.164154052734375, "rewards/rejected": -19.665164947509766, "step": 16194 }, { "epoch": 2.52, "learning_rate": 2.269265003371544e-06, "logits/chosen": -2.3468992710113525, "logits/rejected": -2.241419553756714, "logps/chosen": -295.203369140625, "logps/rejected": -355.27227783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.802995443344116, "rewards/margins": 11.874446868896484, "rewards/rejected": -15.67744255065918, "step": 16195 }, { "epoch": 2.52, "learning_rate": 2.2685315628403963e-06, "logits/chosen": -2.6642322540283203, "logits/rejected": -2.8503503799438477, "logps/chosen": -364.74029541015625, "logps/rejected": -487.6025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.998876571655273, "rewards/margins": 10.656118392944336, "rewards/rejected": -20.65499496459961, "step": 16196 }, { "epoch": 2.52, "learning_rate": 2.2677981223092482e-06, "logits/chosen": -2.9739646911621094, "logits/rejected": -1.9804795980453491, "logps/chosen": -716.5675659179688, "logps/rejected": -368.7427978515625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.921166896820068, "rewards/margins": 6.085409164428711, "rewards/rejected": -13.006576538085938, "step": 16197 }, { "epoch": 2.52, "learning_rate": 2.2670646817781005e-06, "logits/chosen": -2.7037510871887207, "logits/rejected": -2.5012154579162598, "logps/chosen": -182.324951171875, "logps/rejected": -285.75079345703125, "loss": 0.536, "rewards/accuracies": 0.5, "rewards/chosen": -5.78659200668335, "rewards/margins": 4.328597068786621, "rewards/rejected": -10.115188598632812, "step": 16198 }, { "epoch": 2.52, "learning_rate": 2.266331241246953e-06, "logits/chosen": -2.220243453979492, "logits/rejected": -2.65242338180542, "logps/chosen": -246.2112274169922, "logps/rejected": -426.712158203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.209983825683594, "rewards/margins": 8.806790351867676, "rewards/rejected": -15.01677417755127, "step": 16199 }, { "epoch": 2.52, "learning_rate": 2.2655978007158047e-06, "logits/chosen": -2.951219081878662, "logits/rejected": -2.9729182720184326, "logps/chosen": -170.60324096679688, "logps/rejected": -293.43988037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.042181968688965, "rewards/margins": 9.498010635375977, "rewards/rejected": -17.540191650390625, "step": 16200 }, { "epoch": 2.52, "learning_rate": 2.264864360184657e-06, "logits/chosen": -1.9930524826049805, "logits/rejected": -2.3492681980133057, "logps/chosen": -228.7803192138672, "logps/rejected": -540.2322387695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.523765563964844, "rewards/margins": 16.132997512817383, "rewards/rejected": -22.656763076782227, "step": 16201 }, { "epoch": 2.52, "learning_rate": 2.264130919653509e-06, "logits/chosen": -2.470191240310669, "logits/rejected": -0.7535619735717773, "logps/chosen": -580.3388061523438, "logps/rejected": -364.1169128417969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.366260528564453, "rewards/margins": 15.582612037658691, "rewards/rejected": -18.94887351989746, "step": 16202 }, { "epoch": 2.52, "learning_rate": 2.263397479122361e-06, "logits/chosen": -1.3974274396896362, "logits/rejected": -2.2090659141540527, "logps/chosen": -124.63046264648438, "logps/rejected": -460.1787109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.928801536560059, "rewards/margins": 17.633819580078125, "rewards/rejected": -24.5626220703125, "step": 16203 }, { "epoch": 2.52, "learning_rate": 2.262664038591213e-06, "logits/chosen": -2.1366772651672363, "logits/rejected": -2.381134033203125, "logps/chosen": -708.468017578125, "logps/rejected": -731.6343383789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.896183967590332, "rewards/margins": 13.048467636108398, "rewards/rejected": -20.944652557373047, "step": 16204 }, { "epoch": 2.52, "learning_rate": 2.2619305980600654e-06, "logits/chosen": -2.5970852375030518, "logits/rejected": -2.631063222885132, "logps/chosen": -512.8718872070312, "logps/rejected": -579.35986328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.683088779449463, "rewards/margins": 11.868705749511719, "rewards/rejected": -19.551794052124023, "step": 16205 }, { "epoch": 2.52, "learning_rate": 2.2611971575289172e-06, "logits/chosen": -1.6707420349121094, "logits/rejected": -2.809359073638916, "logps/chosen": -268.9270324707031, "logps/rejected": -574.82861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9949951171875, "rewards/margins": 14.787662506103516, "rewards/rejected": -18.782657623291016, "step": 16206 }, { "epoch": 2.52, "learning_rate": 2.26046371699777e-06, "logits/chosen": -0.8272382020950317, "logits/rejected": -2.4142792224884033, "logps/chosen": -149.1813201904297, "logps/rejected": -606.6688232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.194488525390625, "rewards/margins": 11.837570190429688, "rewards/rejected": -20.032058715820312, "step": 16207 }, { "epoch": 2.52, "learning_rate": 2.259730276466622e-06, "logits/chosen": -3.018760919570923, "logits/rejected": -3.0488455295562744, "logps/chosen": -69.63749694824219, "logps/rejected": -221.40711975097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.028829097747803, "rewards/margins": 11.918766021728516, "rewards/rejected": -16.947595596313477, "step": 16208 }, { "epoch": 2.52, "learning_rate": 2.2589968359354737e-06, "logits/chosen": -2.5246310234069824, "logits/rejected": -0.7785763144493103, "logps/chosen": -304.3861999511719, "logps/rejected": -268.0767822265625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.570804595947266, "rewards/margins": 8.428762435913086, "rewards/rejected": -13.999567031860352, "step": 16209 }, { "epoch": 2.52, "learning_rate": 2.258263395404326e-06, "logits/chosen": -2.3359673023223877, "logits/rejected": -1.8347179889678955, "logps/chosen": -300.050048828125, "logps/rejected": -367.7984619140625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -7.513888835906982, "rewards/margins": 6.629563808441162, "rewards/rejected": -14.143452644348145, "step": 16210 }, { "epoch": 2.52, "learning_rate": 2.257529954873178e-06, "logits/chosen": -2.2316911220550537, "logits/rejected": -2.184628963470459, "logps/chosen": -275.16094970703125, "logps/rejected": -504.8083801269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.933746337890625, "rewards/margins": 14.761492729187012, "rewards/rejected": -21.695240020751953, "step": 16211 }, { "epoch": 2.52, "learning_rate": 2.25679651434203e-06, "logits/chosen": -2.2339413166046143, "logits/rejected": -2.709500551223755, "logps/chosen": -423.3507080078125, "logps/rejected": -474.0843505859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -9.306958198547363, "rewards/margins": 9.759888648986816, "rewards/rejected": -19.06684684753418, "step": 16212 }, { "epoch": 2.52, "learning_rate": 2.256063073810882e-06, "logits/chosen": -2.6282548904418945, "logits/rejected": -2.0379106998443604, "logps/chosen": -368.55291748046875, "logps/rejected": -355.41839599609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.660277843475342, "rewards/margins": 13.001548767089844, "rewards/rejected": -20.661827087402344, "step": 16213 }, { "epoch": 2.52, "learning_rate": 2.2553296332797344e-06, "logits/chosen": -2.6571476459503174, "logits/rejected": -2.1626691818237305, "logps/chosen": -657.0001220703125, "logps/rejected": -630.5038452148438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.295686721801758, "rewards/margins": 9.330545425415039, "rewards/rejected": -19.626232147216797, "step": 16214 }, { "epoch": 2.52, "learning_rate": 2.2545961927485867e-06, "logits/chosen": -1.918991208076477, "logits/rejected": -2.7778916358947754, "logps/chosen": -209.08546447753906, "logps/rejected": -383.8167724609375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.624673843383789, "rewards/margins": 7.972451210021973, "rewards/rejected": -15.597124099731445, "step": 16215 }, { "epoch": 2.52, "learning_rate": 2.253862752217439e-06, "logits/chosen": -2.4637022018432617, "logits/rejected": -2.9202229976654053, "logps/chosen": -115.4661636352539, "logps/rejected": -334.23687744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.69236946105957, "rewards/margins": 12.25223159790039, "rewards/rejected": -16.94460105895996, "step": 16216 }, { "epoch": 2.52, "learning_rate": 2.253129311686291e-06, "logits/chosen": -2.755338430404663, "logits/rejected": -2.877683639526367, "logps/chosen": -80.0225830078125, "logps/rejected": -245.25570678710938, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.364223003387451, "rewards/margins": 8.949268341064453, "rewards/rejected": -13.313491821289062, "step": 16217 }, { "epoch": 2.52, "learning_rate": 2.252395871155143e-06, "logits/chosen": -1.3810302019119263, "logits/rejected": -2.602874755859375, "logps/chosen": -131.72642517089844, "logps/rejected": -501.223388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.722236156463623, "rewards/margins": 13.042488098144531, "rewards/rejected": -20.764724731445312, "step": 16218 }, { "epoch": 2.52, "learning_rate": 2.251662430623995e-06, "logits/chosen": -2.734992265701294, "logits/rejected": -2.250340461730957, "logps/chosen": -648.140625, "logps/rejected": -499.03216552734375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -9.567058563232422, "rewards/margins": 9.78040885925293, "rewards/rejected": -19.34746551513672, "step": 16219 }, { "epoch": 2.52, "learning_rate": 2.250928990092847e-06, "logits/chosen": -2.598187208175659, "logits/rejected": -2.5570974349975586, "logps/chosen": -408.1058349609375, "logps/rejected": -489.32452392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.525684356689453, "rewards/margins": 14.536219596862793, "rewards/rejected": -23.061904907226562, "step": 16220 }, { "epoch": 2.52, "learning_rate": 2.2501955495616992e-06, "logits/chosen": -2.588146686553955, "logits/rejected": -1.995301365852356, "logps/chosen": -178.7183837890625, "logps/rejected": -409.79583740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.3379225730896, "rewards/margins": 11.082756042480469, "rewards/rejected": -15.420679092407227, "step": 16221 }, { "epoch": 2.52, "learning_rate": 2.249462109030551e-06, "logits/chosen": -1.630388855934143, "logits/rejected": -2.6106934547424316, "logps/chosen": -126.37843322753906, "logps/rejected": -520.9520263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.068170547485352, "rewards/margins": 14.255437850952148, "rewards/rejected": -22.3236083984375, "step": 16222 }, { "epoch": 2.52, "learning_rate": 2.2487286684994034e-06, "logits/chosen": -1.9053277969360352, "logits/rejected": -2.714674472808838, "logps/chosen": -163.8948516845703, "logps/rejected": -375.1385498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.824332237243652, "rewards/margins": 14.669620513916016, "rewards/rejected": -19.493953704833984, "step": 16223 }, { "epoch": 2.52, "learning_rate": 2.2479952279682557e-06, "logits/chosen": -2.5768120288848877, "logits/rejected": -2.634059190750122, "logps/chosen": -332.73760986328125, "logps/rejected": -400.7923583984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.821431159973145, "rewards/margins": 8.26762580871582, "rewards/rejected": -17.08905601501465, "step": 16224 }, { "epoch": 2.52, "learning_rate": 2.247261787437108e-06, "logits/chosen": -2.31410813331604, "logits/rejected": -3.04025936126709, "logps/chosen": -104.88868713378906, "logps/rejected": -368.4352111816406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.02744197845459, "rewards/margins": 10.314408302307129, "rewards/rejected": -16.34185028076172, "step": 16225 }, { "epoch": 2.52, "learning_rate": 2.24652834690596e-06, "logits/chosen": -2.443418025970459, "logits/rejected": -2.609954833984375, "logps/chosen": -194.19671630859375, "logps/rejected": -326.3093566894531, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.120863914489746, "rewards/margins": 12.241654396057129, "rewards/rejected": -18.362518310546875, "step": 16226 }, { "epoch": 2.52, "learning_rate": 2.245794906374812e-06, "logits/chosen": -2.427485227584839, "logits/rejected": -2.0211262702941895, "logps/chosen": -348.4638671875, "logps/rejected": -346.2208251953125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.656667232513428, "rewards/margins": 8.337556838989258, "rewards/rejected": -15.994224548339844, "step": 16227 }, { "epoch": 2.52, "learning_rate": 2.245061465843664e-06, "logits/chosen": -2.477099895477295, "logits/rejected": -2.669275999069214, "logps/chosen": -120.78640747070312, "logps/rejected": -319.74053955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.527585983276367, "rewards/margins": 12.069795608520508, "rewards/rejected": -19.597381591796875, "step": 16228 }, { "epoch": 2.52, "learning_rate": 2.244328025312516e-06, "logits/chosen": -2.4617059230804443, "logits/rejected": -2.6888608932495117, "logps/chosen": -384.5536193847656, "logps/rejected": -410.38812255859375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.049888610839844, "rewards/margins": 8.590593338012695, "rewards/rejected": -14.640481948852539, "step": 16229 }, { "epoch": 2.52, "learning_rate": 2.2435945847813682e-06, "logits/chosen": -2.464273452758789, "logits/rejected": -2.9702372550964355, "logps/chosen": -219.6769561767578, "logps/rejected": -514.5621337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.62451457977295, "rewards/margins": 13.786033630371094, "rewards/rejected": -22.41054916381836, "step": 16230 }, { "epoch": 2.52, "learning_rate": 2.24286114425022e-06, "logits/chosen": -1.7615644931793213, "logits/rejected": -2.573288917541504, "logps/chosen": -129.8964385986328, "logps/rejected": -280.16790771484375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.576265335083008, "rewards/margins": 7.934467315673828, "rewards/rejected": -12.510732650756836, "step": 16231 }, { "epoch": 2.52, "learning_rate": 2.242127703719073e-06, "logits/chosen": -2.350853681564331, "logits/rejected": -2.7328593730926514, "logps/chosen": -236.4258575439453, "logps/rejected": -409.3717041015625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -8.240266799926758, "rewards/margins": 6.742215156555176, "rewards/rejected": -14.982481002807617, "step": 16232 }, { "epoch": 2.52, "learning_rate": 2.2413942631879247e-06, "logits/chosen": -2.3665380477905273, "logits/rejected": -2.6471827030181885, "logps/chosen": -180.6920623779297, "logps/rejected": -388.06756591796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.829227447509766, "rewards/margins": 9.175504684448242, "rewards/rejected": -16.004732131958008, "step": 16233 }, { "epoch": 2.52, "learning_rate": 2.240660822656777e-06, "logits/chosen": -2.842409372329712, "logits/rejected": -2.6000192165374756, "logps/chosen": -215.48118591308594, "logps/rejected": -181.6199951171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.1078720092773438, "rewards/margins": 9.224205017089844, "rewards/rejected": -11.332077026367188, "step": 16234 }, { "epoch": 2.52, "learning_rate": 2.239927382125629e-06, "logits/chosen": -2.8085062503814697, "logits/rejected": -2.5713822841644287, "logps/chosen": -983.14208984375, "logps/rejected": -705.8524169921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.100049018859863, "rewards/margins": 9.867082595825195, "rewards/rejected": -17.967130661010742, "step": 16235 }, { "epoch": 2.53, "learning_rate": 2.239193941594481e-06, "logits/chosen": -1.223750114440918, "logits/rejected": -2.3826518058776855, "logps/chosen": -111.65704345703125, "logps/rejected": -395.2994079589844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.727718830108643, "rewards/margins": 10.876867294311523, "rewards/rejected": -16.60458755493164, "step": 16236 }, { "epoch": 2.53, "learning_rate": 2.238460501063333e-06, "logits/chosen": -2.7129437923431396, "logits/rejected": -2.6803269386291504, "logps/chosen": -291.3733825683594, "logps/rejected": -385.25592041015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.212289333343506, "rewards/margins": 8.910528182983398, "rewards/rejected": -15.122817993164062, "step": 16237 }, { "epoch": 2.53, "learning_rate": 2.2377270605321854e-06, "logits/chosen": -1.717462420463562, "logits/rejected": -2.783491611480713, "logps/chosen": -240.003173828125, "logps/rejected": -498.8634033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.238997459411621, "rewards/margins": 14.581850051879883, "rewards/rejected": -19.820846557617188, "step": 16238 }, { "epoch": 2.53, "learning_rate": 2.2369936200010373e-06, "logits/chosen": -2.7952945232391357, "logits/rejected": -2.9408822059631348, "logps/chosen": -561.6026611328125, "logps/rejected": -634.310791015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.041070938110352, "rewards/margins": 10.525799751281738, "rewards/rejected": -18.566871643066406, "step": 16239 }, { "epoch": 2.53, "learning_rate": 2.2362601794698896e-06, "logits/chosen": -2.366128921508789, "logits/rejected": -2.5331149101257324, "logps/chosen": -394.73284912109375, "logps/rejected": -563.176025390625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -5.623781681060791, "rewards/margins": 9.310018539428711, "rewards/rejected": -14.933799743652344, "step": 16240 }, { "epoch": 2.53, "learning_rate": 2.235526738938742e-06, "logits/chosen": -2.9356095790863037, "logits/rejected": -2.8274729251861572, "logps/chosen": -705.0169677734375, "logps/rejected": -302.43426513671875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -6.7044878005981445, "rewards/margins": 5.943277359008789, "rewards/rejected": -12.647765159606934, "step": 16241 }, { "epoch": 2.53, "learning_rate": 2.2347932984075938e-06, "logits/chosen": -3.1543495655059814, "logits/rejected": -3.000689744949341, "logps/chosen": -157.68478393554688, "logps/rejected": -253.60470581054688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.833799362182617, "rewards/margins": 8.687978744506836, "rewards/rejected": -18.521778106689453, "step": 16242 }, { "epoch": 2.53, "learning_rate": 2.234059857876446e-06, "logits/chosen": -2.5972981452941895, "logits/rejected": -1.5947529077529907, "logps/chosen": -435.28277587890625, "logps/rejected": -416.70623779296875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -7.327756404876709, "rewards/margins": 6.278552532196045, "rewards/rejected": -13.606308937072754, "step": 16243 }, { "epoch": 2.53, "learning_rate": 2.233326417345298e-06, "logits/chosen": -1.2235381603240967, "logits/rejected": -2.5843770503997803, "logps/chosen": -174.50555419921875, "logps/rejected": -485.6195068359375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -9.591561317443848, "rewards/margins": 5.900931358337402, "rewards/rejected": -15.49249267578125, "step": 16244 }, { "epoch": 2.53, "learning_rate": 2.2325929768141502e-06, "logits/chosen": -2.5794363021850586, "logits/rejected": -2.1219232082366943, "logps/chosen": -146.1962127685547, "logps/rejected": -278.23406982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.271087646484375, "rewards/margins": 10.840633392333984, "rewards/rejected": -15.11172103881836, "step": 16245 }, { "epoch": 2.53, "learning_rate": 2.231859536283002e-06, "logits/chosen": -2.54874587059021, "logits/rejected": -2.7650671005249023, "logps/chosen": -144.63455200195312, "logps/rejected": -412.7369079589844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.19991683959961, "rewards/margins": 11.004383087158203, "rewards/rejected": -22.204299926757812, "step": 16246 }, { "epoch": 2.53, "learning_rate": 2.2311260957518544e-06, "logits/chosen": -2.061462163925171, "logits/rejected": -2.731039524078369, "logps/chosen": -194.69512939453125, "logps/rejected": -358.394775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.394776344299316, "rewards/margins": 10.476470947265625, "rewards/rejected": -15.871246337890625, "step": 16247 }, { "epoch": 2.53, "learning_rate": 2.2303926552207063e-06, "logits/chosen": -2.2760531902313232, "logits/rejected": -2.7295479774475098, "logps/chosen": -614.0733642578125, "logps/rejected": -749.5604248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.465353488922119, "rewards/margins": 12.276322364807129, "rewards/rejected": -19.741676330566406, "step": 16248 }, { "epoch": 2.53, "learning_rate": 2.2296592146895586e-06, "logits/chosen": -1.4666097164154053, "logits/rejected": -2.386589288711548, "logps/chosen": -132.61276245117188, "logps/rejected": -328.18914794921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.284816741943359, "rewards/margins": 8.987836837768555, "rewards/rejected": -14.272653579711914, "step": 16249 }, { "epoch": 2.53, "learning_rate": 2.228925774158411e-06, "logits/chosen": -2.8481879234313965, "logits/rejected": -2.4517428874969482, "logps/chosen": -278.87860107421875, "logps/rejected": -394.93572998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.403986930847168, "rewards/margins": 13.790103912353516, "rewards/rejected": -19.194091796875, "step": 16250 }, { "epoch": 2.53, "learning_rate": 2.2281923336272628e-06, "logits/chosen": -2.482480764389038, "logits/rejected": -3.0664050579071045, "logps/chosen": -258.3614501953125, "logps/rejected": -463.5679626464844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.142870903015137, "rewards/margins": 8.251623153686523, "rewards/rejected": -15.39449405670166, "step": 16251 }, { "epoch": 2.53, "learning_rate": 2.227458893096115e-06, "logits/chosen": -2.6705663204193115, "logits/rejected": -1.4699736833572388, "logps/chosen": -480.331298828125, "logps/rejected": -171.84886169433594, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -6.0555877685546875, "rewards/margins": 4.35670280456543, "rewards/rejected": -10.412290573120117, "step": 16252 }, { "epoch": 2.53, "learning_rate": 2.226725452564967e-06, "logits/chosen": -2.4527997970581055, "logits/rejected": -2.632631301879883, "logps/chosen": -122.64904022216797, "logps/rejected": -382.7292175292969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.530858039855957, "rewards/margins": 11.696044921875, "rewards/rejected": -18.22690200805664, "step": 16253 }, { "epoch": 2.53, "learning_rate": 2.2259920120338193e-06, "logits/chosen": -2.4754574298858643, "logits/rejected": -2.873255968093872, "logps/chosen": -108.40825653076172, "logps/rejected": -247.39364624023438, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -8.57008171081543, "rewards/margins": 6.785058975219727, "rewards/rejected": -15.355140686035156, "step": 16254 }, { "epoch": 2.53, "learning_rate": 2.225258571502671e-06, "logits/chosen": -2.356086492538452, "logits/rejected": -2.4765689373016357, "logps/chosen": -116.78243255615234, "logps/rejected": -315.3873291015625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -7.266683578491211, "rewards/margins": 11.685626029968262, "rewards/rejected": -18.95231056213379, "step": 16255 }, { "epoch": 2.53, "learning_rate": 2.2245251309715234e-06, "logits/chosen": -2.3708078861236572, "logits/rejected": -2.7984938621520996, "logps/chosen": -240.90310668945312, "logps/rejected": -533.4840698242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.448500633239746, "rewards/margins": 14.179174423217773, "rewards/rejected": -21.627676010131836, "step": 16256 }, { "epoch": 2.53, "learning_rate": 2.2237916904403757e-06, "logits/chosen": -2.3109371662139893, "logits/rejected": -2.7792277336120605, "logps/chosen": -453.95916748046875, "logps/rejected": -470.3297119140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.069782257080078, "rewards/margins": 7.583746910095215, "rewards/rejected": -15.65353012084961, "step": 16257 }, { "epoch": 2.53, "learning_rate": 2.2230582499092276e-06, "logits/chosen": -1.1672632694244385, "logits/rejected": -2.662292003631592, "logps/chosen": -162.83169555664062, "logps/rejected": -508.9385986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.644308090209961, "rewards/margins": 13.168149948120117, "rewards/rejected": -20.812458038330078, "step": 16258 }, { "epoch": 2.53, "learning_rate": 2.22232480937808e-06, "logits/chosen": -2.629070281982422, "logits/rejected": -2.593079090118408, "logps/chosen": -858.9493408203125, "logps/rejected": -626.188232421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.718106269836426, "rewards/margins": 8.849456787109375, "rewards/rejected": -14.567564010620117, "step": 16259 }, { "epoch": 2.53, "learning_rate": 2.221591368846932e-06, "logits/chosen": -1.902501106262207, "logits/rejected": -2.6660609245300293, "logps/chosen": -239.12191772460938, "logps/rejected": -420.17132568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.192456245422363, "rewards/margins": 10.662827491760254, "rewards/rejected": -19.855283737182617, "step": 16260 }, { "epoch": 2.53, "learning_rate": 2.220857928315784e-06, "logits/chosen": -2.726863145828247, "logits/rejected": -3.071875810623169, "logps/chosen": -157.27012634277344, "logps/rejected": -306.97314453125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -9.365147590637207, "rewards/margins": 6.498831748962402, "rewards/rejected": -15.86397933959961, "step": 16261 }, { "epoch": 2.53, "learning_rate": 2.220124487784636e-06, "logits/chosen": -1.8870515823364258, "logits/rejected": -2.4575066566467285, "logps/chosen": -176.6042022705078, "logps/rejected": -360.2891540527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.304159641265869, "rewards/margins": 11.911893844604492, "rewards/rejected": -18.216053009033203, "step": 16262 }, { "epoch": 2.53, "learning_rate": 2.2193910472534883e-06, "logits/chosen": -2.3743629455566406, "logits/rejected": -2.558609962463379, "logps/chosen": -112.56675720214844, "logps/rejected": -248.68536376953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.429815292358398, "rewards/margins": 10.625871658325195, "rewards/rejected": -16.055686950683594, "step": 16263 }, { "epoch": 2.53, "learning_rate": 2.21865760672234e-06, "logits/chosen": -2.314181089401245, "logits/rejected": -2.6962637901306152, "logps/chosen": -321.99310302734375, "logps/rejected": -392.8904724121094, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -7.00541877746582, "rewards/margins": 6.431123733520508, "rewards/rejected": -13.436542510986328, "step": 16264 }, { "epoch": 2.53, "learning_rate": 2.2179241661911925e-06, "logits/chosen": -2.056704044342041, "logits/rejected": -2.8077216148376465, "logps/chosen": -130.397705078125, "logps/rejected": -333.9361572265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.430187225341797, "rewards/margins": 7.303127288818359, "rewards/rejected": -12.733314514160156, "step": 16265 }, { "epoch": 2.53, "learning_rate": 2.2171907256600448e-06, "logits/chosen": -2.430680751800537, "logits/rejected": -2.8469150066375732, "logps/chosen": -73.40302276611328, "logps/rejected": -224.18045043945312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -3.924978733062744, "rewards/margins": 9.6608247756958, "rewards/rejected": -13.585803985595703, "step": 16266 }, { "epoch": 2.53, "learning_rate": 2.216457285128897e-06, "logits/chosen": -2.5837275981903076, "logits/rejected": -2.2447729110717773, "logps/chosen": -178.87982177734375, "logps/rejected": -138.77685546875, "loss": 0.1137, "rewards/accuracies": 1.0, "rewards/chosen": -8.857566833496094, "rewards/margins": 2.225447654724121, "rewards/rejected": -11.083014488220215, "step": 16267 }, { "epoch": 2.53, "learning_rate": 2.215723844597749e-06, "logits/chosen": -2.6540372371673584, "logits/rejected": -2.984607696533203, "logps/chosen": -86.902099609375, "logps/rejected": -526.0931396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.889225482940674, "rewards/margins": 14.090487480163574, "rewards/rejected": -20.979713439941406, "step": 16268 }, { "epoch": 2.53, "learning_rate": 2.214990404066601e-06, "logits/chosen": -2.846406936645508, "logits/rejected": -2.8955636024475098, "logps/chosen": -532.072021484375, "logps/rejected": -627.489501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.875397682189941, "rewards/margins": 12.54748821258545, "rewards/rejected": -19.42288589477539, "step": 16269 }, { "epoch": 2.53, "learning_rate": 2.214256963535453e-06, "logits/chosen": -1.918795108795166, "logits/rejected": -2.4788177013397217, "logps/chosen": -212.6544189453125, "logps/rejected": -435.5365295410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.838299751281738, "rewards/margins": 11.175131797790527, "rewards/rejected": -19.013431549072266, "step": 16270 }, { "epoch": 2.53, "learning_rate": 2.213523523004305e-06, "logits/chosen": -1.0322846174240112, "logits/rejected": -1.917449712753296, "logps/chosen": -295.0729064941406, "logps/rejected": -706.9991455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.138165473937988, "rewards/margins": 15.894013404846191, "rewards/rejected": -24.03217887878418, "step": 16271 }, { "epoch": 2.53, "learning_rate": 2.2127900824731573e-06, "logits/chosen": -2.7222204208374023, "logits/rejected": -2.7767579555511475, "logps/chosen": -344.23553466796875, "logps/rejected": -454.1457824707031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.15501594543457, "rewards/margins": 17.728851318359375, "rewards/rejected": -27.883867263793945, "step": 16272 }, { "epoch": 2.53, "learning_rate": 2.212056641942009e-06, "logits/chosen": -2.3471758365631104, "logits/rejected": -2.6722865104675293, "logps/chosen": -180.24655151367188, "logps/rejected": -416.2773132324219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.172831535339355, "rewards/margins": 10.951099395751953, "rewards/rejected": -20.123931884765625, "step": 16273 }, { "epoch": 2.53, "learning_rate": 2.211323201410862e-06, "logits/chosen": -1.3732730150222778, "logits/rejected": -2.384744882583618, "logps/chosen": -266.8965148925781, "logps/rejected": -605.9658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.02880859375, "rewards/margins": 12.066154479980469, "rewards/rejected": -24.09496307373047, "step": 16274 }, { "epoch": 2.53, "learning_rate": 2.2105897608797138e-06, "logits/chosen": -2.5097594261169434, "logits/rejected": -2.316927909851074, "logps/chosen": -210.96498107910156, "logps/rejected": -196.25155639648438, "loss": 1.3626, "rewards/accuracies": 0.5, "rewards/chosen": -8.943384170532227, "rewards/margins": 2.6033313274383545, "rewards/rejected": -11.54671573638916, "step": 16275 }, { "epoch": 2.53, "learning_rate": 2.209856320348566e-06, "logits/chosen": -2.70120906829834, "logits/rejected": -1.2138855457305908, "logps/chosen": -311.5644226074219, "logps/rejected": -287.6514587402344, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -5.735238075256348, "rewards/margins": 7.738580703735352, "rewards/rejected": -13.473817825317383, "step": 16276 }, { "epoch": 2.53, "learning_rate": 2.209122879817418e-06, "logits/chosen": -2.4013121128082275, "logits/rejected": -1.3464370965957642, "logps/chosen": -416.0802917480469, "logps/rejected": -374.42254638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.26638412475586, "rewards/margins": 10.132637023925781, "rewards/rejected": -20.39902114868164, "step": 16277 }, { "epoch": 2.53, "learning_rate": 2.20838943928627e-06, "logits/chosen": -2.828207015991211, "logits/rejected": -2.417022228240967, "logps/chosen": -257.45538330078125, "logps/rejected": -303.12823486328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.591947078704834, "rewards/margins": 8.06667709350586, "rewards/rejected": -11.658623695373535, "step": 16278 }, { "epoch": 2.53, "learning_rate": 2.207655998755122e-06, "logits/chosen": -2.7985706329345703, "logits/rejected": -2.249117851257324, "logps/chosen": -151.5526123046875, "logps/rejected": -228.85354614257812, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -5.559085845947266, "rewards/margins": 9.265443801879883, "rewards/rejected": -14.824529647827148, "step": 16279 }, { "epoch": 2.53, "learning_rate": 2.206922558223974e-06, "logits/chosen": -1.4494184255599976, "logits/rejected": -2.8903160095214844, "logps/chosen": -86.39157104492188, "logps/rejected": -437.7039794921875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.159712791442871, "rewards/margins": 8.897804260253906, "rewards/rejected": -16.057518005371094, "step": 16280 }, { "epoch": 2.53, "learning_rate": 2.2061891176928263e-06, "logits/chosen": -2.6390416622161865, "logits/rejected": -2.8375394344329834, "logps/chosen": -265.73419189453125, "logps/rejected": -412.8053894042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.532184600830078, "rewards/margins": 13.613237380981445, "rewards/rejected": -22.145421981811523, "step": 16281 }, { "epoch": 2.53, "learning_rate": 2.2054556771616786e-06, "logits/chosen": -3.134950637817383, "logits/rejected": -2.7905404567718506, "logps/chosen": -289.1159973144531, "logps/rejected": -383.7764892578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.953401565551758, "rewards/margins": 9.046167373657227, "rewards/rejected": -12.999568939208984, "step": 16282 }, { "epoch": 2.53, "learning_rate": 2.204722236630531e-06, "logits/chosen": -2.3439173698425293, "logits/rejected": -2.669605016708374, "logps/chosen": -150.24053955078125, "logps/rejected": -335.4178771972656, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -8.669989585876465, "rewards/margins": 5.781745910644531, "rewards/rejected": -14.45173454284668, "step": 16283 }, { "epoch": 2.53, "learning_rate": 2.203988796099383e-06, "logits/chosen": -2.6498658657073975, "logits/rejected": -1.8093026876449585, "logps/chosen": -264.1363830566406, "logps/rejected": -368.68035888671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.338685989379883, "rewards/margins": 13.023192405700684, "rewards/rejected": -18.36187744140625, "step": 16284 }, { "epoch": 2.53, "learning_rate": 2.203255355568235e-06, "logits/chosen": -2.8095571994781494, "logits/rejected": -3.1080336570739746, "logps/chosen": -74.6536865234375, "logps/rejected": -222.7059783935547, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -4.138144016265869, "rewards/margins": 7.922339916229248, "rewards/rejected": -12.060483932495117, "step": 16285 }, { "epoch": 2.53, "learning_rate": 2.202521915037087e-06, "logits/chosen": -2.1088192462921143, "logits/rejected": -2.727057695388794, "logps/chosen": -180.91241455078125, "logps/rejected": -259.65557861328125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -9.012578010559082, "rewards/margins": 5.496596336364746, "rewards/rejected": -14.509174346923828, "step": 16286 }, { "epoch": 2.53, "learning_rate": 2.2017884745059393e-06, "logits/chosen": -2.581019639968872, "logits/rejected": -2.5696489810943604, "logps/chosen": -398.31695556640625, "logps/rejected": -519.6141357421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.560206413269043, "rewards/margins": 11.907581329345703, "rewards/rejected": -18.467788696289062, "step": 16287 }, { "epoch": 2.53, "learning_rate": 2.201055033974791e-06, "logits/chosen": -2.6246466636657715, "logits/rejected": -2.7581310272216797, "logps/chosen": -202.69378662109375, "logps/rejected": -297.8868103027344, "loss": 0.0221, "rewards/accuracies": 1.0, "rewards/chosen": -6.407179355621338, "rewards/margins": 8.552074432373047, "rewards/rejected": -14.959254264831543, "step": 16288 }, { "epoch": 2.53, "learning_rate": 2.200321593443643e-06, "logits/chosen": -2.588408946990967, "logits/rejected": -2.4407742023468018, "logps/chosen": -244.57687377929688, "logps/rejected": -377.9720153808594, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.758625507354736, "rewards/margins": 8.455123901367188, "rewards/rejected": -15.213749885559082, "step": 16289 }, { "epoch": 2.53, "learning_rate": 2.1995881529124953e-06, "logits/chosen": -2.7096285820007324, "logits/rejected": -2.1997556686401367, "logps/chosen": -258.8411865234375, "logps/rejected": -379.1422119140625, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/chosen": -4.989390850067139, "rewards/margins": 12.02279281616211, "rewards/rejected": -17.012184143066406, "step": 16290 }, { "epoch": 2.53, "learning_rate": 2.1988547123813476e-06, "logits/chosen": -2.6779372692108154, "logits/rejected": -2.6368942260742188, "logps/chosen": -388.6666259765625, "logps/rejected": -485.6123962402344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.594629287719727, "rewards/margins": 8.534152030944824, "rewards/rejected": -17.128780364990234, "step": 16291 }, { "epoch": 2.53, "learning_rate": 2.1981212718502e-06, "logits/chosen": -2.692939519882202, "logits/rejected": -2.4880731105804443, "logps/chosen": -367.60205078125, "logps/rejected": -523.839111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.5139219760894775, "rewards/margins": 12.73655891418457, "rewards/rejected": -16.25048065185547, "step": 16292 }, { "epoch": 2.53, "learning_rate": 2.197387831319052e-06, "logits/chosen": -0.933421790599823, "logits/rejected": -2.6396872997283936, "logps/chosen": -186.21298217773438, "logps/rejected": -571.8851318359375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.4542412757873535, "rewards/margins": 7.5575270652771, "rewards/rejected": -15.011768341064453, "step": 16293 }, { "epoch": 2.53, "learning_rate": 2.196654390787904e-06, "logits/chosen": -2.5689146518707275, "logits/rejected": -2.195233106613159, "logps/chosen": -187.60952758789062, "logps/rejected": -184.17626953125, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/chosen": -7.483362674713135, "rewards/margins": 4.535086631774902, "rewards/rejected": -12.018449783325195, "step": 16294 }, { "epoch": 2.53, "learning_rate": 2.195920950256756e-06, "logits/chosen": -1.5415230989456177, "logits/rejected": -2.543926477432251, "logps/chosen": -182.02578735351562, "logps/rejected": -488.04669189453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.711663246154785, "rewards/margins": 12.200729370117188, "rewards/rejected": -18.91239356994629, "step": 16295 }, { "epoch": 2.53, "learning_rate": 2.1951875097256083e-06, "logits/chosen": -2.515780448913574, "logits/rejected": -2.7770655155181885, "logps/chosen": -104.62174987792969, "logps/rejected": -357.74517822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.154911041259766, "rewards/margins": 12.363624572753906, "rewards/rejected": -18.518535614013672, "step": 16296 }, { "epoch": 2.53, "learning_rate": 2.19445406919446e-06, "logits/chosen": -2.5141494274139404, "logits/rejected": -2.272331953048706, "logps/chosen": -148.91317749023438, "logps/rejected": -362.8155517578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.457508087158203, "rewards/margins": 13.12208080291748, "rewards/rejected": -19.57958984375, "step": 16297 }, { "epoch": 2.53, "learning_rate": 2.193720628663312e-06, "logits/chosen": -2.097501516342163, "logits/rejected": -2.4581947326660156, "logps/chosen": -405.099365234375, "logps/rejected": -533.4151611328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.940626621246338, "rewards/margins": 8.145135879516602, "rewards/rejected": -16.08576202392578, "step": 16298 }, { "epoch": 2.53, "learning_rate": 2.192987188132165e-06, "logits/chosen": -2.3718807697296143, "logits/rejected": -2.6230127811431885, "logps/chosen": -391.4169616699219, "logps/rejected": -428.6670227050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.549269676208496, "rewards/margins": 14.509936332702637, "rewards/rejected": -20.059206008911133, "step": 16299 }, { "epoch": 2.53, "learning_rate": 2.1922537476010167e-06, "logits/chosen": -2.627732515335083, "logits/rejected": -2.4441564083099365, "logps/chosen": -223.5440216064453, "logps/rejected": -209.2459716796875, "loss": 0.1806, "rewards/accuracies": 1.0, "rewards/chosen": -5.7868757247924805, "rewards/margins": 4.561254501342773, "rewards/rejected": -10.348130226135254, "step": 16300 }, { "epoch": 2.54, "learning_rate": 2.191520307069869e-06, "logits/chosen": -2.4437098503112793, "logits/rejected": -2.6828839778900146, "logps/chosen": -556.0083618164062, "logps/rejected": -563.7861938476562, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.539235591888428, "rewards/margins": 10.212173461914062, "rewards/rejected": -16.75140953063965, "step": 16301 }, { "epoch": 2.54, "learning_rate": 2.190786866538721e-06, "logits/chosen": -3.0002799034118652, "logits/rejected": -2.3142471313476562, "logps/chosen": -225.7303009033203, "logps/rejected": -255.92709350585938, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.0930399894714355, "rewards/margins": 6.157904148101807, "rewards/rejected": -11.250944137573242, "step": 16302 }, { "epoch": 2.54, "learning_rate": 2.190053426007573e-06, "logits/chosen": -2.2917697429656982, "logits/rejected": -1.5735228061676025, "logps/chosen": -311.64208984375, "logps/rejected": -287.4620056152344, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": -3.5687179565429688, "rewards/margins": 6.733981132507324, "rewards/rejected": -10.302699089050293, "step": 16303 }, { "epoch": 2.54, "learning_rate": 2.189319985476425e-06, "logits/chosen": -2.312878131866455, "logits/rejected": -2.6063737869262695, "logps/chosen": -145.1436309814453, "logps/rejected": -320.9412841796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.984185695648193, "rewards/margins": 9.45587158203125, "rewards/rejected": -14.440056800842285, "step": 16304 }, { "epoch": 2.54, "learning_rate": 2.1885865449452773e-06, "logits/chosen": -2.3716554641723633, "logits/rejected": -2.7341506481170654, "logps/chosen": -152.80274963378906, "logps/rejected": -317.2113037109375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -10.033506393432617, "rewards/margins": 5.791357040405273, "rewards/rejected": -15.82486343383789, "step": 16305 }, { "epoch": 2.54, "learning_rate": 2.187853104414129e-06, "logits/chosen": -2.0381267070770264, "logits/rejected": -2.728213310241699, "logps/chosen": -174.90350341796875, "logps/rejected": -452.50506591796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.479574203491211, "rewards/margins": 10.134357452392578, "rewards/rejected": -18.61393165588379, "step": 16306 }, { "epoch": 2.54, "learning_rate": 2.1871196638829815e-06, "logits/chosen": -1.9074716567993164, "logits/rejected": -2.7545483112335205, "logps/chosen": -194.36865234375, "logps/rejected": -384.1281433105469, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.478438854217529, "rewards/margins": 9.75751781463623, "rewards/rejected": -15.235957145690918, "step": 16307 }, { "epoch": 2.54, "learning_rate": 2.186386223351834e-06, "logits/chosen": -2.370321750640869, "logits/rejected": -2.6958847045898438, "logps/chosen": -246.79750061035156, "logps/rejected": -359.80145263671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.535765171051025, "rewards/margins": 8.39422607421875, "rewards/rejected": -12.929990768432617, "step": 16308 }, { "epoch": 2.54, "learning_rate": 2.1856527828206857e-06, "logits/chosen": -1.6638782024383545, "logits/rejected": -1.5106486082077026, "logps/chosen": -881.96337890625, "logps/rejected": -700.5077514648438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.320662498474121, "rewards/margins": 13.640213012695312, "rewards/rejected": -19.96087646484375, "step": 16309 }, { "epoch": 2.54, "learning_rate": 2.184919342289538e-06, "logits/chosen": -2.6511025428771973, "logits/rejected": -2.4424798488616943, "logps/chosen": -238.26556396484375, "logps/rejected": -270.7486572265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.887822151184082, "rewards/margins": 8.874675750732422, "rewards/rejected": -11.762497901916504, "step": 16310 }, { "epoch": 2.54, "learning_rate": 2.18418590175839e-06, "logits/chosen": -2.791966199874878, "logits/rejected": -2.408695936203003, "logps/chosen": -303.1324462890625, "logps/rejected": -343.0687255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.29432487487793, "rewards/margins": 11.84793472290039, "rewards/rejected": -20.14225959777832, "step": 16311 }, { "epoch": 2.54, "learning_rate": 2.183452461227242e-06, "logits/chosen": -2.3377342224121094, "logits/rejected": -2.5159640312194824, "logps/chosen": -288.4803466796875, "logps/rejected": -428.92083740234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.7617392539978027, "rewards/margins": 9.209295272827148, "rewards/rejected": -12.971034049987793, "step": 16312 }, { "epoch": 2.54, "learning_rate": 2.182719020696094e-06, "logits/chosen": -2.637964963912964, "logits/rejected": -2.0009560585021973, "logps/chosen": -325.6917419433594, "logps/rejected": -312.63275146484375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.442094802856445, "rewards/margins": 6.841670036315918, "rewards/rejected": -11.283764839172363, "step": 16313 }, { "epoch": 2.54, "learning_rate": 2.1819855801649464e-06, "logits/chosen": -1.5250831842422485, "logits/rejected": -2.5739123821258545, "logps/chosen": -186.43789672851562, "logps/rejected": -477.1725769042969, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.10047435760498, "rewards/margins": 11.988876342773438, "rewards/rejected": -20.089351654052734, "step": 16314 }, { "epoch": 2.54, "learning_rate": 2.1812521396337982e-06, "logits/chosen": -2.2960333824157715, "logits/rejected": -2.8024628162384033, "logps/chosen": -229.9453125, "logps/rejected": -357.45458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.2212564945220947, "rewards/margins": 10.390334129333496, "rewards/rejected": -13.611590385437012, "step": 16315 }, { "epoch": 2.54, "learning_rate": 2.180518699102651e-06, "logits/chosen": -2.2829701900482178, "logits/rejected": -2.6862454414367676, "logps/chosen": -185.64283752441406, "logps/rejected": -387.20526123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7035698890686035, "rewards/margins": 11.942360877990723, "rewards/rejected": -17.645931243896484, "step": 16316 }, { "epoch": 2.54, "learning_rate": 2.179785258571503e-06, "logits/chosen": -2.8812780380249023, "logits/rejected": -2.9509382247924805, "logps/chosen": -121.91874694824219, "logps/rejected": -177.23077392578125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.247077941894531, "rewards/margins": 7.281766414642334, "rewards/rejected": -13.528844833374023, "step": 16317 }, { "epoch": 2.54, "learning_rate": 2.1790518180403547e-06, "logits/chosen": -1.179476022720337, "logits/rejected": -1.9758522510528564, "logps/chosen": -234.3818817138672, "logps/rejected": -521.8115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.843477249145508, "rewards/margins": 11.095963478088379, "rewards/rejected": -19.939441680908203, "step": 16318 }, { "epoch": 2.54, "learning_rate": 2.178318377509207e-06, "logits/chosen": -2.6354832649230957, "logits/rejected": -3.0601449012756348, "logps/chosen": -142.99049377441406, "logps/rejected": -354.6404113769531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.815974235534668, "rewards/margins": 10.181825637817383, "rewards/rejected": -18.997798919677734, "step": 16319 }, { "epoch": 2.54, "learning_rate": 2.177584936978059e-06, "logits/chosen": -2.4547410011291504, "logits/rejected": -1.8587156534194946, "logps/chosen": -162.86627197265625, "logps/rejected": -301.8865661621094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.366231918334961, "rewards/margins": 7.93039083480835, "rewards/rejected": -16.29662322998047, "step": 16320 }, { "epoch": 2.54, "learning_rate": 2.176851496446911e-06, "logits/chosen": -2.6149063110351562, "logits/rejected": -2.2102348804473877, "logps/chosen": -306.830322265625, "logps/rejected": -485.2123107910156, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.756648063659668, "rewards/margins": 11.630132675170898, "rewards/rejected": -19.38677978515625, "step": 16321 }, { "epoch": 2.54, "learning_rate": 2.176118055915763e-06, "logits/chosen": -2.5297460556030273, "logits/rejected": -0.6570301651954651, "logps/chosen": -390.17730712890625, "logps/rejected": -241.88336181640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.234240531921387, "rewards/margins": 8.769125938415527, "rewards/rejected": -15.003366470336914, "step": 16322 }, { "epoch": 2.54, "learning_rate": 2.1753846153846154e-06, "logits/chosen": -2.0195438861846924, "logits/rejected": -2.790644884109497, "logps/chosen": -107.46670532226562, "logps/rejected": -264.8704833984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.156132698059082, "rewards/margins": 7.706416130065918, "rewards/rejected": -13.862548828125, "step": 16323 }, { "epoch": 2.54, "learning_rate": 2.1746511748534677e-06, "logits/chosen": -2.487734079360962, "logits/rejected": -2.8468265533447266, "logps/chosen": -244.60830688476562, "logps/rejected": -320.265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -8.359423637390137, "rewards/margins": 6.607171058654785, "rewards/rejected": -14.966594696044922, "step": 16324 }, { "epoch": 2.54, "learning_rate": 2.17391773432232e-06, "logits/chosen": -2.7893922328948975, "logits/rejected": -2.1501832008361816, "logps/chosen": -230.24630737304688, "logps/rejected": -286.5735168457031, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.367980003356934, "rewards/margins": 7.553443908691406, "rewards/rejected": -12.921424865722656, "step": 16325 }, { "epoch": 2.54, "learning_rate": 2.173184293791172e-06, "logits/chosen": -2.7712182998657227, "logits/rejected": -1.2901995182037354, "logps/chosen": -453.010009765625, "logps/rejected": -254.77719116210938, "loss": 0.4794, "rewards/accuracies": 0.5, "rewards/chosen": -8.3473482131958, "rewards/margins": 2.809865713119507, "rewards/rejected": -11.15721321105957, "step": 16326 }, { "epoch": 2.54, "learning_rate": 2.1724508532600237e-06, "logits/chosen": -1.9605220556259155, "logits/rejected": -2.729987144470215, "logps/chosen": -403.4268493652344, "logps/rejected": -862.256103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.854059219360352, "rewards/margins": 13.044079780578613, "rewards/rejected": -20.89813995361328, "step": 16327 }, { "epoch": 2.54, "learning_rate": 2.171717412728876e-06, "logits/chosen": -1.2494162321090698, "logits/rejected": -2.6259233951568604, "logps/chosen": -117.39768981933594, "logps/rejected": -416.49005126953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.543411731719971, "rewards/margins": 12.87843132019043, "rewards/rejected": -19.421844482421875, "step": 16328 }, { "epoch": 2.54, "learning_rate": 2.170983972197728e-06, "logits/chosen": -2.863679885864258, "logits/rejected": -2.6324236392974854, "logps/chosen": -882.70263671875, "logps/rejected": -553.3096313476562, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.4785614013671875, "rewards/margins": 7.307361602783203, "rewards/rejected": -14.78592300415039, "step": 16329 }, { "epoch": 2.54, "learning_rate": 2.1702505316665802e-06, "logits/chosen": -1.5092191696166992, "logits/rejected": -2.708775043487549, "logps/chosen": -288.086669921875, "logps/rejected": -406.4814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.178894996643066, "rewards/margins": 11.159825325012207, "rewards/rejected": -19.338720321655273, "step": 16330 }, { "epoch": 2.54, "learning_rate": 2.169517091135432e-06, "logits/chosen": -2.008162021636963, "logits/rejected": -3.0051562786102295, "logps/chosen": -154.93365478515625, "logps/rejected": -458.43707275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.5599565505981445, "rewards/margins": 9.758831024169922, "rewards/rejected": -15.318788528442383, "step": 16331 }, { "epoch": 2.54, "learning_rate": 2.1687836506042844e-06, "logits/chosen": -1.7034237384796143, "logits/rejected": -2.6862940788269043, "logps/chosen": -138.3419647216797, "logps/rejected": -432.4060974121094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.727862358093262, "rewards/margins": 11.248416900634766, "rewards/rejected": -16.97627830505371, "step": 16332 }, { "epoch": 2.54, "learning_rate": 2.1680502100731367e-06, "logits/chosen": -1.8049148321151733, "logits/rejected": -2.2851626873016357, "logps/chosen": -240.59963989257812, "logps/rejected": -411.56329345703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.243230819702148, "rewards/margins": 9.619585990905762, "rewards/rejected": -17.862817764282227, "step": 16333 }, { "epoch": 2.54, "learning_rate": 2.167316769541989e-06, "logits/chosen": -2.5942118167877197, "logits/rejected": -2.792569398880005, "logps/chosen": -262.85870361328125, "logps/rejected": -368.85272216796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.600736618041992, "rewards/margins": 7.922609329223633, "rewards/rejected": -16.523345947265625, "step": 16334 }, { "epoch": 2.54, "learning_rate": 2.166583329010841e-06, "logits/chosen": -2.4479517936706543, "logits/rejected": -2.321500301361084, "logps/chosen": -231.57838439941406, "logps/rejected": -424.0727844238281, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.202110290527344, "rewards/margins": 9.518423080444336, "rewards/rejected": -16.72053337097168, "step": 16335 }, { "epoch": 2.54, "learning_rate": 2.165849888479693e-06, "logits/chosen": -2.5010058879852295, "logits/rejected": -2.501758098602295, "logps/chosen": -373.9494323730469, "logps/rejected": -384.90972900390625, "loss": 1.4924, "rewards/accuracies": 0.5, "rewards/chosen": -10.537117004394531, "rewards/margins": 5.971816539764404, "rewards/rejected": -16.508934020996094, "step": 16336 }, { "epoch": 2.54, "learning_rate": 2.165116447948545e-06, "logits/chosen": -1.4333583116531372, "logits/rejected": -2.50309681892395, "logps/chosen": -296.37042236328125, "logps/rejected": -455.6224365234375, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/chosen": -7.145269870758057, "rewards/margins": 5.206945896148682, "rewards/rejected": -12.352215766906738, "step": 16337 }, { "epoch": 2.54, "learning_rate": 2.164383007417397e-06, "logits/chosen": -1.8409295082092285, "logits/rejected": -2.8648602962493896, "logps/chosen": -180.73095703125, "logps/rejected": -405.29498291015625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -11.63700008392334, "rewards/margins": 7.779789924621582, "rewards/rejected": -19.416790008544922, "step": 16338 }, { "epoch": 2.54, "learning_rate": 2.1636495668862492e-06, "logits/chosen": -2.6536154747009277, "logits/rejected": -2.9965691566467285, "logps/chosen": -463.9883117675781, "logps/rejected": -523.7752685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.98733139038086, "rewards/margins": 10.277545928955078, "rewards/rejected": -19.264877319335938, "step": 16339 }, { "epoch": 2.54, "learning_rate": 2.162916126355101e-06, "logits/chosen": -1.4909346103668213, "logits/rejected": -2.6643221378326416, "logps/chosen": -185.96873474121094, "logps/rejected": -491.13531494140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.772950172424316, "rewards/margins": 12.285279273986816, "rewards/rejected": -19.058229446411133, "step": 16340 }, { "epoch": 2.54, "learning_rate": 2.162182685823954e-06, "logits/chosen": -1.3203991651535034, "logits/rejected": -2.658546209335327, "logps/chosen": -269.14019775390625, "logps/rejected": -549.333984375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -5.7780375480651855, "rewards/margins": 12.011815071105957, "rewards/rejected": -17.789852142333984, "step": 16341 }, { "epoch": 2.54, "learning_rate": 2.1614492452928057e-06, "logits/chosen": -2.515748977661133, "logits/rejected": -2.2484633922576904, "logps/chosen": -690.9325561523438, "logps/rejected": -585.0916748046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.856406211853027, "rewards/margins": 8.93050765991211, "rewards/rejected": -15.78691291809082, "step": 16342 }, { "epoch": 2.54, "learning_rate": 2.160715804761658e-06, "logits/chosen": -2.6008431911468506, "logits/rejected": -2.1228554248809814, "logps/chosen": -222.57119750976562, "logps/rejected": -265.3187561035156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.744896650314331, "rewards/margins": 7.860786437988281, "rewards/rejected": -11.605683326721191, "step": 16343 }, { "epoch": 2.54, "learning_rate": 2.15998236423051e-06, "logits/chosen": -2.5484488010406494, "logits/rejected": -2.6900012493133545, "logps/chosen": -164.4736328125, "logps/rejected": -293.4989013671875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -7.163869380950928, "rewards/margins": 6.246906757354736, "rewards/rejected": -13.410776138305664, "step": 16344 }, { "epoch": 2.54, "learning_rate": 2.159248923699362e-06, "logits/chosen": -2.843169927597046, "logits/rejected": -2.5778744220733643, "logps/chosen": -413.0101318359375, "logps/rejected": -428.639404296875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.943719387054443, "rewards/margins": 8.30013370513916, "rewards/rejected": -15.243852615356445, "step": 16345 }, { "epoch": 2.54, "learning_rate": 2.158515483168214e-06, "logits/chosen": -2.518010377883911, "logits/rejected": -2.9609367847442627, "logps/chosen": -218.52374267578125, "logps/rejected": -368.1602478027344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.1865997314453125, "rewards/margins": 10.018941879272461, "rewards/rejected": -17.205541610717773, "step": 16346 }, { "epoch": 2.54, "learning_rate": 2.157782042637066e-06, "logits/chosen": -1.2815521955490112, "logits/rejected": -2.250128746032715, "logps/chosen": -255.04803466796875, "logps/rejected": -439.7169189453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.884562969207764, "rewards/margins": 10.201900482177734, "rewards/rejected": -18.086463928222656, "step": 16347 }, { "epoch": 2.54, "learning_rate": 2.1570486021059183e-06, "logits/chosen": -2.751821756362915, "logits/rejected": -1.7405786514282227, "logps/chosen": -461.2257995605469, "logps/rejected": -376.90032958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.358174324035645, "rewards/margins": 12.501928329467773, "rewards/rejected": -21.860103607177734, "step": 16348 }, { "epoch": 2.54, "learning_rate": 2.1563151615747706e-06, "logits/chosen": -1.56485116481781, "logits/rejected": -2.5991852283477783, "logps/chosen": -108.12176513671875, "logps/rejected": -435.7602233886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.726376056671143, "rewards/margins": 12.7374267578125, "rewards/rejected": -18.463802337646484, "step": 16349 }, { "epoch": 2.54, "learning_rate": 2.155581721043623e-06, "logits/chosen": -1.7422914505004883, "logits/rejected": -2.563735246658325, "logps/chosen": -264.301513671875, "logps/rejected": -484.59942626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.633999824523926, "rewards/margins": 14.724461555480957, "rewards/rejected": -22.358461380004883, "step": 16350 }, { "epoch": 2.54, "learning_rate": 2.1548482805124747e-06, "logits/chosen": -2.412458896636963, "logits/rejected": -1.9822174310684204, "logps/chosen": -162.1600341796875, "logps/rejected": -237.54031372070312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.240635633468628, "rewards/margins": 8.745023727416992, "rewards/rejected": -11.9856595993042, "step": 16351 }, { "epoch": 2.54, "learning_rate": 2.154114839981327e-06, "logits/chosen": -2.4029438495635986, "logits/rejected": -2.38140869140625, "logps/chosen": -176.8284912109375, "logps/rejected": -323.19232177734375, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -5.789827346801758, "rewards/margins": 5.940524101257324, "rewards/rejected": -11.730351448059082, "step": 16352 }, { "epoch": 2.54, "learning_rate": 2.153381399450179e-06, "logits/chosen": -2.863241195678711, "logits/rejected": -2.946004629135132, "logps/chosen": -108.15200805664062, "logps/rejected": -204.0374755859375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -7.747515678405762, "rewards/margins": 5.999246597290039, "rewards/rejected": -13.746763229370117, "step": 16353 }, { "epoch": 2.54, "learning_rate": 2.1526479589190312e-06, "logits/chosen": -2.38769268989563, "logits/rejected": -2.241644859313965, "logps/chosen": -128.1687469482422, "logps/rejected": -294.000244140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.847996711730957, "rewards/margins": 9.188310623168945, "rewards/rejected": -15.036308288574219, "step": 16354 }, { "epoch": 2.54, "learning_rate": 2.151914518387883e-06, "logits/chosen": -2.6176905632019043, "logits/rejected": -2.860976457595825, "logps/chosen": -292.34051513671875, "logps/rejected": -452.96923828125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.673606872558594, "rewards/margins": 10.274674415588379, "rewards/rejected": -16.94828224182129, "step": 16355 }, { "epoch": 2.54, "learning_rate": 2.1511810778567354e-06, "logits/chosen": -2.174919605255127, "logits/rejected": -2.774789333343506, "logps/chosen": -174.49496459960938, "logps/rejected": -522.330078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.586417198181152, "rewards/margins": 10.647700309753418, "rewards/rejected": -18.23411750793457, "step": 16356 }, { "epoch": 2.54, "learning_rate": 2.1504476373255873e-06, "logits/chosen": -2.4999756813049316, "logits/rejected": -1.8146276473999023, "logps/chosen": -248.6737060546875, "logps/rejected": -308.9245910644531, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.066211700439453, "rewards/margins": 8.725178718566895, "rewards/rejected": -14.791390419006348, "step": 16357 }, { "epoch": 2.54, "learning_rate": 2.1497141967944396e-06, "logits/chosen": -1.8250181674957275, "logits/rejected": -2.757120132446289, "logps/chosen": -101.36807250976562, "logps/rejected": -317.7994384765625, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -7.893619537353516, "rewards/margins": 4.549944877624512, "rewards/rejected": -12.443563461303711, "step": 16358 }, { "epoch": 2.54, "learning_rate": 2.148980756263292e-06, "logits/chosen": -2.4732766151428223, "logits/rejected": -2.0582330226898193, "logps/chosen": -245.94766235351562, "logps/rejected": -408.488037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.083134651184082, "rewards/margins": 11.526281356811523, "rewards/rejected": -18.609416961669922, "step": 16359 }, { "epoch": 2.54, "learning_rate": 2.1482473157321438e-06, "logits/chosen": -2.225341558456421, "logits/rejected": -3.031641721725464, "logps/chosen": -273.0883483886719, "logps/rejected": -635.5699462890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.807778358459473, "rewards/margins": 13.040860176086426, "rewards/rejected": -20.848636627197266, "step": 16360 }, { "epoch": 2.54, "learning_rate": 2.147513875200996e-06, "logits/chosen": -1.3772211074829102, "logits/rejected": -2.962763786315918, "logps/chosen": -180.76614379882812, "logps/rejected": -450.9398498535156, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.109551429748535, "rewards/margins": 9.277409553527832, "rewards/rejected": -14.386960983276367, "step": 16361 }, { "epoch": 2.54, "learning_rate": 2.146780434669848e-06, "logits/chosen": -2.7346725463867188, "logits/rejected": -2.52437424659729, "logps/chosen": -557.7666015625, "logps/rejected": -462.449462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0514373779296875, "rewards/margins": 14.617246627807617, "rewards/rejected": -20.668685913085938, "step": 16362 }, { "epoch": 2.54, "learning_rate": 2.1460469941387002e-06, "logits/chosen": -2.3901076316833496, "logits/rejected": -2.5553672313690186, "logps/chosen": -296.6062316894531, "logps/rejected": -499.4246826171875, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -5.71945858001709, "rewards/margins": 10.531129837036133, "rewards/rejected": -16.250587463378906, "step": 16363 }, { "epoch": 2.54, "learning_rate": 2.145313553607552e-06, "logits/chosen": -2.180872678756714, "logits/rejected": -2.4516944885253906, "logps/chosen": -119.9403076171875, "logps/rejected": -273.2301330566406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -9.211753845214844, "rewards/margins": 7.78100061416626, "rewards/rejected": -16.992753982543945, "step": 16364 }, { "epoch": 2.55, "learning_rate": 2.1445801130764044e-06, "logits/chosen": -0.9626340866088867, "logits/rejected": -2.8071563243865967, "logps/chosen": -190.7628173828125, "logps/rejected": -657.822998046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.17997407913208, "rewards/margins": 9.834212303161621, "rewards/rejected": -16.01418685913086, "step": 16365 }, { "epoch": 2.55, "learning_rate": 2.1438466725452563e-06, "logits/chosen": -1.4148774147033691, "logits/rejected": -2.3801891803741455, "logps/chosen": -157.54434204101562, "logps/rejected": -383.17095947265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.244978904724121, "rewards/margins": 9.863749504089355, "rewards/rejected": -16.108728408813477, "step": 16366 }, { "epoch": 2.55, "learning_rate": 2.1431132320141086e-06, "logits/chosen": -0.9428678154945374, "logits/rejected": -2.254296064376831, "logps/chosen": -140.4408416748047, "logps/rejected": -534.7859497070312, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.398722171783447, "rewards/margins": 10.59310245513916, "rewards/rejected": -17.991825103759766, "step": 16367 }, { "epoch": 2.55, "learning_rate": 2.142379791482961e-06, "logits/chosen": -2.183161735534668, "logits/rejected": -2.7767062187194824, "logps/chosen": -464.03143310546875, "logps/rejected": -488.2408752441406, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -10.801575660705566, "rewards/margins": 6.217106819152832, "rewards/rejected": -17.0186824798584, "step": 16368 }, { "epoch": 2.55, "learning_rate": 2.1416463509518128e-06, "logits/chosen": -2.224856376647949, "logits/rejected": -2.7788774967193604, "logps/chosen": -119.20606231689453, "logps/rejected": -321.75274658203125, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -8.320914268493652, "rewards/margins": 8.052165031433105, "rewards/rejected": -16.373079299926758, "step": 16369 }, { "epoch": 2.55, "learning_rate": 2.140912910420665e-06, "logits/chosen": -2.2952051162719727, "logits/rejected": -2.8755576610565186, "logps/chosen": -201.2054443359375, "logps/rejected": -458.2743225097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.706647872924805, "rewards/margins": 10.419374465942383, "rewards/rejected": -17.126022338867188, "step": 16370 }, { "epoch": 2.55, "learning_rate": 2.140179469889517e-06, "logits/chosen": -2.4277091026306152, "logits/rejected": -2.8618006706237793, "logps/chosen": -142.89170837402344, "logps/rejected": -381.0480651855469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.2183637619018555, "rewards/margins": 10.242106437683105, "rewards/rejected": -14.460470199584961, "step": 16371 }, { "epoch": 2.55, "learning_rate": 2.1394460293583693e-06, "logits/chosen": -2.7815942764282227, "logits/rejected": -2.8970682621002197, "logps/chosen": -316.74920654296875, "logps/rejected": -354.58905029296875, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -6.828531265258789, "rewards/margins": 5.758599758148193, "rewards/rejected": -12.587130546569824, "step": 16372 }, { "epoch": 2.55, "learning_rate": 2.138712588827221e-06, "logits/chosen": -2.332355499267578, "logits/rejected": -2.568378210067749, "logps/chosen": -301.48809814453125, "logps/rejected": -474.95220947265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.333724975585938, "rewards/margins": 13.421686172485352, "rewards/rejected": -23.75541114807129, "step": 16373 }, { "epoch": 2.55, "learning_rate": 2.1379791482960734e-06, "logits/chosen": -1.6975277662277222, "logits/rejected": -2.244277238845825, "logps/chosen": -213.88876342773438, "logps/rejected": -440.510009765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.682397842407227, "rewards/margins": 14.887112617492676, "rewards/rejected": -21.56951141357422, "step": 16374 }, { "epoch": 2.55, "learning_rate": 2.1372457077649258e-06, "logits/chosen": -0.8980825543403625, "logits/rejected": -1.150726318359375, "logps/chosen": -403.75030517578125, "logps/rejected": -723.3115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.389070987701416, "rewards/margins": 11.062458038330078, "rewards/rejected": -17.45153045654297, "step": 16375 }, { "epoch": 2.55, "learning_rate": 2.1365122672337776e-06, "logits/chosen": -2.3210113048553467, "logits/rejected": -1.065010905265808, "logps/chosen": -367.1111145019531, "logps/rejected": -292.343017578125, "loss": 0.0443, "rewards/accuracies": 1.0, "rewards/chosen": -7.695423603057861, "rewards/margins": 6.879024028778076, "rewards/rejected": -14.574447631835938, "step": 16376 }, { "epoch": 2.55, "learning_rate": 2.13577882670263e-06, "logits/chosen": -2.1364195346832275, "logits/rejected": -2.6867775917053223, "logps/chosen": -124.77398681640625, "logps/rejected": -372.3518981933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.964400291442871, "rewards/margins": 10.894179344177246, "rewards/rejected": -16.858579635620117, "step": 16377 }, { "epoch": 2.55, "learning_rate": 2.135045386171482e-06, "logits/chosen": -2.152792453765869, "logits/rejected": -2.846170425415039, "logps/chosen": -190.69866943359375, "logps/rejected": -338.5523376464844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.700512409210205, "rewards/margins": 9.927821159362793, "rewards/rejected": -16.628334045410156, "step": 16378 }, { "epoch": 2.55, "learning_rate": 2.134311945640334e-06, "logits/chosen": -1.1695263385772705, "logits/rejected": -2.6456410884857178, "logps/chosen": -124.91313934326172, "logps/rejected": -534.2449951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.388962745666504, "rewards/margins": 12.223261833190918, "rewards/rejected": -18.612224578857422, "step": 16379 }, { "epoch": 2.55, "learning_rate": 2.133578505109186e-06, "logits/chosen": -2.8497254848480225, "logits/rejected": -2.774625301361084, "logps/chosen": -168.77386474609375, "logps/rejected": -254.32266235351562, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -7.02370548248291, "rewards/margins": 8.976244926452637, "rewards/rejected": -15.999950408935547, "step": 16380 }, { "epoch": 2.55, "learning_rate": 2.1328450645780383e-06, "logits/chosen": -1.0303953886032104, "logits/rejected": -2.351966619491577, "logps/chosen": -111.22747802734375, "logps/rejected": -346.48126220703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.414676666259766, "rewards/margins": 10.899385452270508, "rewards/rejected": -17.314062118530273, "step": 16381 }, { "epoch": 2.55, "learning_rate": 2.13211162404689e-06, "logits/chosen": -2.6970033645629883, "logits/rejected": -2.1982009410858154, "logps/chosen": -139.8845977783203, "logps/rejected": -219.69667053222656, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -8.46478271484375, "rewards/margins": 6.3365912437438965, "rewards/rejected": -14.801374435424805, "step": 16382 }, { "epoch": 2.55, "learning_rate": 2.1313781835157425e-06, "logits/chosen": -0.9149779677391052, "logits/rejected": -2.3878679275512695, "logps/chosen": -134.9204559326172, "logps/rejected": -617.8818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.100709438323975, "rewards/margins": 13.344354629516602, "rewards/rejected": -19.445064544677734, "step": 16383 }, { "epoch": 2.55, "learning_rate": 2.1306447429845948e-06, "logits/chosen": -2.89155912399292, "logits/rejected": -2.5994338989257812, "logps/chosen": -667.1242065429688, "logps/rejected": -557.186279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.968391418457031, "rewards/margins": 12.213672637939453, "rewards/rejected": -18.182064056396484, "step": 16384 }, { "epoch": 2.55, "learning_rate": 2.129911302453447e-06, "logits/chosen": -2.4413020610809326, "logits/rejected": -2.573619842529297, "logps/chosen": -115.8974609375, "logps/rejected": -209.409423828125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -7.555572509765625, "rewards/margins": 9.023136138916016, "rewards/rejected": -16.57870864868164, "step": 16385 }, { "epoch": 2.55, "learning_rate": 2.129177861922299e-06, "logits/chosen": -2.6183629035949707, "logits/rejected": -2.77531361579895, "logps/chosen": -285.680908203125, "logps/rejected": -400.74493408203125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -7.61363410949707, "rewards/margins": 6.708348274230957, "rewards/rejected": -14.321981430053711, "step": 16386 }, { "epoch": 2.55, "learning_rate": 2.128444421391151e-06, "logits/chosen": -1.5115375518798828, "logits/rejected": -2.5509421825408936, "logps/chosen": -119.67304992675781, "logps/rejected": -365.09814453125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -9.06615924835205, "rewards/margins": 7.801142692565918, "rewards/rejected": -16.86730194091797, "step": 16387 }, { "epoch": 2.55, "learning_rate": 2.127710980860003e-06, "logits/chosen": -2.3446879386901855, "logits/rejected": -2.5458357334136963, "logps/chosen": -200.9605255126953, "logps/rejected": -341.62579345703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.597741603851318, "rewards/margins": 7.518245220184326, "rewards/rejected": -13.115986824035645, "step": 16388 }, { "epoch": 2.55, "learning_rate": 2.126977540328855e-06, "logits/chosen": -2.390565872192383, "logits/rejected": -2.5399258136749268, "logps/chosen": -542.0136108398438, "logps/rejected": -433.71966552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.254095554351807, "rewards/margins": 10.273855209350586, "rewards/rejected": -16.527950286865234, "step": 16389 }, { "epoch": 2.55, "learning_rate": 2.1262440997977073e-06, "logits/chosen": -2.1123709678649902, "logits/rejected": -2.662527322769165, "logps/chosen": -187.3770751953125, "logps/rejected": -401.47222900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.966340065002441, "rewards/margins": 11.99367904663086, "rewards/rejected": -16.960020065307617, "step": 16390 }, { "epoch": 2.55, "learning_rate": 2.125510659266559e-06, "logits/chosen": -2.6403439044952393, "logits/rejected": -2.6036040782928467, "logps/chosen": -525.3199462890625, "logps/rejected": -782.601806640625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.317623138427734, "rewards/margins": 8.230215072631836, "rewards/rejected": -15.54783821105957, "step": 16391 }, { "epoch": 2.55, "learning_rate": 2.124777218735412e-06, "logits/chosen": -2.5274174213409424, "logits/rejected": -1.2868151664733887, "logps/chosen": -743.8009033203125, "logps/rejected": -515.5352172851562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.769426345825195, "rewards/margins": 8.850184440612793, "rewards/rejected": -16.619611740112305, "step": 16392 }, { "epoch": 2.55, "learning_rate": 2.124043778204264e-06, "logits/chosen": -1.9307429790496826, "logits/rejected": -2.397475004196167, "logps/chosen": -351.3345642089844, "logps/rejected": -633.6636962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.906208038330078, "rewards/margins": 11.257286071777344, "rewards/rejected": -18.163494110107422, "step": 16393 }, { "epoch": 2.55, "learning_rate": 2.123310337673116e-06, "logits/chosen": -1.4656531810760498, "logits/rejected": -2.9484610557556152, "logps/chosen": -195.04957580566406, "logps/rejected": -557.2800903320312, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -9.2875394821167, "rewards/margins": 8.140686988830566, "rewards/rejected": -17.428226470947266, "step": 16394 }, { "epoch": 2.55, "learning_rate": 2.122576897141968e-06, "logits/chosen": -2.4159774780273438, "logits/rejected": -2.320249319076538, "logps/chosen": -480.0859680175781, "logps/rejected": -424.48828125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -10.271492004394531, "rewards/margins": 6.367575645446777, "rewards/rejected": -16.639068603515625, "step": 16395 }, { "epoch": 2.55, "learning_rate": 2.12184345661082e-06, "logits/chosen": -2.532902956008911, "logits/rejected": -1.805821418762207, "logps/chosen": -404.19720458984375, "logps/rejected": -495.5665283203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.534634590148926, "rewards/margins": 9.119026184082031, "rewards/rejected": -16.65365982055664, "step": 16396 }, { "epoch": 2.55, "learning_rate": 2.121110016079672e-06, "logits/chosen": -1.3344154357910156, "logits/rejected": -2.4553933143615723, "logps/chosen": -111.20841217041016, "logps/rejected": -441.3567199707031, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.555790901184082, "rewards/margins": 9.743038177490234, "rewards/rejected": -17.298830032348633, "step": 16397 }, { "epoch": 2.55, "learning_rate": 2.120376575548524e-06, "logits/chosen": -2.619462728500366, "logits/rejected": -0.7708542346954346, "logps/chosen": -724.6928100585938, "logps/rejected": -473.68292236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.328392028808594, "rewards/margins": 11.99051284790039, "rewards/rejected": -20.318904876708984, "step": 16398 }, { "epoch": 2.55, "learning_rate": 2.1196431350173763e-06, "logits/chosen": -2.071319818496704, "logits/rejected": -2.697938919067383, "logps/chosen": -418.8975524902344, "logps/rejected": -621.6286010742188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.482733249664307, "rewards/margins": 9.981194496154785, "rewards/rejected": -14.46392822265625, "step": 16399 }, { "epoch": 2.55, "learning_rate": 2.1189096944862286e-06, "logits/chosen": -1.423163890838623, "logits/rejected": -2.3135862350463867, "logps/chosen": -126.69110870361328, "logps/rejected": -354.07781982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.082399368286133, "rewards/margins": 11.839533805847168, "rewards/rejected": -16.921932220458984, "step": 16400 }, { "epoch": 2.55, "learning_rate": 2.118176253955081e-06, "logits/chosen": -2.10848069190979, "logits/rejected": -2.764552354812622, "logps/chosen": -130.69741821289062, "logps/rejected": -431.656005859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.710909366607666, "rewards/margins": 11.56171989440918, "rewards/rejected": -18.272628784179688, "step": 16401 }, { "epoch": 2.55, "learning_rate": 2.117442813423933e-06, "logits/chosen": -2.4810657501220703, "logits/rejected": -3.0551302433013916, "logps/chosen": -173.1905517578125, "logps/rejected": -480.00152587890625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.091612339019775, "rewards/margins": 10.5651216506958, "rewards/rejected": -17.656734466552734, "step": 16402 }, { "epoch": 2.55, "learning_rate": 2.116709372892785e-06, "logits/chosen": -2.437865972518921, "logits/rejected": -2.6225457191467285, "logps/chosen": -282.0316162109375, "logps/rejected": -476.26806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.717878818511963, "rewards/margins": 13.213634490966797, "rewards/rejected": -18.9315128326416, "step": 16403 }, { "epoch": 2.55, "learning_rate": 2.115975932361637e-06, "logits/chosen": -1.8465474843978882, "logits/rejected": -2.321258544921875, "logps/chosen": -468.120361328125, "logps/rejected": -471.82879638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.8788739442825317, "rewards/margins": 14.240190505981445, "rewards/rejected": -16.119064331054688, "step": 16404 }, { "epoch": 2.55, "learning_rate": 2.1152424918304893e-06, "logits/chosen": -2.7848172187805176, "logits/rejected": -2.785454273223877, "logps/chosen": -353.0394287109375, "logps/rejected": -304.6156311035156, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -10.514298439025879, "rewards/margins": 7.605311870574951, "rewards/rejected": -18.119609832763672, "step": 16405 }, { "epoch": 2.55, "learning_rate": 2.114509051299341e-06, "logits/chosen": -2.9152283668518066, "logits/rejected": -2.5252389907836914, "logps/chosen": -478.18896484375, "logps/rejected": -334.4737243652344, "loss": 2.52, "rewards/accuracies": 0.5, "rewards/chosen": -10.626221656799316, "rewards/margins": 1.360950231552124, "rewards/rejected": -11.98717212677002, "step": 16406 }, { "epoch": 2.55, "learning_rate": 2.113775610768193e-06, "logits/chosen": -1.597339391708374, "logits/rejected": -2.532681941986084, "logps/chosen": -200.5730743408203, "logps/rejected": -321.69781494140625, "loss": 0.0669, "rewards/accuracies": 1.0, "rewards/chosen": -6.550559043884277, "rewards/margins": 6.627539157867432, "rewards/rejected": -13.17809772491455, "step": 16407 }, { "epoch": 2.55, "learning_rate": 2.1130421702370454e-06, "logits/chosen": -2.0432820320129395, "logits/rejected": -2.5184361934661865, "logps/chosen": -277.99102783203125, "logps/rejected": -471.3838195800781, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.558597087860107, "rewards/margins": 9.672463417053223, "rewards/rejected": -17.231060028076172, "step": 16408 }, { "epoch": 2.55, "learning_rate": 2.1123087297058977e-06, "logits/chosen": -1.8565529584884644, "logits/rejected": -2.5843303203582764, "logps/chosen": -300.0753173828125, "logps/rejected": -407.85693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.50196647644043, "rewards/margins": 9.71440315246582, "rewards/rejected": -19.21636962890625, "step": 16409 }, { "epoch": 2.55, "learning_rate": 2.11157528917475e-06, "logits/chosen": -2.3728415966033936, "logits/rejected": -2.491687536239624, "logps/chosen": -190.77569580078125, "logps/rejected": -348.11370849609375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -9.008008003234863, "rewards/margins": 10.867262840270996, "rewards/rejected": -19.87527084350586, "step": 16410 }, { "epoch": 2.55, "learning_rate": 2.110841848643602e-06, "logits/chosen": -1.8715286254882812, "logits/rejected": -2.64959716796875, "logps/chosen": -161.40322875976562, "logps/rejected": -357.217041015625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -6.750655174255371, "rewards/margins": 8.744836807250977, "rewards/rejected": -15.495491981506348, "step": 16411 }, { "epoch": 2.55, "learning_rate": 2.110108408112454e-06, "logits/chosen": -1.9510802030563354, "logits/rejected": -2.5552871227264404, "logps/chosen": -174.35836791992188, "logps/rejected": -288.0927734375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -8.119148254394531, "rewards/margins": 6.51516580581665, "rewards/rejected": -14.634313583374023, "step": 16412 }, { "epoch": 2.55, "learning_rate": 2.109374967581306e-06, "logits/chosen": -2.390913724899292, "logits/rejected": -2.686556100845337, "logps/chosen": -238.82382202148438, "logps/rejected": -285.8948974609375, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": -7.405523300170898, "rewards/margins": 5.038228988647461, "rewards/rejected": -12.44375228881836, "step": 16413 }, { "epoch": 2.55, "learning_rate": 2.1086415270501583e-06, "logits/chosen": -3.1806037425994873, "logits/rejected": -2.619922161102295, "logps/chosen": -365.58740234375, "logps/rejected": -350.4306335449219, "loss": 0.3028, "rewards/accuracies": 1.0, "rewards/chosen": -8.448562622070312, "rewards/margins": 4.807219982147217, "rewards/rejected": -13.255783081054688, "step": 16414 }, { "epoch": 2.55, "learning_rate": 2.10790808651901e-06, "logits/chosen": -1.1326266527175903, "logits/rejected": -2.6839749813079834, "logps/chosen": -193.71795654296875, "logps/rejected": -529.9011840820312, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/chosen": -5.470680236816406, "rewards/margins": 10.489143371582031, "rewards/rejected": -15.959823608398438, "step": 16415 }, { "epoch": 2.55, "learning_rate": 2.107174645987862e-06, "logits/chosen": -2.6341805458068848, "logits/rejected": -1.8484700918197632, "logps/chosen": -781.092041015625, "logps/rejected": -558.5704345703125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -11.418710708618164, "rewards/margins": 8.991460800170898, "rewards/rejected": -20.410171508789062, "step": 16416 }, { "epoch": 2.55, "learning_rate": 2.106441205456715e-06, "logits/chosen": -1.7948170900344849, "logits/rejected": -2.452965021133423, "logps/chosen": -202.2453155517578, "logps/rejected": -458.0125732421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.776580810546875, "rewards/margins": 9.131877899169922, "rewards/rejected": -17.908458709716797, "step": 16417 }, { "epoch": 2.55, "learning_rate": 2.1057077649255667e-06, "logits/chosen": -0.9150079488754272, "logits/rejected": -2.5794825553894043, "logps/chosen": -121.28343200683594, "logps/rejected": -476.1378479003906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.1691312789917, "rewards/margins": 14.430882453918457, "rewards/rejected": -23.600013732910156, "step": 16418 }, { "epoch": 2.55, "learning_rate": 2.104974324394419e-06, "logits/chosen": -2.654949426651001, "logits/rejected": -1.525406002998352, "logps/chosen": -564.0286865234375, "logps/rejected": -416.84710693359375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -10.306661605834961, "rewards/margins": 7.0105462074279785, "rewards/rejected": -17.31720733642578, "step": 16419 }, { "epoch": 2.55, "learning_rate": 2.104240883863271e-06, "logits/chosen": -2.5662543773651123, "logits/rejected": -2.610440492630005, "logps/chosen": -320.3973388671875, "logps/rejected": -498.434814453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.681857109069824, "rewards/margins": 12.44137954711914, "rewards/rejected": -22.12323760986328, "step": 16420 }, { "epoch": 2.55, "learning_rate": 2.103507443332123e-06, "logits/chosen": -2.4434256553649902, "logits/rejected": -2.3268392086029053, "logps/chosen": -212.88211059570312, "logps/rejected": -405.7999267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.708303451538086, "rewards/margins": 11.563678741455078, "rewards/rejected": -19.271984100341797, "step": 16421 }, { "epoch": 2.55, "learning_rate": 2.102774002800975e-06, "logits/chosen": -2.671848773956299, "logits/rejected": -2.5156922340393066, "logps/chosen": -377.7182312011719, "logps/rejected": -485.337646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.624610424041748, "rewards/margins": 10.071720123291016, "rewards/rejected": -17.696331024169922, "step": 16422 }, { "epoch": 2.55, "learning_rate": 2.1020405622698273e-06, "logits/chosen": -2.911076784133911, "logits/rejected": -1.9884381294250488, "logps/chosen": -328.73321533203125, "logps/rejected": -274.5779113769531, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.2293195724487305, "rewards/margins": 7.955560207366943, "rewards/rejected": -12.184880256652832, "step": 16423 }, { "epoch": 2.55, "learning_rate": 2.1013071217386792e-06, "logits/chosen": -2.57169246673584, "logits/rejected": -2.781228542327881, "logps/chosen": -423.20263671875, "logps/rejected": -523.085693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.747665405273438, "rewards/margins": 9.886377334594727, "rewards/rejected": -20.634042739868164, "step": 16424 }, { "epoch": 2.55, "learning_rate": 2.1005736812075315e-06, "logits/chosen": -1.4479209184646606, "logits/rejected": -2.436500072479248, "logps/chosen": -152.2353973388672, "logps/rejected": -473.6559143066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.106088161468506, "rewards/margins": 11.223591804504395, "rewards/rejected": -17.329681396484375, "step": 16425 }, { "epoch": 2.55, "learning_rate": 2.099840240676384e-06, "logits/chosen": -2.5549209117889404, "logits/rejected": -2.783137321472168, "logps/chosen": -234.68304443359375, "logps/rejected": -352.1002502441406, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -9.743160247802734, "rewards/margins": 5.620840549468994, "rewards/rejected": -15.364001274108887, "step": 16426 }, { "epoch": 2.55, "learning_rate": 2.0991068001452357e-06, "logits/chosen": -1.5192437171936035, "logits/rejected": -2.4761526584625244, "logps/chosen": -179.70208740234375, "logps/rejected": -360.3798522949219, "loss": 0.0686, "rewards/accuracies": 1.0, "rewards/chosen": -8.633410453796387, "rewards/margins": 4.320701599121094, "rewards/rejected": -12.95411205291748, "step": 16427 }, { "epoch": 2.55, "learning_rate": 2.098373359614088e-06, "logits/chosen": -2.9206113815307617, "logits/rejected": -2.477128028869629, "logps/chosen": -196.79360961914062, "logps/rejected": -230.60891723632812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.983419895172119, "rewards/margins": 8.987419128417969, "rewards/rejected": -11.97083854675293, "step": 16428 }, { "epoch": 2.56, "learning_rate": 2.09763991908294e-06, "logits/chosen": -2.337700128555298, "logits/rejected": -2.512308359146118, "logps/chosen": -223.48031616210938, "logps/rejected": -332.7402038574219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.452906131744385, "rewards/margins": 9.122467994689941, "rewards/rejected": -14.575374603271484, "step": 16429 }, { "epoch": 2.56, "learning_rate": 2.096906478551792e-06, "logits/chosen": -2.4943654537200928, "logits/rejected": -2.484921932220459, "logps/chosen": -686.0393676757812, "logps/rejected": -442.27520751953125, "loss": 0.1921, "rewards/accuracies": 1.0, "rewards/chosen": -9.084136009216309, "rewards/margins": 7.085748195648193, "rewards/rejected": -16.169883728027344, "step": 16430 }, { "epoch": 2.56, "learning_rate": 2.096173038020644e-06, "logits/chosen": -1.922105312347412, "logits/rejected": -2.5283584594726562, "logps/chosen": -255.6558074951172, "logps/rejected": -494.6512756347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.052637100219727, "rewards/margins": 15.177757263183594, "rewards/rejected": -22.230396270751953, "step": 16431 }, { "epoch": 2.56, "learning_rate": 2.0954395974894964e-06, "logits/chosen": -2.7629847526550293, "logits/rejected": -3.248967170715332, "logps/chosen": -147.6703643798828, "logps/rejected": -444.2021789550781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.546924591064453, "rewards/margins": 13.597753524780273, "rewards/rejected": -19.144678115844727, "step": 16432 }, { "epoch": 2.56, "learning_rate": 2.0947061569583482e-06, "logits/chosen": -2.86845326423645, "logits/rejected": -2.577169418334961, "logps/chosen": -299.88458251953125, "logps/rejected": -304.8810119628906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.575002431869507, "rewards/margins": 11.832717895507812, "rewards/rejected": -15.407720565795898, "step": 16433 }, { "epoch": 2.56, "learning_rate": 2.093972716427201e-06, "logits/chosen": -1.538394808769226, "logits/rejected": -2.7458598613739014, "logps/chosen": -150.34609985351562, "logps/rejected": -611.31201171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.975305080413818, "rewards/margins": 12.580451011657715, "rewards/rejected": -17.555755615234375, "step": 16434 }, { "epoch": 2.56, "learning_rate": 2.093239275896053e-06, "logits/chosen": -2.6157891750335693, "logits/rejected": -2.6751909255981445, "logps/chosen": -120.03919982910156, "logps/rejected": -222.73031616210938, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -8.062878608703613, "rewards/margins": 5.976656436920166, "rewards/rejected": -14.039535522460938, "step": 16435 }, { "epoch": 2.56, "learning_rate": 2.0925058353649047e-06, "logits/chosen": -1.266979455947876, "logits/rejected": -2.340294599533081, "logps/chosen": -148.8779296875, "logps/rejected": -451.91436767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.101248741149902, "rewards/margins": 10.614794731140137, "rewards/rejected": -15.716043472290039, "step": 16436 }, { "epoch": 2.56, "learning_rate": 2.091772394833757e-06, "logits/chosen": -2.413069486618042, "logits/rejected": -2.6014654636383057, "logps/chosen": -525.8084716796875, "logps/rejected": -731.731689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.103613376617432, "rewards/margins": 12.949504852294922, "rewards/rejected": -17.053117752075195, "step": 16437 }, { "epoch": 2.56, "learning_rate": 2.091038954302609e-06, "logits/chosen": -0.9126859307289124, "logits/rejected": -2.085767984390259, "logps/chosen": -151.89553833007812, "logps/rejected": -528.300537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.998320579528809, "rewards/margins": 11.96418571472168, "rewards/rejected": -19.962505340576172, "step": 16438 }, { "epoch": 2.56, "learning_rate": 2.090305513771461e-06, "logits/chosen": -2.9419116973876953, "logits/rejected": -3.0391907691955566, "logps/chosen": -91.6025390625, "logps/rejected": -287.9605712890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.751264572143555, "rewards/margins": 10.31411361694336, "rewards/rejected": -17.065378189086914, "step": 16439 }, { "epoch": 2.56, "learning_rate": 2.089572073240313e-06, "logits/chosen": -2.529510021209717, "logits/rejected": -2.478872299194336, "logps/chosen": -275.9850158691406, "logps/rejected": -271.1374816894531, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.221693992614746, "rewards/margins": 7.600309371948242, "rewards/rejected": -11.822003364562988, "step": 16440 }, { "epoch": 2.56, "learning_rate": 2.0888386327091654e-06, "logits/chosen": -2.767287015914917, "logits/rejected": -2.873771905899048, "logps/chosen": -225.50631713867188, "logps/rejected": -343.2681884765625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -8.440353393554688, "rewards/margins": 8.268828392028809, "rewards/rejected": -16.709182739257812, "step": 16441 }, { "epoch": 2.56, "learning_rate": 2.0881051921780177e-06, "logits/chosen": -2.0473101139068604, "logits/rejected": -2.6444170475006104, "logps/chosen": -161.3920135498047, "logps/rejected": -315.3839111328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.534711837768555, "rewards/margins": 9.231483459472656, "rewards/rejected": -14.766195297241211, "step": 16442 }, { "epoch": 2.56, "learning_rate": 2.08737175164687e-06, "logits/chosen": -2.951145648956299, "logits/rejected": -2.1165482997894287, "logps/chosen": -466.4289245605469, "logps/rejected": -240.05722045898438, "loss": 0.1764, "rewards/accuracies": 1.0, "rewards/chosen": -8.458742141723633, "rewards/margins": 3.310326337814331, "rewards/rejected": -11.769067764282227, "step": 16443 }, { "epoch": 2.56, "learning_rate": 2.086638311115722e-06, "logits/chosen": -1.9130887985229492, "logits/rejected": -2.865119457244873, "logps/chosen": -282.5662841796875, "logps/rejected": -540.6314086914062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.099252700805664, "rewards/margins": 10.124273300170898, "rewards/rejected": -18.223526000976562, "step": 16444 }, { "epoch": 2.56, "learning_rate": 2.0859048705845737e-06, "logits/chosen": -1.720452070236206, "logits/rejected": -2.7789297103881836, "logps/chosen": -199.91470336914062, "logps/rejected": -463.026611328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.03110933303833, "rewards/margins": 9.15018081665039, "rewards/rejected": -15.181289672851562, "step": 16445 }, { "epoch": 2.56, "learning_rate": 2.085171430053426e-06, "logits/chosen": -1.8815741539001465, "logits/rejected": -2.838132619857788, "logps/chosen": -287.67333984375, "logps/rejected": -595.9746704101562, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -10.347541809082031, "rewards/margins": 5.894961833953857, "rewards/rejected": -16.242504119873047, "step": 16446 }, { "epoch": 2.56, "learning_rate": 2.084437989522278e-06, "logits/chosen": -2.3780007362365723, "logits/rejected": -2.698124647140503, "logps/chosen": -179.11422729492188, "logps/rejected": -323.564208984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.045782566070557, "rewards/margins": 10.352228164672852, "rewards/rejected": -16.39801025390625, "step": 16447 }, { "epoch": 2.56, "learning_rate": 2.0837045489911302e-06, "logits/chosen": -2.58333683013916, "logits/rejected": -2.067199468612671, "logps/chosen": -216.51889038085938, "logps/rejected": -343.85833740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.745620012283325, "rewards/margins": 12.77688980102539, "rewards/rejected": -16.52250862121582, "step": 16448 }, { "epoch": 2.56, "learning_rate": 2.082971108459982e-06, "logits/chosen": -2.6444225311279297, "logits/rejected": -2.4727625846862793, "logps/chosen": -187.1267852783203, "logps/rejected": -305.25799560546875, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -8.564536094665527, "rewards/margins": 5.373994827270508, "rewards/rejected": -13.938530921936035, "step": 16449 }, { "epoch": 2.56, "learning_rate": 2.0822376679288344e-06, "logits/chosen": -2.2554099559783936, "logits/rejected": -2.761884927749634, "logps/chosen": -208.3511199951172, "logps/rejected": -324.8597412109375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.641172885894775, "rewards/margins": 9.040202140808105, "rewards/rejected": -16.68137550354004, "step": 16450 }, { "epoch": 2.56, "learning_rate": 2.0815042273976867e-06, "logits/chosen": -2.5854506492614746, "logits/rejected": -2.8538105487823486, "logps/chosen": -94.42277526855469, "logps/rejected": -202.28732299804688, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -7.004286766052246, "rewards/margins": 6.026137351989746, "rewards/rejected": -13.030424118041992, "step": 16451 }, { "epoch": 2.56, "learning_rate": 2.080770786866539e-06, "logits/chosen": -2.6838724613189697, "logits/rejected": -2.887739419937134, "logps/chosen": -116.0876235961914, "logps/rejected": -316.22857666015625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -5.503548622131348, "rewards/margins": 9.477829933166504, "rewards/rejected": -14.981378555297852, "step": 16452 }, { "epoch": 2.56, "learning_rate": 2.080037346335391e-06, "logits/chosen": -1.7037830352783203, "logits/rejected": -2.4247004985809326, "logps/chosen": -129.74964904785156, "logps/rejected": -414.13262939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.656180381774902, "rewards/margins": 9.735814094543457, "rewards/rejected": -17.39199447631836, "step": 16453 }, { "epoch": 2.56, "learning_rate": 2.079303905804243e-06, "logits/chosen": -2.0404062271118164, "logits/rejected": -2.9997897148132324, "logps/chosen": -115.14795684814453, "logps/rejected": -457.34649658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.195951461791992, "rewards/margins": 11.431291580200195, "rewards/rejected": -17.627243041992188, "step": 16454 }, { "epoch": 2.56, "learning_rate": 2.078570465273095e-06, "logits/chosen": -1.9872841835021973, "logits/rejected": -2.889937162399292, "logps/chosen": -112.60155487060547, "logps/rejected": -403.1058044433594, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.141020774841309, "rewards/margins": 8.087933540344238, "rewards/rejected": -14.228954315185547, "step": 16455 }, { "epoch": 2.56, "learning_rate": 2.077837024741947e-06, "logits/chosen": -1.9300185441970825, "logits/rejected": -2.59233021736145, "logps/chosen": -185.18748474121094, "logps/rejected": -417.0220947265625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -12.607355117797852, "rewards/margins": 7.3618483543396, "rewards/rejected": -19.96920394897461, "step": 16456 }, { "epoch": 2.56, "learning_rate": 2.0771035842107993e-06, "logits/chosen": -2.4190592765808105, "logits/rejected": -2.7610533237457275, "logps/chosen": -302.1109924316406, "logps/rejected": -454.90545654296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -10.753472328186035, "rewards/margins": 8.978278160095215, "rewards/rejected": -19.73175048828125, "step": 16457 }, { "epoch": 2.56, "learning_rate": 2.076370143679651e-06, "logits/chosen": -2.051292657852173, "logits/rejected": -2.741521120071411, "logps/chosen": -526.739501953125, "logps/rejected": -723.23388671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.62801742553711, "rewards/margins": 9.595176696777344, "rewards/rejected": -18.223194122314453, "step": 16458 }, { "epoch": 2.56, "learning_rate": 2.075636703148504e-06, "logits/chosen": -3.1080756187438965, "logits/rejected": -2.2925684452056885, "logps/chosen": -840.134033203125, "logps/rejected": -500.6102600097656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.7242798805236816, "rewards/margins": 8.609599113464355, "rewards/rejected": -12.333879470825195, "step": 16459 }, { "epoch": 2.56, "learning_rate": 2.0749032626173557e-06, "logits/chosen": -2.0277554988861084, "logits/rejected": -2.6482863426208496, "logps/chosen": -483.87255859375, "logps/rejected": -535.7501220703125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.903617858886719, "rewards/margins": 9.164392471313477, "rewards/rejected": -17.068010330200195, "step": 16460 }, { "epoch": 2.56, "learning_rate": 2.074169822086208e-06, "logits/chosen": -2.730093240737915, "logits/rejected": -2.8203177452087402, "logps/chosen": -256.409912109375, "logps/rejected": -343.2366638183594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.8220109939575195, "rewards/margins": 12.146829605102539, "rewards/rejected": -16.968841552734375, "step": 16461 }, { "epoch": 2.56, "learning_rate": 2.07343638155506e-06, "logits/chosen": -2.8390743732452393, "logits/rejected": -1.8585789203643799, "logps/chosen": -793.4993286132812, "logps/rejected": -485.62646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.912256240844727, "rewards/margins": 11.887022972106934, "rewards/rejected": -19.799278259277344, "step": 16462 }, { "epoch": 2.56, "learning_rate": 2.0727029410239122e-06, "logits/chosen": -2.3847665786743164, "logits/rejected": -2.4865059852600098, "logps/chosen": -204.210205078125, "logps/rejected": -216.85035705566406, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.600185394287109, "rewards/margins": 7.590108394622803, "rewards/rejected": -14.19029426574707, "step": 16463 }, { "epoch": 2.56, "learning_rate": 2.071969500492764e-06, "logits/chosen": -2.421949625015259, "logits/rejected": -2.9177989959716797, "logps/chosen": -640.22314453125, "logps/rejected": -721.856201171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.95134162902832, "rewards/margins": 9.459901809692383, "rewards/rejected": -18.411243438720703, "step": 16464 }, { "epoch": 2.56, "learning_rate": 2.071236059961616e-06, "logits/chosen": -2.955263376235962, "logits/rejected": -2.006330966949463, "logps/chosen": -192.25518798828125, "logps/rejected": -140.10171508789062, "loss": 0.1908, "rewards/accuracies": 1.0, "rewards/chosen": -5.315733432769775, "rewards/margins": 5.986953258514404, "rewards/rejected": -11.30268669128418, "step": 16465 }, { "epoch": 2.56, "learning_rate": 2.0705026194304683e-06, "logits/chosen": -2.842217445373535, "logits/rejected": -1.7807741165161133, "logps/chosen": -301.1910705566406, "logps/rejected": -207.61981201171875, "loss": 0.2485, "rewards/accuracies": 1.0, "rewards/chosen": -2.9425742626190186, "rewards/margins": 7.2910284996032715, "rewards/rejected": -10.233602523803711, "step": 16466 }, { "epoch": 2.56, "learning_rate": 2.0697691788993206e-06, "logits/chosen": -0.966378927230835, "logits/rejected": -2.7001914978027344, "logps/chosen": -201.3350830078125, "logps/rejected": -598.7818603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.334665298461914, "rewards/margins": 10.577484130859375, "rewards/rejected": -16.91214942932129, "step": 16467 }, { "epoch": 2.56, "learning_rate": 2.069035738368173e-06, "logits/chosen": -2.8124186992645264, "logits/rejected": -2.3005619049072266, "logps/chosen": -336.13714599609375, "logps/rejected": -258.64251708984375, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -7.081315040588379, "rewards/margins": 6.142060279846191, "rewards/rejected": -13.22337532043457, "step": 16468 }, { "epoch": 2.56, "learning_rate": 2.0683022978370248e-06, "logits/chosen": -1.810505986213684, "logits/rejected": -2.9658193588256836, "logps/chosen": -117.50947570800781, "logps/rejected": -364.12835693359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.895961761474609, "rewards/margins": 8.580580711364746, "rewards/rejected": -14.476543426513672, "step": 16469 }, { "epoch": 2.56, "learning_rate": 2.067568857305877e-06, "logits/chosen": -2.7027032375335693, "logits/rejected": -3.2354533672332764, "logps/chosen": -95.87065887451172, "logps/rejected": -296.7553405761719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.747134685516357, "rewards/margins": 8.627546310424805, "rewards/rejected": -16.374679565429688, "step": 16470 }, { "epoch": 2.56, "learning_rate": 2.066835416774729e-06, "logits/chosen": -1.9997254610061646, "logits/rejected": -2.846940040588379, "logps/chosen": -117.6885986328125, "logps/rejected": -408.83404541015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.246031761169434, "rewards/margins": 12.54387378692627, "rewards/rejected": -20.789905548095703, "step": 16471 }, { "epoch": 2.56, "learning_rate": 2.0661019762435812e-06, "logits/chosen": -1.871186375617981, "logits/rejected": -2.3739216327667236, "logps/chosen": -152.5679168701172, "logps/rejected": -453.18035888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.142003536224365, "rewards/margins": 13.039846420288086, "rewards/rejected": -20.18185043334961, "step": 16472 }, { "epoch": 2.56, "learning_rate": 2.065368535712433e-06, "logits/chosen": -2.9721622467041016, "logits/rejected": -2.7444093227386475, "logps/chosen": -619.5692138671875, "logps/rejected": -295.8753967285156, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.973521709442139, "rewards/margins": 7.260943412780762, "rewards/rejected": -12.234465599060059, "step": 16473 }, { "epoch": 2.56, "learning_rate": 2.0646350951812854e-06, "logits/chosen": -2.2697479724884033, "logits/rejected": -2.748424768447876, "logps/chosen": -180.63165283203125, "logps/rejected": -310.696533203125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.537676811218262, "rewards/margins": 7.656059265136719, "rewards/rejected": -13.193737030029297, "step": 16474 }, { "epoch": 2.56, "learning_rate": 2.0639016546501373e-06, "logits/chosen": -2.334468126296997, "logits/rejected": -2.4170784950256348, "logps/chosen": -111.33135986328125, "logps/rejected": -332.8787841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.8159878253936768, "rewards/margins": 11.634066581726074, "rewards/rejected": -14.450054168701172, "step": 16475 }, { "epoch": 2.56, "learning_rate": 2.0631682141189896e-06, "logits/chosen": -2.593928575515747, "logits/rejected": -1.9301645755767822, "logps/chosen": -318.26611328125, "logps/rejected": -326.633544921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.418837070465088, "rewards/margins": 9.872095108032227, "rewards/rejected": -16.290931701660156, "step": 16476 }, { "epoch": 2.56, "learning_rate": 2.062434773587842e-06, "logits/chosen": -1.1345921754837036, "logits/rejected": -1.3158104419708252, "logps/chosen": -373.43963623046875, "logps/rejected": -632.414306640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.96363639831543, "rewards/margins": 11.055988311767578, "rewards/rejected": -22.019624710083008, "step": 16477 }, { "epoch": 2.56, "learning_rate": 2.0617013330566938e-06, "logits/chosen": -2.3420941829681396, "logits/rejected": -2.9701642990112305, "logps/chosen": -293.0096130371094, "logps/rejected": -483.11376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.173630237579346, "rewards/margins": 11.429506301879883, "rewards/rejected": -18.60313606262207, "step": 16478 }, { "epoch": 2.56, "learning_rate": 2.060967892525546e-06, "logits/chosen": -2.5282142162323, "logits/rejected": -2.5720129013061523, "logps/chosen": -149.85995483398438, "logps/rejected": -236.79840087890625, "loss": 0.2484, "rewards/accuracies": 1.0, "rewards/chosen": -6.723274230957031, "rewards/margins": 6.293042182922363, "rewards/rejected": -13.016316413879395, "step": 16479 }, { "epoch": 2.56, "learning_rate": 2.060234451994398e-06, "logits/chosen": -2.7096986770629883, "logits/rejected": -0.9690898060798645, "logps/chosen": -280.1070556640625, "logps/rejected": -163.86802673339844, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": -6.27944803237915, "rewards/margins": 5.58279275894165, "rewards/rejected": -11.8622407913208, "step": 16480 }, { "epoch": 2.56, "learning_rate": 2.0595010114632503e-06, "logits/chosen": -1.4057072401046753, "logits/rejected": -2.59797739982605, "logps/chosen": -119.35797119140625, "logps/rejected": -405.454833984375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -7.908262252807617, "rewards/margins": 8.585431098937988, "rewards/rejected": -16.493694305419922, "step": 16481 }, { "epoch": 2.56, "learning_rate": 2.058767570932102e-06, "logits/chosen": -2.024066209793091, "logits/rejected": -2.655810832977295, "logps/chosen": -101.68582153320312, "logps/rejected": -387.34521484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.599607467651367, "rewards/margins": 12.548059463500977, "rewards/rejected": -21.147666931152344, "step": 16482 }, { "epoch": 2.56, "learning_rate": 2.0580341304009544e-06, "logits/chosen": -1.3104151487350464, "logits/rejected": -2.66810941696167, "logps/chosen": -189.01063537597656, "logps/rejected": -503.4561767578125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -8.646656036376953, "rewards/margins": 9.257608413696289, "rewards/rejected": -17.904264450073242, "step": 16483 }, { "epoch": 2.56, "learning_rate": 2.0573006898698067e-06, "logits/chosen": -2.155641555786133, "logits/rejected": -2.696908950805664, "logps/chosen": -171.522705078125, "logps/rejected": -204.64805603027344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.407827377319336, "rewards/margins": 7.7943830490112305, "rewards/rejected": -13.202210426330566, "step": 16484 }, { "epoch": 2.56, "learning_rate": 2.0565672493386586e-06, "logits/chosen": -2.6063568592071533, "logits/rejected": -2.813262701034546, "logps/chosen": -703.6331176757812, "logps/rejected": -755.2181396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.854408264160156, "rewards/margins": 15.358465194702148, "rewards/rejected": -23.212871551513672, "step": 16485 }, { "epoch": 2.56, "learning_rate": 2.055833808807511e-06, "logits/chosen": -2.453770637512207, "logits/rejected": -2.7005562782287598, "logps/chosen": -283.50836181640625, "logps/rejected": -521.19677734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.24188756942749, "rewards/margins": 9.919530868530273, "rewards/rejected": -15.161418914794922, "step": 16486 }, { "epoch": 2.56, "learning_rate": 2.055100368276363e-06, "logits/chosen": -2.4267594814300537, "logits/rejected": -2.7829415798187256, "logps/chosen": -735.0446166992188, "logps/rejected": -776.3450927734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -9.21340560913086, "rewards/margins": 12.198375701904297, "rewards/rejected": -21.411781311035156, "step": 16487 }, { "epoch": 2.56, "learning_rate": 2.054366927745215e-06, "logits/chosen": -1.7460871934890747, "logits/rejected": -2.6210734844207764, "logps/chosen": -246.57810974121094, "logps/rejected": -466.958740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.494961738586426, "rewards/margins": 11.487020492553711, "rewards/rejected": -18.98198127746582, "step": 16488 }, { "epoch": 2.56, "learning_rate": 2.053633487214067e-06, "logits/chosen": -2.625520706176758, "logits/rejected": -1.7733160257339478, "logps/chosen": -243.6314697265625, "logps/rejected": -135.47300720214844, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": -4.304888725280762, "rewards/margins": 7.415313243865967, "rewards/rejected": -11.72020149230957, "step": 16489 }, { "epoch": 2.56, "learning_rate": 2.0529000466829193e-06, "logits/chosen": -2.518444776535034, "logits/rejected": -2.8906545639038086, "logps/chosen": -108.19168090820312, "logps/rejected": -359.85614013671875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.711458683013916, "rewards/margins": 13.118073463439941, "rewards/rejected": -18.829532623291016, "step": 16490 }, { "epoch": 2.56, "learning_rate": 2.052166606151771e-06, "logits/chosen": -2.549835443496704, "logits/rejected": -2.741990327835083, "logps/chosen": -316.09393310546875, "logps/rejected": -488.962646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.0127482414245605, "rewards/margins": 12.932714462280273, "rewards/rejected": -14.945463180541992, "step": 16491 }, { "epoch": 2.56, "learning_rate": 2.0514331656206235e-06, "logits/chosen": -2.3008692264556885, "logits/rejected": -2.657423734664917, "logps/chosen": -95.76417541503906, "logps/rejected": -217.68389892578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.642345428466797, "rewards/margins": 9.000547409057617, "rewards/rejected": -15.642892837524414, "step": 16492 }, { "epoch": 2.57, "learning_rate": 2.0506997250894758e-06, "logits/chosen": -2.1462807655334473, "logits/rejected": -2.6691110134124756, "logps/chosen": -96.03384399414062, "logps/rejected": -219.87477111816406, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.145622253417969, "rewards/margins": 8.181045532226562, "rewards/rejected": -15.326667785644531, "step": 16493 }, { "epoch": 2.57, "learning_rate": 2.049966284558328e-06, "logits/chosen": -2.0221080780029297, "logits/rejected": -2.4388909339904785, "logps/chosen": -222.0833740234375, "logps/rejected": -537.123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.592592716217041, "rewards/margins": 11.1924467086792, "rewards/rejected": -17.7850399017334, "step": 16494 }, { "epoch": 2.57, "learning_rate": 2.04923284402718e-06, "logits/chosen": -2.780268669128418, "logits/rejected": -2.429253101348877, "logps/chosen": -236.1278076171875, "logps/rejected": -238.070068359375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -1.4223015308380127, "rewards/margins": 9.795920372009277, "rewards/rejected": -11.218221664428711, "step": 16495 }, { "epoch": 2.57, "learning_rate": 2.048499403496032e-06, "logits/chosen": -1.8220881223678589, "logits/rejected": -2.7378406524658203, "logps/chosen": -255.0189971923828, "logps/rejected": -649.8416748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.102940559387207, "rewards/margins": 12.301861763000488, "rewards/rejected": -18.404802322387695, "step": 16496 }, { "epoch": 2.57, "learning_rate": 2.047765962964884e-06, "logits/chosen": -1.2463736534118652, "logits/rejected": -2.16858172416687, "logps/chosen": -207.5187530517578, "logps/rejected": -567.1326904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.540409564971924, "rewards/margins": 13.000051498413086, "rewards/rejected": -18.54046058654785, "step": 16497 }, { "epoch": 2.57, "learning_rate": 2.047032522433736e-06, "logits/chosen": -1.8751469850540161, "logits/rejected": -2.834958553314209, "logps/chosen": -195.04327392578125, "logps/rejected": -524.8568725585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.173855781555176, "rewards/margins": 15.008591651916504, "rewards/rejected": -21.18244743347168, "step": 16498 }, { "epoch": 2.57, "learning_rate": 2.0462990819025883e-06, "logits/chosen": -1.703989028930664, "logits/rejected": -2.7237966060638428, "logps/chosen": -316.7331848144531, "logps/rejected": -655.2774658203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -10.705347061157227, "rewards/margins": 9.326871871948242, "rewards/rejected": -20.03221893310547, "step": 16499 }, { "epoch": 2.57, "learning_rate": 2.04556564137144e-06, "logits/chosen": -2.2233428955078125, "logits/rejected": -2.686326742172241, "logps/chosen": -74.19215393066406, "logps/rejected": -249.79595947265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.087131023406982, "rewards/margins": 10.045464515686035, "rewards/rejected": -16.13259506225586, "step": 16500 }, { "epoch": 2.57, "learning_rate": 2.044832200840293e-06, "logits/chosen": -1.2580467462539673, "logits/rejected": -2.3691086769104004, "logps/chosen": -121.87262725830078, "logps/rejected": -598.0460815429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.806445121765137, "rewards/margins": 12.608757019042969, "rewards/rejected": -20.415203094482422, "step": 16501 }, { "epoch": 2.57, "learning_rate": 2.0440987603091448e-06, "logits/chosen": -1.6947342157363892, "logits/rejected": -2.5029609203338623, "logps/chosen": -167.2061767578125, "logps/rejected": -340.13531494140625, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/chosen": -7.785600662231445, "rewards/margins": 8.72105598449707, "rewards/rejected": -16.506656646728516, "step": 16502 }, { "epoch": 2.57, "learning_rate": 2.043365319777997e-06, "logits/chosen": -1.877046823501587, "logits/rejected": -2.6367764472961426, "logps/chosen": -233.5955047607422, "logps/rejected": -486.95367431640625, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -6.849905014038086, "rewards/margins": 10.940547943115234, "rewards/rejected": -17.790454864501953, "step": 16503 }, { "epoch": 2.57, "learning_rate": 2.042631879246849e-06, "logits/chosen": -2.6733617782592773, "logits/rejected": -2.5102503299713135, "logps/chosen": -289.60498046875, "logps/rejected": -266.77874755859375, "loss": 0.0289, "rewards/accuracies": 1.0, "rewards/chosen": -9.226975440979004, "rewards/margins": 5.5920515060424805, "rewards/rejected": -14.819026947021484, "step": 16504 }, { "epoch": 2.57, "learning_rate": 2.041898438715701e-06, "logits/chosen": -1.8123090267181396, "logits/rejected": -2.542020559310913, "logps/chosen": -325.9603271484375, "logps/rejected": -414.7511901855469, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.250231742858887, "rewards/margins": 8.892461776733398, "rewards/rejected": -14.142693519592285, "step": 16505 }, { "epoch": 2.57, "learning_rate": 2.041164998184553e-06, "logits/chosen": -2.1811492443084717, "logits/rejected": -2.6395840644836426, "logps/chosen": -581.9378051757812, "logps/rejected": -604.6168212890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.966957092285156, "rewards/margins": 8.680564880371094, "rewards/rejected": -16.64752197265625, "step": 16506 }, { "epoch": 2.57, "learning_rate": 2.040431557653405e-06, "logits/chosen": -2.078747034072876, "logits/rejected": -2.572261095046997, "logps/chosen": -161.77027893066406, "logps/rejected": -289.347412109375, "loss": 0.0375, "rewards/accuracies": 1.0, "rewards/chosen": -8.84282112121582, "rewards/margins": 6.55475378036499, "rewards/rejected": -15.397575378417969, "step": 16507 }, { "epoch": 2.57, "learning_rate": 2.0396981171222573e-06, "logits/chosen": -2.7800049781799316, "logits/rejected": -2.7059550285339355, "logps/chosen": -280.25177001953125, "logps/rejected": -528.84423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.135211944580078, "rewards/margins": 12.668840408325195, "rewards/rejected": -20.804054260253906, "step": 16508 }, { "epoch": 2.57, "learning_rate": 2.0389646765911096e-06, "logits/chosen": -2.7588577270507812, "logits/rejected": -1.0661195516586304, "logps/chosen": -495.09539794921875, "logps/rejected": -242.4453582763672, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.88814926147461, "rewards/margins": 8.083135604858398, "rewards/rejected": -16.971284866333008, "step": 16509 }, { "epoch": 2.57, "learning_rate": 2.038231236059962e-06, "logits/chosen": -2.825976610183716, "logits/rejected": -2.0322721004486084, "logps/chosen": -346.9197998046875, "logps/rejected": -348.73565673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.025615692138672, "rewards/margins": 13.280391693115234, "rewards/rejected": -22.306007385253906, "step": 16510 }, { "epoch": 2.57, "learning_rate": 2.037497795528814e-06, "logits/chosen": -2.952347755432129, "logits/rejected": -2.7276017665863037, "logps/chosen": -204.37344360351562, "logps/rejected": -243.46859741210938, "loss": 0.3277, "rewards/accuracies": 1.0, "rewards/chosen": -6.0767717361450195, "rewards/margins": 7.710274696350098, "rewards/rejected": -13.787046432495117, "step": 16511 }, { "epoch": 2.57, "learning_rate": 2.036764354997666e-06, "logits/chosen": -2.2786366939544678, "logits/rejected": -2.9304120540618896, "logps/chosen": -121.17546081542969, "logps/rejected": -382.17437744140625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -5.093618392944336, "rewards/margins": 5.995204925537109, "rewards/rejected": -11.088823318481445, "step": 16512 }, { "epoch": 2.57, "learning_rate": 2.036030914466518e-06, "logits/chosen": -2.5991077423095703, "logits/rejected": -1.1790803670883179, "logps/chosen": -250.1184539794922, "logps/rejected": -179.94271850585938, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -6.027395248413086, "rewards/margins": 4.847412586212158, "rewards/rejected": -10.874807357788086, "step": 16513 }, { "epoch": 2.57, "learning_rate": 2.03529747393537e-06, "logits/chosen": -1.485515832901001, "logits/rejected": -2.870821237564087, "logps/chosen": -129.2147216796875, "logps/rejected": -423.2939453125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.329032897949219, "rewards/margins": 7.684065818786621, "rewards/rejected": -17.013097763061523, "step": 16514 }, { "epoch": 2.57, "learning_rate": 2.034564033404222e-06, "logits/chosen": -1.5242552757263184, "logits/rejected": -2.6420223712921143, "logps/chosen": -115.24382019042969, "logps/rejected": -284.9739990234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.551406860351562, "rewards/margins": 8.942842483520508, "rewards/rejected": -17.49424934387207, "step": 16515 }, { "epoch": 2.57, "learning_rate": 2.033830592873074e-06, "logits/chosen": -2.624039888381958, "logits/rejected": -2.5601868629455566, "logps/chosen": -168.5383758544922, "logps/rejected": -270.33209228515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.151977062225342, "rewards/margins": 8.938238143920898, "rewards/rejected": -14.090215682983398, "step": 16516 }, { "epoch": 2.57, "learning_rate": 2.0330971523419263e-06, "logits/chosen": -2.571136713027954, "logits/rejected": -2.651995897293091, "logps/chosen": -344.2766418457031, "logps/rejected": -613.8301391601562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.471133232116699, "rewards/margins": 13.108013153076172, "rewards/rejected": -20.579147338867188, "step": 16517 }, { "epoch": 2.57, "learning_rate": 2.0323637118107787e-06, "logits/chosen": -0.8695247769355774, "logits/rejected": -1.8986625671386719, "logps/chosen": -220.19252014160156, "logps/rejected": -540.3067626953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.422746658325195, "rewards/margins": 11.936483383178711, "rewards/rejected": -18.359230041503906, "step": 16518 }, { "epoch": 2.57, "learning_rate": 2.031630271279631e-06, "logits/chosen": -2.6060383319854736, "logits/rejected": -2.924015522003174, "logps/chosen": -511.223876953125, "logps/rejected": -535.6787109375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.356040000915527, "rewards/margins": 8.617191314697266, "rewards/rejected": -14.973231315612793, "step": 16519 }, { "epoch": 2.57, "learning_rate": 2.030896830748483e-06, "logits/chosen": -2.0532805919647217, "logits/rejected": -2.9657669067382812, "logps/chosen": -348.8101501464844, "logps/rejected": -525.573974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6814591884613037, "rewards/margins": 11.449516296386719, "rewards/rejected": -15.130974769592285, "step": 16520 }, { "epoch": 2.57, "learning_rate": 2.030163390217335e-06, "logits/chosen": -2.5234904289245605, "logits/rejected": -2.871260643005371, "logps/chosen": -186.92088317871094, "logps/rejected": -336.8364562988281, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -9.418432235717773, "rewards/margins": 7.224917411804199, "rewards/rejected": -16.643348693847656, "step": 16521 }, { "epoch": 2.57, "learning_rate": 2.029429949686187e-06, "logits/chosen": -2.567049264907837, "logits/rejected": -1.7637308835983276, "logps/chosen": -242.04779052734375, "logps/rejected": -257.8377990722656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.941656589508057, "rewards/margins": 8.115643501281738, "rewards/rejected": -14.057300567626953, "step": 16522 }, { "epoch": 2.57, "learning_rate": 2.0286965091550393e-06, "logits/chosen": -2.673506021499634, "logits/rejected": -2.140052080154419, "logps/chosen": -323.44921875, "logps/rejected": -284.3552551269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3044586181640625, "rewards/margins": 11.565208435058594, "rewards/rejected": -14.869667053222656, "step": 16523 }, { "epoch": 2.57, "learning_rate": 2.027963068623891e-06, "logits/chosen": -0.9617611169815063, "logits/rejected": -1.113632321357727, "logps/chosen": -478.2814636230469, "logps/rejected": -429.911376953125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.205039978027344, "rewards/margins": 8.92297077178955, "rewards/rejected": -14.128010749816895, "step": 16524 }, { "epoch": 2.57, "learning_rate": 2.027229628092743e-06, "logits/chosen": -2.173588752746582, "logits/rejected": -1.303149938583374, "logps/chosen": -298.1812744140625, "logps/rejected": -308.619384765625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.921319007873535, "rewards/margins": 7.124998092651367, "rewards/rejected": -13.046318054199219, "step": 16525 }, { "epoch": 2.57, "learning_rate": 2.026496187561596e-06, "logits/chosen": -2.148298978805542, "logits/rejected": -2.746284008026123, "logps/chosen": -130.57424926757812, "logps/rejected": -291.45831298828125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.237022399902344, "rewards/margins": 8.893241882324219, "rewards/rejected": -13.130264282226562, "step": 16526 }, { "epoch": 2.57, "learning_rate": 2.0257627470304477e-06, "logits/chosen": -2.6630067825317383, "logits/rejected": -2.0341551303863525, "logps/chosen": -328.58282470703125, "logps/rejected": -340.752197265625, "loss": 0.3107, "rewards/accuracies": 1.0, "rewards/chosen": -10.402504920959473, "rewards/margins": 2.3827261924743652, "rewards/rejected": -12.78523063659668, "step": 16527 }, { "epoch": 2.57, "learning_rate": 2.0250293064993e-06, "logits/chosen": -2.394996166229248, "logits/rejected": -2.418102741241455, "logps/chosen": -191.46693420410156, "logps/rejected": -257.1961975097656, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -9.81373405456543, "rewards/margins": 5.747634410858154, "rewards/rejected": -15.561368942260742, "step": 16528 }, { "epoch": 2.57, "learning_rate": 2.024295865968152e-06, "logits/chosen": -2.358184576034546, "logits/rejected": -2.838961124420166, "logps/chosen": -156.3459930419922, "logps/rejected": -367.1457214355469, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.27999210357666, "rewards/margins": 8.381653785705566, "rewards/rejected": -15.661645889282227, "step": 16529 }, { "epoch": 2.57, "learning_rate": 2.023562425437004e-06, "logits/chosen": -1.6228142976760864, "logits/rejected": -2.6258349418640137, "logps/chosen": -248.304443359375, "logps/rejected": -656.60498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.523266792297363, "rewards/margins": 11.580963134765625, "rewards/rejected": -21.104228973388672, "step": 16530 }, { "epoch": 2.57, "learning_rate": 2.022828984905856e-06, "logits/chosen": -3.136237382888794, "logits/rejected": -2.3772799968719482, "logps/chosen": -316.84039306640625, "logps/rejected": -170.17361450195312, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -6.532585144042969, "rewards/margins": 6.018557548522949, "rewards/rejected": -12.551141738891602, "step": 16531 }, { "epoch": 2.57, "learning_rate": 2.0220955443747083e-06, "logits/chosen": -1.7264474630355835, "logits/rejected": -2.376070499420166, "logps/chosen": -333.0243835449219, "logps/rejected": -661.1092529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.434787750244141, "rewards/margins": 12.033068656921387, "rewards/rejected": -19.467857360839844, "step": 16532 }, { "epoch": 2.57, "learning_rate": 2.0213621038435602e-06, "logits/chosen": -2.2457900047302246, "logits/rejected": -2.668837547302246, "logps/chosen": -134.07669067382812, "logps/rejected": -227.56283569335938, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -5.148864269256592, "rewards/margins": 6.809300422668457, "rewards/rejected": -11.95816421508789, "step": 16533 }, { "epoch": 2.57, "learning_rate": 2.0206286633124125e-06, "logits/chosen": -2.4642932415008545, "logits/rejected": -1.8974924087524414, "logps/chosen": -360.13641357421875, "logps/rejected": -296.8638610839844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.381714105606079, "rewards/margins": 9.795520782470703, "rewards/rejected": -13.177234649658203, "step": 16534 }, { "epoch": 2.57, "learning_rate": 2.019895222781265e-06, "logits/chosen": -2.8333067893981934, "logits/rejected": -3.0357367992401123, "logps/chosen": -126.51676940917969, "logps/rejected": -327.27276611328125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.501978397369385, "rewards/margins": 10.238519668579102, "rewards/rejected": -17.740497589111328, "step": 16535 }, { "epoch": 2.57, "learning_rate": 2.0191617822501167e-06, "logits/chosen": -2.8397467136383057, "logits/rejected": -2.9492571353912354, "logps/chosen": -490.1441955566406, "logps/rejected": -591.18212890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.283198356628418, "rewards/margins": 8.930315017700195, "rewards/rejected": -14.213513374328613, "step": 16536 }, { "epoch": 2.57, "learning_rate": 2.018428341718969e-06, "logits/chosen": -1.8511766195297241, "logits/rejected": -2.735175371170044, "logps/chosen": -131.69065856933594, "logps/rejected": -503.3685302734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.663727283477783, "rewards/margins": 10.917672157287598, "rewards/rejected": -17.58139991760254, "step": 16537 }, { "epoch": 2.57, "learning_rate": 2.017694901187821e-06, "logits/chosen": -2.8545687198638916, "logits/rejected": -2.8728981018066406, "logps/chosen": -175.28517150878906, "logps/rejected": -237.15660095214844, "loss": 0.027, "rewards/accuracies": 1.0, "rewards/chosen": -5.125103950500488, "rewards/margins": 4.4766082763671875, "rewards/rejected": -9.601712226867676, "step": 16538 }, { "epoch": 2.57, "learning_rate": 2.016961460656673e-06, "logits/chosen": -2.669564962387085, "logits/rejected": -3.11828351020813, "logps/chosen": -419.86737060546875, "logps/rejected": -491.3373107910156, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/chosen": -4.82834005355835, "rewards/margins": 8.673480987548828, "rewards/rejected": -13.501821517944336, "step": 16539 }, { "epoch": 2.57, "learning_rate": 2.016228020125525e-06, "logits/chosen": -2.246047258377075, "logits/rejected": -2.782291889190674, "logps/chosen": -178.9170684814453, "logps/rejected": -384.3060302734375, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -6.052125930786133, "rewards/margins": 6.0770978927612305, "rewards/rejected": -12.129223823547363, "step": 16540 }, { "epoch": 2.57, "learning_rate": 2.0154945795943774e-06, "logits/chosen": -2.6750245094299316, "logits/rejected": -1.9266300201416016, "logps/chosen": -254.61819458007812, "logps/rejected": -244.959716796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.97688364982605, "rewards/margins": 9.7907133102417, "rewards/rejected": -12.767597198486328, "step": 16541 }, { "epoch": 2.57, "learning_rate": 2.0147611390632292e-06, "logits/chosen": -2.611415386199951, "logits/rejected": -1.6909114122390747, "logps/chosen": -151.70660400390625, "logps/rejected": -194.2599639892578, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.4601874351501465, "rewards/margins": 8.54530143737793, "rewards/rejected": -16.005489349365234, "step": 16542 }, { "epoch": 2.57, "learning_rate": 2.014027698532082e-06, "logits/chosen": -2.589491605758667, "logits/rejected": -2.901716470718384, "logps/chosen": -194.45211791992188, "logps/rejected": -408.20037841796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.012975215911865, "rewards/margins": 8.507810592651367, "rewards/rejected": -15.52078628540039, "step": 16543 }, { "epoch": 2.57, "learning_rate": 2.013294258000934e-06, "logits/chosen": -1.8615095615386963, "logits/rejected": -2.901189088821411, "logps/chosen": -192.94627380371094, "logps/rejected": -496.9442138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.337396621704102, "rewards/margins": 11.20408821105957, "rewards/rejected": -19.541484832763672, "step": 16544 }, { "epoch": 2.57, "learning_rate": 2.0125608174697857e-06, "logits/chosen": -2.02785587310791, "logits/rejected": -2.795811891555786, "logps/chosen": -437.51629638671875, "logps/rejected": -602.6133422851562, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.150285720825195, "rewards/margins": 11.530660629272461, "rewards/rejected": -18.680946350097656, "step": 16545 }, { "epoch": 2.57, "learning_rate": 2.011827376938638e-06, "logits/chosen": -2.113480567932129, "logits/rejected": -2.5844688415527344, "logps/chosen": -176.6771240234375, "logps/rejected": -418.3087463378906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.575582504272461, "rewards/margins": 11.132867813110352, "rewards/rejected": -15.708450317382812, "step": 16546 }, { "epoch": 2.57, "learning_rate": 2.01109393640749e-06, "logits/chosen": -1.8836702108383179, "logits/rejected": -2.758301258087158, "logps/chosen": -106.04267883300781, "logps/rejected": -318.31829833984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6811366081237793, "rewards/margins": 10.228740692138672, "rewards/rejected": -13.909876823425293, "step": 16547 }, { "epoch": 2.57, "learning_rate": 2.010360495876342e-06, "logits/chosen": -2.7231013774871826, "logits/rejected": -2.63051438331604, "logps/chosen": -283.44476318359375, "logps/rejected": -313.79473876953125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.280276298522949, "rewards/margins": 7.685881614685059, "rewards/rejected": -12.966157913208008, "step": 16548 }, { "epoch": 2.57, "learning_rate": 2.009627055345194e-06, "logits/chosen": -2.8630590438842773, "logits/rejected": -3.108534336090088, "logps/chosen": -179.39541625976562, "logps/rejected": -320.3103332519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.490300178527832, "rewards/margins": 12.737348556518555, "rewards/rejected": -18.227649688720703, "step": 16549 }, { "epoch": 2.57, "learning_rate": 2.0088936148140464e-06, "logits/chosen": -1.3526993989944458, "logits/rejected": -2.7787301540374756, "logps/chosen": -96.4324722290039, "logps/rejected": -415.16229248046875, "loss": 0.0282, "rewards/accuracies": 1.0, "rewards/chosen": -6.470996856689453, "rewards/margins": 10.880327224731445, "rewards/rejected": -17.351322174072266, "step": 16550 }, { "epoch": 2.57, "learning_rate": 2.0081601742828983e-06, "logits/chosen": -2.707277297973633, "logits/rejected": -2.64943528175354, "logps/chosen": -177.2969970703125, "logps/rejected": -281.7457275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.9792375564575195, "rewards/margins": 9.44508171081543, "rewards/rejected": -16.424320220947266, "step": 16551 }, { "epoch": 2.57, "learning_rate": 2.007426733751751e-06, "logits/chosen": -0.6623971462249756, "logits/rejected": -2.546158790588379, "logps/chosen": -104.05903625488281, "logps/rejected": -609.5358276367188, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.757986068725586, "rewards/margins": 11.967621803283691, "rewards/rejected": -18.72560691833496, "step": 16552 }, { "epoch": 2.57, "learning_rate": 2.006693293220603e-06, "logits/chosen": -2.073845386505127, "logits/rejected": -2.74184250831604, "logps/chosen": -406.59490966796875, "logps/rejected": -624.3784790039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0395307540893555, "rewards/margins": 15.448657989501953, "rewards/rejected": -22.488189697265625, "step": 16553 }, { "epoch": 2.57, "learning_rate": 2.0059598526894547e-06, "logits/chosen": -0.9660642743110657, "logits/rejected": -2.1764063835144043, "logps/chosen": -166.52377319335938, "logps/rejected": -411.890869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.401552200317383, "rewards/margins": 14.910028457641602, "rewards/rejected": -21.311580657958984, "step": 16554 }, { "epoch": 2.57, "learning_rate": 2.005226412158307e-06, "logits/chosen": -1.7229087352752686, "logits/rejected": -2.5813546180725098, "logps/chosen": -161.21200561523438, "logps/rejected": -409.6759033203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.5679497718811035, "rewards/margins": 9.647928237915039, "rewards/rejected": -16.215877532958984, "step": 16555 }, { "epoch": 2.57, "learning_rate": 2.004492971627159e-06, "logits/chosen": -2.034794330596924, "logits/rejected": -2.505366802215576, "logps/chosen": -411.5043029785156, "logps/rejected": -450.3171081542969, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.7344236373901367, "rewards/margins": 7.244296550750732, "rewards/rejected": -10.978719711303711, "step": 16556 }, { "epoch": 2.57, "learning_rate": 2.0037595310960112e-06, "logits/chosen": -2.393141746520996, "logits/rejected": -2.4140636920928955, "logps/chosen": -174.76731872558594, "logps/rejected": -252.9165802001953, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -9.099952697753906, "rewards/margins": 5.766983509063721, "rewards/rejected": -14.866935729980469, "step": 16557 }, { "epoch": 2.58, "learning_rate": 2.003026090564863e-06, "logits/chosen": -1.7533036470413208, "logits/rejected": -2.842581272125244, "logps/chosen": -257.4517822265625, "logps/rejected": -568.223876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.789794445037842, "rewards/margins": 11.253241539001465, "rewards/rejected": -17.04303550720215, "step": 16558 }, { "epoch": 2.58, "learning_rate": 2.0022926500337154e-06, "logits/chosen": -1.877882480621338, "logits/rejected": -2.821437358856201, "logps/chosen": -117.60438537597656, "logps/rejected": -316.18572998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.445707321166992, "rewards/margins": 10.35061264038086, "rewards/rejected": -19.79631996154785, "step": 16559 }, { "epoch": 2.58, "learning_rate": 2.0015592095025677e-06, "logits/chosen": -2.431898832321167, "logits/rejected": -2.150067090988159, "logps/chosen": -164.42306518554688, "logps/rejected": -439.2829284667969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.2447195053100586, "rewards/margins": 11.139581680297852, "rewards/rejected": -14.384302139282227, "step": 16560 }, { "epoch": 2.58, "learning_rate": 2.00082576897142e-06, "logits/chosen": -1.9960415363311768, "logits/rejected": -2.219836473464966, "logps/chosen": -178.37313842773438, "logps/rejected": -332.3913269042969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.290434837341309, "rewards/margins": 12.29611587524414, "rewards/rejected": -18.586551666259766, "step": 16561 }, { "epoch": 2.58, "learning_rate": 2.000092328440272e-06, "logits/chosen": -2.4255316257476807, "logits/rejected": -2.8182713985443115, "logps/chosen": -212.31907653808594, "logps/rejected": -386.7381591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.805351257324219, "rewards/margins": 11.391353607177734, "rewards/rejected": -18.196704864501953, "step": 16562 }, { "epoch": 2.58, "learning_rate": 1.9993588879091238e-06, "logits/chosen": -2.0875790119171143, "logits/rejected": -2.963892936706543, "logps/chosen": -90.28801727294922, "logps/rejected": -416.9926452636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.089199542999268, "rewards/margins": 11.744783401489258, "rewards/rejected": -17.833982467651367, "step": 16563 }, { "epoch": 2.58, "learning_rate": 1.998625447377976e-06, "logits/chosen": -2.3704581260681152, "logits/rejected": -2.7947893142700195, "logps/chosen": -133.01266479492188, "logps/rejected": -455.22332763671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.094285011291504, "rewards/margins": 11.6985445022583, "rewards/rejected": -15.792829513549805, "step": 16564 }, { "epoch": 2.58, "learning_rate": 1.997892006846828e-06, "logits/chosen": -2.259878635406494, "logits/rejected": -2.86820650100708, "logps/chosen": -174.36996459960938, "logps/rejected": -481.16668701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.140687942504883, "rewards/margins": 14.485300064086914, "rewards/rejected": -20.625988006591797, "step": 16565 }, { "epoch": 2.58, "learning_rate": 1.9971585663156802e-06, "logits/chosen": -2.5610721111297607, "logits/rejected": -2.752429485321045, "logps/chosen": -111.504150390625, "logps/rejected": -444.6339111328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.361484527587891, "rewards/margins": 15.902368545532227, "rewards/rejected": -21.263853073120117, "step": 16566 }, { "epoch": 2.58, "learning_rate": 1.996425125784532e-06, "logits/chosen": -2.6007022857666016, "logits/rejected": -2.430079221725464, "logps/chosen": -353.5359802246094, "logps/rejected": -346.9452209472656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.042971134185791, "rewards/margins": 9.246903419494629, "rewards/rejected": -15.289875030517578, "step": 16567 }, { "epoch": 2.58, "learning_rate": 1.9956916852533844e-06, "logits/chosen": -2.2652292251586914, "logits/rejected": -2.571603536605835, "logps/chosen": -156.40185546875, "logps/rejected": -281.703369140625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -8.011983871459961, "rewards/margins": 9.078537940979004, "rewards/rejected": -17.09052276611328, "step": 16568 }, { "epoch": 2.58, "learning_rate": 1.9949582447222367e-06, "logits/chosen": -1.4332613945007324, "logits/rejected": -2.4838085174560547, "logps/chosen": -194.46759033203125, "logps/rejected": -552.8094482421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.058454513549805, "rewards/margins": 9.090170860290527, "rewards/rejected": -14.148625373840332, "step": 16569 }, { "epoch": 2.58, "learning_rate": 1.994224804191089e-06, "logits/chosen": -2.837183952331543, "logits/rejected": -3.077192783355713, "logps/chosen": -93.21635437011719, "logps/rejected": -306.3946838378906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.992936134338379, "rewards/margins": 12.949504852294922, "rewards/rejected": -16.942441940307617, "step": 16570 }, { "epoch": 2.58, "learning_rate": 1.993491363659941e-06, "logits/chosen": -2.297123908996582, "logits/rejected": -2.9105570316314697, "logps/chosen": -362.9768371582031, "logps/rejected": -416.98455810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.494067668914795, "rewards/margins": 12.442861557006836, "rewards/rejected": -18.93692970275879, "step": 16571 }, { "epoch": 2.58, "learning_rate": 1.992757923128793e-06, "logits/chosen": -2.726912260055542, "logits/rejected": -2.3658878803253174, "logps/chosen": -651.51220703125, "logps/rejected": -516.2579956054688, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -9.49504280090332, "rewards/margins": 6.84976053237915, "rewards/rejected": -16.344802856445312, "step": 16572 }, { "epoch": 2.58, "learning_rate": 1.992024482597645e-06, "logits/chosen": -2.5741071701049805, "logits/rejected": -1.4561823606491089, "logps/chosen": -825.521240234375, "logps/rejected": -493.8493957519531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.9038758277893066, "rewards/margins": 10.325189590454102, "rewards/rejected": -14.22906494140625, "step": 16573 }, { "epoch": 2.58, "learning_rate": 1.991291042066497e-06, "logits/chosen": -1.8810923099517822, "logits/rejected": -2.8851377964019775, "logps/chosen": -196.2381134033203, "logps/rejected": -515.302490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.794143676757812, "rewards/margins": 9.703142166137695, "rewards/rejected": -18.497285842895508, "step": 16574 }, { "epoch": 2.58, "learning_rate": 1.9905576015353493e-06, "logits/chosen": -2.5956313610076904, "logits/rejected": -1.7684227228164673, "logps/chosen": -761.4071655273438, "logps/rejected": -479.0263671875, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -11.64137077331543, "rewards/margins": 7.941972255706787, "rewards/rejected": -19.583343505859375, "step": 16575 }, { "epoch": 2.58, "learning_rate": 1.989824161004201e-06, "logits/chosen": -2.1115360260009766, "logits/rejected": -2.670901298522949, "logps/chosen": -417.61968994140625, "logps/rejected": -529.9757690429688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.718509674072266, "rewards/margins": 8.921470642089844, "rewards/rejected": -16.63998031616211, "step": 16576 }, { "epoch": 2.58, "learning_rate": 1.989090720473054e-06, "logits/chosen": -2.513408660888672, "logits/rejected": -2.365316390991211, "logps/chosen": -185.40005493164062, "logps/rejected": -327.4760437011719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.115947723388672, "rewards/margins": 9.307868003845215, "rewards/rejected": -17.423816680908203, "step": 16577 }, { "epoch": 2.58, "learning_rate": 1.9883572799419057e-06, "logits/chosen": -1.7551567554473877, "logits/rejected": -2.692389488220215, "logps/chosen": -370.1916809082031, "logps/rejected": -698.121337890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.847767353057861, "rewards/margins": 16.69209861755371, "rewards/rejected": -21.539867401123047, "step": 16578 }, { "epoch": 2.58, "learning_rate": 1.987623839410758e-06, "logits/chosen": -1.5277527570724487, "logits/rejected": -2.893627166748047, "logps/chosen": -129.91006469726562, "logps/rejected": -372.4377746582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.774947166442871, "rewards/margins": 11.171664237976074, "rewards/rejected": -17.946611404418945, "step": 16579 }, { "epoch": 2.58, "learning_rate": 1.98689039887961e-06, "logits/chosen": -2.3519248962402344, "logits/rejected": -2.383324384689331, "logps/chosen": -210.23838806152344, "logps/rejected": -294.3699951171875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.7630934715271, "rewards/margins": 7.907491683959961, "rewards/rejected": -13.670585632324219, "step": 16580 }, { "epoch": 2.58, "learning_rate": 1.9861569583484622e-06, "logits/chosen": -2.8254261016845703, "logits/rejected": -2.937208414077759, "logps/chosen": -554.77685546875, "logps/rejected": -632.75439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.025012969970703, "rewards/margins": 12.92874526977539, "rewards/rejected": -20.953758239746094, "step": 16581 }, { "epoch": 2.58, "learning_rate": 1.985423517817314e-06, "logits/chosen": -2.9906668663024902, "logits/rejected": -1.6186175346374512, "logps/chosen": -323.31884765625, "logps/rejected": -173.18930053710938, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -2.608053684234619, "rewards/margins": 6.388537406921387, "rewards/rejected": -8.996591567993164, "step": 16582 }, { "epoch": 2.58, "learning_rate": 1.984690077286166e-06, "logits/chosen": -2.6521496772766113, "logits/rejected": -2.8712711334228516, "logps/chosen": -131.9733428955078, "logps/rejected": -410.2291259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.880858421325684, "rewards/margins": 11.771610260009766, "rewards/rejected": -19.652469635009766, "step": 16583 }, { "epoch": 2.58, "learning_rate": 1.9839566367550183e-06, "logits/chosen": -2.795238971710205, "logits/rejected": -2.9206864833831787, "logps/chosen": -363.4005126953125, "logps/rejected": -481.11431884765625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.440573692321777, "rewards/margins": 7.565481185913086, "rewards/rejected": -14.006054878234863, "step": 16584 }, { "epoch": 2.58, "learning_rate": 1.9832231962238706e-06, "logits/chosen": -2.6599175930023193, "logits/rejected": -2.734100341796875, "logps/chosen": -209.49546813964844, "logps/rejected": -328.3084411621094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.558738708496094, "rewards/margins": 10.054322242736816, "rewards/rejected": -15.61306095123291, "step": 16585 }, { "epoch": 2.58, "learning_rate": 1.982489755692723e-06, "logits/chosen": -1.7212660312652588, "logits/rejected": -2.2958550453186035, "logps/chosen": -175.24151611328125, "logps/rejected": -438.5028381347656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.086850166320801, "rewards/margins": 16.326614379882812, "rewards/rejected": -22.413463592529297, "step": 16586 }, { "epoch": 2.58, "learning_rate": 1.9817563151615748e-06, "logits/chosen": -2.6901891231536865, "logits/rejected": -2.520087957382202, "logps/chosen": -472.34375, "logps/rejected": -706.3043212890625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.156329154968262, "rewards/margins": 10.27308177947998, "rewards/rejected": -17.429410934448242, "step": 16587 }, { "epoch": 2.58, "learning_rate": 1.981022874630427e-06, "logits/chosen": -2.736952781677246, "logits/rejected": -1.79780912399292, "logps/chosen": -339.1156311035156, "logps/rejected": -402.68475341796875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -5.132920742034912, "rewards/margins": 7.590005874633789, "rewards/rejected": -12.72292709350586, "step": 16588 }, { "epoch": 2.58, "learning_rate": 1.980289434099279e-06, "logits/chosen": -1.3196579217910767, "logits/rejected": -2.7444300651550293, "logps/chosen": -148.3043975830078, "logps/rejected": -533.5025024414062, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.06937313079834, "rewards/margins": 10.516773223876953, "rewards/rejected": -14.58614730834961, "step": 16589 }, { "epoch": 2.58, "learning_rate": 1.9795559935681313e-06, "logits/chosen": -1.9708657264709473, "logits/rejected": -2.6889989376068115, "logps/chosen": -141.8493194580078, "logps/rejected": -459.68817138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.0096435546875, "rewards/margins": 10.736730575561523, "rewards/rejected": -19.746374130249023, "step": 16590 }, { "epoch": 2.58, "learning_rate": 1.978822553036983e-06, "logits/chosen": -2.881870985031128, "logits/rejected": -2.2037341594696045, "logps/chosen": -420.02252197265625, "logps/rejected": -400.7598876953125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.90584135055542, "rewards/margins": 10.498883247375488, "rewards/rejected": -18.40472412109375, "step": 16591 }, { "epoch": 2.58, "learning_rate": 1.9780891125058354e-06, "logits/chosen": -1.5979158878326416, "logits/rejected": -2.5586493015289307, "logps/chosen": -132.57574462890625, "logps/rejected": -385.70306396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1634111404418945, "rewards/margins": 15.287288665771484, "rewards/rejected": -21.450698852539062, "step": 16592 }, { "epoch": 2.58, "learning_rate": 1.9773556719746873e-06, "logits/chosen": -0.9943456053733826, "logits/rejected": -2.6266891956329346, "logps/chosen": -164.1954803466797, "logps/rejected": -607.3421020507812, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -9.034492492675781, "rewards/margins": 8.675108909606934, "rewards/rejected": -17.70960235595703, "step": 16593 }, { "epoch": 2.58, "learning_rate": 1.9766222314435396e-06, "logits/chosen": -1.0423338413238525, "logits/rejected": -2.2794532775878906, "logps/chosen": -125.89234161376953, "logps/rejected": -391.806884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.063085079193115, "rewards/margins": 15.262815475463867, "rewards/rejected": -19.32590103149414, "step": 16594 }, { "epoch": 2.58, "learning_rate": 1.975888790912392e-06, "logits/chosen": -1.1229368448257446, "logits/rejected": -2.1210439205169678, "logps/chosen": -296.3232421875, "logps/rejected": -718.2281494140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.925450325012207, "rewards/margins": 12.842072486877441, "rewards/rejected": -20.76752281188965, "step": 16595 }, { "epoch": 2.58, "learning_rate": 1.975155350381244e-06, "logits/chosen": -2.788182020187378, "logits/rejected": -2.350811719894409, "logps/chosen": -216.49420166015625, "logps/rejected": -233.5283203125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.3282670974731445, "rewards/margins": 10.46215534210205, "rewards/rejected": -15.790422439575195, "step": 16596 }, { "epoch": 2.58, "learning_rate": 1.974421909850096e-06, "logits/chosen": -2.632840394973755, "logits/rejected": -2.0492632389068604, "logps/chosen": -207.38494873046875, "logps/rejected": -318.990478515625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.405277729034424, "rewards/margins": 11.71823787689209, "rewards/rejected": -17.123516082763672, "step": 16597 }, { "epoch": 2.58, "learning_rate": 1.973688469318948e-06, "logits/chosen": -2.45131778717041, "logits/rejected": -2.163299083709717, "logps/chosen": -675.1469116210938, "logps/rejected": -583.4027099609375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -7.287381172180176, "rewards/margins": 6.484941482543945, "rewards/rejected": -13.772322654724121, "step": 16598 }, { "epoch": 2.58, "learning_rate": 1.9729550287878003e-06, "logits/chosen": -2.9835944175720215, "logits/rejected": -3.0673346519470215, "logps/chosen": -184.12966918945312, "logps/rejected": -183.23422241210938, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -5.2000226974487305, "rewards/margins": 6.45718240737915, "rewards/rejected": -11.657205581665039, "step": 16599 }, { "epoch": 2.58, "learning_rate": 1.972221588256652e-06, "logits/chosen": -1.5364221334457397, "logits/rejected": -2.644120931625366, "logps/chosen": -247.0432586669922, "logps/rejected": -515.7743530273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2040510177612305, "rewards/margins": 13.382131576538086, "rewards/rejected": -20.586181640625, "step": 16600 }, { "epoch": 2.58, "learning_rate": 1.9714881477255045e-06, "logits/chosen": -0.9504373073577881, "logits/rejected": -1.0122193098068237, "logps/chosen": -438.7043151855469, "logps/rejected": -528.549560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.689260482788086, "rewards/margins": 11.405261993408203, "rewards/rejected": -19.09452247619629, "step": 16601 }, { "epoch": 2.58, "learning_rate": 1.9707547071943568e-06, "logits/chosen": -2.648219108581543, "logits/rejected": -2.3619537353515625, "logps/chosen": -273.42742919921875, "logps/rejected": -329.5799865722656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.950326919555664, "rewards/margins": 9.642642974853516, "rewards/rejected": -14.59296989440918, "step": 16602 }, { "epoch": 2.58, "learning_rate": 1.9700212666632086e-06, "logits/chosen": -2.342582941055298, "logits/rejected": -2.5154380798339844, "logps/chosen": -137.57965087890625, "logps/rejected": -282.0332946777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.868630409240723, "rewards/margins": 12.394781112670898, "rewards/rejected": -17.263412475585938, "step": 16603 }, { "epoch": 2.58, "learning_rate": 1.969287826132061e-06, "logits/chosen": -1.3148126602172852, "logits/rejected": -2.597856044769287, "logps/chosen": -239.26956176757812, "logps/rejected": -513.1948852539062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.608015060424805, "rewards/margins": 11.825565338134766, "rewards/rejected": -18.433578491210938, "step": 16604 }, { "epoch": 2.58, "learning_rate": 1.968554385600913e-06, "logits/chosen": -2.905642509460449, "logits/rejected": -2.185353994369507, "logps/chosen": -322.1253662109375, "logps/rejected": -427.6993713378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.148416042327881, "rewards/margins": 13.417884826660156, "rewards/rejected": -17.566301345825195, "step": 16605 }, { "epoch": 2.58, "learning_rate": 1.967820945069765e-06, "logits/chosen": -1.4828580617904663, "logits/rejected": -2.913027286529541, "logps/chosen": -126.4359130859375, "logps/rejected": -570.85107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.538393020629883, "rewards/margins": 14.616934776306152, "rewards/rejected": -23.15532684326172, "step": 16606 }, { "epoch": 2.58, "learning_rate": 1.967087504538617e-06, "logits/chosen": -2.8130109310150146, "logits/rejected": -2.817976951599121, "logps/chosen": -295.5155944824219, "logps/rejected": -354.10400390625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.268416404724121, "rewards/margins": 8.469322204589844, "rewards/rejected": -15.737737655639648, "step": 16607 }, { "epoch": 2.58, "learning_rate": 1.9663540640074693e-06, "logits/chosen": -2.465564489364624, "logits/rejected": -1.9524725675582886, "logps/chosen": -260.73486328125, "logps/rejected": -259.8924865722656, "loss": 0.3663, "rewards/accuracies": 0.5, "rewards/chosen": -8.781578063964844, "rewards/margins": 5.433779239654541, "rewards/rejected": -14.215356826782227, "step": 16608 }, { "epoch": 2.58, "learning_rate": 1.965620623476321e-06, "logits/chosen": -2.250486135482788, "logits/rejected": -1.8666934967041016, "logps/chosen": -208.01707458496094, "logps/rejected": -372.0177917480469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.962532043457031, "rewards/margins": 13.766120910644531, "rewards/rejected": -19.728652954101562, "step": 16609 }, { "epoch": 2.58, "learning_rate": 1.9648871829451735e-06, "logits/chosen": -1.766935110092163, "logits/rejected": -2.405362129211426, "logps/chosen": -209.14779663085938, "logps/rejected": -315.269287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1926956176757812, "rewards/margins": 13.656036376953125, "rewards/rejected": -16.848731994628906, "step": 16610 }, { "epoch": 2.58, "learning_rate": 1.9641537424140258e-06, "logits/chosen": -1.7713208198547363, "logits/rejected": -2.5573081970214844, "logps/chosen": -189.50807189941406, "logps/rejected": -387.7647705078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.475468635559082, "rewards/margins": 9.765920639038086, "rewards/rejected": -18.241390228271484, "step": 16611 }, { "epoch": 2.58, "learning_rate": 1.963420301882878e-06, "logits/chosen": -2.0088398456573486, "logits/rejected": -2.562016725540161, "logps/chosen": -214.25, "logps/rejected": -341.3078308105469, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.187380313873291, "rewards/margins": 8.524944305419922, "rewards/rejected": -14.712325096130371, "step": 16612 }, { "epoch": 2.58, "learning_rate": 1.96268686135173e-06, "logits/chosen": -2.648261547088623, "logits/rejected": -2.2558419704437256, "logps/chosen": -225.20481872558594, "logps/rejected": -328.34722900390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -9.400430679321289, "rewards/margins": 9.31001091003418, "rewards/rejected": -18.71044158935547, "step": 16613 }, { "epoch": 2.58, "learning_rate": 1.961953420820582e-06, "logits/chosen": -2.7334368228912354, "logits/rejected": -2.4580657482147217, "logps/chosen": -348.7125549316406, "logps/rejected": -385.838623046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.588726997375488, "rewards/margins": 9.503653526306152, "rewards/rejected": -17.09238052368164, "step": 16614 }, { "epoch": 2.58, "learning_rate": 1.961219980289434e-06, "logits/chosen": -2.3847837448120117, "logits/rejected": -2.916494846343994, "logps/chosen": -101.2562026977539, "logps/rejected": -366.2724914550781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.682888507843018, "rewards/margins": 14.297739028930664, "rewards/rejected": -19.980628967285156, "step": 16615 }, { "epoch": 2.58, "learning_rate": 1.960486539758286e-06, "logits/chosen": -2.2298965454101562, "logits/rejected": -2.7815725803375244, "logps/chosen": -113.66724395751953, "logps/rejected": -355.0146484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.306905746459961, "rewards/margins": 9.819093704223633, "rewards/rejected": -18.125999450683594, "step": 16616 }, { "epoch": 2.58, "learning_rate": 1.9597530992271383e-06, "logits/chosen": -2.412381410598755, "logits/rejected": -2.919790267944336, "logps/chosen": -230.68014526367188, "logps/rejected": -340.24853515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.173274040222168, "rewards/margins": 8.569854736328125, "rewards/rejected": -14.743127822875977, "step": 16617 }, { "epoch": 2.58, "learning_rate": 1.95901965869599e-06, "logits/chosen": -2.237773895263672, "logits/rejected": -2.7040858268737793, "logps/chosen": -588.67529296875, "logps/rejected": -615.0849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.012296676635742, "rewards/margins": 13.053044319152832, "rewards/rejected": -22.06534194946289, "step": 16618 }, { "epoch": 2.58, "learning_rate": 1.958286218164843e-06, "logits/chosen": -2.072230100631714, "logits/rejected": -2.3381779193878174, "logps/chosen": -205.36807250976562, "logps/rejected": -442.05462646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.424556732177734, "rewards/margins": 12.48314094543457, "rewards/rejected": -17.907699584960938, "step": 16619 }, { "epoch": 2.58, "learning_rate": 1.957552777633695e-06, "logits/chosen": -1.1158982515335083, "logits/rejected": -2.573171854019165, "logps/chosen": -123.30455017089844, "logps/rejected": -483.73724365234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.493358135223389, "rewards/margins": 9.892674446105957, "rewards/rejected": -16.386032104492188, "step": 16620 }, { "epoch": 2.58, "learning_rate": 1.956819337102547e-06, "logits/chosen": -2.7403836250305176, "logits/rejected": -2.5809590816497803, "logps/chosen": -104.14328002929688, "logps/rejected": -248.02615356445312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.278478145599365, "rewards/margins": 8.702943801879883, "rewards/rejected": -12.981422424316406, "step": 16621 }, { "epoch": 2.59, "learning_rate": 1.956085896571399e-06, "logits/chosen": -2.550429582595825, "logits/rejected": -2.73105525970459, "logps/chosen": -217.08041381835938, "logps/rejected": -532.0380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.7757463455200195, "rewards/margins": 11.995841979980469, "rewards/rejected": -16.771589279174805, "step": 16622 }, { "epoch": 2.59, "learning_rate": 1.955352456040251e-06, "logits/chosen": -2.640235662460327, "logits/rejected": -1.7852290868759155, "logps/chosen": -163.21878051757812, "logps/rejected": -120.81988525390625, "loss": 0.3724, "rewards/accuracies": 0.5, "rewards/chosen": -5.455868244171143, "rewards/margins": 5.152801513671875, "rewards/rejected": -10.608670234680176, "step": 16623 }, { "epoch": 2.59, "learning_rate": 1.954619015509103e-06, "logits/chosen": -1.6602298021316528, "logits/rejected": -2.3621292114257812, "logps/chosen": -147.70968627929688, "logps/rejected": -414.74072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.338907241821289, "rewards/margins": 10.653982162475586, "rewards/rejected": -19.992889404296875, "step": 16624 }, { "epoch": 2.59, "learning_rate": 1.953885574977955e-06, "logits/chosen": -2.4388222694396973, "logits/rejected": -2.5404510498046875, "logps/chosen": -76.96864318847656, "logps/rejected": -274.6394348144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.033659934997559, "rewards/margins": 11.31253719329834, "rewards/rejected": -16.3461971282959, "step": 16625 }, { "epoch": 2.59, "learning_rate": 1.9531521344468073e-06, "logits/chosen": -2.8531832695007324, "logits/rejected": -2.6364047527313232, "logps/chosen": -247.57745361328125, "logps/rejected": -356.94622802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.515217781066895, "rewards/margins": 10.758920669555664, "rewards/rejected": -19.274139404296875, "step": 16626 }, { "epoch": 2.59, "learning_rate": 1.9524186939156596e-06, "logits/chosen": -2.773851156234741, "logits/rejected": -1.7253328561782837, "logps/chosen": -354.08013916015625, "logps/rejected": -285.89306640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.340022563934326, "rewards/margins": 6.61559534072876, "rewards/rejected": -13.955617904663086, "step": 16627 }, { "epoch": 2.59, "learning_rate": 1.951685253384512e-06, "logits/chosen": -2.075813055038452, "logits/rejected": -2.9623537063598633, "logps/chosen": -216.85386657714844, "logps/rejected": -422.6984558105469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.8131184577941895, "rewards/margins": 9.711617469787598, "rewards/rejected": -14.524736404418945, "step": 16628 }, { "epoch": 2.59, "learning_rate": 1.950951812853364e-06, "logits/chosen": -2.0680441856384277, "logits/rejected": -2.6943633556365967, "logps/chosen": -597.90771484375, "logps/rejected": -520.4641723632812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.420771837234497, "rewards/margins": 11.424455642700195, "rewards/rejected": -14.845227241516113, "step": 16629 }, { "epoch": 2.59, "learning_rate": 1.950218372322216e-06, "logits/chosen": -1.8050072193145752, "logits/rejected": -2.7307989597320557, "logps/chosen": -124.53191375732422, "logps/rejected": -437.2034912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.05433464050293, "rewards/margins": 13.393744468688965, "rewards/rejected": -18.448078155517578, "step": 16630 }, { "epoch": 2.59, "learning_rate": 1.949484931791068e-06, "logits/chosen": -1.5394039154052734, "logits/rejected": -2.6588265895843506, "logps/chosen": -137.46832275390625, "logps/rejected": -493.856689453125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.809530258178711, "rewards/margins": 11.067692756652832, "rewards/rejected": -17.87722396850586, "step": 16631 }, { "epoch": 2.59, "learning_rate": 1.94875149125992e-06, "logits/chosen": -2.3172414302825928, "logits/rejected": -2.799649715423584, "logps/chosen": -141.83187866210938, "logps/rejected": -296.65447998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.827999114990234, "rewards/margins": 11.757776260375977, "rewards/rejected": -18.58577537536621, "step": 16632 }, { "epoch": 2.59, "learning_rate": 1.948018050728772e-06, "logits/chosen": -2.525266408920288, "logits/rejected": -2.1373727321624756, "logps/chosen": -477.42718505859375, "logps/rejected": -506.38970947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.1726531982421875, "rewards/margins": 12.0443115234375, "rewards/rejected": -17.216964721679688, "step": 16633 }, { "epoch": 2.59, "learning_rate": 1.947284610197624e-06, "logits/chosen": -2.6988778114318848, "logits/rejected": -2.365736961364746, "logps/chosen": -439.0900573730469, "logps/rejected": -618.5155029296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -8.050278663635254, "rewards/margins": 8.72834587097168, "rewards/rejected": -16.77862548828125, "step": 16634 }, { "epoch": 2.59, "learning_rate": 1.9465511696664764e-06, "logits/chosen": -2.57846999168396, "logits/rejected": -2.7519044876098633, "logps/chosen": -142.4738311767578, "logps/rejected": -383.30279541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.491169452667236, "rewards/margins": 13.765340805053711, "rewards/rejected": -20.25650978088379, "step": 16635 }, { "epoch": 2.59, "learning_rate": 1.9458177291353287e-06, "logits/chosen": -1.4367393255233765, "logits/rejected": -1.872045874595642, "logps/chosen": -273.47613525390625, "logps/rejected": -647.5671997070312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.126468658447266, "rewards/margins": 7.937684059143066, "rewards/rejected": -17.06415367126465, "step": 16636 }, { "epoch": 2.59, "learning_rate": 1.945084288604181e-06, "logits/chosen": -2.8459484577178955, "logits/rejected": -2.889760732650757, "logps/chosen": -213.54129028320312, "logps/rejected": -400.515869140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.762828826904297, "rewards/margins": 11.811320304870605, "rewards/rejected": -17.574148178100586, "step": 16637 }, { "epoch": 2.59, "learning_rate": 1.944350848073033e-06, "logits/chosen": -0.733718991279602, "logits/rejected": -2.7125301361083984, "logps/chosen": -136.90103149414062, "logps/rejected": -525.2223510742188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.030513763427734, "rewards/margins": 10.08619499206543, "rewards/rejected": -16.116708755493164, "step": 16638 }, { "epoch": 2.59, "learning_rate": 1.943617407541885e-06, "logits/chosen": -1.6802494525909424, "logits/rejected": -3.082186698913574, "logps/chosen": -114.42283630371094, "logps/rejected": -503.3873596191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.050045013427734, "rewards/margins": 14.654032707214355, "rewards/rejected": -20.704078674316406, "step": 16639 }, { "epoch": 2.59, "learning_rate": 1.942883967010737e-06, "logits/chosen": -1.3002828359603882, "logits/rejected": -2.8017663955688477, "logps/chosen": -109.9325180053711, "logps/rejected": -454.4282531738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.863536834716797, "rewards/margins": 10.123920440673828, "rewards/rejected": -14.987457275390625, "step": 16640 }, { "epoch": 2.59, "learning_rate": 1.9421505264795893e-06, "logits/chosen": -0.5835743546485901, "logits/rejected": -2.521679401397705, "logps/chosen": -124.500244140625, "logps/rejected": -374.09442138671875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -9.232730865478516, "rewards/margins": 9.038299560546875, "rewards/rejected": -18.27103042602539, "step": 16641 }, { "epoch": 2.59, "learning_rate": 1.941417085948441e-06, "logits/chosen": -1.4378453493118286, "logits/rejected": -2.6225671768188477, "logps/chosen": -345.52734375, "logps/rejected": -575.6463623046875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -10.4576416015625, "rewards/margins": 7.44289493560791, "rewards/rejected": -17.900535583496094, "step": 16642 }, { "epoch": 2.59, "learning_rate": 1.940683645417293e-06, "logits/chosen": -2.4141685962677, "logits/rejected": -3.145880937576294, "logps/chosen": -95.29693603515625, "logps/rejected": -507.35504150390625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.360191345214844, "rewards/margins": 10.889101028442383, "rewards/rejected": -18.249292373657227, "step": 16643 }, { "epoch": 2.59, "learning_rate": 1.939950204886146e-06, "logits/chosen": -2.468742609024048, "logits/rejected": -2.8339264392852783, "logps/chosen": -102.83267211914062, "logps/rejected": -184.29953002929688, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -7.345068454742432, "rewards/margins": 5.456364631652832, "rewards/rejected": -12.801433563232422, "step": 16644 }, { "epoch": 2.59, "learning_rate": 1.9392167643549977e-06, "logits/chosen": -1.6740167140960693, "logits/rejected": -2.573474407196045, "logps/chosen": -224.89932250976562, "logps/rejected": -587.732421875, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -11.93221664428711, "rewards/margins": 7.114818096160889, "rewards/rejected": -19.047035217285156, "step": 16645 }, { "epoch": 2.59, "learning_rate": 1.93848332382385e-06, "logits/chosen": -2.5650670528411865, "logits/rejected": -1.6858677864074707, "logps/chosen": -109.15999603271484, "logps/rejected": -243.7626190185547, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.919843673706055, "rewards/margins": 9.873697280883789, "rewards/rejected": -15.793540954589844, "step": 16646 }, { "epoch": 2.59, "learning_rate": 1.937749883292702e-06, "logits/chosen": -2.7718966007232666, "logits/rejected": -2.572429895401001, "logps/chosen": -693.9765625, "logps/rejected": -565.1304931640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.526010990142822, "rewards/margins": 9.587223052978516, "rewards/rejected": -16.11323356628418, "step": 16647 }, { "epoch": 2.59, "learning_rate": 1.937016442761554e-06, "logits/chosen": -0.7658627033233643, "logits/rejected": -1.1142083406448364, "logps/chosen": -134.73377990722656, "logps/rejected": -372.7737731933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.344338417053223, "rewards/margins": 12.640809059143066, "rewards/rejected": -16.98514747619629, "step": 16648 }, { "epoch": 2.59, "learning_rate": 1.936283002230406e-06, "logits/chosen": -1.120762825012207, "logits/rejected": -2.5006325244903564, "logps/chosen": -202.63510131835938, "logps/rejected": -607.4706420898438, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.999714851379395, "rewards/margins": 12.196239471435547, "rewards/rejected": -22.195953369140625, "step": 16649 }, { "epoch": 2.59, "learning_rate": 1.9355495616992583e-06, "logits/chosen": -2.0944406986236572, "logits/rejected": -2.883574962615967, "logps/chosen": -177.37649536132812, "logps/rejected": -303.31103515625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.417715549468994, "rewards/margins": 8.385763168334961, "rewards/rejected": -14.803478240966797, "step": 16650 }, { "epoch": 2.59, "learning_rate": 1.9348161211681102e-06, "logits/chosen": -2.497041702270508, "logits/rejected": -2.0944576263427734, "logps/chosen": -203.87753295898438, "logps/rejected": -252.33633422851562, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.845267295837402, "rewards/margins": 7.982510566711426, "rewards/rejected": -13.827777862548828, "step": 16651 }, { "epoch": 2.59, "learning_rate": 1.9340826806369625e-06, "logits/chosen": -1.9453201293945312, "logits/rejected": -2.642427921295166, "logps/chosen": -203.8590850830078, "logps/rejected": -312.83892822265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.97916316986084, "rewards/margins": 7.640857219696045, "rewards/rejected": -14.620019912719727, "step": 16652 }, { "epoch": 2.59, "learning_rate": 1.933349240105815e-06, "logits/chosen": -2.4396777153015137, "logits/rejected": -2.761800527572632, "logps/chosen": -70.88674926757812, "logps/rejected": -190.7705841064453, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -5.773227691650391, "rewards/margins": 4.379487037658691, "rewards/rejected": -10.152713775634766, "step": 16653 }, { "epoch": 2.59, "learning_rate": 1.9326157995746667e-06, "logits/chosen": -2.062177896499634, "logits/rejected": -2.7833073139190674, "logps/chosen": -289.1366882324219, "logps/rejected": -447.79815673828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.41967248916626, "rewards/margins": 8.54725456237793, "rewards/rejected": -15.966926574707031, "step": 16654 }, { "epoch": 2.59, "learning_rate": 1.931882359043519e-06, "logits/chosen": -1.7988590002059937, "logits/rejected": -2.2331249713897705, "logps/chosen": -112.7455062866211, "logps/rejected": -320.6102294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.690077781677246, "rewards/margins": 14.73375129699707, "rewards/rejected": -20.423828125, "step": 16655 }, { "epoch": 2.59, "learning_rate": 1.931148918512371e-06, "logits/chosen": -2.8101186752319336, "logits/rejected": -2.531559944152832, "logps/chosen": -290.14404296875, "logps/rejected": -268.4446716308594, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -2.6690871715545654, "rewards/margins": 9.992948532104492, "rewards/rejected": -12.66203498840332, "step": 16656 }, { "epoch": 2.59, "learning_rate": 1.930415477981223e-06, "logits/chosen": -2.1971263885498047, "logits/rejected": -2.7323873043060303, "logps/chosen": -187.43365478515625, "logps/rejected": -520.686279296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.729349613189697, "rewards/margins": 11.241140365600586, "rewards/rejected": -15.970489501953125, "step": 16657 }, { "epoch": 2.59, "learning_rate": 1.929682037450075e-06, "logits/chosen": -2.810072660446167, "logits/rejected": -2.1151418685913086, "logps/chosen": -232.889892578125, "logps/rejected": -295.24981689453125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -8.922780990600586, "rewards/margins": 7.161803722381592, "rewards/rejected": -16.084585189819336, "step": 16658 }, { "epoch": 2.59, "learning_rate": 1.9289485969189274e-06, "logits/chosen": -2.5461275577545166, "logits/rejected": -2.7833704948425293, "logps/chosen": -198.18052673339844, "logps/rejected": -393.36822509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -1.9157074689865112, "rewards/margins": 11.479207992553711, "rewards/rejected": -13.394914627075195, "step": 16659 }, { "epoch": 2.59, "learning_rate": 1.9282151563877792e-06, "logits/chosen": -2.531351327896118, "logits/rejected": -2.1672043800354004, "logps/chosen": -549.5422973632812, "logps/rejected": -561.8701171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.092113494873047, "rewards/margins": 8.680663108825684, "rewards/rejected": -13.772777557373047, "step": 16660 }, { "epoch": 2.59, "learning_rate": 1.927481715856632e-06, "logits/chosen": -2.369041681289673, "logits/rejected": -2.86360502243042, "logps/chosen": -280.5708923339844, "logps/rejected": -432.291259765625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.634265422821045, "rewards/margins": 8.65457534790039, "rewards/rejected": -15.288841247558594, "step": 16661 }, { "epoch": 2.59, "learning_rate": 1.926748275325484e-06, "logits/chosen": -1.8552018404006958, "logits/rejected": -2.631124973297119, "logps/chosen": -121.89155578613281, "logps/rejected": -277.61529541015625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/chosen": -8.486920356750488, "rewards/margins": 6.1063995361328125, "rewards/rejected": -14.593318939208984, "step": 16662 }, { "epoch": 2.59, "learning_rate": 1.9260148347943357e-06, "logits/chosen": -2.6754133701324463, "logits/rejected": -2.9348859786987305, "logps/chosen": -239.66586303710938, "logps/rejected": -417.5463562011719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.6331095695495605, "rewards/margins": 14.372570037841797, "rewards/rejected": -19.005680084228516, "step": 16663 }, { "epoch": 2.59, "learning_rate": 1.925281394263188e-06, "logits/chosen": -1.8764046430587769, "logits/rejected": -2.700063467025757, "logps/chosen": -242.33621215820312, "logps/rejected": -516.7779541015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.420361518859863, "rewards/margins": 12.720929145812988, "rewards/rejected": -19.14129066467285, "step": 16664 }, { "epoch": 2.59, "learning_rate": 1.92454795373204e-06, "logits/chosen": -2.8337137699127197, "logits/rejected": -1.7615021467208862, "logps/chosen": -521.7340698242188, "logps/rejected": -249.61328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.579473495483398, "rewards/margins": 8.950039863586426, "rewards/rejected": -14.529513359069824, "step": 16665 }, { "epoch": 2.59, "learning_rate": 1.9238145132008922e-06, "logits/chosen": -2.7214157581329346, "logits/rejected": -3.0160419940948486, "logps/chosen": -765.0885009765625, "logps/rejected": -758.1251220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.367898941040039, "rewards/margins": 11.206911087036133, "rewards/rejected": -17.574810028076172, "step": 16666 }, { "epoch": 2.59, "learning_rate": 1.923081072669744e-06, "logits/chosen": -2.345447301864624, "logits/rejected": -2.6849007606506348, "logps/chosen": -256.169677734375, "logps/rejected": -537.8909301757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6460466384887695, "rewards/margins": 11.010980606079102, "rewards/rejected": -17.657028198242188, "step": 16667 }, { "epoch": 2.59, "learning_rate": 1.9223476321385964e-06, "logits/chosen": -2.4409432411193848, "logits/rejected": -1.8530840873718262, "logps/chosen": -287.9298400878906, "logps/rejected": -390.89642333984375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -7.993751525878906, "rewards/margins": 7.849679946899414, "rewards/rejected": -15.84343147277832, "step": 16668 }, { "epoch": 2.59, "learning_rate": 1.9216141916074487e-06, "logits/chosen": -2.7855064868927, "logits/rejected": -2.6233277320861816, "logps/chosen": -825.9176635742188, "logps/rejected": -662.5704345703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.501398086547852, "rewards/margins": 9.954618453979492, "rewards/rejected": -16.456016540527344, "step": 16669 }, { "epoch": 2.59, "learning_rate": 1.920880751076301e-06, "logits/chosen": -2.7118453979492188, "logits/rejected": -2.521482467651367, "logps/chosen": -408.79119873046875, "logps/rejected": -346.5254821777344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.953788757324219, "rewards/margins": 9.60340404510498, "rewards/rejected": -16.557193756103516, "step": 16670 }, { "epoch": 2.59, "learning_rate": 1.920147310545153e-06, "logits/chosen": -1.7198140621185303, "logits/rejected": -2.5656416416168213, "logps/chosen": -145.41009521484375, "logps/rejected": -360.12896728515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.359816551208496, "rewards/margins": 9.728402137756348, "rewards/rejected": -16.088218688964844, "step": 16671 }, { "epoch": 2.59, "learning_rate": 1.9194138700140048e-06, "logits/chosen": -2.3244426250457764, "logits/rejected": -2.7205827236175537, "logps/chosen": -213.45269775390625, "logps/rejected": -473.9008483886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.523320198059082, "rewards/margins": 13.769679069519043, "rewards/rejected": -21.292999267578125, "step": 16672 }, { "epoch": 2.59, "learning_rate": 1.918680429482857e-06, "logits/chosen": -3.0089972019195557, "logits/rejected": -2.5452024936676025, "logps/chosen": -446.38671875, "logps/rejected": -510.566650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.716983318328857, "rewards/margins": 11.466169357299805, "rewards/rejected": -18.18315315246582, "step": 16673 }, { "epoch": 2.59, "learning_rate": 1.917946988951709e-06, "logits/chosen": -2.22451114654541, "logits/rejected": -0.8716727495193481, "logps/chosen": -226.6825408935547, "logps/rejected": -116.03703308105469, "loss": 0.3599, "rewards/accuracies": 0.5, "rewards/chosen": -6.188459396362305, "rewards/margins": 3.077869415283203, "rewards/rejected": -9.266328811645508, "step": 16674 }, { "epoch": 2.59, "learning_rate": 1.9172135484205612e-06, "logits/chosen": -2.5349340438842773, "logits/rejected": -1.337984561920166, "logps/chosen": -234.73388671875, "logps/rejected": -224.30465698242188, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": -10.797728538513184, "rewards/margins": 4.30557107925415, "rewards/rejected": -15.103300094604492, "step": 16675 }, { "epoch": 2.59, "learning_rate": 1.916480107889413e-06, "logits/chosen": -2.7979793548583984, "logits/rejected": -1.8710052967071533, "logps/chosen": -1321.404541015625, "logps/rejected": -785.04931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.976229667663574, "rewards/margins": 12.81991958618164, "rewards/rejected": -17.79615020751953, "step": 16676 }, { "epoch": 2.59, "learning_rate": 1.9157466673582654e-06, "logits/chosen": -2.0416605472564697, "logits/rejected": -0.7636486887931824, "logps/chosen": -215.4527587890625, "logps/rejected": -164.0913848876953, "loss": 0.5458, "rewards/accuracies": 0.5, "rewards/chosen": -9.745450019836426, "rewards/margins": 0.5118050575256348, "rewards/rejected": -10.257255554199219, "step": 16677 }, { "epoch": 2.59, "learning_rate": 1.9150132268271177e-06, "logits/chosen": -2.603980541229248, "logits/rejected": -2.3224921226501465, "logps/chosen": -487.6522521972656, "logps/rejected": -497.2016296386719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.610777854919434, "rewards/margins": 9.658507347106934, "rewards/rejected": -17.269285202026367, "step": 16678 }, { "epoch": 2.59, "learning_rate": 1.91427978629597e-06, "logits/chosen": -2.412351608276367, "logits/rejected": -2.8260302543640137, "logps/chosen": -153.64715576171875, "logps/rejected": -354.27032470703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.5471906661987305, "rewards/margins": 9.167845726013184, "rewards/rejected": -13.715036392211914, "step": 16679 }, { "epoch": 2.59, "learning_rate": 1.913546345764822e-06, "logits/chosen": -1.8938491344451904, "logits/rejected": -2.6749427318573, "logps/chosen": -215.0372772216797, "logps/rejected": -349.09222412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.475075721740723, "rewards/margins": 9.472723007202148, "rewards/rejected": -15.947799682617188, "step": 16680 }, { "epoch": 2.59, "learning_rate": 1.912812905233674e-06, "logits/chosen": -2.3150546550750732, "logits/rejected": -2.5818142890930176, "logps/chosen": -260.7916259765625, "logps/rejected": -502.6190185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.462295055389404, "rewards/margins": 12.005159378051758, "rewards/rejected": -19.46745491027832, "step": 16681 }, { "epoch": 2.59, "learning_rate": 1.912079464702526e-06, "logits/chosen": -2.801024913787842, "logits/rejected": -3.094114303588867, "logps/chosen": -209.20309448242188, "logps/rejected": -293.24005126953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.828538417816162, "rewards/margins": 9.490486145019531, "rewards/rejected": -16.31902503967285, "step": 16682 }, { "epoch": 2.59, "learning_rate": 1.911346024171378e-06, "logits/chosen": -2.6011128425598145, "logits/rejected": -1.1215540170669556, "logps/chosen": -208.09243774414062, "logps/rejected": -222.69842529296875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.667228698730469, "rewards/margins": 7.072240829467773, "rewards/rejected": -12.739469528198242, "step": 16683 }, { "epoch": 2.59, "learning_rate": 1.9106125836402303e-06, "logits/chosen": -2.2430264949798584, "logits/rejected": -2.6809887886047363, "logps/chosen": -139.86846923828125, "logps/rejected": -403.74066162109375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.912483215332031, "rewards/margins": 11.760459899902344, "rewards/rejected": -20.672943115234375, "step": 16684 }, { "epoch": 2.59, "learning_rate": 1.909879143109082e-06, "logits/chosen": -2.8402962684631348, "logits/rejected": -2.9998111724853516, "logps/chosen": -276.96917724609375, "logps/rejected": -356.2091369628906, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.612387180328369, "rewards/margins": 7.719785690307617, "rewards/rejected": -15.332172393798828, "step": 16685 }, { "epoch": 2.6, "learning_rate": 1.909145702577935e-06, "logits/chosen": -1.6351630687713623, "logits/rejected": -2.6914753913879395, "logps/chosen": -513.625, "logps/rejected": -795.7005615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.900041580200195, "rewards/margins": 13.095454216003418, "rewards/rejected": -21.995494842529297, "step": 16686 }, { "epoch": 2.6, "learning_rate": 1.9084122620467867e-06, "logits/chosen": -1.78864586353302, "logits/rejected": -2.61372709274292, "logps/chosen": -256.45025634765625, "logps/rejected": -566.8108520507812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.520486831665039, "rewards/margins": 11.049544334411621, "rewards/rejected": -20.570030212402344, "step": 16687 }, { "epoch": 2.6, "learning_rate": 1.907678821515639e-06, "logits/chosen": -1.7238746881484985, "logits/rejected": -2.6807620525360107, "logps/chosen": -150.3837890625, "logps/rejected": -245.69711303710938, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/chosen": -6.609652519226074, "rewards/margins": 4.235301971435547, "rewards/rejected": -10.844953536987305, "step": 16688 }, { "epoch": 2.6, "learning_rate": 1.906945380984491e-06, "logits/chosen": -2.087293863296509, "logits/rejected": -2.9806103706359863, "logps/chosen": -122.81221008300781, "logps/rejected": -427.06817626953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.130720138549805, "rewards/margins": 11.757329940795898, "rewards/rejected": -21.888050079345703, "step": 16689 }, { "epoch": 2.6, "learning_rate": 1.9062119404533432e-06, "logits/chosen": -2.688831090927124, "logits/rejected": -2.5410614013671875, "logps/chosen": -454.5641174316406, "logps/rejected": -725.930908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4947614669799805, "rewards/margins": 14.205368995666504, "rewards/rejected": -19.700130462646484, "step": 16690 }, { "epoch": 2.6, "learning_rate": 1.905478499922195e-06, "logits/chosen": -1.7759097814559937, "logits/rejected": -2.63928484916687, "logps/chosen": -165.6707000732422, "logps/rejected": -557.047119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.460823059082031, "rewards/margins": 14.299518585205078, "rewards/rejected": -22.76034164428711, "step": 16691 }, { "epoch": 2.6, "learning_rate": 1.9047450593910472e-06, "logits/chosen": -1.8623429536819458, "logits/rejected": -2.680682420730591, "logps/chosen": -173.45579528808594, "logps/rejected": -326.7562561035156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.38386344909668, "rewards/margins": 8.681877136230469, "rewards/rejected": -16.06574058532715, "step": 16692 }, { "epoch": 2.6, "learning_rate": 1.9040116188598995e-06, "logits/chosen": -2.513343334197998, "logits/rejected": -2.178661584854126, "logps/chosen": -194.4022216796875, "logps/rejected": -228.73365783691406, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/chosen": -8.864424705505371, "rewards/margins": 3.230558395385742, "rewards/rejected": -12.094983100891113, "step": 16693 }, { "epoch": 2.6, "learning_rate": 1.9032781783287514e-06, "logits/chosen": -1.5923672914505005, "logits/rejected": -2.217048406600952, "logps/chosen": -296.72467041015625, "logps/rejected": -619.6156005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.185522079467773, "rewards/margins": 13.589685440063477, "rewards/rejected": -20.77520751953125, "step": 16694 }, { "epoch": 2.6, "learning_rate": 1.9025447377976037e-06, "logits/chosen": -2.4869415760040283, "logits/rejected": -2.3019275665283203, "logps/chosen": -622.447021484375, "logps/rejected": -562.308837890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.103141784667969, "rewards/margins": 9.977333068847656, "rewards/rejected": -19.080474853515625, "step": 16695 }, { "epoch": 2.6, "learning_rate": 1.9018112972664555e-06, "logits/chosen": -2.701840877532959, "logits/rejected": -2.7976877689361572, "logps/chosen": -207.28897094726562, "logps/rejected": -318.5569152832031, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -7.720223426818848, "rewards/margins": 10.505261421203613, "rewards/rejected": -18.22548484802246, "step": 16696 }, { "epoch": 2.6, "learning_rate": 1.901077856735308e-06, "logits/chosen": -1.3139817714691162, "logits/rejected": -1.9309790134429932, "logps/chosen": -148.12115478515625, "logps/rejected": -316.4150695800781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.31375503540039, "rewards/margins": 9.439369201660156, "rewards/rejected": -17.753124237060547, "step": 16697 }, { "epoch": 2.6, "learning_rate": 1.90034441620416e-06, "logits/chosen": -2.51517653465271, "logits/rejected": -2.2409300804138184, "logps/chosen": -378.11724853515625, "logps/rejected": -596.8460693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.764692306518555, "rewards/margins": 14.761500358581543, "rewards/rejected": -24.52619171142578, "step": 16698 }, { "epoch": 2.6, "learning_rate": 1.8996109756730122e-06, "logits/chosen": -2.4926741123199463, "logits/rejected": -3.015657901763916, "logps/chosen": -870.111083984375, "logps/rejected": -751.3123779296875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -5.259851455688477, "rewards/margins": 5.477145195007324, "rewards/rejected": -10.7369966506958, "step": 16699 }, { "epoch": 2.6, "learning_rate": 1.8988775351418641e-06, "logits/chosen": -2.031087875366211, "logits/rejected": -2.949127674102783, "logps/chosen": -499.5980224609375, "logps/rejected": -587.298583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.62047290802002, "rewards/margins": 15.425857543945312, "rewards/rejected": -24.046329498291016, "step": 16700 }, { "epoch": 2.6, "learning_rate": 1.8981440946107162e-06, "logits/chosen": -1.6124539375305176, "logits/rejected": -2.0681540966033936, "logps/chosen": -218.78248596191406, "logps/rejected": -452.92803955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.655189514160156, "rewards/margins": 16.270244598388672, "rewards/rejected": -20.925434112548828, "step": 16701 }, { "epoch": 2.6, "learning_rate": 1.8974106540795685e-06, "logits/chosen": -2.847003221511841, "logits/rejected": -2.4319496154785156, "logps/chosen": -537.470703125, "logps/rejected": -363.5711364746094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.0786848068237305, "rewards/margins": 11.113689422607422, "rewards/rejected": -16.19237518310547, "step": 16702 }, { "epoch": 2.6, "learning_rate": 1.8966772135484204e-06, "logits/chosen": -2.3324763774871826, "logits/rejected": -2.7650294303894043, "logps/chosen": -120.18638610839844, "logps/rejected": -344.0592041015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.864266872406006, "rewards/margins": 9.177915573120117, "rewards/rejected": -15.042182922363281, "step": 16703 }, { "epoch": 2.6, "learning_rate": 1.8959437730172727e-06, "logits/chosen": -1.5622872114181519, "logits/rejected": -2.3201982975006104, "logps/chosen": -123.95478820800781, "logps/rejected": -399.29339599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.806249141693115, "rewards/margins": 14.095853805541992, "rewards/rejected": -18.902103424072266, "step": 16704 }, { "epoch": 2.6, "learning_rate": 1.8952103324861248e-06, "logits/chosen": -1.9259251356124878, "logits/rejected": -2.2870841026306152, "logps/chosen": -345.06488037109375, "logps/rejected": -407.53387451171875, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": -9.562615394592285, "rewards/margins": 7.719806671142578, "rewards/rejected": -17.282421112060547, "step": 16705 }, { "epoch": 2.6, "learning_rate": 1.894476891954977e-06, "logits/chosen": -2.740943670272827, "logits/rejected": -1.2890558242797852, "logps/chosen": -361.96453857421875, "logps/rejected": -231.67974853515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.01130747795105, "rewards/margins": 9.719573974609375, "rewards/rejected": -12.730881690979004, "step": 16706 }, { "epoch": 2.6, "learning_rate": 1.893743451423829e-06, "logits/chosen": -2.7306621074676514, "logits/rejected": -3.179018497467041, "logps/chosen": -262.67059326171875, "logps/rejected": -460.7657470703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.754976272583008, "rewards/margins": 7.735230445861816, "rewards/rejected": -16.49020767211914, "step": 16707 }, { "epoch": 2.6, "learning_rate": 1.8930100108926813e-06, "logits/chosen": -2.7038583755493164, "logits/rejected": -2.217238664627075, "logps/chosen": -636.2611694335938, "logps/rejected": -472.2854309082031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.05888557434082, "rewards/margins": 10.967063903808594, "rewards/rejected": -17.025949478149414, "step": 16708 }, { "epoch": 2.6, "learning_rate": 1.8922765703615334e-06, "logits/chosen": -1.4936262369155884, "logits/rejected": -2.9831740856170654, "logps/chosen": -155.556884765625, "logps/rejected": -643.663818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.3203582763671875, "rewards/margins": 12.38177490234375, "rewards/rejected": -19.702133178710938, "step": 16709 }, { "epoch": 2.6, "learning_rate": 1.8915431298303857e-06, "logits/chosen": -2.7225875854492188, "logits/rejected": -2.804367780685425, "logps/chosen": -486.09088134765625, "logps/rejected": -659.2974243164062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.138423919677734, "rewards/margins": 12.85797119140625, "rewards/rejected": -19.996395111083984, "step": 16710 }, { "epoch": 2.6, "learning_rate": 1.8908096892992375e-06, "logits/chosen": -2.5041708946228027, "logits/rejected": -2.5324795246124268, "logps/chosen": -357.7908935546875, "logps/rejected": -610.785888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.328784942626953, "rewards/margins": 12.034957885742188, "rewards/rejected": -17.36374282836914, "step": 16711 }, { "epoch": 2.6, "learning_rate": 1.8900762487680894e-06, "logits/chosen": -2.7178585529327393, "logits/rejected": -2.7551395893096924, "logps/chosen": -85.72303009033203, "logps/rejected": -300.4425354003906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.144275665283203, "rewards/margins": 12.874568939208984, "rewards/rejected": -19.018844604492188, "step": 16712 }, { "epoch": 2.6, "learning_rate": 1.8893428082369417e-06, "logits/chosen": -1.3792812824249268, "logits/rejected": -2.3968636989593506, "logps/chosen": -275.8586120605469, "logps/rejected": -509.560302734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.193317890167236, "rewards/margins": 10.858671188354492, "rewards/rejected": -17.05198860168457, "step": 16713 }, { "epoch": 2.6, "learning_rate": 1.8886093677057938e-06, "logits/chosen": -1.7395299673080444, "logits/rejected": -2.5288262367248535, "logps/chosen": -112.65673828125, "logps/rejected": -427.46197509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.158847332000732, "rewards/margins": 13.99969482421875, "rewards/rejected": -18.15854263305664, "step": 16714 }, { "epoch": 2.6, "learning_rate": 1.8878759271746461e-06, "logits/chosen": -2.5949411392211914, "logits/rejected": -2.445939064025879, "logps/chosen": -285.4551086425781, "logps/rejected": -389.3743896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.978816986083984, "rewards/margins": 10.771785736083984, "rewards/rejected": -18.75060272216797, "step": 16715 }, { "epoch": 2.6, "learning_rate": 1.887142486643498e-06, "logits/chosen": -1.3899691104888916, "logits/rejected": -2.7412521839141846, "logps/chosen": -141.86929321289062, "logps/rejected": -374.17633056640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.856009483337402, "rewards/margins": 10.080815315246582, "rewards/rejected": -16.936824798583984, "step": 16716 }, { "epoch": 2.6, "learning_rate": 1.8864090461123503e-06, "logits/chosen": -2.7228057384490967, "logits/rejected": -1.0134330987930298, "logps/chosen": -267.2933654785156, "logps/rejected": -197.86959838867188, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -6.265183448791504, "rewards/margins": 8.854308128356934, "rewards/rejected": -15.119491577148438, "step": 16717 }, { "epoch": 2.6, "learning_rate": 1.8856756055812024e-06, "logits/chosen": -2.177398920059204, "logits/rejected": -2.491175889968872, "logps/chosen": -153.8772735595703, "logps/rejected": -255.40090942382812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.353354454040527, "rewards/margins": 9.100798606872559, "rewards/rejected": -13.454153060913086, "step": 16718 }, { "epoch": 2.6, "learning_rate": 1.8849421650500547e-06, "logits/chosen": -1.9477280378341675, "logits/rejected": -2.715137004852295, "logps/chosen": -117.1321029663086, "logps/rejected": -414.0886535644531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.410800933837891, "rewards/margins": 12.735797882080078, "rewards/rejected": -20.14659881591797, "step": 16719 }, { "epoch": 2.6, "learning_rate": 1.8842087245189066e-06, "logits/chosen": -2.368633508682251, "logits/rejected": -2.888812780380249, "logps/chosen": -100.42962646484375, "logps/rejected": -362.40570068359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.591511249542236, "rewards/margins": 7.602633476257324, "rewards/rejected": -15.194145202636719, "step": 16720 }, { "epoch": 2.6, "learning_rate": 1.8834752839877584e-06, "logits/chosen": -2.757148265838623, "logits/rejected": -2.8433473110198975, "logps/chosen": -78.92024993896484, "logps/rejected": -221.37103271484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.222952842712402, "rewards/margins": 10.119003295898438, "rewards/rejected": -15.341957092285156, "step": 16721 }, { "epoch": 2.6, "learning_rate": 1.882741843456611e-06, "logits/chosen": -2.348503589630127, "logits/rejected": -2.6537609100341797, "logps/chosen": -359.07666015625, "logps/rejected": -445.1943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.71183967590332, "rewards/margins": 9.967992782592773, "rewards/rejected": -18.679832458496094, "step": 16722 }, { "epoch": 2.6, "learning_rate": 1.8820084029254628e-06, "logits/chosen": -2.4106943607330322, "logits/rejected": -2.8060739040374756, "logps/chosen": -192.29661560058594, "logps/rejected": -306.93896484375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.130965232849121, "rewards/margins": 7.366610527038574, "rewards/rejected": -12.497575759887695, "step": 16723 }, { "epoch": 2.6, "learning_rate": 1.8812749623943151e-06, "logits/chosen": -2.5556907653808594, "logits/rejected": -2.493307113647461, "logps/chosen": -522.8466796875, "logps/rejected": -547.2012939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.479839324951172, "rewards/margins": 10.721324920654297, "rewards/rejected": -19.20116424560547, "step": 16724 }, { "epoch": 2.6, "learning_rate": 1.880541521863167e-06, "logits/chosen": -2.554638624191284, "logits/rejected": -2.704484224319458, "logps/chosen": -305.358154296875, "logps/rejected": -359.62493896484375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -11.924005508422852, "rewards/margins": 6.735687255859375, "rewards/rejected": -18.659692764282227, "step": 16725 }, { "epoch": 2.6, "learning_rate": 1.8798080813320195e-06, "logits/chosen": -1.8303947448730469, "logits/rejected": -2.0527374744415283, "logps/chosen": -229.65460205078125, "logps/rejected": -374.6183166503906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.533425331115723, "rewards/margins": 10.885231018066406, "rewards/rejected": -15.418657302856445, "step": 16726 }, { "epoch": 2.6, "learning_rate": 1.8790746408008714e-06, "logits/chosen": -2.0093870162963867, "logits/rejected": -2.6322405338287354, "logps/chosen": -227.19422912597656, "logps/rejected": -441.173583984375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -7.501373291015625, "rewards/margins": 10.202823638916016, "rewards/rejected": -17.70419692993164, "step": 16727 }, { "epoch": 2.6, "learning_rate": 1.8783412002697237e-06, "logits/chosen": -2.7214813232421875, "logits/rejected": -2.34842586517334, "logps/chosen": -616.08349609375, "logps/rejected": -565.667236328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.314630508422852, "rewards/margins": 10.566858291625977, "rewards/rejected": -20.881488800048828, "step": 16728 }, { "epoch": 2.6, "learning_rate": 1.8776077597385756e-06, "logits/chosen": -1.192735195159912, "logits/rejected": -2.645880937576294, "logps/chosen": -127.04374694824219, "logps/rejected": -371.8955078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.400881290435791, "rewards/margins": 9.782571792602539, "rewards/rejected": -17.183452606201172, "step": 16729 }, { "epoch": 2.6, "learning_rate": 1.8768743192074279e-06, "logits/chosen": -2.5574231147766113, "logits/rejected": -2.4471817016601562, "logps/chosen": -336.31134033203125, "logps/rejected": -368.6667175292969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.598047256469727, "rewards/margins": 11.491613388061523, "rewards/rejected": -18.08966064453125, "step": 16730 }, { "epoch": 2.6, "learning_rate": 1.87614087867628e-06, "logits/chosen": -1.9898486137390137, "logits/rejected": -2.8755111694335938, "logps/chosen": -109.15679931640625, "logps/rejected": -364.78497314453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.612135410308838, "rewards/margins": 9.959412574768066, "rewards/rejected": -15.571548461914062, "step": 16731 }, { "epoch": 2.6, "learning_rate": 1.8754074381451318e-06, "logits/chosen": -3.0151796340942383, "logits/rejected": -2.6519014835357666, "logps/chosen": -517.0486450195312, "logps/rejected": -509.6134033203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.804227828979492, "rewards/margins": 8.553855895996094, "rewards/rejected": -16.35808563232422, "step": 16732 }, { "epoch": 2.6, "learning_rate": 1.8746739976139842e-06, "logits/chosen": -2.7894492149353027, "logits/rejected": -2.4957916736602783, "logps/chosen": -531.843505859375, "logps/rejected": -404.9800720214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.442814350128174, "rewards/margins": 10.009709358215332, "rewards/rejected": -15.452524185180664, "step": 16733 }, { "epoch": 2.6, "learning_rate": 1.8739405570828362e-06, "logits/chosen": -1.1456592082977295, "logits/rejected": -2.061129093170166, "logps/chosen": -209.4379425048828, "logps/rejected": -566.4061889648438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.663105010986328, "rewards/margins": 14.442167282104492, "rewards/rejected": -19.10527229309082, "step": 16734 }, { "epoch": 2.6, "learning_rate": 1.8732071165516885e-06, "logits/chosen": -1.6523274183273315, "logits/rejected": -1.9165887832641602, "logps/chosen": -455.6592102050781, "logps/rejected": -464.919189453125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.746622562408447, "rewards/margins": 7.480655670166016, "rewards/rejected": -13.227277755737305, "step": 16735 }, { "epoch": 2.6, "learning_rate": 1.8724736760205404e-06, "logits/chosen": -1.7881393432617188, "logits/rejected": -2.960883140563965, "logps/chosen": -254.12460327148438, "logps/rejected": -752.961669921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.865260601043701, "rewards/margins": 14.263994216918945, "rewards/rejected": -21.129255294799805, "step": 16736 }, { "epoch": 2.6, "learning_rate": 1.8717402354893927e-06, "logits/chosen": -2.2312161922454834, "logits/rejected": -0.6839694380760193, "logps/chosen": -199.96266174316406, "logps/rejected": -136.6630401611328, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -4.293546676635742, "rewards/margins": 7.080674648284912, "rewards/rejected": -11.374221801757812, "step": 16737 }, { "epoch": 2.6, "learning_rate": 1.8710067949582446e-06, "logits/chosen": -2.6273481845855713, "logits/rejected": -2.497793436050415, "logps/chosen": -439.6952819824219, "logps/rejected": -504.09747314453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.91670036315918, "rewards/margins": 9.52379035949707, "rewards/rejected": -17.44049072265625, "step": 16738 }, { "epoch": 2.6, "learning_rate": 1.8702733544270971e-06, "logits/chosen": -2.7613298892974854, "logits/rejected": -2.466064691543579, "logps/chosen": -152.4622802734375, "logps/rejected": -271.90283203125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -5.01314640045166, "rewards/margins": 5.762701034545898, "rewards/rejected": -10.775847434997559, "step": 16739 }, { "epoch": 2.6, "learning_rate": 1.869539913895949e-06, "logits/chosen": -2.840479612350464, "logits/rejected": -2.808021068572998, "logps/chosen": -562.4857177734375, "logps/rejected": -456.1230163574219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.790692329406738, "rewards/margins": 11.267134666442871, "rewards/rejected": -17.05782699584961, "step": 16740 }, { "epoch": 2.6, "learning_rate": 1.8688064733648009e-06, "logits/chosen": -2.4828054904937744, "logits/rejected": -2.946911096572876, "logps/chosen": -98.9938735961914, "logps/rejected": -417.81256103515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.270969867706299, "rewards/margins": 10.504354476928711, "rewards/rejected": -17.77532386779785, "step": 16741 }, { "epoch": 2.6, "learning_rate": 1.8680730328336532e-06, "logits/chosen": -1.8540080785751343, "logits/rejected": -2.3980510234832764, "logps/chosen": -156.99005126953125, "logps/rejected": -391.40802001953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.5255446434021, "rewards/margins": 10.42607593536377, "rewards/rejected": -14.951620101928711, "step": 16742 }, { "epoch": 2.6, "learning_rate": 1.8673395923025053e-06, "logits/chosen": -2.7500410079956055, "logits/rejected": -1.610192060470581, "logps/chosen": -311.62939453125, "logps/rejected": -254.67379760742188, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.599600315093994, "rewards/margins": 8.005865097045898, "rewards/rejected": -14.60546588897705, "step": 16743 }, { "epoch": 2.6, "learning_rate": 1.8666061517713576e-06, "logits/chosen": -2.2306201457977295, "logits/rejected": -2.4789469242095947, "logps/chosen": -162.00538635253906, "logps/rejected": -307.7043151855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.5123815536499023, "rewards/margins": 11.75071907043457, "rewards/rejected": -14.263099670410156, "step": 16744 }, { "epoch": 2.6, "learning_rate": 1.8658727112402094e-06, "logits/chosen": -2.852141857147217, "logits/rejected": -2.894986867904663, "logps/chosen": -199.7691650390625, "logps/rejected": -311.78167724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.447139024734497, "rewards/margins": 12.192131042480469, "rewards/rejected": -14.639269828796387, "step": 16745 }, { "epoch": 2.6, "learning_rate": 1.8651392707090617e-06, "logits/chosen": -1.8857842683792114, "logits/rejected": -2.2617642879486084, "logps/chosen": -171.1951904296875, "logps/rejected": -322.0089416503906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.689914703369141, "rewards/margins": 9.691758155822754, "rewards/rejected": -14.381673812866211, "step": 16746 }, { "epoch": 2.6, "learning_rate": 1.8644058301779138e-06, "logits/chosen": -2.723564624786377, "logits/rejected": -2.7672550678253174, "logps/chosen": -225.59539794921875, "logps/rejected": -272.41510009765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -2.2767295837402344, "rewards/margins": 8.238807678222656, "rewards/rejected": -10.51553726196289, "step": 16747 }, { "epoch": 2.6, "learning_rate": 1.8636723896467661e-06, "logits/chosen": -2.4181132316589355, "logits/rejected": -1.9222370386123657, "logps/chosen": -407.08642578125, "logps/rejected": -513.4775390625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -6.836787223815918, "rewards/margins": 11.60188102722168, "rewards/rejected": -18.438669204711914, "step": 16748 }, { "epoch": 2.6, "learning_rate": 1.862938949115618e-06, "logits/chosen": -2.4296956062316895, "logits/rejected": -2.8654303550720215, "logps/chosen": -488.9466552734375, "logps/rejected": -632.3276977539062, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.840566635131836, "rewards/margins": 8.163337707519531, "rewards/rejected": -14.003904342651367, "step": 16749 }, { "epoch": 2.6, "learning_rate": 1.8622055085844699e-06, "logits/chosen": -2.4482533931732178, "logits/rejected": -2.608140707015991, "logps/chosen": -489.1031188964844, "logps/rejected": -565.2505493164062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.022545337677002, "rewards/margins": 13.659822463989258, "rewards/rejected": -20.6823673248291, "step": 16750 }, { "epoch": 2.61, "learning_rate": 1.8614720680533224e-06, "logits/chosen": -2.001474380493164, "logits/rejected": -2.7203047275543213, "logps/chosen": -143.9085235595703, "logps/rejected": -445.021484375, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -8.048760414123535, "rewards/margins": 9.092757225036621, "rewards/rejected": -17.141517639160156, "step": 16751 }, { "epoch": 2.61, "learning_rate": 1.8607386275221743e-06, "logits/chosen": -3.073500871658325, "logits/rejected": -2.796221971511841, "logps/chosen": -254.85133361816406, "logps/rejected": -171.50790405273438, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.735681533813477, "rewards/margins": 7.475322723388672, "rewards/rejected": -14.211004257202148, "step": 16752 }, { "epoch": 2.61, "learning_rate": 1.8600051869910266e-06, "logits/chosen": -2.4147870540618896, "logits/rejected": -2.6888272762298584, "logps/chosen": -189.78964233398438, "logps/rejected": -340.7301025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.991440773010254, "rewards/margins": 10.784093856811523, "rewards/rejected": -16.775535583496094, "step": 16753 }, { "epoch": 2.61, "learning_rate": 1.8592717464598785e-06, "logits/chosen": -2.483137369155884, "logits/rejected": -2.6662189960479736, "logps/chosen": -101.23907470703125, "logps/rejected": -283.71319580078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.495255947113037, "rewards/margins": 10.129658699035645, "rewards/rejected": -14.624914169311523, "step": 16754 }, { "epoch": 2.61, "learning_rate": 1.8585383059287308e-06, "logits/chosen": -2.693310022354126, "logits/rejected": -2.7910640239715576, "logps/chosen": -250.69363403320312, "logps/rejected": -563.2655029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.061610698699951, "rewards/margins": 15.43165397644043, "rewards/rejected": -22.49326515197754, "step": 16755 }, { "epoch": 2.61, "learning_rate": 1.8578048653975829e-06, "logits/chosen": -1.7557415962219238, "logits/rejected": -2.58554744720459, "logps/chosen": -188.73605346679688, "logps/rejected": -494.0545959472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.443153381347656, "rewards/margins": 12.070626258850098, "rewards/rejected": -16.51378059387207, "step": 16756 }, { "epoch": 2.61, "learning_rate": 1.8570714248664352e-06, "logits/chosen": -2.808270215988159, "logits/rejected": -3.055021286010742, "logps/chosen": -216.85049438476562, "logps/rejected": -530.420166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.982280254364014, "rewards/margins": 11.726612091064453, "rewards/rejected": -16.708892822265625, "step": 16757 }, { "epoch": 2.61, "learning_rate": 1.856337984335287e-06, "logits/chosen": -3.2167716026306152, "logits/rejected": -2.9954488277435303, "logps/chosen": -336.74755859375, "logps/rejected": -364.45458984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.90024471282959, "rewards/margins": 8.4785795211792, "rewards/rejected": -16.37882423400879, "step": 16758 }, { "epoch": 2.61, "learning_rate": 1.8556045438041393e-06, "logits/chosen": -2.587672472000122, "logits/rejected": -1.7520735263824463, "logps/chosen": -176.5685272216797, "logps/rejected": -307.0781555175781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.154775619506836, "rewards/margins": 13.768538475036621, "rewards/rejected": -19.92331314086914, "step": 16759 }, { "epoch": 2.61, "learning_rate": 1.8548711032729914e-06, "logits/chosen": -2.7552530765533447, "logits/rejected": -1.8317707777023315, "logps/chosen": -193.7589874267578, "logps/rejected": -317.26715087890625, "loss": 0.7739, "rewards/accuracies": 0.5, "rewards/chosen": -7.271651268005371, "rewards/margins": 7.599189758300781, "rewards/rejected": -14.870841979980469, "step": 16760 }, { "epoch": 2.61, "learning_rate": 1.8541376627418433e-06, "logits/chosen": -2.735093116760254, "logits/rejected": -2.159916400909424, "logps/chosen": -428.3175048828125, "logps/rejected": -436.22283935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.564156532287598, "rewards/margins": 11.651872634887695, "rewards/rejected": -18.21603012084961, "step": 16761 }, { "epoch": 2.61, "learning_rate": 1.8534042222106956e-06, "logits/chosen": -3.0481748580932617, "logits/rejected": -2.829740047454834, "logps/chosen": -273.7103576660156, "logps/rejected": -242.95782470703125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.495980262756348, "rewards/margins": 8.603792190551758, "rewards/rejected": -14.099772453308105, "step": 16762 }, { "epoch": 2.61, "learning_rate": 1.8526707816795475e-06, "logits/chosen": -1.8121471405029297, "logits/rejected": -2.692286968231201, "logps/chosen": -202.10317993164062, "logps/rejected": -556.16650390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.067470550537109, "rewards/margins": 11.183409690856934, "rewards/rejected": -18.25088119506836, "step": 16763 }, { "epoch": 2.61, "learning_rate": 1.8519373411484e-06, "logits/chosen": -2.7438507080078125, "logits/rejected": -1.7559998035430908, "logps/chosen": -234.4447021484375, "logps/rejected": -259.35882568359375, "loss": 0.1582, "rewards/accuracies": 1.0, "rewards/chosen": -8.170662879943848, "rewards/margins": 3.6294991970062256, "rewards/rejected": -11.800161361694336, "step": 16764 }, { "epoch": 2.61, "learning_rate": 1.8512039006172519e-06, "logits/chosen": -2.8886353969573975, "logits/rejected": -2.9364635944366455, "logps/chosen": -145.24502563476562, "logps/rejected": -257.6347961425781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.826472282409668, "rewards/margins": 9.170592308044434, "rewards/rejected": -16.9970645904541, "step": 16765 }, { "epoch": 2.61, "learning_rate": 1.8504704600861042e-06, "logits/chosen": -2.5043039321899414, "logits/rejected": -2.5426948070526123, "logps/chosen": -131.83782958984375, "logps/rejected": -283.57647705078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.056709289550781, "rewards/margins": 9.143831253051758, "rewards/rejected": -16.20054054260254, "step": 16766 }, { "epoch": 2.61, "learning_rate": 1.849737019554956e-06, "logits/chosen": -2.4178860187530518, "logits/rejected": -2.649671792984009, "logps/chosen": -198.2717742919922, "logps/rejected": -416.1784362792969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.236270904541016, "rewards/margins": 11.784709930419922, "rewards/rejected": -18.020980834960938, "step": 16767 }, { "epoch": 2.61, "learning_rate": 1.8490035790238086e-06, "logits/chosen": -1.4582509994506836, "logits/rejected": -2.5338971614837646, "logps/chosen": -135.63821411132812, "logps/rejected": -361.82513427734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.861812591552734, "rewards/margins": 10.037697792053223, "rewards/rejected": -14.89950942993164, "step": 16768 }, { "epoch": 2.61, "learning_rate": 1.8482701384926605e-06, "logits/chosen": -1.662063717842102, "logits/rejected": -2.6878604888916016, "logps/chosen": -332.9442138671875, "logps/rejected": -590.478515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -11.661283493041992, "rewards/margins": 9.321590423583984, "rewards/rejected": -20.982873916625977, "step": 16769 }, { "epoch": 2.61, "learning_rate": 1.8475366979615123e-06, "logits/chosen": -1.422638177871704, "logits/rejected": -2.196368932723999, "logps/chosen": -212.1138916015625, "logps/rejected": -528.7398681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.323882102966309, "rewards/margins": 12.812259674072266, "rewards/rejected": -20.13614273071289, "step": 16770 }, { "epoch": 2.61, "learning_rate": 1.8468032574303646e-06, "logits/chosen": -2.631463050842285, "logits/rejected": -2.6122851371765137, "logps/chosen": -769.2520751953125, "logps/rejected": -697.927978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.67657470703125, "rewards/margins": 13.278078079223633, "rewards/rejected": -24.954652786254883, "step": 16771 }, { "epoch": 2.61, "learning_rate": 1.8460698168992167e-06, "logits/chosen": -2.5082569122314453, "logits/rejected": -2.637565851211548, "logps/chosen": -111.34205627441406, "logps/rejected": -281.7666320800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.567318439483643, "rewards/margins": 11.030263900756836, "rewards/rejected": -16.59758186340332, "step": 16772 }, { "epoch": 2.61, "learning_rate": 1.845336376368069e-06, "logits/chosen": -2.6075210571289062, "logits/rejected": -1.7308553457260132, "logps/chosen": -359.20904541015625, "logps/rejected": -570.249755859375, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -6.817861080169678, "rewards/margins": 9.491108894348145, "rewards/rejected": -16.308969497680664, "step": 16773 }, { "epoch": 2.61, "learning_rate": 1.844602935836921e-06, "logits/chosen": -2.684483528137207, "logits/rejected": -2.127067804336548, "logps/chosen": -610.9458618164062, "logps/rejected": -579.9520263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.399744033813477, "rewards/margins": 14.386589050292969, "rewards/rejected": -18.786333084106445, "step": 16774 }, { "epoch": 2.61, "learning_rate": 1.8438694953057732e-06, "logits/chosen": -2.6206023693084717, "logits/rejected": -2.106405735015869, "logps/chosen": -231.7751922607422, "logps/rejected": -239.18951416015625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -9.481504440307617, "rewards/margins": 6.333007335662842, "rewards/rejected": -15.8145112991333, "step": 16775 }, { "epoch": 2.61, "learning_rate": 1.8431360547746253e-06, "logits/chosen": -1.6359995603561401, "logits/rejected": -2.6761701107025146, "logps/chosen": -230.99893188476562, "logps/rejected": -401.3340148925781, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -6.106675624847412, "rewards/margins": 5.85065221786499, "rewards/rejected": -11.957327842712402, "step": 16776 }, { "epoch": 2.61, "learning_rate": 1.8424026142434776e-06, "logits/chosen": -2.641695976257324, "logits/rejected": -2.716413974761963, "logps/chosen": -247.13787841796875, "logps/rejected": -290.35479736328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.4178595542907715, "rewards/margins": 8.261698722839355, "rewards/rejected": -14.679557800292969, "step": 16777 }, { "epoch": 2.61, "learning_rate": 1.8416691737123295e-06, "logits/chosen": -2.7643678188323975, "logits/rejected": -1.4652572870254517, "logps/chosen": -601.1422119140625, "logps/rejected": -403.9222717285156, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -9.935744285583496, "rewards/margins": 9.666515350341797, "rewards/rejected": -19.60226058959961, "step": 16778 }, { "epoch": 2.61, "learning_rate": 1.8409357331811818e-06, "logits/chosen": -2.495130777359009, "logits/rejected": -2.6639809608459473, "logps/chosen": -229.9478759765625, "logps/rejected": -464.654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.298701763153076, "rewards/margins": 14.825000762939453, "rewards/rejected": -21.123703002929688, "step": 16779 }, { "epoch": 2.61, "learning_rate": 1.8402022926500337e-06, "logits/chosen": -2.269991397857666, "logits/rejected": -3.157604694366455, "logps/chosen": -152.84837341308594, "logps/rejected": -492.0140380859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.197817325592041, "rewards/margins": 11.995521545410156, "rewards/rejected": -17.19333839416504, "step": 16780 }, { "epoch": 2.61, "learning_rate": 1.8394688521188857e-06, "logits/chosen": -1.847519040107727, "logits/rejected": -2.87783145904541, "logps/chosen": -207.57272338867188, "logps/rejected": -580.5018310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.697004795074463, "rewards/margins": 10.15379524230957, "rewards/rejected": -14.850799560546875, "step": 16781 }, { "epoch": 2.61, "learning_rate": 1.838735411587738e-06, "logits/chosen": -0.4699716567993164, "logits/rejected": -2.555307149887085, "logps/chosen": -137.05572509765625, "logps/rejected": -563.129150390625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -10.260324478149414, "rewards/margins": 8.049295425415039, "rewards/rejected": -18.309619903564453, "step": 16782 }, { "epoch": 2.61, "learning_rate": 1.83800197105659e-06, "logits/chosen": -2.6994423866271973, "logits/rejected": -1.9442766904830933, "logps/chosen": -336.6299743652344, "logps/rejected": -237.6126251220703, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -5.737709045410156, "rewards/margins": 6.365941524505615, "rewards/rejected": -12.10365104675293, "step": 16783 }, { "epoch": 2.61, "learning_rate": 1.8372685305254422e-06, "logits/chosen": -2.784928321838379, "logits/rejected": -2.5438437461853027, "logps/chosen": -168.9696044921875, "logps/rejected": -211.38963317871094, "loss": 0.1484, "rewards/accuracies": 1.0, "rewards/chosen": -7.382498264312744, "rewards/margins": 6.086729526519775, "rewards/rejected": -13.46922779083252, "step": 16784 }, { "epoch": 2.61, "learning_rate": 1.8365350899942943e-06, "logits/chosen": -2.3225362300872803, "logits/rejected": -2.714726448059082, "logps/chosen": -162.5205078125, "logps/rejected": -365.47161865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.363945007324219, "rewards/margins": 10.770360946655273, "rewards/rejected": -19.134305953979492, "step": 16785 }, { "epoch": 2.61, "learning_rate": 1.8358016494631466e-06, "logits/chosen": -0.9428485035896301, "logits/rejected": -1.732920527458191, "logps/chosen": -176.62998962402344, "logps/rejected": -493.53057861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.942909240722656, "rewards/margins": 16.990278244018555, "rewards/rejected": -21.93318748474121, "step": 16786 }, { "epoch": 2.61, "learning_rate": 1.8350682089319985e-06, "logits/chosen": -2.5033328533172607, "logits/rejected": -2.2203190326690674, "logps/chosen": -390.86346435546875, "logps/rejected": -441.7369384765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.104668617248535, "rewards/margins": 9.07516098022461, "rewards/rejected": -18.17983055114746, "step": 16787 }, { "epoch": 2.61, "learning_rate": 1.8343347684008508e-06, "logits/chosen": -2.234626531600952, "logits/rejected": -2.565378189086914, "logps/chosen": -221.048095703125, "logps/rejected": -429.8882751464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.641225814819336, "rewards/margins": 13.105273246765137, "rewards/rejected": -17.746498107910156, "step": 16788 }, { "epoch": 2.61, "learning_rate": 1.8336013278697029e-06, "logits/chosen": -2.6731224060058594, "logits/rejected": -1.5735132694244385, "logps/chosen": -231.0552978515625, "logps/rejected": -164.50714111328125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.468221664428711, "rewards/margins": 6.3842549324035645, "rewards/rejected": -10.852476119995117, "step": 16789 }, { "epoch": 2.61, "learning_rate": 1.8328678873385548e-06, "logits/chosen": -1.1024245023727417, "logits/rejected": -1.3075212240219116, "logps/chosen": -77.29551696777344, "logps/rejected": -316.1849670410156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.848974704742432, "rewards/margins": 12.591226577758789, "rewards/rejected": -18.440200805664062, "step": 16790 }, { "epoch": 2.61, "learning_rate": 1.832134446807407e-06, "logits/chosen": -2.8140223026275635, "logits/rejected": -2.8942980766296387, "logps/chosen": -115.41797637939453, "logps/rejected": -216.5284881591797, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.4009575843811035, "rewards/margins": 7.430964946746826, "rewards/rejected": -13.83192253112793, "step": 16791 }, { "epoch": 2.61, "learning_rate": 1.831401006276259e-06, "logits/chosen": -1.6732832193374634, "logits/rejected": -2.7454135417938232, "logps/chosen": -102.1311264038086, "logps/rejected": -260.3973693847656, "loss": 0.0469, "rewards/accuracies": 1.0, "rewards/chosen": -8.662485122680664, "rewards/margins": 7.107378005981445, "rewards/rejected": -15.76986312866211, "step": 16792 }, { "epoch": 2.61, "learning_rate": 1.8306675657451112e-06, "logits/chosen": -1.9056999683380127, "logits/rejected": -2.6524605751037598, "logps/chosen": -119.350341796875, "logps/rejected": -409.19000244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.377121448516846, "rewards/margins": 12.058270454406738, "rewards/rejected": -18.43539047241211, "step": 16793 }, { "epoch": 2.61, "learning_rate": 1.8299341252139633e-06, "logits/chosen": -1.9089889526367188, "logits/rejected": -2.2753098011016846, "logps/chosen": -244.8517608642578, "logps/rejected": -239.59706115722656, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/chosen": -10.580377578735352, "rewards/margins": 4.264339447021484, "rewards/rejected": -14.844717025756836, "step": 16794 }, { "epoch": 2.61, "learning_rate": 1.8292006846828156e-06, "logits/chosen": -2.7663919925689697, "logits/rejected": -2.8130953311920166, "logps/chosen": -151.71734619140625, "logps/rejected": -251.00091552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.002693176269531, "rewards/margins": 9.937438011169434, "rewards/rejected": -14.940131187438965, "step": 16795 }, { "epoch": 2.61, "learning_rate": 1.8284672441516675e-06, "logits/chosen": -2.4135615825653076, "logits/rejected": -2.786120891571045, "logps/chosen": -204.88531494140625, "logps/rejected": -505.4384460449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.0631685256958, "rewards/margins": 14.890172958374023, "rewards/rejected": -22.953340530395508, "step": 16796 }, { "epoch": 2.61, "learning_rate": 1.8277338036205198e-06, "logits/chosen": -2.6645143032073975, "logits/rejected": -1.545164942741394, "logps/chosen": -274.50640869140625, "logps/rejected": -176.32479858398438, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -6.90330696105957, "rewards/margins": 4.856044292449951, "rewards/rejected": -11.75935173034668, "step": 16797 }, { "epoch": 2.61, "learning_rate": 1.827000363089372e-06, "logits/chosen": -2.6558520793914795, "logits/rejected": -2.520148992538452, "logps/chosen": -181.67063903808594, "logps/rejected": -210.11424255371094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.329100131988525, "rewards/margins": 8.523906707763672, "rewards/rejected": -14.853007316589355, "step": 16798 }, { "epoch": 2.61, "learning_rate": 1.8262669225582242e-06, "logits/chosen": -1.2742446660995483, "logits/rejected": -2.5572826862335205, "logps/chosen": -134.988037109375, "logps/rejected": -520.2684326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.859589576721191, "rewards/margins": 15.060428619384766, "rewards/rejected": -22.920019149780273, "step": 16799 }, { "epoch": 2.61, "learning_rate": 1.825533482027076e-06, "logits/chosen": -1.952410340309143, "logits/rejected": -2.7755377292633057, "logps/chosen": -332.2262878417969, "logps/rejected": -685.933837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.1336822509765625, "rewards/margins": 12.490213394165039, "rewards/rejected": -19.6238956451416, "step": 16800 }, { "epoch": 2.61, "learning_rate": 1.824800041495928e-06, "logits/chosen": -2.176948308944702, "logits/rejected": -2.7860724925994873, "logps/chosen": -167.85238647460938, "logps/rejected": -470.00677490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.145700454711914, "rewards/margins": 11.747082710266113, "rewards/rejected": -19.892784118652344, "step": 16801 }, { "epoch": 2.61, "learning_rate": 1.8240666009647805e-06, "logits/chosen": -2.043844699859619, "logits/rejected": -2.1435494422912598, "logps/chosen": -310.02764892578125, "logps/rejected": -725.1400146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.6332597732543945, "rewards/margins": 15.45811939239502, "rewards/rejected": -20.091379165649414, "step": 16802 }, { "epoch": 2.61, "learning_rate": 1.8233331604336324e-06, "logits/chosen": -2.0877768993377686, "logits/rejected": -2.9449148178100586, "logps/chosen": -370.2454833984375, "logps/rejected": -758.7330322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.694201469421387, "rewards/margins": 16.27972412109375, "rewards/rejected": -23.97392463684082, "step": 16803 }, { "epoch": 2.61, "learning_rate": 1.8225997199024847e-06, "logits/chosen": -2.444044351577759, "logits/rejected": -2.198753595352173, "logps/chosen": -146.41207885742188, "logps/rejected": -192.15895080566406, "loss": 0.3591, "rewards/accuracies": 0.5, "rewards/chosen": -10.058609008789062, "rewards/margins": 4.552149295806885, "rewards/rejected": -14.610757827758789, "step": 16804 }, { "epoch": 2.61, "learning_rate": 1.8218662793713365e-06, "logits/chosen": -2.5931594371795654, "logits/rejected": -2.7029638290405273, "logps/chosen": -210.65411376953125, "logps/rejected": -325.4053649902344, "loss": 0.2153, "rewards/accuracies": 1.0, "rewards/chosen": -5.495445728302002, "rewards/margins": 8.796430587768555, "rewards/rejected": -14.291875839233398, "step": 16805 }, { "epoch": 2.61, "learning_rate": 1.821132838840189e-06, "logits/chosen": -1.31240975856781, "logits/rejected": -2.8107521533966064, "logps/chosen": -139.46786499023438, "logps/rejected": -515.3549194335938, "loss": 0.0292, "rewards/accuracies": 1.0, "rewards/chosen": -6.268886089324951, "rewards/margins": 10.53525161743164, "rewards/rejected": -16.80413818359375, "step": 16806 }, { "epoch": 2.61, "learning_rate": 1.820399398309041e-06, "logits/chosen": -2.8234622478485107, "logits/rejected": -2.892214775085449, "logps/chosen": -107.30369567871094, "logps/rejected": -244.65667724609375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -5.802091598510742, "rewards/margins": 8.899613380432129, "rewards/rejected": -14.701704978942871, "step": 16807 }, { "epoch": 2.61, "learning_rate": 1.8196659577778932e-06, "logits/chosen": -2.7609517574310303, "logits/rejected": -2.3187854290008545, "logps/chosen": -854.931640625, "logps/rejected": -567.6998901367188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4976396560668945, "rewards/margins": 12.741106033325195, "rewards/rejected": -16.238744735717773, "step": 16808 }, { "epoch": 2.61, "learning_rate": 1.8189325172467451e-06, "logits/chosen": -0.8761144876480103, "logits/rejected": -2.6004467010498047, "logps/chosen": -117.2658920288086, "logps/rejected": -467.76153564453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.556173324584961, "rewards/margins": 9.745594024658203, "rewards/rejected": -15.301767349243164, "step": 16809 }, { "epoch": 2.61, "learning_rate": 1.8181990767155972e-06, "logits/chosen": -2.6654305458068848, "logits/rejected": -2.4294116497039795, "logps/chosen": -350.93157958984375, "logps/rejected": -447.94183349609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.651947975158691, "rewards/margins": 11.72887897491455, "rewards/rejected": -17.380826950073242, "step": 16810 }, { "epoch": 2.61, "learning_rate": 1.8174656361844495e-06, "logits/chosen": -1.2259173393249512, "logits/rejected": -2.281146764755249, "logps/chosen": -117.00286865234375, "logps/rejected": -265.4563903808594, "loss": 0.0429, "rewards/accuracies": 1.0, "rewards/chosen": -5.832647323608398, "rewards/margins": 3.934394121170044, "rewards/rejected": -9.767041206359863, "step": 16811 }, { "epoch": 2.61, "learning_rate": 1.8167321956533014e-06, "logits/chosen": -1.734749436378479, "logits/rejected": -2.696916341781616, "logps/chosen": -149.55130004882812, "logps/rejected": -386.49468994140625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -9.948944091796875, "rewards/margins": 11.484308242797852, "rewards/rejected": -21.433252334594727, "step": 16812 }, { "epoch": 2.61, "learning_rate": 1.8159987551221537e-06, "logits/chosen": -2.8804900646209717, "logits/rejected": -2.883798837661743, "logps/chosen": -235.2254638671875, "logps/rejected": -274.13006591796875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -7.688989162445068, "rewards/margins": 5.2243876457214355, "rewards/rejected": -12.913376808166504, "step": 16813 }, { "epoch": 2.61, "learning_rate": 1.8152653145910058e-06, "logits/chosen": -2.7282357215881348, "logits/rejected": -2.4917666912078857, "logps/chosen": -169.9542694091797, "logps/rejected": -211.96949768066406, "loss": 0.9243, "rewards/accuracies": 0.5, "rewards/chosen": -8.064836502075195, "rewards/margins": 2.56705904006958, "rewards/rejected": -10.631896018981934, "step": 16814 }, { "epoch": 2.62, "learning_rate": 1.814531874059858e-06, "logits/chosen": -2.2406656742095947, "logits/rejected": -2.002645969390869, "logps/chosen": -305.49114990234375, "logps/rejected": -395.6452331542969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.529136657714844, "rewards/margins": 9.82477855682373, "rewards/rejected": -19.35391616821289, "step": 16815 }, { "epoch": 2.62, "learning_rate": 1.81379843352871e-06, "logits/chosen": -2.2303714752197266, "logits/rejected": -2.424572467803955, "logps/chosen": -242.63458251953125, "logps/rejected": -382.1899719238281, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -13.287684440612793, "rewards/margins": 6.875067710876465, "rewards/rejected": -20.162752151489258, "step": 16816 }, { "epoch": 2.62, "learning_rate": 1.8130649929975623e-06, "logits/chosen": -1.2768179178237915, "logits/rejected": -2.7613580226898193, "logps/chosen": -126.52203369140625, "logps/rejected": -437.51251220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.088218688964844, "rewards/margins": 10.822492599487305, "rewards/rejected": -18.91071128845215, "step": 16817 }, { "epoch": 2.62, "learning_rate": 1.8123315524664141e-06, "logits/chosen": -2.650212287902832, "logits/rejected": -2.5167901515960693, "logps/chosen": -478.28466796875, "logps/rejected": -545.2429809570312, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -10.044560432434082, "rewards/margins": 10.262338638305664, "rewards/rejected": -20.306900024414062, "step": 16818 }, { "epoch": 2.62, "learning_rate": 1.8115981119352662e-06, "logits/chosen": -0.5438604354858398, "logits/rejected": -2.63600754737854, "logps/chosen": -166.55288696289062, "logps/rejected": -644.7265014648438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.951561450958252, "rewards/margins": 12.410888671875, "rewards/rejected": -19.362449645996094, "step": 16819 }, { "epoch": 2.62, "learning_rate": 1.8108646714041185e-06, "logits/chosen": -1.6449229717254639, "logits/rejected": -2.690058708190918, "logps/chosen": -183.2230682373047, "logps/rejected": -536.99755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.263976573944092, "rewards/margins": 14.471153259277344, "rewards/rejected": -21.735130310058594, "step": 16820 }, { "epoch": 2.62, "learning_rate": 1.8101312308729704e-06, "logits/chosen": -2.31868577003479, "logits/rejected": -2.537018299102783, "logps/chosen": -210.44171142578125, "logps/rejected": -329.83746337890625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.8212890625, "rewards/margins": 8.171987533569336, "rewards/rejected": -10.993276596069336, "step": 16821 }, { "epoch": 2.62, "learning_rate": 1.8093977903418227e-06, "logits/chosen": -3.064302444458008, "logits/rejected": -2.9177939891815186, "logps/chosen": -210.00059509277344, "logps/rejected": -236.67318725585938, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.363813400268555, "rewards/margins": 7.272865295410156, "rewards/rejected": -13.636678695678711, "step": 16822 }, { "epoch": 2.62, "learning_rate": 1.8086643498106748e-06, "logits/chosen": -2.450770854949951, "logits/rejected": -2.6729753017425537, "logps/chosen": -246.12396240234375, "logps/rejected": -447.642822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.08865213394165, "rewards/margins": 11.900896072387695, "rewards/rejected": -18.989547729492188, "step": 16823 }, { "epoch": 2.62, "learning_rate": 1.807930909279527e-06, "logits/chosen": -2.978415012359619, "logits/rejected": -2.402024269104004, "logps/chosen": -552.317138671875, "logps/rejected": -620.260986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.337020874023438, "rewards/margins": 14.182514190673828, "rewards/rejected": -24.519535064697266, "step": 16824 }, { "epoch": 2.62, "learning_rate": 1.807197468748379e-06, "logits/chosen": -2.892334461212158, "logits/rejected": -1.628758430480957, "logps/chosen": -749.6503295898438, "logps/rejected": -436.55450439453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.023413181304932, "rewards/margins": 9.531534194946289, "rewards/rejected": -15.554946899414062, "step": 16825 }, { "epoch": 2.62, "learning_rate": 1.8064640282172313e-06, "logits/chosen": -1.5350127220153809, "logits/rejected": -2.742696762084961, "logps/chosen": -141.5802764892578, "logps/rejected": -645.195556640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.519035339355469, "rewards/margins": 11.664352416992188, "rewards/rejected": -19.183387756347656, "step": 16826 }, { "epoch": 2.62, "learning_rate": 1.8057305876860834e-06, "logits/chosen": -2.951747417449951, "logits/rejected": -1.994324803352356, "logps/chosen": -267.4512023925781, "logps/rejected": -205.1637725830078, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -2.9951977729797363, "rewards/margins": 6.513064861297607, "rewards/rejected": -9.508262634277344, "step": 16827 }, { "epoch": 2.62, "learning_rate": 1.8049971471549357e-06, "logits/chosen": -2.159506320953369, "logits/rejected": -2.8264129161834717, "logps/chosen": -106.6022720336914, "logps/rejected": -258.17266845703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.237686157226562, "rewards/margins": 8.524896621704102, "rewards/rejected": -16.762582778930664, "step": 16828 }, { "epoch": 2.62, "learning_rate": 1.8042637066237875e-06, "logits/chosen": -1.9946173429489136, "logits/rejected": -2.7040512561798096, "logps/chosen": -376.6959228515625, "logps/rejected": -418.789794921875, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/chosen": -7.828607082366943, "rewards/margins": 4.716351509094238, "rewards/rejected": -12.544958114624023, "step": 16829 }, { "epoch": 2.62, "learning_rate": 1.8035302660926394e-06, "logits/chosen": -1.7524281740188599, "logits/rejected": -2.9128129482269287, "logps/chosen": -192.6138916015625, "logps/rejected": -551.3411254882812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.900580883026123, "rewards/margins": 11.633481979370117, "rewards/rejected": -18.5340633392334, "step": 16830 }, { "epoch": 2.62, "learning_rate": 1.802796825561492e-06, "logits/chosen": -1.807752251625061, "logits/rejected": -2.5721399784088135, "logps/chosen": -125.31315612792969, "logps/rejected": -353.91046142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.268680572509766, "rewards/margins": 11.143964767456055, "rewards/rejected": -19.41264533996582, "step": 16831 }, { "epoch": 2.62, "learning_rate": 1.8020633850303438e-06, "logits/chosen": -2.5753276348114014, "logits/rejected": -2.047426700592041, "logps/chosen": -188.5225372314453, "logps/rejected": -207.57374572753906, "loss": 0.0461, "rewards/accuracies": 1.0, "rewards/chosen": -7.464123249053955, "rewards/margins": 4.183311939239502, "rewards/rejected": -11.647435188293457, "step": 16832 }, { "epoch": 2.62, "learning_rate": 1.8013299444991961e-06, "logits/chosen": -2.6267192363739014, "logits/rejected": -2.9511680603027344, "logps/chosen": -156.8882293701172, "logps/rejected": -384.10540771484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.074753761291504, "rewards/margins": 10.764183044433594, "rewards/rejected": -15.838936805725098, "step": 16833 }, { "epoch": 2.62, "learning_rate": 1.800596503968048e-06, "logits/chosen": -0.8524703979492188, "logits/rejected": -2.3570456504821777, "logps/chosen": -212.6046142578125, "logps/rejected": -531.384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.947233200073242, "rewards/margins": 14.333952903747559, "rewards/rejected": -21.281185150146484, "step": 16834 }, { "epoch": 2.62, "learning_rate": 1.7998630634369003e-06, "logits/chosen": -2.8767309188842773, "logits/rejected": -2.568850040435791, "logps/chosen": -268.1503601074219, "logps/rejected": -304.0208740234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.934494972229004, "rewards/margins": 9.158515930175781, "rewards/rejected": -15.093011856079102, "step": 16835 }, { "epoch": 2.62, "learning_rate": 1.7991296229057524e-06, "logits/chosen": -2.62253999710083, "logits/rejected": -2.9300615787506104, "logps/chosen": -127.07038879394531, "logps/rejected": -412.5679931640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.896091461181641, "rewards/margins": 14.692890167236328, "rewards/rejected": -19.58898162841797, "step": 16836 }, { "epoch": 2.62, "learning_rate": 1.7983961823746047e-06, "logits/chosen": -2.306628465652466, "logits/rejected": -2.8546864986419678, "logps/chosen": -94.01028442382812, "logps/rejected": -394.02386474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7540946006774902, "rewards/margins": 13.508295059204102, "rewards/rejected": -17.26239013671875, "step": 16837 }, { "epoch": 2.62, "learning_rate": 1.7976627418434566e-06, "logits/chosen": -2.734403371810913, "logits/rejected": -2.795301675796509, "logps/chosen": -306.58148193359375, "logps/rejected": -332.99639892578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.3267502784729004, "rewards/margins": 9.613128662109375, "rewards/rejected": -11.939878463745117, "step": 16838 }, { "epoch": 2.62, "learning_rate": 1.7969293013123087e-06, "logits/chosen": -2.865902900695801, "logits/rejected": -2.7025513648986816, "logps/chosen": -501.4271545410156, "logps/rejected": -473.1678771972656, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -10.449020385742188, "rewards/margins": 6.912173271179199, "rewards/rejected": -17.361194610595703, "step": 16839 }, { "epoch": 2.62, "learning_rate": 1.796195860781161e-06, "logits/chosen": -2.4123542308807373, "logits/rejected": -2.6644370555877686, "logps/chosen": -414.9920349121094, "logps/rejected": -490.34423828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.497014999389648, "rewards/margins": 8.154095649719238, "rewards/rejected": -17.651111602783203, "step": 16840 }, { "epoch": 2.62, "learning_rate": 1.7954624202500128e-06, "logits/chosen": -2.676262617111206, "logits/rejected": -2.73561954498291, "logps/chosen": -266.84515380859375, "logps/rejected": -295.1195068359375, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -5.692415237426758, "rewards/margins": 5.26922082901001, "rewards/rejected": -10.961636543273926, "step": 16841 }, { "epoch": 2.62, "learning_rate": 1.7947289797188651e-06, "logits/chosen": -2.5641212463378906, "logits/rejected": -2.457876205444336, "logps/chosen": -555.145751953125, "logps/rejected": -520.9339599609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.575957298278809, "rewards/margins": 9.351929664611816, "rewards/rejected": -17.927886962890625, "step": 16842 }, { "epoch": 2.62, "learning_rate": 1.793995539187717e-06, "logits/chosen": -2.4880752563476562, "logits/rejected": -2.7768893241882324, "logps/chosen": -289.3993835449219, "logps/rejected": -447.78546142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.28889274597168, "rewards/margins": 11.495525360107422, "rewards/rejected": -18.7844181060791, "step": 16843 }, { "epoch": 2.62, "learning_rate": 1.7932620986565695e-06, "logits/chosen": -1.9405577182769775, "logits/rejected": -2.22255539894104, "logps/chosen": -165.04364013671875, "logps/rejected": -340.42901611328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.0122246742248535, "rewards/margins": 9.968289375305176, "rewards/rejected": -15.980514526367188, "step": 16844 }, { "epoch": 2.62, "learning_rate": 1.7925286581254214e-06, "logits/chosen": -2.2497618198394775, "logits/rejected": -2.4856250286102295, "logps/chosen": -160.09243774414062, "logps/rejected": -334.2690734863281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.115693092346191, "rewards/margins": 10.24055004119873, "rewards/rejected": -16.356243133544922, "step": 16845 }, { "epoch": 2.62, "learning_rate": 1.7917952175942737e-06, "logits/chosen": -2.284517526626587, "logits/rejected": -2.7197184562683105, "logps/chosen": -101.3817138671875, "logps/rejected": -388.4378662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.933327674865723, "rewards/margins": 13.23324203491211, "rewards/rejected": -18.16657066345215, "step": 16846 }, { "epoch": 2.62, "learning_rate": 1.7910617770631256e-06, "logits/chosen": -2.788761615753174, "logits/rejected": -2.575951099395752, "logps/chosen": -624.538818359375, "logps/rejected": -536.7060546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.004208087921143, "rewards/margins": 10.5787353515625, "rewards/rejected": -15.582942962646484, "step": 16847 }, { "epoch": 2.62, "learning_rate": 1.7903283365319781e-06, "logits/chosen": -2.2285516262054443, "logits/rejected": -2.698307752609253, "logps/chosen": -109.33307647705078, "logps/rejected": -345.1863098144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.085211277008057, "rewards/margins": 12.711116790771484, "rewards/rejected": -17.796327590942383, "step": 16848 }, { "epoch": 2.62, "learning_rate": 1.78959489600083e-06, "logits/chosen": -2.61954402923584, "logits/rejected": -2.789705276489258, "logps/chosen": -355.2852783203125, "logps/rejected": -479.91119384765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -9.106340408325195, "rewards/margins": 9.833322525024414, "rewards/rejected": -18.93966293334961, "step": 16849 }, { "epoch": 2.62, "learning_rate": 1.7888614554696819e-06, "logits/chosen": -2.0247910022735596, "logits/rejected": -2.707653522491455, "logps/chosen": -132.2423095703125, "logps/rejected": -347.87481689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.01785945892334, "rewards/margins": 10.664308547973633, "rewards/rejected": -17.68216896057129, "step": 16850 }, { "epoch": 2.62, "learning_rate": 1.7881280149385342e-06, "logits/chosen": -2.117605686187744, "logits/rejected": -2.7239859104156494, "logps/chosen": -228.77127075195312, "logps/rejected": -596.9482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.00396728515625, "rewards/margins": 17.343303680419922, "rewards/rejected": -25.347270965576172, "step": 16851 }, { "epoch": 2.62, "learning_rate": 1.7873945744073863e-06, "logits/chosen": -1.2354124784469604, "logits/rejected": -2.4532718658447266, "logps/chosen": -136.84584045410156, "logps/rejected": -313.16119384765625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -10.489255905151367, "rewards/margins": 5.486105918884277, "rewards/rejected": -15.975360870361328, "step": 16852 }, { "epoch": 2.62, "learning_rate": 1.7866611338762386e-06, "logits/chosen": -2.8084592819213867, "logits/rejected": -2.8265297412872314, "logps/chosen": -428.8029479980469, "logps/rejected": -519.8543090820312, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.915369033813477, "rewards/margins": 10.921958923339844, "rewards/rejected": -16.83732795715332, "step": 16853 }, { "epoch": 2.62, "learning_rate": 1.7859276933450904e-06, "logits/chosen": -2.0499987602233887, "logits/rejected": -2.5416107177734375, "logps/chosen": -161.8787841796875, "logps/rejected": -443.4693603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1223955154418945, "rewards/margins": 15.995113372802734, "rewards/rejected": -22.117507934570312, "step": 16854 }, { "epoch": 2.62, "learning_rate": 1.7851942528139427e-06, "logits/chosen": -2.9253039360046387, "logits/rejected": -2.744112491607666, "logps/chosen": -379.2087707519531, "logps/rejected": -613.2847290039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.066595554351807, "rewards/margins": 16.03132438659668, "rewards/rejected": -23.097919464111328, "step": 16855 }, { "epoch": 2.62, "learning_rate": 1.7844608122827948e-06, "logits/chosen": -1.3454997539520264, "logits/rejected": -2.6529829502105713, "logps/chosen": -200.62973022460938, "logps/rejected": -453.43463134765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.750720024108887, "rewards/margins": 8.280654907226562, "rewards/rejected": -15.031375885009766, "step": 16856 }, { "epoch": 2.62, "learning_rate": 1.7837273717516471e-06, "logits/chosen": -2.8115766048431396, "logits/rejected": -2.18369722366333, "logps/chosen": -190.0242919921875, "logps/rejected": -141.01345825195312, "loss": 0.0808, "rewards/accuracies": 1.0, "rewards/chosen": -7.6034746170043945, "rewards/margins": 4.739177703857422, "rewards/rejected": -12.342653274536133, "step": 16857 }, { "epoch": 2.62, "learning_rate": 1.782993931220499e-06, "logits/chosen": -2.4523119926452637, "logits/rejected": -2.8405885696411133, "logps/chosen": -67.47612762451172, "logps/rejected": -269.834228515625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.220844745635986, "rewards/margins": 9.77565860748291, "rewards/rejected": -14.996503829956055, "step": 16858 }, { "epoch": 2.62, "learning_rate": 1.7822604906893509e-06, "logits/chosen": -2.932234764099121, "logits/rejected": -2.4595096111297607, "logps/chosen": -255.84527587890625, "logps/rejected": -521.8701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.051939964294434, "rewards/margins": 13.961268424987793, "rewards/rejected": -21.013208389282227, "step": 16859 }, { "epoch": 2.62, "learning_rate": 1.7815270501582032e-06, "logits/chosen": -2.696749448776245, "logits/rejected": -1.931891918182373, "logps/chosen": -492.2081298828125, "logps/rejected": -439.7247314453125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -10.304326057434082, "rewards/margins": 12.928150177001953, "rewards/rejected": -23.23247718811035, "step": 16860 }, { "epoch": 2.62, "learning_rate": 1.7807936096270553e-06, "logits/chosen": -2.4434144496917725, "logits/rejected": -2.064732313156128, "logps/chosen": -206.76400756835938, "logps/rejected": -412.20367431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.615105628967285, "rewards/margins": 11.727774620056152, "rewards/rejected": -18.342880249023438, "step": 16861 }, { "epoch": 2.62, "learning_rate": 1.7800601690959076e-06, "logits/chosen": -1.735826015472412, "logits/rejected": -2.335514545440674, "logps/chosen": -151.6429901123047, "logps/rejected": -437.4096984863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.486754417419434, "rewards/margins": 13.76640510559082, "rewards/rejected": -21.25316047668457, "step": 16862 }, { "epoch": 2.62, "learning_rate": 1.7793267285647595e-06, "logits/chosen": -2.674724578857422, "logits/rejected": -1.900948405265808, "logps/chosen": -249.20814514160156, "logps/rejected": -279.8853759765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.3368332386016846, "rewards/margins": 11.67436408996582, "rewards/rejected": -15.011198043823242, "step": 16863 }, { "epoch": 2.62, "learning_rate": 1.7785932880336118e-06, "logits/chosen": -1.6307672262191772, "logits/rejected": -2.653634548187256, "logps/chosen": -158.47781372070312, "logps/rejected": -408.9083251953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.569245338439941, "rewards/margins": 11.524148941040039, "rewards/rejected": -17.093395233154297, "step": 16864 }, { "epoch": 2.62, "learning_rate": 1.7778598475024638e-06, "logits/chosen": -2.012510061264038, "logits/rejected": -2.4989776611328125, "logps/chosen": -174.4178466796875, "logps/rejected": -409.4429016113281, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -5.952889919281006, "rewards/margins": 7.34928035736084, "rewards/rejected": -13.302169799804688, "step": 16865 }, { "epoch": 2.62, "learning_rate": 1.7771264069713162e-06, "logits/chosen": -1.2369552850723267, "logits/rejected": -2.548967123031616, "logps/chosen": -134.26174926757812, "logps/rejected": -350.3880920410156, "loss": 0.1262, "rewards/accuracies": 1.0, "rewards/chosen": -7.58419132232666, "rewards/margins": 7.348808288574219, "rewards/rejected": -14.932999610900879, "step": 16866 }, { "epoch": 2.62, "learning_rate": 1.776392966440168e-06, "logits/chosen": -2.8195557594299316, "logits/rejected": -2.8163881301879883, "logps/chosen": -190.66627502441406, "logps/rejected": -342.69134521484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.7896270751953125, "rewards/margins": 9.741629600524902, "rewards/rejected": -17.53125762939453, "step": 16867 }, { "epoch": 2.62, "learning_rate": 1.7756595259090203e-06, "logits/chosen": -2.585015058517456, "logits/rejected": -2.0413458347320557, "logps/chosen": -359.31915283203125, "logps/rejected": -514.0135498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.863155364990234, "rewards/margins": 13.134180068969727, "rewards/rejected": -18.997333526611328, "step": 16868 }, { "epoch": 2.62, "learning_rate": 1.7749260853778724e-06, "logits/chosen": -1.1938246488571167, "logits/rejected": -2.0531606674194336, "logps/chosen": -146.58473205566406, "logps/rejected": -386.6916809082031, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.5059814453125, "rewards/margins": 11.95824146270752, "rewards/rejected": -18.464221954345703, "step": 16869 }, { "epoch": 2.62, "learning_rate": 1.7741926448467243e-06, "logits/chosen": -2.2504734992980957, "logits/rejected": -2.611334800720215, "logps/chosen": -66.15031433105469, "logps/rejected": -327.884033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.085901737213135, "rewards/margins": 13.291393280029297, "rewards/rejected": -17.377294540405273, "step": 16870 }, { "epoch": 2.62, "learning_rate": 1.7734592043155766e-06, "logits/chosen": -2.7400777339935303, "logits/rejected": -2.4497289657592773, "logps/chosen": -722.448486328125, "logps/rejected": -783.3749389648438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.410262107849121, "rewards/margins": 8.639947891235352, "rewards/rejected": -17.050209045410156, "step": 16871 }, { "epoch": 2.62, "learning_rate": 1.7727257637844285e-06, "logits/chosen": -2.1239991188049316, "logits/rejected": -2.6237170696258545, "logps/chosen": -163.61639404296875, "logps/rejected": -275.35650634765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.003142356872559, "rewards/margins": 8.877456665039062, "rewards/rejected": -14.880598068237305, "step": 16872 }, { "epoch": 2.62, "learning_rate": 1.771992323253281e-06, "logits/chosen": -2.317397117614746, "logits/rejected": -2.7740819454193115, "logps/chosen": -210.6331787109375, "logps/rejected": -410.40631103515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.572885513305664, "rewards/margins": 9.756951332092285, "rewards/rejected": -15.329837799072266, "step": 16873 }, { "epoch": 2.62, "learning_rate": 1.7712588827221329e-06, "logits/chosen": -2.377713918685913, "logits/rejected": -2.6299517154693604, "logps/chosen": -186.09844970703125, "logps/rejected": -330.1495056152344, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.801076889038086, "rewards/margins": 9.1920166015625, "rewards/rejected": -16.993093490600586, "step": 16874 }, { "epoch": 2.62, "learning_rate": 1.7705254421909852e-06, "logits/chosen": -2.659789800643921, "logits/rejected": -2.615107774734497, "logps/chosen": -225.87075805664062, "logps/rejected": -291.99957275390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.723395347595215, "rewards/margins": 8.702581405639648, "rewards/rejected": -17.42597770690918, "step": 16875 }, { "epoch": 2.62, "learning_rate": 1.769792001659837e-06, "logits/chosen": -2.326138734817505, "logits/rejected": -2.6839382648468018, "logps/chosen": -462.1985168457031, "logps/rejected": -689.5294799804688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.331953048706055, "rewards/margins": 8.87820053100586, "rewards/rejected": -18.210155487060547, "step": 16876 }, { "epoch": 2.62, "learning_rate": 1.7690585611286894e-06, "logits/chosen": -2.598890781402588, "logits/rejected": -3.12396502494812, "logps/chosen": -113.17529296875, "logps/rejected": -303.98944091796875, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.990206718444824, "rewards/margins": 6.342277526855469, "rewards/rejected": -13.332484245300293, "step": 16877 }, { "epoch": 2.62, "learning_rate": 1.7683251205975414e-06, "logits/chosen": -2.905010461807251, "logits/rejected": -2.365121364593506, "logps/chosen": -229.79400634765625, "logps/rejected": -193.57965087890625, "loss": 0.6111, "rewards/accuracies": 0.5, "rewards/chosen": -8.034139633178711, "rewards/margins": 3.712198495864868, "rewards/rejected": -11.746338844299316, "step": 16878 }, { "epoch": 2.63, "learning_rate": 1.7675916800663933e-06, "logits/chosen": -1.2324436902999878, "logits/rejected": -2.0322301387786865, "logps/chosen": -147.6557159423828, "logps/rejected": -278.6839599609375, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -5.588696479797363, "rewards/margins": 9.312573432922363, "rewards/rejected": -14.901269912719727, "step": 16879 }, { "epoch": 2.63, "learning_rate": 1.7668582395352456e-06, "logits/chosen": -2.7743263244628906, "logits/rejected": -2.238088369369507, "logps/chosen": -702.1466064453125, "logps/rejected": -416.7985534667969, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": -8.618908882141113, "rewards/margins": 3.149305582046509, "rewards/rejected": -11.768214225769043, "step": 16880 }, { "epoch": 2.63, "learning_rate": 1.7661247990040977e-06, "logits/chosen": -2.2699339389801025, "logits/rejected": -2.859224557876587, "logps/chosen": -167.083740234375, "logps/rejected": -407.76190185546875, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -6.510105133056641, "rewards/margins": 6.516589641571045, "rewards/rejected": -13.026695251464844, "step": 16881 }, { "epoch": 2.63, "learning_rate": 1.76539135847295e-06, "logits/chosen": -3.0374176502227783, "logits/rejected": -2.925891160964966, "logps/chosen": -551.0902099609375, "logps/rejected": -475.3338928222656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.130148410797119, "rewards/margins": 11.250343322753906, "rewards/rejected": -18.3804931640625, "step": 16882 }, { "epoch": 2.63, "learning_rate": 1.7646579179418019e-06, "logits/chosen": -2.3958334922790527, "logits/rejected": -2.9902641773223877, "logps/chosen": -332.58868408203125, "logps/rejected": -749.9033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.77839183807373, "rewards/margins": 13.848227500915527, "rewards/rejected": -22.62662124633789, "step": 16883 }, { "epoch": 2.63, "learning_rate": 1.763924477410654e-06, "logits/chosen": -1.242271065711975, "logits/rejected": -2.371473789215088, "logps/chosen": -163.12371826171875, "logps/rejected": -444.93682861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.288087844848633, "rewards/margins": 14.179576873779297, "rewards/rejected": -20.467666625976562, "step": 16884 }, { "epoch": 2.63, "learning_rate": 1.763191036879506e-06, "logits/chosen": -1.740395426750183, "logits/rejected": -2.692898750305176, "logps/chosen": -146.50604248046875, "logps/rejected": -369.10137939453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.5426483154296875, "rewards/margins": 9.804948806762695, "rewards/rejected": -17.347597122192383, "step": 16885 }, { "epoch": 2.63, "learning_rate": 1.7624575963483584e-06, "logits/chosen": -2.529012441635132, "logits/rejected": -2.254075288772583, "logps/chosen": -479.6207275390625, "logps/rejected": -468.0551452636719, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.886645317077637, "rewards/margins": 8.504180908203125, "rewards/rejected": -17.390825271606445, "step": 16886 }, { "epoch": 2.63, "learning_rate": 1.7617241558172105e-06, "logits/chosen": -1.37208890914917, "logits/rejected": -2.6399405002593994, "logps/chosen": -220.68601989746094, "logps/rejected": -541.4981689453125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.558346271514893, "rewards/margins": 12.663509368896484, "rewards/rejected": -19.22185516357422, "step": 16887 }, { "epoch": 2.63, "learning_rate": 1.7609907152860626e-06, "logits/chosen": -2.4089343547821045, "logits/rejected": -2.7099714279174805, "logps/chosen": -117.95948791503906, "logps/rejected": -338.4873046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.38643741607666, "rewards/margins": 11.883844375610352, "rewards/rejected": -17.270280838012695, "step": 16888 }, { "epoch": 2.63, "learning_rate": 1.7602572747549146e-06, "logits/chosen": -2.3915085792541504, "logits/rejected": -2.655817747116089, "logps/chosen": -103.86993408203125, "logps/rejected": -266.30078125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.049018859863281, "rewards/margins": 9.736376762390137, "rewards/rejected": -15.785394668579102, "step": 16889 }, { "epoch": 2.63, "learning_rate": 1.759523834223767e-06, "logits/chosen": -2.1097731590270996, "logits/rejected": -2.579742908477783, "logps/chosen": -123.70648956298828, "logps/rejected": -345.6839599609375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -9.238511085510254, "rewards/margins": 10.868598937988281, "rewards/rejected": -20.10710906982422, "step": 16890 }, { "epoch": 2.63, "learning_rate": 1.758790393692619e-06, "logits/chosen": -2.6809592247009277, "logits/rejected": -2.821113109588623, "logps/chosen": -106.30287170410156, "logps/rejected": -343.01416015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.870388031005859, "rewards/margins": 12.513571739196777, "rewards/rejected": -18.38395881652832, "step": 16891 }, { "epoch": 2.63, "learning_rate": 1.7580569531614711e-06, "logits/chosen": -1.9837485551834106, "logits/rejected": -2.5701005458831787, "logps/chosen": -382.2773132324219, "logps/rejected": -589.6114501953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -14.651294708251953, "rewards/margins": 8.117677688598633, "rewards/rejected": -22.76897430419922, "step": 16892 }, { "epoch": 2.63, "learning_rate": 1.757323512630323e-06, "logits/chosen": -2.5758421421051025, "logits/rejected": -0.8934893608093262, "logps/chosen": -204.40184020996094, "logps/rejected": -259.23468017578125, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/chosen": -8.08023738861084, "rewards/margins": 7.968259334564209, "rewards/rejected": -16.04849624633789, "step": 16893 }, { "epoch": 2.63, "learning_rate": 1.7565900720991753e-06, "logits/chosen": -2.6588211059570312, "logits/rejected": -1.7213127613067627, "logps/chosen": -322.4154052734375, "logps/rejected": -366.43951416015625, "loss": 0.5847, "rewards/accuracies": 0.5, "rewards/chosen": -9.119760513305664, "rewards/margins": 6.449923038482666, "rewards/rejected": -15.569684028625488, "step": 16894 }, { "epoch": 2.63, "learning_rate": 1.7558566315680274e-06, "logits/chosen": -2.7007203102111816, "logits/rejected": -2.411025285720825, "logps/chosen": -647.9363403320312, "logps/rejected": -971.7564697265625, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -7.772923469543457, "rewards/margins": 8.361748695373535, "rewards/rejected": -16.134672164916992, "step": 16895 }, { "epoch": 2.63, "learning_rate": 1.7551231910368795e-06, "logits/chosen": -2.3540213108062744, "logits/rejected": -2.650317907333374, "logps/chosen": -213.18321228027344, "logps/rejected": -396.34869384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.469002723693848, "rewards/margins": 10.027811050415039, "rewards/rejected": -18.496814727783203, "step": 16896 }, { "epoch": 2.63, "learning_rate": 1.7543897505057316e-06, "logits/chosen": -2.1589386463165283, "logits/rejected": -2.7728207111358643, "logps/chosen": -136.15255737304688, "logps/rejected": -284.3775939941406, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -7.87437105178833, "rewards/margins": 7.439687728881836, "rewards/rejected": -15.314058303833008, "step": 16897 }, { "epoch": 2.63, "learning_rate": 1.7536563099745837e-06, "logits/chosen": -2.449148654937744, "logits/rejected": -2.6461925506591797, "logps/chosen": -210.50173950195312, "logps/rejected": -366.17706298828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.226962089538574, "rewards/margins": 8.099618911743164, "rewards/rejected": -14.326580047607422, "step": 16898 }, { "epoch": 2.63, "learning_rate": 1.752922869443436e-06, "logits/chosen": -2.540235996246338, "logits/rejected": -2.889531373977661, "logps/chosen": -172.4678955078125, "logps/rejected": -427.01959228515625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -8.367332458496094, "rewards/margins": 6.680539131164551, "rewards/rejected": -15.047871589660645, "step": 16899 }, { "epoch": 2.63, "learning_rate": 1.752189428912288e-06, "logits/chosen": -2.6450183391571045, "logits/rejected": -2.421081304550171, "logps/chosen": -514.88427734375, "logps/rejected": -492.93255615234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.244440078735352, "rewards/margins": 16.012653350830078, "rewards/rejected": -20.25709342956543, "step": 16900 }, { "epoch": 2.63, "learning_rate": 1.7514559883811401e-06, "logits/chosen": -2.2161343097686768, "logits/rejected": -1.9422225952148438, "logps/chosen": -435.065673828125, "logps/rejected": -524.3683471679688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.545202255249023, "rewards/margins": 13.470571517944336, "rewards/rejected": -23.01577377319336, "step": 16901 }, { "epoch": 2.63, "learning_rate": 1.7507225478499922e-06, "logits/chosen": -2.0278406143188477, "logits/rejected": -2.6410698890686035, "logps/chosen": -148.7569580078125, "logps/rejected": -434.10772705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.142547607421875, "rewards/margins": 12.253606796264648, "rewards/rejected": -17.396154403686523, "step": 16902 }, { "epoch": 2.63, "learning_rate": 1.7499891073188443e-06, "logits/chosen": -2.338376760482788, "logits/rejected": -2.6164214611053467, "logps/chosen": -247.26968383789062, "logps/rejected": -465.91546630859375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.555992126464844, "rewards/margins": 7.6873297691345215, "rewards/rejected": -15.243322372436523, "step": 16903 }, { "epoch": 2.63, "learning_rate": 1.7492556667876964e-06, "logits/chosen": -2.8029935359954834, "logits/rejected": -2.8189454078674316, "logps/chosen": -847.6209106445312, "logps/rejected": -869.5172729492188, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -8.82569694519043, "rewards/margins": 6.123775482177734, "rewards/rejected": -14.949472427368164, "step": 16904 }, { "epoch": 2.63, "learning_rate": 1.7485222262565485e-06, "logits/chosen": -2.6503639221191406, "logits/rejected": -2.623166799545288, "logps/chosen": -99.31431579589844, "logps/rejected": -264.802001953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.86626672744751, "rewards/margins": 10.305423736572266, "rewards/rejected": -16.171689987182617, "step": 16905 }, { "epoch": 2.63, "learning_rate": 1.7477887857254006e-06, "logits/chosen": -2.5324084758758545, "logits/rejected": -2.7434494495391846, "logps/chosen": -118.44406127929688, "logps/rejected": -331.1024169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.773637771606445, "rewards/margins": 12.800056457519531, "rewards/rejected": -18.573694229125977, "step": 16906 }, { "epoch": 2.63, "learning_rate": 1.747055345194253e-06, "logits/chosen": -2.061070680618286, "logits/rejected": -2.6139559745788574, "logps/chosen": -190.0841522216797, "logps/rejected": -441.29931640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.568975448608398, "rewards/margins": 8.839619636535645, "rewards/rejected": -19.40859603881836, "step": 16907 }, { "epoch": 2.63, "learning_rate": 1.746321904663105e-06, "logits/chosen": -2.254284143447876, "logits/rejected": -2.758585214614868, "logps/chosen": -188.40283203125, "logps/rejected": -523.8679809570312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.39249324798584, "rewards/margins": 14.09980583190918, "rewards/rejected": -22.492298126220703, "step": 16908 }, { "epoch": 2.63, "learning_rate": 1.745588464131957e-06, "logits/chosen": -1.9171549081802368, "logits/rejected": -2.7014381885528564, "logps/chosen": -161.80728149414062, "logps/rejected": -552.6261596679688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.868902206420898, "rewards/margins": 12.011695861816406, "rewards/rejected": -18.880599975585938, "step": 16909 }, { "epoch": 2.63, "learning_rate": 1.7448550236008092e-06, "logits/chosen": -2.508537769317627, "logits/rejected": -2.42410945892334, "logps/chosen": -614.1065673828125, "logps/rejected": -518.4360961914062, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": -6.536778450012207, "rewards/margins": 10.12977409362793, "rewards/rejected": -16.66655158996582, "step": 16910 }, { "epoch": 2.63, "learning_rate": 1.7441215830696615e-06, "logits/chosen": -2.22251033782959, "logits/rejected": -2.5869853496551514, "logps/chosen": -229.55374145507812, "logps/rejected": -372.5030822753906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.915740013122559, "rewards/margins": 9.289908409118652, "rewards/rejected": -14.205648422241211, "step": 16911 }, { "epoch": 2.63, "learning_rate": 1.7433881425385136e-06, "logits/chosen": -1.5444378852844238, "logits/rejected": -2.5533158779144287, "logps/chosen": -197.42941284179688, "logps/rejected": -403.904541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.744096755981445, "rewards/margins": 11.877897262573242, "rewards/rejected": -18.621994018554688, "step": 16912 }, { "epoch": 2.63, "learning_rate": 1.7426547020073654e-06, "logits/chosen": -2.4204678535461426, "logits/rejected": -1.7341036796569824, "logps/chosen": -128.46018981933594, "logps/rejected": -174.83804321289062, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -7.412507057189941, "rewards/margins": 7.169063568115234, "rewards/rejected": -14.581571578979492, "step": 16913 }, { "epoch": 2.63, "learning_rate": 1.7419212614762175e-06, "logits/chosen": -1.9908090829849243, "logits/rejected": -2.6854562759399414, "logps/chosen": -122.16964721679688, "logps/rejected": -427.79840087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.740412712097168, "rewards/margins": 13.942481994628906, "rewards/rejected": -19.682893753051758, "step": 16914 }, { "epoch": 2.63, "learning_rate": 1.7411878209450698e-06, "logits/chosen": -2.4999232292175293, "logits/rejected": -2.9547626972198486, "logps/chosen": -79.02122497558594, "logps/rejected": -236.0140380859375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -6.20894193649292, "rewards/margins": 5.503905296325684, "rewards/rejected": -11.712846755981445, "step": 16915 }, { "epoch": 2.63, "learning_rate": 1.740454380413922e-06, "logits/chosen": -1.4387273788452148, "logits/rejected": -2.3403327465057373, "logps/chosen": -264.27398681640625, "logps/rejected": -658.7728271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.426172733306885, "rewards/margins": 18.090848922729492, "rewards/rejected": -23.51702117919922, "step": 16916 }, { "epoch": 2.63, "learning_rate": 1.739720939882774e-06, "logits/chosen": -1.298110842704773, "logits/rejected": -2.352846622467041, "logps/chosen": -241.8272705078125, "logps/rejected": -369.3843688964844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.763296127319336, "rewards/margins": 9.684135437011719, "rewards/rejected": -17.447433471679688, "step": 16917 }, { "epoch": 2.63, "learning_rate": 1.738987499351626e-06, "logits/chosen": -2.924165725708008, "logits/rejected": -3.1307990550994873, "logps/chosen": -101.4502182006836, "logps/rejected": -266.534912109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.688755989074707, "rewards/margins": 9.264904022216797, "rewards/rejected": -16.953659057617188, "step": 16918 }, { "epoch": 2.63, "learning_rate": 1.7382540588204782e-06, "logits/chosen": -2.430737018585205, "logits/rejected": -2.3954336643218994, "logps/chosen": -398.8959045410156, "logps/rejected": -434.6734924316406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.88614559173584, "rewards/margins": 11.324003219604492, "rewards/rejected": -18.210147857666016, "step": 16919 }, { "epoch": 2.63, "learning_rate": 1.7375206182893305e-06, "logits/chosen": -2.076533079147339, "logits/rejected": -2.458359718322754, "logps/chosen": -275.7616882324219, "logps/rejected": -449.39013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.812841892242432, "rewards/margins": 13.776310920715332, "rewards/rejected": -18.589153289794922, "step": 16920 }, { "epoch": 2.63, "learning_rate": 1.7367871777581826e-06, "logits/chosen": -1.4167999029159546, "logits/rejected": -2.672820568084717, "logps/chosen": -204.44769287109375, "logps/rejected": -582.5921020507812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.203591346740723, "rewards/margins": 11.648031234741211, "rewards/rejected": -18.85162353515625, "step": 16921 }, { "epoch": 2.63, "learning_rate": 1.7360537372270347e-06, "logits/chosen": -1.1896144151687622, "logits/rejected": -2.6718437671661377, "logps/chosen": -128.76531982421875, "logps/rejected": -414.38507080078125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -10.35365104675293, "rewards/margins": 10.101337432861328, "rewards/rejected": -20.45499038696289, "step": 16922 }, { "epoch": 2.63, "learning_rate": 1.7353202966958866e-06, "logits/chosen": -2.786126136779785, "logits/rejected": -2.7567830085754395, "logps/chosen": -355.24041748046875, "logps/rejected": -382.8056640625, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": -6.194590091705322, "rewards/margins": 8.464865684509277, "rewards/rejected": -14.659456253051758, "step": 16923 }, { "epoch": 2.63, "learning_rate": 1.7345868561647389e-06, "logits/chosen": -2.6681876182556152, "logits/rejected": -2.7347402572631836, "logps/chosen": -201.82565307617188, "logps/rejected": -339.97235107421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.135891914367676, "rewards/margins": 10.447624206542969, "rewards/rejected": -16.583515167236328, "step": 16924 }, { "epoch": 2.63, "learning_rate": 1.733853415633591e-06, "logits/chosen": -2.4859697818756104, "logits/rejected": -2.5355520248413086, "logps/chosen": -792.6480102539062, "logps/rejected": -636.2872924804688, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -9.617281913757324, "rewards/margins": 7.81333589553833, "rewards/rejected": -17.430618286132812, "step": 16925 }, { "epoch": 2.63, "learning_rate": 1.733119975102443e-06, "logits/chosen": -2.4758071899414062, "logits/rejected": -2.8360581398010254, "logps/chosen": -275.8247985839844, "logps/rejected": -497.5348815917969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.503288269042969, "rewards/margins": 12.667671203613281, "rewards/rejected": -22.17095947265625, "step": 16926 }, { "epoch": 2.63, "learning_rate": 1.7323865345712951e-06, "logits/chosen": -2.7123188972473145, "logits/rejected": -2.9036824703216553, "logps/chosen": -152.7908477783203, "logps/rejected": -310.2327880859375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.745560646057129, "rewards/margins": 7.812829494476318, "rewards/rejected": -14.558389663696289, "step": 16927 }, { "epoch": 2.63, "learning_rate": 1.7316530940401474e-06, "logits/chosen": -1.7941148281097412, "logits/rejected": -2.9129419326782227, "logps/chosen": -304.0684814453125, "logps/rejected": -552.216064453125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.780095100402832, "rewards/margins": 9.833658218383789, "rewards/rejected": -14.613753318786621, "step": 16928 }, { "epoch": 2.63, "learning_rate": 1.7309196535089995e-06, "logits/chosen": -2.162120819091797, "logits/rejected": -3.0191450119018555, "logps/chosen": -139.31637573242188, "logps/rejected": -340.6976318359375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.788543701171875, "rewards/margins": 8.144935607910156, "rewards/rejected": -13.933479309082031, "step": 16929 }, { "epoch": 2.63, "learning_rate": 1.7301862129778516e-06, "logits/chosen": -2.5797195434570312, "logits/rejected": -1.477509617805481, "logps/chosen": -441.3603515625, "logps/rejected": -366.4013671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -10.353261947631836, "rewards/margins": 7.313572883605957, "rewards/rejected": -17.66683578491211, "step": 16930 }, { "epoch": 2.63, "learning_rate": 1.7294527724467037e-06, "logits/chosen": -2.6074841022491455, "logits/rejected": -2.7058756351470947, "logps/chosen": -297.5967712402344, "logps/rejected": -481.05755615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.10350513458252, "rewards/margins": 11.558826446533203, "rewards/rejected": -20.662330627441406, "step": 16931 }, { "epoch": 2.63, "learning_rate": 1.728719331915556e-06, "logits/chosen": -2.5030295848846436, "logits/rejected": -2.744293212890625, "logps/chosen": -145.0732421875, "logps/rejected": -510.0738525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.177536487579346, "rewards/margins": 14.070024490356445, "rewards/rejected": -20.247562408447266, "step": 16932 }, { "epoch": 2.63, "learning_rate": 1.7279858913844079e-06, "logits/chosen": -2.4735400676727295, "logits/rejected": -1.4198558330535889, "logps/chosen": -261.8804931640625, "logps/rejected": -388.60699462890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.51854944229126, "rewards/margins": 8.632436752319336, "rewards/rejected": -16.150985717773438, "step": 16933 }, { "epoch": 2.63, "learning_rate": 1.72725245085326e-06, "logits/chosen": -2.8217453956604004, "logits/rejected": -2.340064287185669, "logps/chosen": -385.245849609375, "logps/rejected": -449.381103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.208928108215332, "rewards/margins": 13.533510208129883, "rewards/rejected": -16.74243927001953, "step": 16934 }, { "epoch": 2.63, "learning_rate": 1.726519010322112e-06, "logits/chosen": -2.340412139892578, "logits/rejected": -2.7579238414764404, "logps/chosen": -115.67341613769531, "logps/rejected": -255.16754150390625, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -7.805356979370117, "rewards/margins": 7.288456916809082, "rewards/rejected": -15.093814849853516, "step": 16935 }, { "epoch": 2.63, "learning_rate": 1.7257855697909644e-06, "logits/chosen": -2.9093575477600098, "logits/rejected": -2.123344659805298, "logps/chosen": -508.2451477050781, "logps/rejected": -502.6036376953125, "loss": 0.9553, "rewards/accuracies": 0.5, "rewards/chosen": -8.271857261657715, "rewards/margins": 5.22893762588501, "rewards/rejected": -13.500795364379883, "step": 16936 }, { "epoch": 2.63, "learning_rate": 1.7250521292598164e-06, "logits/chosen": -1.7971925735473633, "logits/rejected": -2.596698522567749, "logps/chosen": -169.0889892578125, "logps/rejected": -414.88604736328125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -9.843350410461426, "rewards/margins": 10.420116424560547, "rewards/rejected": -20.263465881347656, "step": 16937 }, { "epoch": 2.63, "learning_rate": 1.7243186887286685e-06, "logits/chosen": -2.2658562660217285, "logits/rejected": -2.8432819843292236, "logps/chosen": -172.41104125976562, "logps/rejected": -416.66632080078125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -7.779965400695801, "rewards/margins": 12.413283348083496, "rewards/rejected": -20.193248748779297, "step": 16938 }, { "epoch": 2.63, "learning_rate": 1.7235852481975206e-06, "logits/chosen": -2.08133864402771, "logits/rejected": -2.8746933937072754, "logps/chosen": -180.62600708007812, "logps/rejected": -411.6614990234375, "loss": 0.0147, "rewards/accuracies": 1.0, "rewards/chosen": -6.140601634979248, "rewards/margins": 4.736653804779053, "rewards/rejected": -10.8772554397583, "step": 16939 }, { "epoch": 2.63, "learning_rate": 1.7228518076663727e-06, "logits/chosen": -0.8705278038978577, "logits/rejected": -2.688979387283325, "logps/chosen": -217.7544403076172, "logps/rejected": -508.174072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.417834281921387, "rewards/margins": 12.122337341308594, "rewards/rejected": -19.540172576904297, "step": 16940 }, { "epoch": 2.63, "learning_rate": 1.722118367135225e-06, "logits/chosen": -1.7591123580932617, "logits/rejected": -2.564581871032715, "logps/chosen": -384.2232971191406, "logps/rejected": -682.5195922851562, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -8.590066909790039, "rewards/margins": 7.301435470581055, "rewards/rejected": -15.891502380371094, "step": 16941 }, { "epoch": 2.63, "learning_rate": 1.721384926604077e-06, "logits/chosen": -2.7344906330108643, "logits/rejected": -1.8956845998764038, "logps/chosen": -522.1799926757812, "logps/rejected": -340.6838073730469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.559531211853027, "rewards/margins": 8.64966869354248, "rewards/rejected": -18.209199905395508, "step": 16942 }, { "epoch": 2.63, "learning_rate": 1.720651486072929e-06, "logits/chosen": -2.848332166671753, "logits/rejected": -2.7690203189849854, "logps/chosen": -347.93072509765625, "logps/rejected": -432.19757080078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.435203552246094, "rewards/margins": 8.234888076782227, "rewards/rejected": -15.67009162902832, "step": 16943 }, { "epoch": 2.64, "learning_rate": 1.719918045541781e-06, "logits/chosen": -2.5995230674743652, "logits/rejected": -2.648895502090454, "logps/chosen": -175.97882080078125, "logps/rejected": -295.9432373046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.094301223754883, "rewards/margins": 9.277153015136719, "rewards/rejected": -16.37145233154297, "step": 16944 }, { "epoch": 2.64, "learning_rate": 1.7191846050106334e-06, "logits/chosen": -1.7298399209976196, "logits/rejected": -2.761026382446289, "logps/chosen": -284.4407043457031, "logps/rejected": -531.0947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.225665092468262, "rewards/margins": 10.373031616210938, "rewards/rejected": -20.598697662353516, "step": 16945 }, { "epoch": 2.64, "learning_rate": 1.7184511644794855e-06, "logits/chosen": -1.3366538286209106, "logits/rejected": -2.3994076251983643, "logps/chosen": -186.91128540039062, "logps/rejected": -323.7198486328125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -8.24445629119873, "rewards/margins": 6.037172794342041, "rewards/rejected": -14.28162956237793, "step": 16946 }, { "epoch": 2.64, "learning_rate": 1.7177177239483376e-06, "logits/chosen": -1.3092291355133057, "logits/rejected": -1.7768361568450928, "logps/chosen": -122.20735168457031, "logps/rejected": -360.49090576171875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.118890285491943, "rewards/margins": 7.732142448425293, "rewards/rejected": -13.851032257080078, "step": 16947 }, { "epoch": 2.64, "learning_rate": 1.7169842834171897e-06, "logits/chosen": -2.094271421432495, "logits/rejected": -2.5098562240600586, "logps/chosen": -295.6348571777344, "logps/rejected": -356.815185546875, "loss": 0.0836, "rewards/accuracies": 1.0, "rewards/chosen": -7.138999938964844, "rewards/margins": 6.1979265213012695, "rewards/rejected": -13.336926460266113, "step": 16948 }, { "epoch": 2.64, "learning_rate": 1.716250842886042e-06, "logits/chosen": -2.4038960933685303, "logits/rejected": -2.536105155944824, "logps/chosen": -263.93218994140625, "logps/rejected": -427.8774108886719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.642304420471191, "rewards/margins": 10.583760261535645, "rewards/rejected": -20.226064682006836, "step": 16949 }, { "epoch": 2.64, "learning_rate": 1.715517402354894e-06, "logits/chosen": -2.732198476791382, "logits/rejected": -3.1391587257385254, "logps/chosen": -111.28416442871094, "logps/rejected": -331.8604736328125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.912850379943848, "rewards/margins": 10.093423843383789, "rewards/rejected": -17.00627326965332, "step": 16950 }, { "epoch": 2.64, "learning_rate": 1.7147839618237461e-06, "logits/chosen": -2.7080941200256348, "logits/rejected": -2.7905945777893066, "logps/chosen": -378.3106994628906, "logps/rejected": -678.1533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.909956455230713, "rewards/margins": 12.875934600830078, "rewards/rejected": -19.785892486572266, "step": 16951 }, { "epoch": 2.64, "learning_rate": 1.714050521292598e-06, "logits/chosen": -1.3118135929107666, "logits/rejected": -2.680065393447876, "logps/chosen": -200.33152770996094, "logps/rejected": -596.8742065429688, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -8.993370056152344, "rewards/margins": 10.069234848022461, "rewards/rejected": -19.062604904174805, "step": 16952 }, { "epoch": 2.64, "learning_rate": 1.7133170807614503e-06, "logits/chosen": -1.9487711191177368, "logits/rejected": -2.693009853363037, "logps/chosen": -149.63880920410156, "logps/rejected": -465.001220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.513139247894287, "rewards/margins": 12.948324203491211, "rewards/rejected": -20.461463928222656, "step": 16953 }, { "epoch": 2.64, "learning_rate": 1.7125836402303024e-06, "logits/chosen": -2.348031997680664, "logits/rejected": -2.922089099884033, "logps/chosen": -214.69100952148438, "logps/rejected": -484.97991943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.2787699699401855, "rewards/margins": 16.980588912963867, "rewards/rejected": -20.259357452392578, "step": 16954 }, { "epoch": 2.64, "learning_rate": 1.7118501996991545e-06, "logits/chosen": -2.6530916690826416, "logits/rejected": -2.8780596256256104, "logps/chosen": -83.73605346679688, "logps/rejected": -382.98492431640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.925581932067871, "rewards/margins": 9.925683975219727, "rewards/rejected": -14.851265907287598, "step": 16955 }, { "epoch": 2.64, "learning_rate": 1.7111167591680066e-06, "logits/chosen": -2.6344008445739746, "logits/rejected": -2.8814895153045654, "logps/chosen": -139.02056884765625, "logps/rejected": -331.45465087890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.827210426330566, "rewards/margins": 9.506387710571289, "rewards/rejected": -18.333599090576172, "step": 16956 }, { "epoch": 2.64, "learning_rate": 1.7103833186368589e-06, "logits/chosen": -2.325016975402832, "logits/rejected": -2.3421106338500977, "logps/chosen": -361.0854187011719, "logps/rejected": -376.8212585449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.877713918685913, "rewards/margins": 12.486213684082031, "rewards/rejected": -15.363927841186523, "step": 16957 }, { "epoch": 2.64, "learning_rate": 1.709649878105711e-06, "logits/chosen": -1.8211885690689087, "logits/rejected": -2.7070279121398926, "logps/chosen": -103.33096313476562, "logps/rejected": -396.9007568359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.283435821533203, "rewards/margins": 12.451091766357422, "rewards/rejected": -19.734527587890625, "step": 16958 }, { "epoch": 2.64, "learning_rate": 1.708916437574563e-06, "logits/chosen": -2.9611921310424805, "logits/rejected": -2.170947551727295, "logps/chosen": -524.3153076171875, "logps/rejected": -344.7117919921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.513679504394531, "rewards/margins": 10.042510986328125, "rewards/rejected": -14.556190490722656, "step": 16959 }, { "epoch": 2.64, "learning_rate": 1.7081829970434152e-06, "logits/chosen": -2.3628320693969727, "logits/rejected": -2.7312066555023193, "logps/chosen": -160.93460083007812, "logps/rejected": -254.55210876464844, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.317654132843018, "rewards/margins": 8.24878978729248, "rewards/rejected": -13.566444396972656, "step": 16960 }, { "epoch": 2.64, "learning_rate": 1.7074495565122672e-06, "logits/chosen": -2.1277105808258057, "logits/rejected": -2.954648971557617, "logps/chosen": -114.94148254394531, "logps/rejected": -396.7531433105469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0411458015441895, "rewards/margins": 10.566672325134277, "rewards/rejected": -14.607818603515625, "step": 16961 }, { "epoch": 2.64, "learning_rate": 1.7067161159811193e-06, "logits/chosen": -1.9624440670013428, "logits/rejected": -2.680771827697754, "logps/chosen": -490.06341552734375, "logps/rejected": -673.8306884765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.937597751617432, "rewards/margins": 10.05784797668457, "rewards/rejected": -16.995445251464844, "step": 16962 }, { "epoch": 2.64, "learning_rate": 1.7059826754499714e-06, "logits/chosen": -2.6147189140319824, "logits/rejected": -2.8605079650878906, "logps/chosen": -131.42037963867188, "logps/rejected": -437.04168701171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.007264137268066, "rewards/margins": 8.754125595092773, "rewards/rejected": -14.761390686035156, "step": 16963 }, { "epoch": 2.64, "learning_rate": 1.7052492349188235e-06, "logits/chosen": -2.6535017490386963, "logits/rejected": -2.9505574703216553, "logps/chosen": -281.0724792480469, "logps/rejected": -504.85552978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.987208843231201, "rewards/margins": 12.571229934692383, "rewards/rejected": -18.558439254760742, "step": 16964 }, { "epoch": 2.64, "learning_rate": 1.7045157943876756e-06, "logits/chosen": -2.827019691467285, "logits/rejected": -2.74326491355896, "logps/chosen": -360.69146728515625, "logps/rejected": -414.82427978515625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.752085208892822, "rewards/margins": 7.894351959228516, "rewards/rejected": -14.64643669128418, "step": 16965 }, { "epoch": 2.64, "learning_rate": 1.703782353856528e-06, "logits/chosen": -1.9256670475006104, "logits/rejected": -2.5344653129577637, "logps/chosen": -310.32647705078125, "logps/rejected": -584.3850708007812, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.140378952026367, "rewards/margins": 8.862319946289062, "rewards/rejected": -17.002700805664062, "step": 16966 }, { "epoch": 2.64, "learning_rate": 1.70304891332538e-06, "logits/chosen": -2.478374481201172, "logits/rejected": -2.01955509185791, "logps/chosen": -388.2587890625, "logps/rejected": -446.4840393066406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -8.413185119628906, "rewards/margins": 7.784149169921875, "rewards/rejected": -16.19733428955078, "step": 16967 }, { "epoch": 2.64, "learning_rate": 1.702315472794232e-06, "logits/chosen": -1.813652753829956, "logits/rejected": -2.9165329933166504, "logps/chosen": -171.1185302734375, "logps/rejected": -636.706298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.384941101074219, "rewards/margins": 14.858625411987305, "rewards/rejected": -25.243568420410156, "step": 16968 }, { "epoch": 2.64, "learning_rate": 1.7015820322630842e-06, "logits/chosen": -2.548438310623169, "logits/rejected": -2.692948579788208, "logps/chosen": -463.7573547363281, "logps/rejected": -592.4095458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.89066219329834, "rewards/margins": 11.906578063964844, "rewards/rejected": -17.797239303588867, "step": 16969 }, { "epoch": 2.64, "learning_rate": 1.7008485917319365e-06, "logits/chosen": -1.4932571649551392, "logits/rejected": -2.607961654663086, "logps/chosen": -323.834716796875, "logps/rejected": -507.62005615234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.869531154632568, "rewards/margins": 10.085886001586914, "rewards/rejected": -16.95541763305664, "step": 16970 }, { "epoch": 2.64, "learning_rate": 1.7001151512007886e-06, "logits/chosen": -2.307574987411499, "logits/rejected": -2.694448471069336, "logps/chosen": -108.02233123779297, "logps/rejected": -293.4241943359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.274667263031006, "rewards/margins": 11.658636093139648, "rewards/rejected": -15.933302879333496, "step": 16971 }, { "epoch": 2.64, "learning_rate": 1.6993817106696404e-06, "logits/chosen": -2.1437320709228516, "logits/rejected": -2.792315721511841, "logps/chosen": -217.267822265625, "logps/rejected": -400.53326416015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.80837631225586, "rewards/margins": 8.754805564880371, "rewards/rejected": -17.563182830810547, "step": 16972 }, { "epoch": 2.64, "learning_rate": 1.6986482701384925e-06, "logits/chosen": -2.3847038745880127, "logits/rejected": -2.62455415725708, "logps/chosen": -194.090087890625, "logps/rejected": -358.5391845703125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.126795768737793, "rewards/margins": 9.83779525756836, "rewards/rejected": -15.964591026306152, "step": 16973 }, { "epoch": 2.64, "learning_rate": 1.6979148296073448e-06, "logits/chosen": -0.7426198720932007, "logits/rejected": -1.5219029188156128, "logps/chosen": -328.36456298828125, "logps/rejected": -603.8242797851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.9761152267456055, "rewards/margins": 16.962554931640625, "rewards/rejected": -23.938671112060547, "step": 16974 }, { "epoch": 2.64, "learning_rate": 1.697181389076197e-06, "logits/chosen": -1.915511131286621, "logits/rejected": -2.7234692573547363, "logps/chosen": -243.6432647705078, "logps/rejected": -721.2913818359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.324310779571533, "rewards/margins": 13.322790145874023, "rewards/rejected": -20.64710235595703, "step": 16975 }, { "epoch": 2.64, "learning_rate": 1.696447948545049e-06, "logits/chosen": -2.605567216873169, "logits/rejected": -1.4768474102020264, "logps/chosen": -300.57061767578125, "logps/rejected": -228.26971435546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.350217819213867, "rewards/margins": 9.351968765258789, "rewards/rejected": -12.702187538146973, "step": 16976 }, { "epoch": 2.64, "learning_rate": 1.6957145080139011e-06, "logits/chosen": -2.17441987991333, "logits/rejected": -2.5860884189605713, "logps/chosen": -427.67364501953125, "logps/rejected": -496.6029968261719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.8390045166015625, "rewards/margins": 8.914464950561523, "rewards/rejected": -15.753470420837402, "step": 16977 }, { "epoch": 2.64, "learning_rate": 1.6949810674827532e-06, "logits/chosen": -2.627084255218506, "logits/rejected": -1.7317858934402466, "logps/chosen": -273.9427490234375, "logps/rejected": -260.4024658203125, "loss": 0.0459, "rewards/accuracies": 1.0, "rewards/chosen": -4.619299411773682, "rewards/margins": 4.318796157836914, "rewards/rejected": -8.938095092773438, "step": 16978 }, { "epoch": 2.64, "learning_rate": 1.6942476269516055e-06, "logits/chosen": -1.0481600761413574, "logits/rejected": -2.367464303970337, "logps/chosen": -198.22467041015625, "logps/rejected": -459.1673278808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.152496337890625, "rewards/margins": 13.729658126831055, "rewards/rejected": -21.88215446472168, "step": 16979 }, { "epoch": 2.64, "learning_rate": 1.6935141864204576e-06, "logits/chosen": -1.5488487482070923, "logits/rejected": -2.6175951957702637, "logps/chosen": -197.57261657714844, "logps/rejected": -447.0373840332031, "loss": 0.1715, "rewards/accuracies": 1.0, "rewards/chosen": -8.72041130065918, "rewards/margins": 5.1579365730285645, "rewards/rejected": -13.878347396850586, "step": 16980 }, { "epoch": 2.64, "learning_rate": 1.6927807458893097e-06, "logits/chosen": -2.36918306350708, "logits/rejected": -1.7561140060424805, "logps/chosen": -687.9666137695312, "logps/rejected": -501.9669494628906, "loss": 0.2412, "rewards/accuracies": 1.0, "rewards/chosen": -8.243990898132324, "rewards/margins": 7.128864765167236, "rewards/rejected": -15.372856140136719, "step": 16981 }, { "epoch": 2.64, "learning_rate": 1.6920473053581616e-06, "logits/chosen": -2.705003499984741, "logits/rejected": -2.8057234287261963, "logps/chosen": -165.410888671875, "logps/rejected": -256.2174072265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.38097095489502, "rewards/margins": 9.06926155090332, "rewards/rejected": -17.450233459472656, "step": 16982 }, { "epoch": 2.64, "learning_rate": 1.6913138648270139e-06, "logits/chosen": -1.1367785930633545, "logits/rejected": -2.2927608489990234, "logps/chosen": -111.32652282714844, "logps/rejected": -294.52569580078125, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -8.160087585449219, "rewards/margins": 5.885125637054443, "rewards/rejected": -14.04521369934082, "step": 16983 }, { "epoch": 2.64, "learning_rate": 1.690580424295866e-06, "logits/chosen": -2.366673231124878, "logits/rejected": -2.9574735164642334, "logps/chosen": -190.2252655029297, "logps/rejected": -424.75885009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.601122856140137, "rewards/margins": 10.258102416992188, "rewards/rejected": -15.85922622680664, "step": 16984 }, { "epoch": 2.64, "learning_rate": 1.689846983764718e-06, "logits/chosen": -2.327440023422241, "logits/rejected": -2.8759818077087402, "logps/chosen": -185.74391174316406, "logps/rejected": -539.306396484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.152587890625, "rewards/margins": 8.019569396972656, "rewards/rejected": -16.172157287597656, "step": 16985 }, { "epoch": 2.64, "learning_rate": 1.6891135432335701e-06, "logits/chosen": -2.8196189403533936, "logits/rejected": -1.794019341468811, "logps/chosen": -418.8056335449219, "logps/rejected": -564.2073364257812, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.3125762939453125, "rewards/margins": 8.494937896728516, "rewards/rejected": -15.807514190673828, "step": 16986 }, { "epoch": 2.64, "learning_rate": 1.6883801027024224e-06, "logits/chosen": -1.6265078783035278, "logits/rejected": -2.3872859477996826, "logps/chosen": -200.8309326171875, "logps/rejected": -226.36602783203125, "loss": 0.9164, "rewards/accuracies": 0.5, "rewards/chosen": -9.92912769317627, "rewards/margins": 4.498574733734131, "rewards/rejected": -14.427701950073242, "step": 16987 }, { "epoch": 2.64, "learning_rate": 1.6876466621712745e-06, "logits/chosen": -2.567206859588623, "logits/rejected": -2.131657600402832, "logps/chosen": -363.716796875, "logps/rejected": -593.7919311523438, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -8.691032409667969, "rewards/margins": 8.392945289611816, "rewards/rejected": -17.08397674560547, "step": 16988 }, { "epoch": 2.64, "learning_rate": 1.6869132216401266e-06, "logits/chosen": -2.240121364593506, "logits/rejected": -2.468026638031006, "logps/chosen": -384.12860107421875, "logps/rejected": -387.78192138671875, "loss": 0.0731, "rewards/accuracies": 1.0, "rewards/chosen": -10.098066329956055, "rewards/margins": 5.935029029846191, "rewards/rejected": -16.03309440612793, "step": 16989 }, { "epoch": 2.64, "learning_rate": 1.6861797811089787e-06, "logits/chosen": -1.6904723644256592, "logits/rejected": -2.586430311203003, "logps/chosen": -114.8319091796875, "logps/rejected": -319.95025634765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.554854393005371, "rewards/margins": 12.26253890991211, "rewards/rejected": -15.81739330291748, "step": 16990 }, { "epoch": 2.64, "learning_rate": 1.685446340577831e-06, "logits/chosen": -2.546020746231079, "logits/rejected": -2.0620737075805664, "logps/chosen": -412.59259033203125, "logps/rejected": -320.73944091796875, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -8.699892044067383, "rewards/margins": 4.8189897537231445, "rewards/rejected": -13.518880844116211, "step": 16991 }, { "epoch": 2.64, "learning_rate": 1.6847129000466829e-06, "logits/chosen": -1.8390086889266968, "logits/rejected": -2.8216633796691895, "logps/chosen": -115.6649398803711, "logps/rejected": -350.4314880371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.9938812255859375, "rewards/margins": 9.797103881835938, "rewards/rejected": -14.790985107421875, "step": 16992 }, { "epoch": 2.64, "learning_rate": 1.683979459515535e-06, "logits/chosen": -2.948681592941284, "logits/rejected": -2.9349911212921143, "logps/chosen": -148.04656982421875, "logps/rejected": -272.1623229980469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.970978736877441, "rewards/margins": 10.82947826385498, "rewards/rejected": -16.800457000732422, "step": 16993 }, { "epoch": 2.64, "learning_rate": 1.683246018984387e-06, "logits/chosen": -1.5866104364395142, "logits/rejected": -2.8562755584716797, "logps/chosen": -211.23336791992188, "logps/rejected": -710.115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.974161148071289, "rewards/margins": 14.826435089111328, "rewards/rejected": -23.80059814453125, "step": 16994 }, { "epoch": 2.64, "learning_rate": 1.6825125784532394e-06, "logits/chosen": -1.5729866027832031, "logits/rejected": -2.6608211994171143, "logps/chosen": -199.8121337890625, "logps/rejected": -508.66937255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.202545166015625, "rewards/margins": 15.118529319763184, "rewards/rejected": -22.321075439453125, "step": 16995 }, { "epoch": 2.64, "learning_rate": 1.6817791379220915e-06, "logits/chosen": -2.4951887130737305, "logits/rejected": -2.6314518451690674, "logps/chosen": -186.390625, "logps/rejected": -443.1374206542969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.541883945465088, "rewards/margins": 10.461901664733887, "rewards/rejected": -18.003786087036133, "step": 16996 }, { "epoch": 2.64, "learning_rate": 1.6810456973909435e-06, "logits/chosen": -2.650771379470825, "logits/rejected": -1.8324812650680542, "logps/chosen": -223.26092529296875, "logps/rejected": -230.08038330078125, "loss": 0.0436, "rewards/accuracies": 1.0, "rewards/chosen": -7.206402778625488, "rewards/margins": 6.786083221435547, "rewards/rejected": -13.992486000061035, "step": 16997 }, { "epoch": 2.64, "learning_rate": 1.6803122568597956e-06, "logits/chosen": -2.908609390258789, "logits/rejected": -3.2039992809295654, "logps/chosen": -113.63441467285156, "logps/rejected": -329.3846740722656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.705145835876465, "rewards/margins": 9.45182991027832, "rewards/rejected": -17.1569766998291, "step": 16998 }, { "epoch": 2.64, "learning_rate": 1.6795788163286477e-06, "logits/chosen": -1.9831464290618896, "logits/rejected": -2.7502059936523438, "logps/chosen": -206.2085723876953, "logps/rejected": -454.1553955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.492592811584473, "rewards/margins": 11.07225227355957, "rewards/rejected": -16.56484603881836, "step": 16999 }, { "epoch": 2.64, "learning_rate": 1.6788453757975e-06, "logits/chosen": -2.7165465354919434, "logits/rejected": -2.5090062618255615, "logps/chosen": -142.1387481689453, "logps/rejected": -298.19195556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.833630561828613, "rewards/margins": 10.819194793701172, "rewards/rejected": -16.6528263092041, "step": 17000 }, { "epoch": 2.64, "learning_rate": 1.678111935266352e-06, "logits/chosen": -2.0845065116882324, "logits/rejected": -2.598299980163574, "logps/chosen": -605.945068359375, "logps/rejected": -454.29559326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.827540874481201, "rewards/margins": 14.19105339050293, "rewards/rejected": -18.01859474182129, "step": 17001 }, { "epoch": 2.64, "learning_rate": 1.677378494735204e-06, "logits/chosen": -2.687863349914551, "logits/rejected": -2.171560525894165, "logps/chosen": -240.1136474609375, "logps/rejected": -322.0783386230469, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -5.648931503295898, "rewards/margins": 7.598231315612793, "rewards/rejected": -13.247162818908691, "step": 17002 }, { "epoch": 2.64, "learning_rate": 1.676645054204056e-06, "logits/chosen": -2.7979767322540283, "logits/rejected": -2.2758638858795166, "logps/chosen": -130.89312744140625, "logps/rejected": -160.9212646484375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -5.817088603973389, "rewards/margins": 6.1717915534973145, "rewards/rejected": -11.988880157470703, "step": 17003 }, { "epoch": 2.64, "learning_rate": 1.6759116136729084e-06, "logits/chosen": -2.523207902908325, "logits/rejected": -2.8441500663757324, "logps/chosen": -505.5196228027344, "logps/rejected": -638.39501953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.220455169677734, "rewards/margins": 8.372967720031738, "rewards/rejected": -16.593421936035156, "step": 17004 }, { "epoch": 2.64, "learning_rate": 1.6751781731417605e-06, "logits/chosen": -2.8172988891601562, "logits/rejected": -2.4857096672058105, "logps/chosen": -715.9312133789062, "logps/rejected": -750.658203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.3189697265625, "rewards/margins": 11.4622802734375, "rewards/rejected": -21.78125, "step": 17005 }, { "epoch": 2.64, "learning_rate": 1.6744447326106126e-06, "logits/chosen": -1.458654522895813, "logits/rejected": -2.2449748516082764, "logps/chosen": -253.78712463378906, "logps/rejected": -598.052001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.848862648010254, "rewards/margins": 18.100975036621094, "rewards/rejected": -25.949838638305664, "step": 17006 }, { "epoch": 2.64, "learning_rate": 1.6737112920794647e-06, "logits/chosen": -2.3133862018585205, "logits/rejected": -2.301969528198242, "logps/chosen": -397.5038146972656, "logps/rejected": -405.2459411621094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.159109115600586, "rewards/margins": 9.463006019592285, "rewards/rejected": -16.622114181518555, "step": 17007 }, { "epoch": 2.65, "learning_rate": 1.672977851548317e-06, "logits/chosen": -2.383652925491333, "logits/rejected": -2.8914923667907715, "logps/chosen": -221.28973388671875, "logps/rejected": -433.41058349609375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.747406959533691, "rewards/margins": 8.651355743408203, "rewards/rejected": -15.398761749267578, "step": 17008 }, { "epoch": 2.65, "learning_rate": 1.672244411017169e-06, "logits/chosen": -1.3869516849517822, "logits/rejected": -2.629295587539673, "logps/chosen": -142.21656799316406, "logps/rejected": -421.7901916503906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.566106796264648, "rewards/margins": 11.749994277954102, "rewards/rejected": -17.31610107421875, "step": 17009 }, { "epoch": 2.65, "learning_rate": 1.6715109704860211e-06, "logits/chosen": -2.7746894359588623, "logits/rejected": -2.90110445022583, "logps/chosen": -565.1762084960938, "logps/rejected": -399.6043701171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.956967353820801, "rewards/margins": 10.108174324035645, "rewards/rejected": -14.065141677856445, "step": 17010 }, { "epoch": 2.65, "learning_rate": 1.670777529954873e-06, "logits/chosen": -3.0906219482421875, "logits/rejected": -2.4414467811584473, "logps/chosen": -187.1669464111328, "logps/rejected": -155.94741821289062, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -4.702584266662598, "rewards/margins": 4.2870049476623535, "rewards/rejected": -8.989588737487793, "step": 17011 }, { "epoch": 2.65, "learning_rate": 1.6700440894237253e-06, "logits/chosen": -2.679126501083374, "logits/rejected": -2.9048473834991455, "logps/chosen": -336.3516845703125, "logps/rejected": -620.6923217773438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.3505754470825195, "rewards/margins": 10.216049194335938, "rewards/rejected": -16.56662368774414, "step": 17012 }, { "epoch": 2.65, "learning_rate": 1.6693106488925774e-06, "logits/chosen": -2.5595850944519043, "logits/rejected": -2.4143762588500977, "logps/chosen": -155.82565307617188, "logps/rejected": -252.78428649902344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.843005180358887, "rewards/margins": 9.70639705657959, "rewards/rejected": -14.549402236938477, "step": 17013 }, { "epoch": 2.65, "learning_rate": 1.6685772083614295e-06, "logits/chosen": -2.89037823677063, "logits/rejected": -2.1251373291015625, "logps/chosen": -196.06097412109375, "logps/rejected": -109.02821350097656, "loss": 0.5868, "rewards/accuracies": 0.5, "rewards/chosen": -4.8546037673950195, "rewards/margins": 3.039977550506592, "rewards/rejected": -7.894581317901611, "step": 17014 }, { "epoch": 2.65, "learning_rate": 1.6678437678302816e-06, "logits/chosen": -2.2919318675994873, "logits/rejected": -2.641961097717285, "logps/chosen": -608.2722778320312, "logps/rejected": -1017.80126953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.910122871398926, "rewards/margins": 9.450933456420898, "rewards/rejected": -18.36105728149414, "step": 17015 }, { "epoch": 2.65, "learning_rate": 1.6671103272991339e-06, "logits/chosen": -2.00075364112854, "logits/rejected": -2.660104751586914, "logps/chosen": -161.62435913085938, "logps/rejected": -394.77789306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9095144271850586, "rewards/margins": 11.084874153137207, "rewards/rejected": -14.994388580322266, "step": 17016 }, { "epoch": 2.65, "learning_rate": 1.666376886767986e-06, "logits/chosen": -2.7897486686706543, "logits/rejected": -2.6777725219726562, "logps/chosen": -459.5422058105469, "logps/rejected": -299.8370361328125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.7505035400390625, "rewards/margins": 8.954111099243164, "rewards/rejected": -12.704614639282227, "step": 17017 }, { "epoch": 2.65, "learning_rate": 1.665643446236838e-06, "logits/chosen": -2.342444658279419, "logits/rejected": -1.9354374408721924, "logps/chosen": -218.8560333251953, "logps/rejected": -250.71160888671875, "loss": 0.0347, "rewards/accuracies": 1.0, "rewards/chosen": -4.728382110595703, "rewards/margins": 6.9177961349487305, "rewards/rejected": -11.646178245544434, "step": 17018 }, { "epoch": 2.65, "learning_rate": 1.6649100057056902e-06, "logits/chosen": -2.5894687175750732, "logits/rejected": -2.479012966156006, "logps/chosen": -204.26272583007812, "logps/rejected": -326.822021484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.568695068359375, "rewards/margins": 11.99061393737793, "rewards/rejected": -18.559309005737305, "step": 17019 }, { "epoch": 2.65, "learning_rate": 1.6641765651745423e-06, "logits/chosen": -2.5393869876861572, "logits/rejected": -1.9600905179977417, "logps/chosen": -430.93359375, "logps/rejected": -316.36260986328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.515359878540039, "rewards/margins": 10.059473037719727, "rewards/rejected": -16.574832916259766, "step": 17020 }, { "epoch": 2.65, "learning_rate": 1.6634431246433943e-06, "logits/chosen": -2.108971357345581, "logits/rejected": -2.7077627182006836, "logps/chosen": -106.99537658691406, "logps/rejected": -251.32009887695312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.334444046020508, "rewards/margins": 9.879313468933105, "rewards/rejected": -16.21375846862793, "step": 17021 }, { "epoch": 2.65, "learning_rate": 1.6627096841122464e-06, "logits/chosen": -0.5977312326431274, "logits/rejected": -1.6036297082901, "logps/chosen": -284.85748291015625, "logps/rejected": -503.37994384765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.864851951599121, "rewards/margins": 15.750839233398438, "rewards/rejected": -21.615692138671875, "step": 17022 }, { "epoch": 2.65, "learning_rate": 1.6619762435810985e-06, "logits/chosen": -2.9686737060546875, "logits/rejected": -3.277000665664673, "logps/chosen": -81.04832458496094, "logps/rejected": -272.7261047363281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.671682834625244, "rewards/margins": 8.429037094116211, "rewards/rejected": -15.100719451904297, "step": 17023 }, { "epoch": 2.65, "learning_rate": 1.6612428030499506e-06, "logits/chosen": -2.3963778018951416, "logits/rejected": -2.6712372303009033, "logps/chosen": -430.64080810546875, "logps/rejected": -571.4857177734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.025555610656738, "rewards/margins": 10.429649353027344, "rewards/rejected": -16.455204010009766, "step": 17024 }, { "epoch": 2.65, "learning_rate": 1.660509362518803e-06, "logits/chosen": -2.2196905612945557, "logits/rejected": -2.444291830062866, "logps/chosen": -340.4537353515625, "logps/rejected": -400.8164367675781, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.4086456298828125, "rewards/margins": 7.2401533126831055, "rewards/rejected": -12.648799896240234, "step": 17025 }, { "epoch": 2.65, "learning_rate": 1.659775921987655e-06, "logits/chosen": -2.059189558029175, "logits/rejected": -2.39109468460083, "logps/chosen": -128.4241485595703, "logps/rejected": -282.80145263671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -8.678862571716309, "rewards/margins": 7.610567569732666, "rewards/rejected": -16.289430618286133, "step": 17026 }, { "epoch": 2.65, "learning_rate": 1.659042481456507e-06, "logits/chosen": -2.4330670833587646, "logits/rejected": -2.785081624984741, "logps/chosen": -404.8247985839844, "logps/rejected": -527.9830322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.613548278808594, "rewards/margins": 14.33243179321289, "rewards/rejected": -23.945980072021484, "step": 17027 }, { "epoch": 2.65, "learning_rate": 1.6583090409253592e-06, "logits/chosen": -2.345839023590088, "logits/rejected": -2.833319664001465, "logps/chosen": -147.575439453125, "logps/rejected": -302.16302490234375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -8.162087440490723, "rewards/margins": 9.004083633422852, "rewards/rejected": -17.16617202758789, "step": 17028 }, { "epoch": 2.65, "learning_rate": 1.6575756003942115e-06, "logits/chosen": -2.476804494857788, "logits/rejected": -3.1663970947265625, "logps/chosen": -127.85160064697266, "logps/rejected": -403.43359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.360095977783203, "rewards/margins": 12.018753051757812, "rewards/rejected": -19.378849029541016, "step": 17029 }, { "epoch": 2.65, "learning_rate": 1.6568421598630636e-06, "logits/chosen": -2.9029483795166016, "logits/rejected": -1.91348135471344, "logps/chosen": -565.5538330078125, "logps/rejected": -488.1956787109375, "loss": 0.0296, "rewards/accuracies": 1.0, "rewards/chosen": -8.184429168701172, "rewards/margins": 8.921005249023438, "rewards/rejected": -17.10543441772461, "step": 17030 }, { "epoch": 2.65, "learning_rate": 1.6561087193319155e-06, "logits/chosen": -2.5825343132019043, "logits/rejected": -2.9039289951324463, "logps/chosen": -84.95486450195312, "logps/rejected": -233.79302978515625, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -5.022430896759033, "rewards/margins": 6.876639366149902, "rewards/rejected": -11.899069786071777, "step": 17031 }, { "epoch": 2.65, "learning_rate": 1.6553752788007675e-06, "logits/chosen": -0.7193722724914551, "logits/rejected": -2.05513334274292, "logps/chosen": -112.30406188964844, "logps/rejected": -486.5380859375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -7.241400718688965, "rewards/margins": 10.825616836547852, "rewards/rejected": -18.0670166015625, "step": 17032 }, { "epoch": 2.65, "learning_rate": 1.6546418382696198e-06, "logits/chosen": -2.572850465774536, "logits/rejected": -2.7820098400115967, "logps/chosen": -111.2878646850586, "logps/rejected": -291.9154052734375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.960575580596924, "rewards/margins": 9.33835506439209, "rewards/rejected": -15.298931121826172, "step": 17033 }, { "epoch": 2.65, "learning_rate": 1.653908397738472e-06, "logits/chosen": -1.9157013893127441, "logits/rejected": -2.691929817199707, "logps/chosen": -99.76130676269531, "logps/rejected": -297.12298583984375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -5.586967468261719, "rewards/margins": 9.359095573425293, "rewards/rejected": -14.946063995361328, "step": 17034 }, { "epoch": 2.65, "learning_rate": 1.653174957207324e-06, "logits/chosen": -2.5130434036254883, "logits/rejected": -2.637489080429077, "logps/chosen": -178.40159606933594, "logps/rejected": -506.8869934082031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.997947692871094, "rewards/margins": 11.762357711791992, "rewards/rejected": -23.76030731201172, "step": 17035 }, { "epoch": 2.65, "learning_rate": 1.6524415166761761e-06, "logits/chosen": -2.3830480575561523, "logits/rejected": -2.9815421104431152, "logps/chosen": -167.50430297851562, "logps/rejected": -372.4385070800781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.068399906158447, "rewards/margins": 9.20372486114502, "rewards/rejected": -16.272125244140625, "step": 17036 }, { "epoch": 2.65, "learning_rate": 1.6517080761450284e-06, "logits/chosen": -2.291940450668335, "logits/rejected": -2.7187399864196777, "logps/chosen": -197.0264129638672, "logps/rejected": -253.10183715820312, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.100517272949219, "rewards/margins": 7.853528022766113, "rewards/rejected": -13.954046249389648, "step": 17037 }, { "epoch": 2.65, "learning_rate": 1.6509746356138805e-06, "logits/chosen": -1.4367785453796387, "logits/rejected": -2.1367557048797607, "logps/chosen": -424.0711669921875, "logps/rejected": -685.4805908203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.446028232574463, "rewards/margins": 11.803204536437988, "rewards/rejected": -18.24923324584961, "step": 17038 }, { "epoch": 2.65, "learning_rate": 1.6502411950827326e-06, "logits/chosen": -2.0053601264953613, "logits/rejected": -2.813044309616089, "logps/chosen": -202.83047485351562, "logps/rejected": -398.2078857421875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.419955253601074, "rewards/margins": 8.82477855682373, "rewards/rejected": -13.244733810424805, "step": 17039 }, { "epoch": 2.65, "learning_rate": 1.6495077545515847e-06, "logits/chosen": -2.009531259536743, "logits/rejected": -2.5938098430633545, "logps/chosen": -345.14288330078125, "logps/rejected": -575.9994506835938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.740766525268555, "rewards/margins": 12.482248306274414, "rewards/rejected": -22.22301483154297, "step": 17040 }, { "epoch": 2.65, "learning_rate": 1.6487743140204368e-06, "logits/chosen": -2.4770631790161133, "logits/rejected": -2.7225818634033203, "logps/chosen": -311.12353515625, "logps/rejected": -343.8884582519531, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -5.418673515319824, "rewards/margins": 8.423907279968262, "rewards/rejected": -13.842580795288086, "step": 17041 }, { "epoch": 2.65, "learning_rate": 1.6480408734892889e-06, "logits/chosen": -2.0920042991638184, "logits/rejected": -2.361328363418579, "logps/chosen": -128.47727966308594, "logps/rejected": -377.39117431640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.858262062072754, "rewards/margins": 10.696120262145996, "rewards/rejected": -16.55438232421875, "step": 17042 }, { "epoch": 2.65, "learning_rate": 1.647307432958141e-06, "logits/chosen": -2.815822124481201, "logits/rejected": -3.030480146408081, "logps/chosen": -259.90478515625, "logps/rejected": -441.59326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.233955383300781, "rewards/margins": 12.654033660888672, "rewards/rejected": -17.887989044189453, "step": 17043 }, { "epoch": 2.65, "learning_rate": 1.646573992426993e-06, "logits/chosen": -1.2349365949630737, "logits/rejected": -2.6862282752990723, "logps/chosen": -147.44810485839844, "logps/rejected": -551.365966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.757147312164307, "rewards/margins": 14.630563735961914, "rewards/rejected": -19.387710571289062, "step": 17044 }, { "epoch": 2.65, "learning_rate": 1.6458405518958451e-06, "logits/chosen": -2.7209012508392334, "logits/rejected": -3.193830728530884, "logps/chosen": -122.4860610961914, "logps/rejected": -344.90478515625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -8.466927528381348, "rewards/margins": 8.388540267944336, "rewards/rejected": -16.85546875, "step": 17045 }, { "epoch": 2.65, "learning_rate": 1.6451071113646974e-06, "logits/chosen": -1.2095513343811035, "logits/rejected": -2.483654499053955, "logps/chosen": -187.68283081054688, "logps/rejected": -459.1518859863281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.274176597595215, "rewards/margins": 11.915876388549805, "rewards/rejected": -18.190052032470703, "step": 17046 }, { "epoch": 2.65, "learning_rate": 1.6443736708335495e-06, "logits/chosen": -2.207026243209839, "logits/rejected": -2.7395801544189453, "logps/chosen": -134.9483642578125, "logps/rejected": -332.3683776855469, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.42465353012085, "rewards/margins": 8.099517822265625, "rewards/rejected": -14.524171829223633, "step": 17047 }, { "epoch": 2.65, "learning_rate": 1.6436402303024016e-06, "logits/chosen": -2.1306135654449463, "logits/rejected": -2.658048152923584, "logps/chosen": -178.00100708007812, "logps/rejected": -433.484130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.624896049499512, "rewards/margins": 12.512460708618164, "rewards/rejected": -19.13735580444336, "step": 17048 }, { "epoch": 2.65, "learning_rate": 1.6429067897712537e-06, "logits/chosen": -2.620891571044922, "logits/rejected": -2.5081732273101807, "logps/chosen": -190.46817016601562, "logps/rejected": -263.5201416015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.6793317794799805, "rewards/margins": 9.51900577545166, "rewards/rejected": -17.19833755493164, "step": 17049 }, { "epoch": 2.65, "learning_rate": 1.642173349240106e-06, "logits/chosen": -2.580990791320801, "logits/rejected": -2.1858527660369873, "logps/chosen": -287.53228759765625, "logps/rejected": -560.9696044921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.576530456542969, "rewards/margins": 12.017319679260254, "rewards/rejected": -16.593849182128906, "step": 17050 }, { "epoch": 2.65, "learning_rate": 1.6414399087089579e-06, "logits/chosen": -2.5775389671325684, "logits/rejected": -2.299581289291382, "logps/chosen": -253.96041870117188, "logps/rejected": -324.8968505859375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -7.3806962966918945, "rewards/margins": 6.374139308929443, "rewards/rejected": -13.75483512878418, "step": 17051 }, { "epoch": 2.65, "learning_rate": 1.64070646817781e-06, "logits/chosen": -2.488206624984741, "logits/rejected": -2.7457947731018066, "logps/chosen": -130.41778564453125, "logps/rejected": -220.35752868652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9406824111938477, "rewards/margins": 10.081649780273438, "rewards/rejected": -14.022332191467285, "step": 17052 }, { "epoch": 2.65, "learning_rate": 1.639973027646662e-06, "logits/chosen": -2.5661609172821045, "logits/rejected": -2.3842928409576416, "logps/chosen": -409.14398193359375, "logps/rejected": -611.442138671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.476907253265381, "rewards/margins": 14.155351638793945, "rewards/rejected": -21.632259368896484, "step": 17053 }, { "epoch": 2.65, "learning_rate": 1.6392395871155144e-06, "logits/chosen": -2.653916835784912, "logits/rejected": -2.704594373703003, "logps/chosen": -364.5546875, "logps/rejected": -320.350830078125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.694094657897949, "rewards/margins": 7.0301055908203125, "rewards/rejected": -13.724201202392578, "step": 17054 }, { "epoch": 2.65, "learning_rate": 1.6385061465843665e-06, "logits/chosen": -1.7901180982589722, "logits/rejected": -3.060800552368164, "logps/chosen": -123.59007263183594, "logps/rejected": -417.8271484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.765047073364258, "rewards/margins": 8.863567352294922, "rewards/rejected": -16.628616333007812, "step": 17055 }, { "epoch": 2.65, "learning_rate": 1.6377727060532186e-06, "logits/chosen": -1.8291780948638916, "logits/rejected": -2.7670047283172607, "logps/chosen": -76.39590454101562, "logps/rejected": -350.81402587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.051786422729492, "rewards/margins": 10.234113693237305, "rewards/rejected": -16.285900115966797, "step": 17056 }, { "epoch": 2.65, "learning_rate": 1.6370392655220706e-06, "logits/chosen": -1.1033912897109985, "logits/rejected": -2.3308136463165283, "logps/chosen": -329.9871826171875, "logps/rejected": -697.8770751953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.887766361236572, "rewards/margins": 9.108552932739258, "rewards/rejected": -14.996319770812988, "step": 17057 }, { "epoch": 2.65, "learning_rate": 1.636305824990923e-06, "logits/chosen": -1.6890908479690552, "logits/rejected": -2.4823834896087646, "logps/chosen": -185.10690307617188, "logps/rejected": -334.35150146484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.4710216522216797, "rewards/margins": 12.918221473693848, "rewards/rejected": -16.389244079589844, "step": 17058 }, { "epoch": 2.65, "learning_rate": 1.635572384459775e-06, "logits/chosen": -2.653832197189331, "logits/rejected": -1.6783427000045776, "logps/chosen": -254.35079956054688, "logps/rejected": -259.5495300292969, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.450144290924072, "rewards/margins": 7.5609893798828125, "rewards/rejected": -14.011133193969727, "step": 17059 }, { "epoch": 2.65, "learning_rate": 1.634838943928627e-06, "logits/chosen": -1.6796770095825195, "logits/rejected": -2.5520222187042236, "logps/chosen": -102.29953002929688, "logps/rejected": -327.18865966796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.564702987670898, "rewards/margins": 11.76380443572998, "rewards/rejected": -17.328506469726562, "step": 17060 }, { "epoch": 2.65, "learning_rate": 1.634105503397479e-06, "logits/chosen": -2.7357728481292725, "logits/rejected": -3.1372437477111816, "logps/chosen": -162.9439239501953, "logps/rejected": -384.50262451171875, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.2029571533203125, "rewards/margins": 7.340891361236572, "rewards/rejected": -12.543848991394043, "step": 17061 }, { "epoch": 2.65, "learning_rate": 1.6333720628663313e-06, "logits/chosen": -1.5610623359680176, "logits/rejected": -1.706628680229187, "logps/chosen": -511.2059631347656, "logps/rejected": -498.4701843261719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.730950355529785, "rewards/margins": 13.62094783782959, "rewards/rejected": -18.351898193359375, "step": 17062 }, { "epoch": 2.65, "learning_rate": 1.6326386223351834e-06, "logits/chosen": -2.5959372520446777, "logits/rejected": -2.738182783126831, "logps/chosen": -289.0155944824219, "logps/rejected": -362.88232421875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -4.285772800445557, "rewards/margins": 5.488898754119873, "rewards/rejected": -9.77467155456543, "step": 17063 }, { "epoch": 2.65, "learning_rate": 1.6319051818040355e-06, "logits/chosen": -2.6559793949127197, "logits/rejected": -1.7045173645019531, "logps/chosen": -579.5208740234375, "logps/rejected": -445.5393371582031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.313375473022461, "rewards/margins": 9.101511001586914, "rewards/rejected": -18.414886474609375, "step": 17064 }, { "epoch": 2.65, "learning_rate": 1.6311717412728876e-06, "logits/chosen": -1.7887953519821167, "logits/rejected": -1.922753095626831, "logps/chosen": -362.2706298828125, "logps/rejected": -629.0572509765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -11.54279899597168, "rewards/margins": 10.811721801757812, "rewards/rejected": -22.35451889038086, "step": 17065 }, { "epoch": 2.65, "learning_rate": 1.6304383007417397e-06, "logits/chosen": -2.633734703063965, "logits/rejected": -2.8116800785064697, "logps/chosen": -150.94432067871094, "logps/rejected": -298.8321533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1209349632263184, "rewards/margins": 11.413116455078125, "rewards/rejected": -14.534051895141602, "step": 17066 }, { "epoch": 2.65, "learning_rate": 1.629704860210592e-06, "logits/chosen": -2.2006099224090576, "logits/rejected": -2.738276243209839, "logps/chosen": -842.9306030273438, "logps/rejected": -697.9606323242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.964252471923828, "rewards/margins": 11.759796142578125, "rewards/rejected": -20.724048614501953, "step": 17067 }, { "epoch": 2.65, "learning_rate": 1.628971419679444e-06, "logits/chosen": -1.2974584102630615, "logits/rejected": -2.518597364425659, "logps/chosen": -309.53045654296875, "logps/rejected": -458.862548828125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -5.762548923492432, "rewards/margins": 8.221471786499023, "rewards/rejected": -13.984020233154297, "step": 17068 }, { "epoch": 2.65, "learning_rate": 1.6282379791482961e-06, "logits/chosen": -2.5767626762390137, "logits/rejected": -3.02548885345459, "logps/chosen": -167.15081787109375, "logps/rejected": -391.5685119628906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.597555160522461, "rewards/margins": 10.807111740112305, "rewards/rejected": -17.404666900634766, "step": 17069 }, { "epoch": 2.65, "learning_rate": 1.627504538617148e-06, "logits/chosen": -1.84296452999115, "logits/rejected": -2.594482898712158, "logps/chosen": -96.39010620117188, "logps/rejected": -271.15899658203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.84511137008667, "rewards/margins": 6.848134517669678, "rewards/rejected": -14.693245887756348, "step": 17070 }, { "epoch": 2.65, "learning_rate": 1.6267710980860003e-06, "logits/chosen": -1.3206359148025513, "logits/rejected": -2.7240097522735596, "logps/chosen": -141.66397094726562, "logps/rejected": -472.64605712890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.054486274719238, "rewards/margins": 8.536397933959961, "rewards/rejected": -15.590885162353516, "step": 17071 }, { "epoch": 2.66, "learning_rate": 1.6260376575548524e-06, "logits/chosen": -2.2443795204162598, "logits/rejected": -2.7434887886047363, "logps/chosen": -526.1978149414062, "logps/rejected": -475.4708251953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.2549409866333, "rewards/margins": 8.223062515258789, "rewards/rejected": -16.478002548217773, "step": 17072 }, { "epoch": 2.66, "learning_rate": 1.6253042170237045e-06, "logits/chosen": -1.355834722518921, "logits/rejected": -2.5737695693969727, "logps/chosen": -201.142822265625, "logps/rejected": -432.8499755859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.010614395141602, "rewards/margins": 8.969108581542969, "rewards/rejected": -16.97972297668457, "step": 17073 }, { "epoch": 2.66, "learning_rate": 1.6245707764925566e-06, "logits/chosen": -2.109039783477783, "logits/rejected": -2.52311372756958, "logps/chosen": -251.93846130371094, "logps/rejected": -487.0723876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.091158390045166, "rewards/margins": 14.312398910522461, "rewards/rejected": -21.40355682373047, "step": 17074 }, { "epoch": 2.66, "learning_rate": 1.623837335961409e-06, "logits/chosen": -2.4348325729370117, "logits/rejected": -2.9304168224334717, "logps/chosen": -69.32715606689453, "logps/rejected": -298.6280517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.2375383377075195, "rewards/margins": 12.840093612670898, "rewards/rejected": -18.077632904052734, "step": 17075 }, { "epoch": 2.66, "learning_rate": 1.623103895430261e-06, "logits/chosen": -2.727159261703491, "logits/rejected": -2.716062068939209, "logps/chosen": -234.1700897216797, "logps/rejected": -406.351318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.356577396392822, "rewards/margins": 12.75052261352539, "rewards/rejected": -18.107099533081055, "step": 17076 }, { "epoch": 2.66, "learning_rate": 1.622370454899113e-06, "logits/chosen": -2.4332432746887207, "logits/rejected": -2.7694621086120605, "logps/chosen": -159.9586944580078, "logps/rejected": -331.1417541503906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.440171241760254, "rewards/margins": 9.422300338745117, "rewards/rejected": -15.862472534179688, "step": 17077 }, { "epoch": 2.66, "learning_rate": 1.6216370143679652e-06, "logits/chosen": -0.9238719344139099, "logits/rejected": -2.1578292846679688, "logps/chosen": -230.18524169921875, "logps/rejected": -588.0865478515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.537656784057617, "rewards/margins": 9.678853988647461, "rewards/rejected": -18.216510772705078, "step": 17078 }, { "epoch": 2.66, "learning_rate": 1.6209035738368173e-06, "logits/chosen": -2.383331298828125, "logits/rejected": -2.8990719318389893, "logps/chosen": -316.91033935546875, "logps/rejected": -415.7230224609375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.165191650390625, "rewards/margins": 8.084554672241211, "rewards/rejected": -14.249746322631836, "step": 17079 }, { "epoch": 2.66, "learning_rate": 1.6201701333056693e-06, "logits/chosen": -1.385261058807373, "logits/rejected": -1.5980442762374878, "logps/chosen": -395.93365478515625, "logps/rejected": -440.0188903808594, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.068723678588867, "rewards/margins": 7.111690521240234, "rewards/rejected": -14.180414199829102, "step": 17080 }, { "epoch": 2.66, "learning_rate": 1.6194366927745214e-06, "logits/chosen": -1.9053043127059937, "logits/rejected": -2.5364010334014893, "logps/chosen": -326.0641784667969, "logps/rejected": -567.7816772460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.233463287353516, "rewards/margins": 13.324981689453125, "rewards/rejected": -22.55844497680664, "step": 17081 }, { "epoch": 2.66, "learning_rate": 1.6187032522433735e-06, "logits/chosen": -1.6919344663619995, "logits/rejected": -2.90948486328125, "logps/chosen": -119.94696044921875, "logps/rejected": -344.24981689453125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.894330978393555, "rewards/margins": 8.979975700378418, "rewards/rejected": -16.874305725097656, "step": 17082 }, { "epoch": 2.66, "learning_rate": 1.6179698117122256e-06, "logits/chosen": -2.5405828952789307, "logits/rejected": -2.7048165798187256, "logps/chosen": -247.32859802246094, "logps/rejected": -390.92626953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.555697441101074, "rewards/margins": 9.99230670928955, "rewards/rejected": -17.548004150390625, "step": 17083 }, { "epoch": 2.66, "learning_rate": 1.617236371181078e-06, "logits/chosen": -2.225534200668335, "logits/rejected": -2.7148680686950684, "logps/chosen": -293.2198486328125, "logps/rejected": -453.271484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -9.435321807861328, "rewards/margins": 9.221941947937012, "rewards/rejected": -18.657264709472656, "step": 17084 }, { "epoch": 2.66, "learning_rate": 1.61650293064993e-06, "logits/chosen": -2.6589229106903076, "logits/rejected": -3.050722599029541, "logps/chosen": -102.07914733886719, "logps/rejected": -313.2995300292969, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.680398941040039, "rewards/margins": 8.006790161132812, "rewards/rejected": -12.687189102172852, "step": 17085 }, { "epoch": 2.66, "learning_rate": 1.615769490118782e-06, "logits/chosen": -2.1031174659729004, "logits/rejected": -2.610821008682251, "logps/chosen": -122.76333618164062, "logps/rejected": -298.42120361328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.549796104431152, "rewards/margins": 9.253729820251465, "rewards/rejected": -17.803525924682617, "step": 17086 }, { "epoch": 2.66, "learning_rate": 1.6150360495876342e-06, "logits/chosen": -1.9853596687316895, "logits/rejected": -2.677492380142212, "logps/chosen": -366.129638671875, "logps/rejected": -591.0425415039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.2160005569458, "rewards/margins": 12.742197036743164, "rewards/rejected": -22.95819854736328, "step": 17087 }, { "epoch": 2.66, "learning_rate": 1.6143026090564865e-06, "logits/chosen": -1.8673912286758423, "logits/rejected": -1.5734410285949707, "logps/chosen": -345.9475402832031, "logps/rejected": -429.9720458984375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -7.887445449829102, "rewards/margins": 9.245376586914062, "rewards/rejected": -17.132822036743164, "step": 17088 }, { "epoch": 2.66, "learning_rate": 1.6135691685253386e-06, "logits/chosen": -1.650591254234314, "logits/rejected": -2.6790778636932373, "logps/chosen": -306.1416320800781, "logps/rejected": -725.9761352539062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.371243476867676, "rewards/margins": 10.81856918334961, "rewards/rejected": -19.18981170654297, "step": 17089 }, { "epoch": 2.66, "learning_rate": 1.6128357279941905e-06, "logits/chosen": -1.0840766429901123, "logits/rejected": -2.607940435409546, "logps/chosen": -181.1856231689453, "logps/rejected": -544.9891357421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.192407608032227, "rewards/margins": 9.44505500793457, "rewards/rejected": -19.637462615966797, "step": 17090 }, { "epoch": 2.66, "learning_rate": 1.6121022874630426e-06, "logits/chosen": -2.7527384757995605, "logits/rejected": -2.312825918197632, "logps/chosen": -304.4334716796875, "logps/rejected": -274.2926330566406, "loss": 0.0309, "rewards/accuracies": 1.0, "rewards/chosen": -6.601325988769531, "rewards/margins": 7.182132244110107, "rewards/rejected": -13.783458709716797, "step": 17091 }, { "epoch": 2.66, "learning_rate": 1.6113688469318949e-06, "logits/chosen": -2.304849147796631, "logits/rejected": -2.3942737579345703, "logps/chosen": -208.94091796875, "logps/rejected": -261.80224609375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -8.789619445800781, "rewards/margins": 5.428128242492676, "rewards/rejected": -14.217747688293457, "step": 17092 }, { "epoch": 2.66, "learning_rate": 1.610635406400747e-06, "logits/chosen": -2.327087640762329, "logits/rejected": -2.951265811920166, "logps/chosen": -395.96551513671875, "logps/rejected": -623.092041015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.233931541442871, "rewards/margins": 9.195690155029297, "rewards/rejected": -16.429622650146484, "step": 17093 }, { "epoch": 2.66, "learning_rate": 1.609901965869599e-06, "logits/chosen": -2.4085440635681152, "logits/rejected": -1.1240094900131226, "logps/chosen": -395.1148986816406, "logps/rejected": -262.9114990234375, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/chosen": -6.746914863586426, "rewards/margins": 5.653733730316162, "rewards/rejected": -12.40064811706543, "step": 17094 }, { "epoch": 2.66, "learning_rate": 1.6091685253384511e-06, "logits/chosen": -2.5419163703918457, "logits/rejected": -2.406524181365967, "logps/chosen": -216.86077880859375, "logps/rejected": -244.03622436523438, "loss": 0.2108, "rewards/accuracies": 1.0, "rewards/chosen": -8.550333023071289, "rewards/margins": 5.526699542999268, "rewards/rejected": -14.077033042907715, "step": 17095 }, { "epoch": 2.66, "learning_rate": 1.6084350848073034e-06, "logits/chosen": -2.0219054222106934, "logits/rejected": -2.7715511322021484, "logps/chosen": -87.78175354003906, "logps/rejected": -441.237548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.726812362670898, "rewards/margins": 12.689273834228516, "rewards/rejected": -19.416086196899414, "step": 17096 }, { "epoch": 2.66, "learning_rate": 1.6077016442761555e-06, "logits/chosen": -2.544459104537964, "logits/rejected": -2.5499937534332275, "logps/chosen": -456.5341796875, "logps/rejected": -450.2960205078125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.543653964996338, "rewards/margins": 9.847782135009766, "rewards/rejected": -16.391435623168945, "step": 17097 }, { "epoch": 2.66, "learning_rate": 1.6069682037450076e-06, "logits/chosen": -1.781331181526184, "logits/rejected": -2.406024694442749, "logps/chosen": -192.07797241210938, "logps/rejected": -347.20196533203125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.475381851196289, "rewards/margins": 8.798309326171875, "rewards/rejected": -16.273691177368164, "step": 17098 }, { "epoch": 2.66, "learning_rate": 1.6062347632138597e-06, "logits/chosen": -2.487673044204712, "logits/rejected": -2.7127060890197754, "logps/chosen": -222.33108520507812, "logps/rejected": -322.8884582519531, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.159275531768799, "rewards/margins": 8.83167839050293, "rewards/rejected": -12.99095344543457, "step": 17099 }, { "epoch": 2.66, "learning_rate": 1.6055013226827118e-06, "logits/chosen": -2.6095378398895264, "logits/rejected": -2.8814821243286133, "logps/chosen": -127.09672546386719, "logps/rejected": -301.4527587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7585396766662598, "rewards/margins": 12.100137710571289, "rewards/rejected": -15.85867691040039, "step": 17100 }, { "epoch": 2.66, "learning_rate": 1.6047678821515639e-06, "logits/chosen": -2.130178928375244, "logits/rejected": -2.3766720294952393, "logps/chosen": -210.57720947265625, "logps/rejected": -374.8035583496094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.261940956115723, "rewards/margins": 12.985118865966797, "rewards/rejected": -19.247058868408203, "step": 17101 }, { "epoch": 2.66, "learning_rate": 1.604034441620416e-06, "logits/chosen": -2.472799777984619, "logits/rejected": -2.334056854248047, "logps/chosen": -212.93136596679688, "logps/rejected": -474.6510314941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.227653503417969, "rewards/margins": 10.71954345703125, "rewards/rejected": -17.94719696044922, "step": 17102 }, { "epoch": 2.66, "learning_rate": 1.603301001089268e-06, "logits/chosen": -1.7786047458648682, "logits/rejected": -2.10711932182312, "logps/chosen": -246.82028198242188, "logps/rejected": -327.67315673828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.476934909820557, "rewards/margins": 8.357277870178223, "rewards/rejected": -14.834213256835938, "step": 17103 }, { "epoch": 2.66, "learning_rate": 1.6025675605581201e-06, "logits/chosen": -2.160349130630493, "logits/rejected": -2.554917097091675, "logps/chosen": -516.5017700195312, "logps/rejected": -627.4918212890625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.440512657165527, "rewards/margins": 9.235854148864746, "rewards/rejected": -13.676366806030273, "step": 17104 }, { "epoch": 2.66, "learning_rate": 1.6018341200269724e-06, "logits/chosen": -2.4499452114105225, "logits/rejected": -1.9188398122787476, "logps/chosen": -254.35177612304688, "logps/rejected": -338.794677734375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.399781227111816, "rewards/margins": 7.778594493865967, "rewards/rejected": -13.178375244140625, "step": 17105 }, { "epoch": 2.66, "learning_rate": 1.6011006794958245e-06, "logits/chosen": -2.3670904636383057, "logits/rejected": -2.7621612548828125, "logps/chosen": -145.13613891601562, "logps/rejected": -352.23602294921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.407621383666992, "rewards/margins": 12.791116714477539, "rewards/rejected": -20.19873809814453, "step": 17106 }, { "epoch": 2.66, "learning_rate": 1.6003672389646766e-06, "logits/chosen": -0.9212031960487366, "logits/rejected": -2.468108654022217, "logps/chosen": -114.11761474609375, "logps/rejected": -390.8240051269531, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.327553749084473, "rewards/margins": 9.604717254638672, "rewards/rejected": -16.93227195739746, "step": 17107 }, { "epoch": 2.66, "learning_rate": 1.5996337984335287e-06, "logits/chosen": -2.2428159713745117, "logits/rejected": -2.802245616912842, "logps/chosen": -351.7940368652344, "logps/rejected": -536.576416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.150556564331055, "rewards/margins": 11.476825714111328, "rewards/rejected": -16.627382278442383, "step": 17108 }, { "epoch": 2.66, "learning_rate": 1.598900357902381e-06, "logits/chosen": -2.1868951320648193, "logits/rejected": -2.8001885414123535, "logps/chosen": -123.98681640625, "logps/rejected": -403.93621826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.16497802734375, "rewards/margins": 11.280376434326172, "rewards/rejected": -19.445354461669922, "step": 17109 }, { "epoch": 2.66, "learning_rate": 1.598166917371233e-06, "logits/chosen": -1.989107608795166, "logits/rejected": -2.6830668449401855, "logps/chosen": -260.21197509765625, "logps/rejected": -664.4689331054688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.327028751373291, "rewards/margins": 8.851444244384766, "rewards/rejected": -14.178472518920898, "step": 17110 }, { "epoch": 2.66, "learning_rate": 1.597433476840085e-06, "logits/chosen": -2.449131965637207, "logits/rejected": -2.675225257873535, "logps/chosen": -173.7332763671875, "logps/rejected": -357.19927978515625, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -11.560493469238281, "rewards/margins": 6.154597282409668, "rewards/rejected": -17.715091705322266, "step": 17111 }, { "epoch": 2.66, "learning_rate": 1.596700036308937e-06, "logits/chosen": -2.441164255142212, "logits/rejected": -2.618851661682129, "logps/chosen": -140.84640502929688, "logps/rejected": -385.3592834472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.299362659454346, "rewards/margins": 12.07778263092041, "rewards/rejected": -16.377145767211914, "step": 17112 }, { "epoch": 2.66, "learning_rate": 1.5959665957777894e-06, "logits/chosen": -2.4724233150482178, "logits/rejected": -2.9086034297943115, "logps/chosen": -186.72549438476562, "logps/rejected": -425.3843688964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4399375915527344, "rewards/margins": 13.712849617004395, "rewards/rejected": -16.152786254882812, "step": 17113 }, { "epoch": 2.66, "learning_rate": 1.5952331552466415e-06, "logits/chosen": -2.5524065494537354, "logits/rejected": -2.7483091354370117, "logps/chosen": -103.61166381835938, "logps/rejected": -265.536865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8932437896728516, "rewards/margins": 11.760783195495605, "rewards/rejected": -15.654026985168457, "step": 17114 }, { "epoch": 2.66, "learning_rate": 1.5944997147154936e-06, "logits/chosen": -2.3412601947784424, "logits/rejected": -2.7924561500549316, "logps/chosen": -332.53277587890625, "logps/rejected": -490.8114013671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.0347976684570312, "rewards/margins": 9.629598617553711, "rewards/rejected": -12.664396286010742, "step": 17115 }, { "epoch": 2.66, "learning_rate": 1.5937662741843457e-06, "logits/chosen": -2.7497572898864746, "logits/rejected": -2.018474817276001, "logps/chosen": -385.7314758300781, "logps/rejected": -353.41619873046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.902444839477539, "rewards/margins": 9.227851867675781, "rewards/rejected": -16.13029670715332, "step": 17116 }, { "epoch": 2.66, "learning_rate": 1.593032833653198e-06, "logits/chosen": -1.9698171615600586, "logits/rejected": -2.673013925552368, "logps/chosen": -165.49322509765625, "logps/rejected": -548.14306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.64110803604126, "rewards/margins": 11.756837844848633, "rewards/rejected": -16.397945404052734, "step": 17117 }, { "epoch": 2.66, "learning_rate": 1.59229939312205e-06, "logits/chosen": -1.239991307258606, "logits/rejected": -2.248911142349243, "logps/chosen": -135.34616088867188, "logps/rejected": -579.8640747070312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.271085739135742, "rewards/margins": 16.396076202392578, "rewards/rejected": -23.667163848876953, "step": 17118 }, { "epoch": 2.66, "learning_rate": 1.5915659525909021e-06, "logits/chosen": -2.420398473739624, "logits/rejected": -2.5824477672576904, "logps/chosen": -199.48060607910156, "logps/rejected": -237.4579620361328, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.392544269561768, "rewards/margins": 8.87716007232666, "rewards/rejected": -15.269704818725586, "step": 17119 }, { "epoch": 2.66, "learning_rate": 1.590832512059754e-06, "logits/chosen": -2.5831387042999268, "logits/rejected": -3.036958694458008, "logps/chosen": -123.34031677246094, "logps/rejected": -310.35455322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.780447959899902, "rewards/margins": 10.360664367675781, "rewards/rejected": -18.14111328125, "step": 17120 }, { "epoch": 2.66, "learning_rate": 1.5900990715286063e-06, "logits/chosen": -2.175973653793335, "logits/rejected": -2.6634714603424072, "logps/chosen": -318.6313781738281, "logps/rejected": -388.47332763671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.958975791931152, "rewards/margins": 9.359683990478516, "rewards/rejected": -15.318660736083984, "step": 17121 }, { "epoch": 2.66, "learning_rate": 1.5893656309974584e-06, "logits/chosen": -2.0817456245422363, "logits/rejected": -2.753450393676758, "logps/chosen": -105.55123901367188, "logps/rejected": -518.7910766601562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.506427764892578, "rewards/margins": 14.294209480285645, "rewards/rejected": -18.80063819885254, "step": 17122 }, { "epoch": 2.66, "learning_rate": 1.5886321904663105e-06, "logits/chosen": -1.7049508094787598, "logits/rejected": -2.7439076900482178, "logps/chosen": -269.44049072265625, "logps/rejected": -503.5588073730469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6240005493164062, "rewards/margins": 13.549033164978027, "rewards/rejected": -17.17303466796875, "step": 17123 }, { "epoch": 2.66, "learning_rate": 1.5878987499351626e-06, "logits/chosen": -2.44950008392334, "logits/rejected": -2.111039400100708, "logps/chosen": -184.0562744140625, "logps/rejected": -343.2936096191406, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -9.930853843688965, "rewards/margins": 9.366589546203613, "rewards/rejected": -19.297443389892578, "step": 17124 }, { "epoch": 2.66, "learning_rate": 1.5871653094040147e-06, "logits/chosen": -2.1101255416870117, "logits/rejected": -2.538238525390625, "logps/chosen": -357.353515625, "logps/rejected": -328.4557189941406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.957130432128906, "rewards/margins": 8.497884750366211, "rewards/rejected": -13.455015182495117, "step": 17125 }, { "epoch": 2.66, "learning_rate": 1.586431868872867e-06, "logits/chosen": -2.696956157684326, "logits/rejected": -2.4931576251983643, "logps/chosen": -190.350830078125, "logps/rejected": -385.64306640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.452262878417969, "rewards/margins": 8.644806861877441, "rewards/rejected": -16.097068786621094, "step": 17126 }, { "epoch": 2.66, "learning_rate": 1.585698428341719e-06, "logits/chosen": -3.0109517574310303, "logits/rejected": -2.517050266265869, "logps/chosen": -337.7158203125, "logps/rejected": -281.7474365234375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -9.737390518188477, "rewards/margins": 5.0285468101501465, "rewards/rejected": -14.765937805175781, "step": 17127 }, { "epoch": 2.66, "learning_rate": 1.5849649878105712e-06, "logits/chosen": -2.895714044570923, "logits/rejected": -2.8428843021392822, "logps/chosen": -254.70867919921875, "logps/rejected": -359.67327880859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.621687412261963, "rewards/margins": 10.094058990478516, "rewards/rejected": -15.71574592590332, "step": 17128 }, { "epoch": 2.66, "learning_rate": 1.584231547279423e-06, "logits/chosen": -2.379598379135132, "logits/rejected": -2.366633415222168, "logps/chosen": -248.21588134765625, "logps/rejected": -382.78424072265625, "loss": 0.0257, "rewards/accuracies": 1.0, "rewards/chosen": -10.321582794189453, "rewards/margins": 4.329891681671143, "rewards/rejected": -14.651474952697754, "step": 17129 }, { "epoch": 2.66, "learning_rate": 1.5834981067482753e-06, "logits/chosen": -2.3982481956481934, "logits/rejected": -2.7420332431793213, "logps/chosen": -204.4310302734375, "logps/rejected": -461.98651123046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.449856758117676, "rewards/margins": 11.481441497802734, "rewards/rejected": -20.931297302246094, "step": 17130 }, { "epoch": 2.66, "learning_rate": 1.5827646662171274e-06, "logits/chosen": -2.7476894855499268, "logits/rejected": -1.2635596990585327, "logps/chosen": -774.9354248046875, "logps/rejected": -661.7224731445312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.569116592407227, "rewards/margins": 9.594279289245605, "rewards/rejected": -16.163394927978516, "step": 17131 }, { "epoch": 2.66, "learning_rate": 1.5820312256859795e-06, "logits/chosen": -2.5919103622436523, "logits/rejected": -2.710122585296631, "logps/chosen": -200.16050720214844, "logps/rejected": -474.17974853515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.071378707885742, "rewards/margins": 7.953307151794434, "rewards/rejected": -13.024685859680176, "step": 17132 }, { "epoch": 2.66, "learning_rate": 1.5812977851548316e-06, "logits/chosen": -1.9321250915527344, "logits/rejected": -2.559253215789795, "logps/chosen": -93.49420166015625, "logps/rejected": -281.50628662109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.189825534820557, "rewards/margins": 8.7196044921875, "rewards/rejected": -13.909429550170898, "step": 17133 }, { "epoch": 2.66, "learning_rate": 1.580564344623684e-06, "logits/chosen": -2.543151617050171, "logits/rejected": -1.0478169918060303, "logps/chosen": -202.09112548828125, "logps/rejected": -191.73287963867188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.465125560760498, "rewards/margins": 8.56605052947998, "rewards/rejected": -13.03117561340332, "step": 17134 }, { "epoch": 2.66, "learning_rate": 1.579830904092536e-06, "logits/chosen": -1.609585165977478, "logits/rejected": -2.6177685260772705, "logps/chosen": -112.83676147460938, "logps/rejected": -589.8140869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.403048515319824, "rewards/margins": 13.462757110595703, "rewards/rejected": -20.865806579589844, "step": 17135 }, { "epoch": 2.67, "learning_rate": 1.579097463561388e-06, "logits/chosen": -0.8936829566955566, "logits/rejected": -2.5050883293151855, "logps/chosen": -135.84230041503906, "logps/rejected": -387.6817932128906, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -7.067843437194824, "rewards/margins": 7.326998233795166, "rewards/rejected": -14.394842147827148, "step": 17136 }, { "epoch": 2.67, "learning_rate": 1.5783640230302402e-06, "logits/chosen": -1.8281511068344116, "logits/rejected": -2.452352285385132, "logps/chosen": -276.973388671875, "logps/rejected": -402.7566833496094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.055444717407227, "rewards/margins": 8.725057601928711, "rewards/rejected": -15.780502319335938, "step": 17137 }, { "epoch": 2.67, "learning_rate": 1.5776305824990925e-06, "logits/chosen": -2.152869462966919, "logits/rejected": -3.0486032962799072, "logps/chosen": -93.75286865234375, "logps/rejected": -447.4267272949219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.555542469024658, "rewards/margins": 11.444145202636719, "rewards/rejected": -15.999688148498535, "step": 17138 }, { "epoch": 2.67, "learning_rate": 1.5768971419679444e-06, "logits/chosen": -2.420656442642212, "logits/rejected": -2.063804864883423, "logps/chosen": -378.09075927734375, "logps/rejected": -439.0145263671875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -2.1646156311035156, "rewards/margins": 7.168676376342773, "rewards/rejected": -9.333292007446289, "step": 17139 }, { "epoch": 2.67, "learning_rate": 1.5761637014367964e-06, "logits/chosen": -2.282099962234497, "logits/rejected": -2.6662323474884033, "logps/chosen": -199.19741821289062, "logps/rejected": -421.232421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.813473701477051, "rewards/margins": 9.79636001586914, "rewards/rejected": -15.609834671020508, "step": 17140 }, { "epoch": 2.67, "learning_rate": 1.5754302609056485e-06, "logits/chosen": -1.2747200727462769, "logits/rejected": -2.6741552352905273, "logps/chosen": -100.28396606445312, "logps/rejected": -579.164794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.644055366516113, "rewards/margins": 15.769184112548828, "rewards/rejected": -22.413238525390625, "step": 17141 }, { "epoch": 2.67, "learning_rate": 1.5746968203745008e-06, "logits/chosen": -2.4206881523132324, "logits/rejected": -2.8835387229919434, "logps/chosen": -330.23162841796875, "logps/rejected": -677.6460571289062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.615283489227295, "rewards/margins": 12.172117233276367, "rewards/rejected": -17.78740119934082, "step": 17142 }, { "epoch": 2.67, "learning_rate": 1.573963379843353e-06, "logits/chosen": -3.1996140480041504, "logits/rejected": -3.0332629680633545, "logps/chosen": -121.37571716308594, "logps/rejected": -159.24533081054688, "loss": 0.3925, "rewards/accuracies": 0.5, "rewards/chosen": -7.434543609619141, "rewards/margins": 4.702503204345703, "rewards/rejected": -12.137046813964844, "step": 17143 }, { "epoch": 2.67, "learning_rate": 1.573229939312205e-06, "logits/chosen": -1.3835304975509644, "logits/rejected": -2.7635719776153564, "logps/chosen": -166.20404052734375, "logps/rejected": -780.9812622070312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.95084285736084, "rewards/margins": 12.200113296508789, "rewards/rejected": -19.150955200195312, "step": 17144 }, { "epoch": 2.67, "learning_rate": 1.5724964987810571e-06, "logits/chosen": -2.4348957538604736, "logits/rejected": -2.5431039333343506, "logps/chosen": -377.1542053222656, "logps/rejected": -458.7996826171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.229962348937988, "rewards/margins": 10.214485168457031, "rewards/rejected": -16.444446563720703, "step": 17145 }, { "epoch": 2.67, "learning_rate": 1.5717630582499092e-06, "logits/chosen": -2.570204734802246, "logits/rejected": -2.64994478225708, "logps/chosen": -175.2238311767578, "logps/rejected": -366.3286437988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.281314849853516, "rewards/margins": 11.996700286865234, "rewards/rejected": -18.27801513671875, "step": 17146 }, { "epoch": 2.67, "learning_rate": 1.5710296177187615e-06, "logits/chosen": -2.673290252685547, "logits/rejected": -2.283478260040283, "logps/chosen": -763.3277587890625, "logps/rejected": -644.924072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.248376846313477, "rewards/margins": 10.72240161895752, "rewards/rejected": -17.970779418945312, "step": 17147 }, { "epoch": 2.67, "learning_rate": 1.5702961771876136e-06, "logits/chosen": -2.626863479614258, "logits/rejected": -3.053309202194214, "logps/chosen": -80.8260726928711, "logps/rejected": -427.8707580566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.63380241394043, "rewards/margins": 11.342327117919922, "rewards/rejected": -16.97612953186035, "step": 17148 }, { "epoch": 2.67, "learning_rate": 1.5695627366564655e-06, "logits/chosen": -1.97859787940979, "logits/rejected": -2.5064661502838135, "logps/chosen": -106.69197845458984, "logps/rejected": -254.5743865966797, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.796713829040527, "rewards/margins": 9.107253074645996, "rewards/rejected": -16.903966903686523, "step": 17149 }, { "epoch": 2.67, "learning_rate": 1.5688292961253176e-06, "logits/chosen": -2.536141872406006, "logits/rejected": -2.5300142765045166, "logps/chosen": -841.561767578125, "logps/rejected": -828.1153564453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.086918830871582, "rewards/margins": 8.391181945800781, "rewards/rejected": -18.47810173034668, "step": 17150 }, { "epoch": 2.67, "learning_rate": 1.5680958555941699e-06, "logits/chosen": -2.5949525833129883, "logits/rejected": -1.2251311540603638, "logps/chosen": -593.4081420898438, "logps/rejected": -391.166259765625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.589334011077881, "rewards/margins": 11.601964950561523, "rewards/rejected": -15.191299438476562, "step": 17151 }, { "epoch": 2.67, "learning_rate": 1.567362415063022e-06, "logits/chosen": -2.3177261352539062, "logits/rejected": -2.1587727069854736, "logps/chosen": -157.6920928955078, "logps/rejected": -387.0921630859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.24747371673584, "rewards/margins": 15.98573112487793, "rewards/rejected": -20.233203887939453, "step": 17152 }, { "epoch": 2.67, "learning_rate": 1.566628974531874e-06, "logits/chosen": -2.422116279602051, "logits/rejected": -2.614598035812378, "logps/chosen": -366.12030029296875, "logps/rejected": -414.9903564453125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -10.122428894042969, "rewards/margins": 9.619661331176758, "rewards/rejected": -19.742088317871094, "step": 17153 }, { "epoch": 2.67, "learning_rate": 1.5658955340007261e-06, "logits/chosen": -2.6667654514312744, "logits/rejected": -2.7500951290130615, "logps/chosen": -157.15228271484375, "logps/rejected": -313.69232177734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.9154558181762695, "rewards/margins": 7.4950103759765625, "rewards/rejected": -15.410467147827148, "step": 17154 }, { "epoch": 2.67, "learning_rate": 1.5651620934695784e-06, "logits/chosen": -2.524306297302246, "logits/rejected": -2.218564748764038, "logps/chosen": -175.38931274414062, "logps/rejected": -307.8247375488281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.673354148864746, "rewards/margins": 9.1122407913208, "rewards/rejected": -13.785594940185547, "step": 17155 }, { "epoch": 2.67, "learning_rate": 1.5644286529384305e-06, "logits/chosen": -2.44148850440979, "logits/rejected": -1.6426455974578857, "logps/chosen": -538.6846923828125, "logps/rejected": -421.73846435546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -10.90585708618164, "rewards/margins": 7.858692169189453, "rewards/rejected": -18.764549255371094, "step": 17156 }, { "epoch": 2.67, "learning_rate": 1.5636952124072826e-06, "logits/chosen": -2.1173975467681885, "logits/rejected": -2.4997780323028564, "logps/chosen": -242.13047790527344, "logps/rejected": -525.7411499023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.433474063873291, "rewards/margins": 13.55457592010498, "rewards/rejected": -17.988048553466797, "step": 17157 }, { "epoch": 2.67, "learning_rate": 1.5629617718761347e-06, "logits/chosen": -2.8438663482666016, "logits/rejected": -2.4378867149353027, "logps/chosen": -211.2776641845703, "logps/rejected": -231.29327392578125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -3.6034770011901855, "rewards/margins": 7.590180397033691, "rewards/rejected": -11.193657875061035, "step": 17158 }, { "epoch": 2.67, "learning_rate": 1.5622283313449868e-06, "logits/chosen": -1.2196416854858398, "logits/rejected": -2.2345614433288574, "logps/chosen": -424.599365234375, "logps/rejected": -683.09765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.521087646484375, "rewards/margins": 14.685347557067871, "rewards/rejected": -20.206436157226562, "step": 17159 }, { "epoch": 2.67, "learning_rate": 1.5614948908138389e-06, "logits/chosen": -2.9231414794921875, "logits/rejected": -2.7988760471343994, "logps/chosen": -112.339111328125, "logps/rejected": -254.53207397460938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.281280994415283, "rewards/margins": 12.639552116394043, "rewards/rejected": -15.920833587646484, "step": 17160 }, { "epoch": 2.67, "learning_rate": 1.560761450282691e-06, "logits/chosen": -1.761162281036377, "logits/rejected": -2.6266367435455322, "logps/chosen": -100.84268188476562, "logps/rejected": -385.45013427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.224843978881836, "rewards/margins": 11.668680191040039, "rewards/rejected": -16.893524169921875, "step": 17161 }, { "epoch": 2.67, "learning_rate": 1.560028009751543e-06, "logits/chosen": -2.5285096168518066, "logits/rejected": -2.9237265586853027, "logps/chosen": -1123.630126953125, "logps/rejected": -1118.447265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.951611518859863, "rewards/margins": 13.80505657196045, "rewards/rejected": -19.756668090820312, "step": 17162 }, { "epoch": 2.67, "learning_rate": 1.5592945692203952e-06, "logits/chosen": -2.975552797317505, "logits/rejected": -1.688556432723999, "logps/chosen": -193.1986846923828, "logps/rejected": -85.10831451416016, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": -3.9477744102478027, "rewards/margins": 3.1583595275878906, "rewards/rejected": -7.106133937835693, "step": 17163 }, { "epoch": 2.67, "learning_rate": 1.5585611286892475e-06, "logits/chosen": -2.7802319526672363, "logits/rejected": -3.112630605697632, "logps/chosen": -168.05923461914062, "logps/rejected": -363.2619934082031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.596733093261719, "rewards/margins": 9.258249282836914, "rewards/rejected": -17.854982376098633, "step": 17164 }, { "epoch": 2.67, "learning_rate": 1.5578276881580995e-06, "logits/chosen": -2.701026678085327, "logits/rejected": -2.607442855834961, "logps/chosen": -398.750732421875, "logps/rejected": -445.6476135253906, "loss": 0.124, "rewards/accuracies": 1.0, "rewards/chosen": -9.320535659790039, "rewards/margins": 6.588118076324463, "rewards/rejected": -15.908653259277344, "step": 17165 }, { "epoch": 2.67, "learning_rate": 1.5570942476269516e-06, "logits/chosen": -2.516221046447754, "logits/rejected": -2.6258816719055176, "logps/chosen": -225.88233947753906, "logps/rejected": -420.0387268066406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.9356770515441895, "rewards/margins": 8.93545150756836, "rewards/rejected": -16.87112808227539, "step": 17166 }, { "epoch": 2.67, "learning_rate": 1.5563608070958037e-06, "logits/chosen": -1.8123737573623657, "logits/rejected": -2.6422760486602783, "logps/chosen": -252.96054077148438, "logps/rejected": -481.70867919921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.436143398284912, "rewards/margins": 9.672895431518555, "rewards/rejected": -14.109039306640625, "step": 17167 }, { "epoch": 2.67, "learning_rate": 1.555627366564656e-06, "logits/chosen": -1.4935564994812012, "logits/rejected": -2.6013686656951904, "logps/chosen": -110.54769134521484, "logps/rejected": -373.9638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.446976661682129, "rewards/margins": 11.735713958740234, "rewards/rejected": -17.182689666748047, "step": 17168 }, { "epoch": 2.67, "learning_rate": 1.554893926033508e-06, "logits/chosen": -2.9275832176208496, "logits/rejected": -2.037168264389038, "logps/chosen": -278.31793212890625, "logps/rejected": -257.89971923828125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.408095836639404, "rewards/margins": 7.91533899307251, "rewards/rejected": -14.323434829711914, "step": 17169 }, { "epoch": 2.67, "learning_rate": 1.55416048550236e-06, "logits/chosen": -0.7038978338241577, "logits/rejected": -1.385452389717102, "logps/chosen": -165.25433349609375, "logps/rejected": -454.5308837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.948139667510986, "rewards/margins": 13.966094970703125, "rewards/rejected": -18.914234161376953, "step": 17170 }, { "epoch": 2.67, "learning_rate": 1.553427044971212e-06, "logits/chosen": -3.043940544128418, "logits/rejected": -3.152669668197632, "logps/chosen": -100.39094543457031, "logps/rejected": -262.2785949707031, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.329957008361816, "rewards/margins": 8.309305191040039, "rewards/rejected": -12.639262199401855, "step": 17171 }, { "epoch": 2.67, "learning_rate": 1.5526936044400644e-06, "logits/chosen": -2.3641746044158936, "logits/rejected": -2.4651124477386475, "logps/chosen": -603.58203125, "logps/rejected": -633.319091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.501796007156372, "rewards/margins": 11.512130737304688, "rewards/rejected": -15.01392650604248, "step": 17172 }, { "epoch": 2.67, "learning_rate": 1.5519601639089165e-06, "logits/chosen": -2.4902119636535645, "logits/rejected": -1.3214691877365112, "logps/chosen": -300.6251220703125, "logps/rejected": -225.24969482421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.9829649925231934, "rewards/margins": 8.665644645690918, "rewards/rejected": -12.648609161376953, "step": 17173 }, { "epoch": 2.67, "learning_rate": 1.5512267233777686e-06, "logits/chosen": -2.628154993057251, "logits/rejected": -2.8676059246063232, "logps/chosen": -129.4054412841797, "logps/rejected": -285.60302734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.6231584548950195, "rewards/margins": 10.430230140686035, "rewards/rejected": -17.053388595581055, "step": 17174 }, { "epoch": 2.67, "learning_rate": 1.5504932828466207e-06, "logits/chosen": -1.8880287408828735, "logits/rejected": -2.6938862800598145, "logps/chosen": -105.72887420654297, "logps/rejected": -378.24310302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.691714286804199, "rewards/margins": 10.705442428588867, "rewards/rejected": -16.39715576171875, "step": 17175 }, { "epoch": 2.67, "learning_rate": 1.549759842315473e-06, "logits/chosen": -1.594114899635315, "logits/rejected": -2.6211507320404053, "logps/chosen": -203.57882690429688, "logps/rejected": -388.9692687988281, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.712839603424072, "rewards/margins": 8.686214447021484, "rewards/rejected": -16.39905548095703, "step": 17176 }, { "epoch": 2.67, "learning_rate": 1.549026401784325e-06, "logits/chosen": -2.3831725120544434, "logits/rejected": -1.6643176078796387, "logps/chosen": -267.10552978515625, "logps/rejected": -437.6347351074219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.73386287689209, "rewards/margins": 13.749876022338867, "rewards/rejected": -20.48373794555664, "step": 17177 }, { "epoch": 2.67, "learning_rate": 1.5482929612531771e-06, "logits/chosen": -3.1096272468566895, "logits/rejected": -2.9644906520843506, "logps/chosen": -861.550048828125, "logps/rejected": -599.3063354492188, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -7.242319107055664, "rewards/margins": 5.798127174377441, "rewards/rejected": -13.040446281433105, "step": 17178 }, { "epoch": 2.67, "learning_rate": 1.547559520722029e-06, "logits/chosen": -1.6528518199920654, "logits/rejected": -2.6714868545532227, "logps/chosen": -162.35186767578125, "logps/rejected": -371.2072448730469, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.494416236877441, "rewards/margins": 8.318021774291992, "rewards/rejected": -16.81243896484375, "step": 17179 }, { "epoch": 2.67, "learning_rate": 1.5468260801908813e-06, "logits/chosen": -2.2824578285217285, "logits/rejected": -2.2349252700805664, "logps/chosen": -382.521240234375, "logps/rejected": -431.3145751953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.719417572021484, "rewards/margins": 9.266678810119629, "rewards/rejected": -16.98609733581543, "step": 17180 }, { "epoch": 2.67, "learning_rate": 1.5460926396597334e-06, "logits/chosen": -2.6035799980163574, "logits/rejected": -2.4991109371185303, "logps/chosen": -673.9026489257812, "logps/rejected": -703.053466796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.073139190673828, "rewards/margins": 9.522664070129395, "rewards/rejected": -16.59580421447754, "step": 17181 }, { "epoch": 2.67, "learning_rate": 1.5453591991285855e-06, "logits/chosen": -2.5184977054595947, "logits/rejected": -2.7670772075653076, "logps/chosen": -185.1546630859375, "logps/rejected": -295.7214050292969, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -6.567169666290283, "rewards/margins": 7.120739936828613, "rewards/rejected": -13.687909126281738, "step": 17182 }, { "epoch": 2.67, "learning_rate": 1.5446257585974376e-06, "logits/chosen": -2.492722511291504, "logits/rejected": -2.8284482955932617, "logps/chosen": -79.25865173339844, "logps/rejected": -240.15728759765625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.466551303863525, "rewards/margins": 7.764682769775391, "rewards/rejected": -14.231233596801758, "step": 17183 }, { "epoch": 2.67, "learning_rate": 1.5438923180662897e-06, "logits/chosen": -1.5355106592178345, "logits/rejected": -2.4553263187408447, "logps/chosen": -104.12161254882812, "logps/rejected": -261.20721435546875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -7.632743835449219, "rewards/margins": 6.2367048263549805, "rewards/rejected": -13.869449615478516, "step": 17184 }, { "epoch": 2.67, "learning_rate": 1.543158877535142e-06, "logits/chosen": -2.6844470500946045, "logits/rejected": -1.7874171733856201, "logps/chosen": -303.7064514160156, "logps/rejected": -232.26092529296875, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -4.6540117263793945, "rewards/margins": 7.330417156219482, "rewards/rejected": -11.984428405761719, "step": 17185 }, { "epoch": 2.67, "learning_rate": 1.542425437003994e-06, "logits/chosen": -2.209787607192993, "logits/rejected": -2.700406074523926, "logps/chosen": -516.2166748046875, "logps/rejected": -730.8670654296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.750071048736572, "rewards/margins": 10.551227569580078, "rewards/rejected": -17.301300048828125, "step": 17186 }, { "epoch": 2.67, "learning_rate": 1.5416919964728462e-06, "logits/chosen": -1.9010488986968994, "logits/rejected": -2.6652863025665283, "logps/chosen": -174.3724365234375, "logps/rejected": -494.16973876953125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.558033466339111, "rewards/margins": 8.559118270874023, "rewards/rejected": -16.117151260375977, "step": 17187 }, { "epoch": 2.67, "learning_rate": 1.540958555941698e-06, "logits/chosen": -2.978665590286255, "logits/rejected": -2.5013813972473145, "logps/chosen": -945.5977783203125, "logps/rejected": -878.800048828125, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -8.880340576171875, "rewards/margins": 7.787170886993408, "rewards/rejected": -16.667510986328125, "step": 17188 }, { "epoch": 2.67, "learning_rate": 1.5402251154105503e-06, "logits/chosen": -2.223909616470337, "logits/rejected": -2.6664886474609375, "logps/chosen": -137.122314453125, "logps/rejected": -278.1182861328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.545388221740723, "rewards/margins": 7.505882740020752, "rewards/rejected": -15.051271438598633, "step": 17189 }, { "epoch": 2.67, "learning_rate": 1.5394916748794024e-06, "logits/chosen": -1.7166551351547241, "logits/rejected": -2.5171704292297363, "logps/chosen": -155.96434020996094, "logps/rejected": -327.3538513183594, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -9.257417678833008, "rewards/margins": 7.71437931060791, "rewards/rejected": -16.971797943115234, "step": 17190 }, { "epoch": 2.67, "learning_rate": 1.5387582343482545e-06, "logits/chosen": -2.1268742084503174, "logits/rejected": -2.486818790435791, "logps/chosen": -79.38855743408203, "logps/rejected": -384.603271484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.430834770202637, "rewards/margins": 10.163019180297852, "rewards/rejected": -15.593854904174805, "step": 17191 }, { "epoch": 2.67, "learning_rate": 1.5380247938171066e-06, "logits/chosen": -1.4131708145141602, "logits/rejected": -2.565363645553589, "logps/chosen": -118.96852111816406, "logps/rejected": -355.1983642578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.088298797607422, "rewards/margins": 10.296266555786133, "rewards/rejected": -15.384565353393555, "step": 17192 }, { "epoch": 2.67, "learning_rate": 1.537291353285959e-06, "logits/chosen": -2.4967496395111084, "logits/rejected": -2.705326795578003, "logps/chosen": -171.11175537109375, "logps/rejected": -345.08416748046875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -7.76965856552124, "rewards/margins": 8.860904693603516, "rewards/rejected": -16.63056182861328, "step": 17193 }, { "epoch": 2.67, "learning_rate": 1.536557912754811e-06, "logits/chosen": -2.8051090240478516, "logits/rejected": -1.6556833982467651, "logps/chosen": -567.0296630859375, "logps/rejected": -536.86572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.427995681762695, "rewards/margins": 12.238121032714844, "rewards/rejected": -19.66611671447754, "step": 17194 }, { "epoch": 2.67, "learning_rate": 1.535824472223663e-06, "logits/chosen": -2.6658763885498047, "logits/rejected": -1.618591547012329, "logps/chosen": -290.6292724609375, "logps/rejected": -160.25161743164062, "loss": 0.5869, "rewards/accuracies": 0.5, "rewards/chosen": -5.523441314697266, "rewards/margins": 3.5636231899261475, "rewards/rejected": -9.087064743041992, "step": 17195 }, { "epoch": 2.67, "learning_rate": 1.5350910316925152e-06, "logits/chosen": -2.450026750564575, "logits/rejected": -1.80909264087677, "logps/chosen": -234.9620361328125, "logps/rejected": -263.3727722167969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.986834526062012, "rewards/margins": 10.190884590148926, "rewards/rejected": -17.177719116210938, "step": 17196 }, { "epoch": 2.67, "learning_rate": 1.5343575911613675e-06, "logits/chosen": -2.2825276851654053, "logits/rejected": -2.733509063720703, "logps/chosen": -261.4804992675781, "logps/rejected": -491.5775146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.8471784591674805, "rewards/margins": 13.356498718261719, "rewards/rejected": -20.203676223754883, "step": 17197 }, { "epoch": 2.67, "learning_rate": 1.5336241506302194e-06, "logits/chosen": -2.831415891647339, "logits/rejected": -2.3095273971557617, "logps/chosen": -201.48159790039062, "logps/rejected": -197.38961791992188, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.308563232421875, "rewards/margins": 7.7927398681640625, "rewards/rejected": -12.101303100585938, "step": 17198 }, { "epoch": 2.67, "learning_rate": 1.5328907100990715e-06, "logits/chosen": -1.816686749458313, "logits/rejected": -2.3585762977600098, "logps/chosen": -190.69224548339844, "logps/rejected": -415.68243408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.719590663909912, "rewards/margins": 15.095211029052734, "rewards/rejected": -19.814802169799805, "step": 17199 }, { "epoch": 2.67, "learning_rate": 1.5321572695679235e-06, "logits/chosen": -2.591679334640503, "logits/rejected": -2.808345317840576, "logps/chosen": -134.40216064453125, "logps/rejected": -352.6925964355469, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -9.40921401977539, "rewards/margins": 8.623716354370117, "rewards/rejected": -18.032930374145508, "step": 17200 }, { "epoch": 2.68, "learning_rate": 1.5314238290367758e-06, "logits/chosen": -2.584376573562622, "logits/rejected": -2.656825065612793, "logps/chosen": -533.8893432617188, "logps/rejected": -558.5624389648438, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -8.439854621887207, "rewards/margins": 12.375581741333008, "rewards/rejected": -20.81543731689453, "step": 17201 }, { "epoch": 2.68, "learning_rate": 1.530690388505628e-06, "logits/chosen": -2.6102330684661865, "logits/rejected": -1.6359351873397827, "logps/chosen": -273.9289245605469, "logps/rejected": -295.1690673828125, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -3.6050102710723877, "rewards/margins": 6.580954551696777, "rewards/rejected": -10.185964584350586, "step": 17202 }, { "epoch": 2.68, "learning_rate": 1.52995694797448e-06, "logits/chosen": -2.3537588119506836, "logits/rejected": -1.6900554895401, "logps/chosen": -250.76663208007812, "logps/rejected": -370.2984313964844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.558871269226074, "rewards/margins": 10.102632522583008, "rewards/rejected": -14.661502838134766, "step": 17203 }, { "epoch": 2.68, "learning_rate": 1.5292235074433321e-06, "logits/chosen": -1.3383444547653198, "logits/rejected": -2.374614953994751, "logps/chosen": -226.85389709472656, "logps/rejected": -354.3197021484375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -7.34105110168457, "rewards/margins": 8.482845306396484, "rewards/rejected": -15.823896408081055, "step": 17204 }, { "epoch": 2.68, "learning_rate": 1.5284900669121842e-06, "logits/chosen": -2.20424747467041, "logits/rejected": -2.528341770172119, "logps/chosen": -275.4420471191406, "logps/rejected": -374.237060546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.663742065429688, "rewards/margins": 8.315614700317383, "rewards/rejected": -16.97935676574707, "step": 17205 }, { "epoch": 2.68, "learning_rate": 1.5277566263810365e-06, "logits/chosen": -2.177294969558716, "logits/rejected": -2.6066718101501465, "logps/chosen": -171.5892791748047, "logps/rejected": -584.737548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.067226409912109, "rewards/margins": 14.249078750610352, "rewards/rejected": -20.31630516052246, "step": 17206 }, { "epoch": 2.68, "learning_rate": 1.5270231858498886e-06, "logits/chosen": -1.8617990016937256, "logits/rejected": -1.125606656074524, "logps/chosen": -930.089599609375, "logps/rejected": -452.6383056640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.8534626960754395, "rewards/margins": 14.306817054748535, "rewards/rejected": -19.160280227661133, "step": 17207 }, { "epoch": 2.68, "learning_rate": 1.5262897453187405e-06, "logits/chosen": -2.1882548332214355, "logits/rejected": -2.858819007873535, "logps/chosen": -336.63079833984375, "logps/rejected": -527.8052368164062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.567028999328613, "rewards/margins": 11.771885871887207, "rewards/rejected": -17.33891487121582, "step": 17208 }, { "epoch": 2.68, "learning_rate": 1.5255563047875926e-06, "logits/chosen": -2.777662754058838, "logits/rejected": -1.8668044805526733, "logps/chosen": -212.54043579101562, "logps/rejected": -225.11334228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.910299062728882, "rewards/margins": 9.97870922088623, "rewards/rejected": -12.889008522033691, "step": 17209 }, { "epoch": 2.68, "learning_rate": 1.5248228642564449e-06, "logits/chosen": -2.414803981781006, "logits/rejected": -1.8047243356704712, "logps/chosen": -317.6561279296875, "logps/rejected": -455.6049499511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.057613372802734, "rewards/margins": 12.676508903503418, "rewards/rejected": -17.73412322998047, "step": 17210 }, { "epoch": 2.68, "learning_rate": 1.524089423725297e-06, "logits/chosen": -2.643988609313965, "logits/rejected": -2.842377185821533, "logps/chosen": -285.44293212890625, "logps/rejected": -400.3900146484375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.309150695800781, "rewards/margins": 8.6135892868042, "rewards/rejected": -13.92273998260498, "step": 17211 }, { "epoch": 2.68, "learning_rate": 1.523355983194149e-06, "logits/chosen": -2.227860450744629, "logits/rejected": -2.5332279205322266, "logps/chosen": -371.05255126953125, "logps/rejected": -488.00872802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.122044563293457, "rewards/margins": 15.648612022399902, "rewards/rejected": -20.77065658569336, "step": 17212 }, { "epoch": 2.68, "learning_rate": 1.5226225426630011e-06, "logits/chosen": -2.1958301067352295, "logits/rejected": -2.7608890533447266, "logps/chosen": -266.968505859375, "logps/rejected": -471.7651672363281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.82371711730957, "rewards/margins": 10.841808319091797, "rewards/rejected": -20.665525436401367, "step": 17213 }, { "epoch": 2.68, "learning_rate": 1.5218891021318534e-06, "logits/chosen": -1.4431201219558716, "logits/rejected": -2.4526050090789795, "logps/chosen": -117.1048812866211, "logps/rejected": -252.1143341064453, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -7.414482116699219, "rewards/margins": 7.15486478805542, "rewards/rejected": -14.569347381591797, "step": 17214 }, { "epoch": 2.68, "learning_rate": 1.5211556616007055e-06, "logits/chosen": -2.8526875972747803, "logits/rejected": -3.0260846614837646, "logps/chosen": -101.43394470214844, "logps/rejected": -373.5164794921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.0650787353515625, "rewards/margins": 11.300762176513672, "rewards/rejected": -17.365840911865234, "step": 17215 }, { "epoch": 2.68, "learning_rate": 1.5204222210695576e-06, "logits/chosen": -2.785353183746338, "logits/rejected": -2.552647113800049, "logps/chosen": -235.55284118652344, "logps/rejected": -297.9297180175781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.368729591369629, "rewards/margins": 9.767082214355469, "rewards/rejected": -17.135812759399414, "step": 17216 }, { "epoch": 2.68, "learning_rate": 1.5196887805384097e-06, "logits/chosen": -1.434393286705017, "logits/rejected": -2.3670732975006104, "logps/chosen": -126.18232727050781, "logps/rejected": -279.302734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.05308198928833, "rewards/margins": 9.419707298278809, "rewards/rejected": -16.472789764404297, "step": 17217 }, { "epoch": 2.68, "learning_rate": 1.5189553400072618e-06, "logits/chosen": -1.9751871824264526, "logits/rejected": -2.4624030590057373, "logps/chosen": -344.4765625, "logps/rejected": -315.70281982421875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.668917655944824, "rewards/margins": 6.2109375, "rewards/rejected": -11.87985610961914, "step": 17218 }, { "epoch": 2.68, "learning_rate": 1.5182218994761139e-06, "logits/chosen": -2.5101521015167236, "logits/rejected": -2.69020676612854, "logps/chosen": -143.76870727539062, "logps/rejected": -225.15379333496094, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -4.91060209274292, "rewards/margins": 5.6588568687438965, "rewards/rejected": -10.569458961486816, "step": 17219 }, { "epoch": 2.68, "learning_rate": 1.517488458944966e-06, "logits/chosen": -2.6792702674865723, "logits/rejected": -2.739212989807129, "logps/chosen": -108.703369140625, "logps/rejected": -203.87818908691406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.241143226623535, "rewards/margins": 11.085103988647461, "rewards/rejected": -16.326248168945312, "step": 17220 }, { "epoch": 2.68, "learning_rate": 1.516755018413818e-06, "logits/chosen": -2.789245367050171, "logits/rejected": -3.1309285163879395, "logps/chosen": -241.9066619873047, "logps/rejected": -362.8739318847656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.945945739746094, "rewards/margins": 9.517483711242676, "rewards/rejected": -17.463430404663086, "step": 17221 }, { "epoch": 2.68, "learning_rate": 1.5160215778826704e-06, "logits/chosen": -2.8025524616241455, "logits/rejected": -2.756037473678589, "logps/chosen": -329.7491149902344, "logps/rejected": -390.54400634765625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.115531921386719, "rewards/margins": 10.353407859802246, "rewards/rejected": -17.46894073486328, "step": 17222 }, { "epoch": 2.68, "learning_rate": 1.5152881373515225e-06, "logits/chosen": -2.361107110977173, "logits/rejected": -2.5686655044555664, "logps/chosen": -142.46690368652344, "logps/rejected": -368.02801513671875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.270529747009277, "rewards/margins": 9.383941650390625, "rewards/rejected": -17.65447235107422, "step": 17223 }, { "epoch": 2.68, "learning_rate": 1.5145546968203746e-06, "logits/chosen": -2.1597743034362793, "logits/rejected": -2.571584939956665, "logps/chosen": -712.1215209960938, "logps/rejected": -630.598876953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.225044250488281, "rewards/margins": 8.213237762451172, "rewards/rejected": -16.438282012939453, "step": 17224 }, { "epoch": 2.68, "learning_rate": 1.5138212562892266e-06, "logits/chosen": -2.563493013381958, "logits/rejected": -2.520250082015991, "logps/chosen": -243.5611572265625, "logps/rejected": -458.57391357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3645997047424316, "rewards/margins": 11.586360931396484, "rewards/rejected": -14.950960159301758, "step": 17225 }, { "epoch": 2.68, "learning_rate": 1.5130878157580787e-06, "logits/chosen": -2.4072554111480713, "logits/rejected": -1.438956618309021, "logps/chosen": -501.8514404296875, "logps/rejected": -362.67938232421875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -5.956310272216797, "rewards/margins": 9.257699012756348, "rewards/rejected": -15.214010238647461, "step": 17226 }, { "epoch": 2.68, "learning_rate": 1.512354375226931e-06, "logits/chosen": -2.216362953186035, "logits/rejected": -2.9310381412506104, "logps/chosen": -76.13741302490234, "logps/rejected": -315.22760009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.541732311248779, "rewards/margins": 12.041330337524414, "rewards/rejected": -17.58306312561035, "step": 17227 }, { "epoch": 2.68, "learning_rate": 1.511620934695783e-06, "logits/chosen": -2.9671406745910645, "logits/rejected": -3.0849826335906982, "logps/chosen": -86.52057647705078, "logps/rejected": -190.37591552734375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.172927379608154, "rewards/margins": 7.2243547439575195, "rewards/rejected": -14.397281646728516, "step": 17228 }, { "epoch": 2.68, "learning_rate": 1.510887494164635e-06, "logits/chosen": -2.392329454421997, "logits/rejected": -3.0318379402160645, "logps/chosen": -118.57977294921875, "logps/rejected": -398.78338623046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.288492679595947, "rewards/margins": 9.820699691772461, "rewards/rejected": -15.10919189453125, "step": 17229 }, { "epoch": 2.68, "learning_rate": 1.510154053633487e-06, "logits/chosen": -1.5356831550598145, "logits/rejected": -2.6730687618255615, "logps/chosen": -242.70346069335938, "logps/rejected": -527.5679321289062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.166323184967041, "rewards/margins": 9.668986320495605, "rewards/rejected": -15.835309982299805, "step": 17230 }, { "epoch": 2.68, "learning_rate": 1.5094206131023394e-06, "logits/chosen": -2.789449453353882, "logits/rejected": -3.0730929374694824, "logps/chosen": -132.98883056640625, "logps/rejected": -271.6431884765625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.901538848876953, "rewards/margins": 8.50777816772461, "rewards/rejected": -14.409317016601562, "step": 17231 }, { "epoch": 2.68, "learning_rate": 1.5086871725711915e-06, "logits/chosen": -1.8557647466659546, "logits/rejected": -2.9425406455993652, "logps/chosen": -156.79989624023438, "logps/rejected": -473.6592102050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.1442818641662598, "rewards/margins": 13.289751052856445, "rewards/rejected": -16.434032440185547, "step": 17232 }, { "epoch": 2.68, "learning_rate": 1.5079537320400436e-06, "logits/chosen": -2.8293635845184326, "logits/rejected": -2.2756645679473877, "logps/chosen": -283.289306640625, "logps/rejected": -200.80941772460938, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.099244117736816, "rewards/margins": 6.7403950691223145, "rewards/rejected": -12.839639663696289, "step": 17233 }, { "epoch": 2.68, "learning_rate": 1.5072202915088957e-06, "logits/chosen": -2.690034866333008, "logits/rejected": -2.573735237121582, "logps/chosen": -490.2044982910156, "logps/rejected": -486.0834655761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.28614616394043, "rewards/margins": 10.49970817565918, "rewards/rejected": -19.78585433959961, "step": 17234 }, { "epoch": 2.68, "learning_rate": 1.506486850977748e-06, "logits/chosen": -2.674372673034668, "logits/rejected": -2.593073606491089, "logps/chosen": -310.0707702636719, "logps/rejected": -280.1068115234375, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -8.115926742553711, "rewards/margins": 5.179068088531494, "rewards/rejected": -13.294995307922363, "step": 17235 }, { "epoch": 2.68, "learning_rate": 1.5057534104466e-06, "logits/chosen": -1.5819077491760254, "logits/rejected": -2.6634342670440674, "logps/chosen": -248.40835571289062, "logps/rejected": -542.4967041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.284638404846191, "rewards/margins": 12.518031120300293, "rewards/rejected": -18.802669525146484, "step": 17236 }, { "epoch": 2.68, "learning_rate": 1.5050199699154521e-06, "logits/chosen": -1.9028154611587524, "logits/rejected": -2.9561166763305664, "logps/chosen": -184.83212280273438, "logps/rejected": -498.488037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.729201316833496, "rewards/margins": 9.386219024658203, "rewards/rejected": -20.115421295166016, "step": 17237 }, { "epoch": 2.68, "learning_rate": 1.504286529384304e-06, "logits/chosen": -2.504056453704834, "logits/rejected": -2.952162027359009, "logps/chosen": -155.84593200683594, "logps/rejected": -291.47161865234375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -7.797961235046387, "rewards/margins": 6.38564395904541, "rewards/rejected": -14.183605194091797, "step": 17238 }, { "epoch": 2.68, "learning_rate": 1.5035530888531563e-06, "logits/chosen": -1.4645580053329468, "logits/rejected": -2.4437458515167236, "logps/chosen": -363.08489990234375, "logps/rejected": -717.5191650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.753863334655762, "rewards/margins": 13.527914047241211, "rewards/rejected": -22.281776428222656, "step": 17239 }, { "epoch": 2.68, "learning_rate": 1.5028196483220084e-06, "logits/chosen": -2.448636293411255, "logits/rejected": -2.2259938716888428, "logps/chosen": -151.3173828125, "logps/rejected": -402.8699951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.355697154998779, "rewards/margins": 15.765953063964844, "rewards/rejected": -20.12165069580078, "step": 17240 }, { "epoch": 2.68, "learning_rate": 1.5020862077908605e-06, "logits/chosen": -1.484583854675293, "logits/rejected": -2.413489818572998, "logps/chosen": -288.11590576171875, "logps/rejected": -586.881103515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -9.988540649414062, "rewards/margins": 9.355487823486328, "rewards/rejected": -19.34402847290039, "step": 17241 }, { "epoch": 2.68, "learning_rate": 1.5013527672597126e-06, "logits/chosen": -2.6871726512908936, "logits/rejected": -1.3432276248931885, "logps/chosen": -578.0477294921875, "logps/rejected": -274.3410339355469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6071426868438721, "rewards/margins": 12.198501586914062, "rewards/rejected": -12.805643081665039, "step": 17242 }, { "epoch": 2.68, "learning_rate": 1.500619326728565e-06, "logits/chosen": -0.8815553188323975, "logits/rejected": -1.614292860031128, "logps/chosen": -136.05581665039062, "logps/rejected": -389.2972717285156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.485681056976318, "rewards/margins": 9.508023262023926, "rewards/rejected": -14.993703842163086, "step": 17243 }, { "epoch": 2.68, "learning_rate": 1.499885886197417e-06, "logits/chosen": -1.3662341833114624, "logits/rejected": -2.6172916889190674, "logps/chosen": -164.3162841796875, "logps/rejected": -595.503662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.473767280578613, "rewards/margins": 17.118305206298828, "rewards/rejected": -24.592073440551758, "step": 17244 }, { "epoch": 2.68, "learning_rate": 1.499152445666269e-06, "logits/chosen": -2.5829062461853027, "logits/rejected": -1.6422152519226074, "logps/chosen": -246.81893920898438, "logps/rejected": -220.87876892089844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.85575008392334, "rewards/margins": 9.262779235839844, "rewards/rejected": -15.118528366088867, "step": 17245 }, { "epoch": 2.68, "learning_rate": 1.4984190051351212e-06, "logits/chosen": -2.6106934547424316, "logits/rejected": -1.9804986715316772, "logps/chosen": -303.48651123046875, "logps/rejected": -329.62091064453125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.553002834320068, "rewards/margins": 10.387918472290039, "rewards/rejected": -15.940921783447266, "step": 17246 }, { "epoch": 2.68, "learning_rate": 1.4976855646039733e-06, "logits/chosen": -2.5254650115966797, "logits/rejected": -3.2011334896087646, "logps/chosen": -96.5245132446289, "logps/rejected": -309.80755615234375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -7.512282371520996, "rewards/margins": 7.475808620452881, "rewards/rejected": -14.988090515136719, "step": 17247 }, { "epoch": 2.68, "learning_rate": 1.4969521240728253e-06, "logits/chosen": -2.6940500736236572, "logits/rejected": -2.7074310779571533, "logps/chosen": -144.98886108398438, "logps/rejected": -201.84539794921875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -7.087348461151123, "rewards/margins": 7.720174789428711, "rewards/rejected": -14.807523727416992, "step": 17248 }, { "epoch": 2.68, "learning_rate": 1.4962186835416774e-06, "logits/chosen": -2.548595666885376, "logits/rejected": -3.168548107147217, "logps/chosen": -186.0821533203125, "logps/rejected": -478.0086669921875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.063902854919434, "rewards/margins": 10.49048137664795, "rewards/rejected": -18.554384231567383, "step": 17249 }, { "epoch": 2.68, "learning_rate": 1.4954852430105295e-06, "logits/chosen": -0.8728668093681335, "logits/rejected": -2.3495447635650635, "logps/chosen": -132.36795043945312, "logps/rejected": -439.1151123046875, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -7.522671699523926, "rewards/margins": 8.83677864074707, "rewards/rejected": -16.35944938659668, "step": 17250 }, { "epoch": 2.68, "learning_rate": 1.4947518024793816e-06, "logits/chosen": -2.3901588916778564, "logits/rejected": -2.4631617069244385, "logps/chosen": -211.9679718017578, "logps/rejected": -371.8854675292969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.145739555358887, "rewards/margins": 10.058860778808594, "rewards/rejected": -15.204599380493164, "step": 17251 }, { "epoch": 2.68, "learning_rate": 1.494018361948234e-06, "logits/chosen": -1.9054969549179077, "logits/rejected": -2.5252673625946045, "logps/chosen": -246.3085174560547, "logps/rejected": -356.7697448730469, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -10.479731559753418, "rewards/margins": 8.579544067382812, "rewards/rejected": -19.059276580810547, "step": 17252 }, { "epoch": 2.68, "learning_rate": 1.493284921417086e-06, "logits/chosen": -2.4217886924743652, "logits/rejected": -2.6359140872955322, "logps/chosen": -111.08834838867188, "logps/rejected": -438.2820739746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.732187271118164, "rewards/margins": 15.247822761535645, "rewards/rejected": -23.980009078979492, "step": 17253 }, { "epoch": 2.68, "learning_rate": 1.492551480885938e-06, "logits/chosen": -1.1820265054702759, "logits/rejected": -2.4818716049194336, "logps/chosen": -118.3084487915039, "logps/rejected": -386.92193603515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.060344696044922, "rewards/margins": 8.139387130737305, "rewards/rejected": -15.199731826782227, "step": 17254 }, { "epoch": 2.68, "learning_rate": 1.4918180403547902e-06, "logits/chosen": -1.6957647800445557, "logits/rejected": -2.6456797122955322, "logps/chosen": -153.86074829101562, "logps/rejected": -511.7414855957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.462911605834961, "rewards/margins": 13.116828918457031, "rewards/rejected": -19.579740524291992, "step": 17255 }, { "epoch": 2.68, "learning_rate": 1.4910845998236425e-06, "logits/chosen": -2.3269827365875244, "logits/rejected": -1.7441096305847168, "logps/chosen": -199.99659729003906, "logps/rejected": -329.6568603515625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.159393310546875, "rewards/margins": 8.883246421813965, "rewards/rejected": -18.042640686035156, "step": 17256 }, { "epoch": 2.68, "learning_rate": 1.4903511592924944e-06, "logits/chosen": -1.85908043384552, "logits/rejected": -2.5579488277435303, "logps/chosen": -148.10890197753906, "logps/rejected": -318.2611389160156, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.113450050354004, "rewards/margins": 10.067099571228027, "rewards/rejected": -16.18054962158203, "step": 17257 }, { "epoch": 2.68, "learning_rate": 1.4896177187613465e-06, "logits/chosen": -2.6824147701263428, "logits/rejected": -2.924060344696045, "logps/chosen": -225.1711883544922, "logps/rejected": -413.28363037109375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -9.112756729125977, "rewards/margins": 8.267375946044922, "rewards/rejected": -17.380130767822266, "step": 17258 }, { "epoch": 2.68, "learning_rate": 1.4888842782301986e-06, "logits/chosen": -2.8370473384857178, "logits/rejected": -2.900668144226074, "logps/chosen": -160.71600341796875, "logps/rejected": -231.88186645507812, "loss": 0.0246, "rewards/accuracies": 1.0, "rewards/chosen": -5.197381019592285, "rewards/margins": 9.281450271606445, "rewards/rejected": -14.47883129119873, "step": 17259 }, { "epoch": 2.68, "learning_rate": 1.4881508376990509e-06, "logits/chosen": -2.6396100521087646, "logits/rejected": -2.670172691345215, "logps/chosen": -231.1935272216797, "logps/rejected": -326.0989685058594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.983865261077881, "rewards/margins": 8.930150985717773, "rewards/rejected": -15.914016723632812, "step": 17260 }, { "epoch": 2.68, "learning_rate": 1.487417397167903e-06, "logits/chosen": -1.9492417573928833, "logits/rejected": -2.8529486656188965, "logps/chosen": -96.83380126953125, "logps/rejected": -329.601806640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.419276237487793, "rewards/margins": 9.88325309753418, "rewards/rejected": -16.30253028869629, "step": 17261 }, { "epoch": 2.68, "learning_rate": 1.486683956636755e-06, "logits/chosen": -2.30102276802063, "logits/rejected": -2.6400833129882812, "logps/chosen": -810.3609008789062, "logps/rejected": -821.8956298828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.390522003173828, "rewards/margins": 12.356209754943848, "rewards/rejected": -17.746732711791992, "step": 17262 }, { "epoch": 2.68, "learning_rate": 1.4859505161056071e-06, "logits/chosen": -1.4637439250946045, "logits/rejected": -2.7908849716186523, "logps/chosen": -89.23513793945312, "logps/rejected": -335.842041015625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -5.093262672424316, "rewards/margins": 9.253543853759766, "rewards/rejected": -14.346806526184082, "step": 17263 }, { "epoch": 2.68, "learning_rate": 1.4852170755744592e-06, "logits/chosen": -2.2361578941345215, "logits/rejected": -2.676515817642212, "logps/chosen": -93.1411361694336, "logps/rejected": -279.15948486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.361630439758301, "rewards/margins": 10.37687873840332, "rewards/rejected": -13.738508224487305, "step": 17264 }, { "epoch": 2.69, "learning_rate": 1.4844836350433115e-06, "logits/chosen": -2.149279832839966, "logits/rejected": -2.6030242443084717, "logps/chosen": -129.83189392089844, "logps/rejected": -330.6534729003906, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.506515026092529, "rewards/margins": 6.997356414794922, "rewards/rejected": -13.50387191772461, "step": 17265 }, { "epoch": 2.69, "learning_rate": 1.4837501945121636e-06, "logits/chosen": -2.6342153549194336, "logits/rejected": -2.904168128967285, "logps/chosen": -148.20382690429688, "logps/rejected": -332.75189208984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.157357215881348, "rewards/margins": 10.270901679992676, "rewards/rejected": -15.428258895874023, "step": 17266 }, { "epoch": 2.69, "learning_rate": 1.4830167539810155e-06, "logits/chosen": -2.622112512588501, "logits/rejected": -2.572763681411743, "logps/chosen": -187.35255432128906, "logps/rejected": -226.023681640625, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -6.093410491943359, "rewards/margins": 7.046929359436035, "rewards/rejected": -13.140340805053711, "step": 17267 }, { "epoch": 2.69, "learning_rate": 1.4822833134498676e-06, "logits/chosen": -1.4101409912109375, "logits/rejected": -2.5069448947906494, "logps/chosen": -263.7814025878906, "logps/rejected": -389.7337951660156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.677316665649414, "rewards/margins": 8.159229278564453, "rewards/rejected": -16.836545944213867, "step": 17268 }, { "epoch": 2.69, "learning_rate": 1.4815498729187199e-06, "logits/chosen": -2.682068109512329, "logits/rejected": -2.9354782104492188, "logps/chosen": -257.2190246582031, "logps/rejected": -471.7021484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.105766296386719, "rewards/margins": 9.887933731079102, "rewards/rejected": -17.99370002746582, "step": 17269 }, { "epoch": 2.69, "learning_rate": 1.480816432387572e-06, "logits/chosen": -2.6806418895721436, "logits/rejected": -2.7759928703308105, "logps/chosen": -186.0533905029297, "logps/rejected": -336.968017578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.210408687591553, "rewards/margins": 9.79855728149414, "rewards/rejected": -16.00896644592285, "step": 17270 }, { "epoch": 2.69, "learning_rate": 1.480082991856424e-06, "logits/chosen": -2.661242723464966, "logits/rejected": -2.554161310195923, "logps/chosen": -435.3404541015625, "logps/rejected": -672.42822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.8994646072387695, "rewards/margins": 12.234273910522461, "rewards/rejected": -19.133737564086914, "step": 17271 }, { "epoch": 2.69, "learning_rate": 1.4793495513252761e-06, "logits/chosen": -1.127447485923767, "logits/rejected": -2.475717306137085, "logps/chosen": -140.50860595703125, "logps/rejected": -369.262939453125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -7.435161113739014, "rewards/margins": 10.808767318725586, "rewards/rejected": -18.243928909301758, "step": 17272 }, { "epoch": 2.69, "learning_rate": 1.4786161107941284e-06, "logits/chosen": -2.3194234371185303, "logits/rejected": -3.0757105350494385, "logps/chosen": -101.62926483154297, "logps/rejected": -426.350830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.015188217163086, "rewards/margins": 13.702544212341309, "rewards/rejected": -18.717731475830078, "step": 17273 }, { "epoch": 2.69, "learning_rate": 1.4778826702629805e-06, "logits/chosen": -2.4126758575439453, "logits/rejected": -2.9453248977661133, "logps/chosen": -150.9363555908203, "logps/rejected": -458.0299072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.764679908752441, "rewards/margins": 12.151227951049805, "rewards/rejected": -19.915908813476562, "step": 17274 }, { "epoch": 2.69, "learning_rate": 1.4771492297318326e-06, "logits/chosen": -2.708526134490967, "logits/rejected": -2.3558735847473145, "logps/chosen": -568.2835693359375, "logps/rejected": -394.9180908203125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -9.54095458984375, "rewards/margins": 7.552716255187988, "rewards/rejected": -17.093671798706055, "step": 17275 }, { "epoch": 2.69, "learning_rate": 1.4764157892006847e-06, "logits/chosen": -2.318793773651123, "logits/rejected": -2.6100575923919678, "logps/chosen": -145.66465759277344, "logps/rejected": -368.068359375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -7.440687656402588, "rewards/margins": 7.909511089324951, "rewards/rejected": -15.350198745727539, "step": 17276 }, { "epoch": 2.69, "learning_rate": 1.4756823486695368e-06, "logits/chosen": -1.7558469772338867, "logits/rejected": -2.7526798248291016, "logps/chosen": -129.19241333007812, "logps/rejected": -228.60780334472656, "loss": 0.4324, "rewards/accuracies": 0.5, "rewards/chosen": -7.072885990142822, "rewards/margins": 5.1762166023254395, "rewards/rejected": -12.249102592468262, "step": 17277 }, { "epoch": 2.69, "learning_rate": 1.474948908138389e-06, "logits/chosen": -2.488619089126587, "logits/rejected": -2.64981746673584, "logps/chosen": -115.52102661132812, "logps/rejected": -283.1259765625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -4.715514183044434, "rewards/margins": 8.033796310424805, "rewards/rejected": -12.749311447143555, "step": 17278 }, { "epoch": 2.69, "learning_rate": 1.474215467607241e-06, "logits/chosen": -2.9592034816741943, "logits/rejected": -2.058504104614258, "logps/chosen": -272.21295166015625, "logps/rejected": -192.36846923828125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.050841808319092, "rewards/margins": 7.896401405334473, "rewards/rejected": -11.947242736816406, "step": 17279 }, { "epoch": 2.69, "learning_rate": 1.473482027076093e-06, "logits/chosen": -1.522637128829956, "logits/rejected": -2.3217992782592773, "logps/chosen": -159.16842651367188, "logps/rejected": -601.4266357421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.753776550292969, "rewards/margins": 12.14697265625, "rewards/rejected": -19.90074920654297, "step": 17280 }, { "epoch": 2.69, "learning_rate": 1.4727485865449454e-06, "logits/chosen": -2.712716817855835, "logits/rejected": -2.6143698692321777, "logps/chosen": -185.4732666015625, "logps/rejected": -282.6259765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -4.8991899490356445, "rewards/margins": 7.980018615722656, "rewards/rejected": -12.8792085647583, "step": 17281 }, { "epoch": 2.69, "learning_rate": 1.4720151460137975e-06, "logits/chosen": -1.1071070432662964, "logits/rejected": -2.3246753215789795, "logps/chosen": -137.85560607910156, "logps/rejected": -427.853759765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.111837387084961, "rewards/margins": 9.424844741821289, "rewards/rejected": -15.53668212890625, "step": 17282 }, { "epoch": 2.69, "learning_rate": 1.4712817054826496e-06, "logits/chosen": -1.7917531728744507, "logits/rejected": -2.3294947147369385, "logps/chosen": -197.40516662597656, "logps/rejected": -312.79815673828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.826497077941895, "rewards/margins": 8.597525596618652, "rewards/rejected": -17.424022674560547, "step": 17283 }, { "epoch": 2.69, "learning_rate": 1.4705482649515016e-06, "logits/chosen": -2.626804828643799, "logits/rejected": -2.6549949645996094, "logps/chosen": -789.4776000976562, "logps/rejected": -623.6962890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.16102933883667, "rewards/margins": 9.678556442260742, "rewards/rejected": -16.839584350585938, "step": 17284 }, { "epoch": 2.69, "learning_rate": 1.4698148244203537e-06, "logits/chosen": -2.6832754611968994, "logits/rejected": -3.0242345333099365, "logps/chosen": -71.592041015625, "logps/rejected": -248.124267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.084171772003174, "rewards/margins": 10.728130340576172, "rewards/rejected": -15.812301635742188, "step": 17285 }, { "epoch": 2.69, "learning_rate": 1.469081383889206e-06, "logits/chosen": -1.0740474462509155, "logits/rejected": -1.5712004899978638, "logps/chosen": -281.722412109375, "logps/rejected": -332.01654052734375, "loss": 0.6016, "rewards/accuracies": 0.5, "rewards/chosen": -8.72834587097168, "rewards/margins": 7.17448091506958, "rewards/rejected": -15.902826309204102, "step": 17286 }, { "epoch": 2.69, "learning_rate": 1.468347943358058e-06, "logits/chosen": -2.517883777618408, "logits/rejected": -2.1838955879211426, "logps/chosen": -245.9713134765625, "logps/rejected": -316.2611389160156, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -6.513944625854492, "rewards/margins": 7.405684947967529, "rewards/rejected": -13.91963005065918, "step": 17287 }, { "epoch": 2.69, "learning_rate": 1.46761450282691e-06, "logits/chosen": -2.131453275680542, "logits/rejected": -2.6777260303497314, "logps/chosen": -465.8509521484375, "logps/rejected": -515.8904418945312, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.059408664703369, "rewards/margins": 7.747367858886719, "rewards/rejected": -12.80677604675293, "step": 17288 }, { "epoch": 2.69, "learning_rate": 1.466881062295762e-06, "logits/chosen": -1.9149614572525024, "logits/rejected": -2.772528886795044, "logps/chosen": -248.9271240234375, "logps/rejected": -473.8630065917969, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.034672737121582, "rewards/margins": 8.427471160888672, "rewards/rejected": -17.462142944335938, "step": 17289 }, { "epoch": 2.69, "learning_rate": 1.4661476217646144e-06, "logits/chosen": -1.3608733415603638, "logits/rejected": -2.392681121826172, "logps/chosen": -77.97279357910156, "logps/rejected": -313.0543212890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.548392295837402, "rewards/margins": 9.359851837158203, "rewards/rejected": -14.908244132995605, "step": 17290 }, { "epoch": 2.69, "learning_rate": 1.4654141812334665e-06, "logits/chosen": -2.2940356731414795, "logits/rejected": -2.7741048336029053, "logps/chosen": -203.67752075195312, "logps/rejected": -524.1991577148438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.360914707183838, "rewards/margins": 14.820234298706055, "rewards/rejected": -22.181148529052734, "step": 17291 }, { "epoch": 2.69, "learning_rate": 1.4646807407023186e-06, "logits/chosen": -1.920097827911377, "logits/rejected": -2.841620922088623, "logps/chosen": -248.283935546875, "logps/rejected": -446.49755859375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -8.285757064819336, "rewards/margins": 8.240226745605469, "rewards/rejected": -16.525981903076172, "step": 17292 }, { "epoch": 2.69, "learning_rate": 1.4639473001711707e-06, "logits/chosen": -1.6074519157409668, "logits/rejected": -2.630373477935791, "logps/chosen": -163.6212158203125, "logps/rejected": -389.8916015625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.5531463623046875, "rewards/margins": 7.287872314453125, "rewards/rejected": -14.841018676757812, "step": 17293 }, { "epoch": 2.69, "learning_rate": 1.463213859640023e-06, "logits/chosen": -2.6826868057250977, "logits/rejected": -2.0733022689819336, "logps/chosen": -224.08132934570312, "logps/rejected": -370.39678955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.631516456604004, "rewards/margins": 14.53345775604248, "rewards/rejected": -19.164974212646484, "step": 17294 }, { "epoch": 2.69, "learning_rate": 1.462480419108875e-06, "logits/chosen": -2.193875789642334, "logits/rejected": -2.950615406036377, "logps/chosen": -101.21229553222656, "logps/rejected": -355.57427978515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.564640998840332, "rewards/margins": 11.54306697845459, "rewards/rejected": -17.107707977294922, "step": 17295 }, { "epoch": 2.69, "learning_rate": 1.4617469785777272e-06, "logits/chosen": -1.30209481716156, "logits/rejected": -2.1534340381622314, "logps/chosen": -191.81021118164062, "logps/rejected": -461.65380859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.938813209533691, "rewards/margins": 10.864185333251953, "rewards/rejected": -18.802997589111328, "step": 17296 }, { "epoch": 2.69, "learning_rate": 1.461013538046579e-06, "logits/chosen": -2.4316153526306152, "logits/rejected": -1.6275752782821655, "logps/chosen": -502.87322998046875, "logps/rejected": -358.3099365234375, "loss": 0.4826, "rewards/accuracies": 0.5, "rewards/chosen": -8.97344970703125, "rewards/margins": 6.3786139488220215, "rewards/rejected": -15.352063179016113, "step": 17297 }, { "epoch": 2.69, "learning_rate": 1.4602800975154313e-06, "logits/chosen": -2.5068986415863037, "logits/rejected": -2.1568706035614014, "logps/chosen": -949.6796264648438, "logps/rejected": -910.3341674804688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.930253982543945, "rewards/margins": 12.249570846557617, "rewards/rejected": -22.179824829101562, "step": 17298 }, { "epoch": 2.69, "learning_rate": 1.4595466569842834e-06, "logits/chosen": -2.0387964248657227, "logits/rejected": -2.215877056121826, "logps/chosen": -174.96368408203125, "logps/rejected": -243.1876220703125, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -4.29750919342041, "rewards/margins": 6.072822093963623, "rewards/rejected": -10.370331764221191, "step": 17299 }, { "epoch": 2.69, "learning_rate": 1.4588132164531355e-06, "logits/chosen": -2.12443470954895, "logits/rejected": -2.3547801971435547, "logps/chosen": -202.92530822753906, "logps/rejected": -220.68605041503906, "loss": 1.3042, "rewards/accuracies": 0.5, "rewards/chosen": -11.15152359008789, "rewards/margins": 3.0010390281677246, "rewards/rejected": -14.152562141418457, "step": 17300 }, { "epoch": 2.69, "learning_rate": 1.4580797759219876e-06, "logits/chosen": -2.628420114517212, "logits/rejected": -1.7456005811691284, "logps/chosen": -288.70196533203125, "logps/rejected": -198.52940368652344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.663552284240723, "rewards/margins": 8.199485778808594, "rewards/rejected": -12.863038063049316, "step": 17301 }, { "epoch": 2.69, "learning_rate": 1.45734633539084e-06, "logits/chosen": -2.785020589828491, "logits/rejected": -1.639024019241333, "logps/chosen": -533.547119140625, "logps/rejected": -349.74346923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3110687732696533, "rewards/margins": 11.705216407775879, "rewards/rejected": -14.016284942626953, "step": 17302 }, { "epoch": 2.69, "learning_rate": 1.456612894859692e-06, "logits/chosen": -2.4678664207458496, "logits/rejected": -2.7825987339019775, "logps/chosen": -551.6218872070312, "logps/rejected": -694.3765869140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.457937240600586, "rewards/margins": 9.200521469116211, "rewards/rejected": -15.658458709716797, "step": 17303 }, { "epoch": 2.69, "learning_rate": 1.455879454328544e-06, "logits/chosen": -2.538728952407837, "logits/rejected": -2.8616116046905518, "logps/chosen": -63.016441345214844, "logps/rejected": -179.4328155517578, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -3.966794967651367, "rewards/margins": 5.6077752113342285, "rewards/rejected": -9.574569702148438, "step": 17304 }, { "epoch": 2.69, "learning_rate": 1.4551460137973962e-06, "logits/chosen": -2.8128862380981445, "logits/rejected": -2.6387646198272705, "logps/chosen": -189.2395782470703, "logps/rejected": -201.14637756347656, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -8.000740051269531, "rewards/margins": 7.730436325073242, "rewards/rejected": -15.731176376342773, "step": 17305 }, { "epoch": 2.69, "learning_rate": 1.4544125732662483e-06, "logits/chosen": -2.6837124824523926, "logits/rejected": -2.227550506591797, "logps/chosen": -340.9692077636719, "logps/rejected": -398.5087585449219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.525685787200928, "rewards/margins": 9.079830169677734, "rewards/rejected": -13.60551643371582, "step": 17306 }, { "epoch": 2.69, "learning_rate": 1.4536791327351004e-06, "logits/chosen": -1.7875648736953735, "logits/rejected": -2.13984751701355, "logps/chosen": -142.0301513671875, "logps/rejected": -408.31304931640625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.298377513885498, "rewards/margins": 12.420365333557129, "rewards/rejected": -18.71874237060547, "step": 17307 }, { "epoch": 2.69, "learning_rate": 1.4529456922039524e-06, "logits/chosen": -2.639723539352417, "logits/rejected": -2.942253351211548, "logps/chosen": -420.18017578125, "logps/rejected": -568.2315673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9734086990356445, "rewards/margins": 13.382121086120605, "rewards/rejected": -17.35552978515625, "step": 17308 }, { "epoch": 2.69, "learning_rate": 1.4522122516728045e-06, "logits/chosen": -2.821570634841919, "logits/rejected": -1.2844181060791016, "logps/chosen": -918.31494140625, "logps/rejected": -475.36541748046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.599673748016357, "rewards/margins": 12.395196914672852, "rewards/rejected": -19.994871139526367, "step": 17309 }, { "epoch": 2.69, "learning_rate": 1.4514788111416566e-06, "logits/chosen": -2.3243720531463623, "logits/rejected": -2.878613233566284, "logps/chosen": -85.58261108398438, "logps/rejected": -233.5229949951172, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.04913854598999, "rewards/margins": 7.157449245452881, "rewards/rejected": -13.206587791442871, "step": 17310 }, { "epoch": 2.69, "learning_rate": 1.450745370610509e-06, "logits/chosen": -2.789400577545166, "logits/rejected": -2.871872663497925, "logps/chosen": -221.13900756835938, "logps/rejected": -312.944580078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.473238945007324, "rewards/margins": 8.095539093017578, "rewards/rejected": -14.568777084350586, "step": 17311 }, { "epoch": 2.69, "learning_rate": 1.450011930079361e-06, "logits/chosen": -2.662585973739624, "logits/rejected": -2.3436081409454346, "logps/chosen": -588.2446899414062, "logps/rejected": -493.5994567871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.404054641723633, "rewards/margins": 12.850879669189453, "rewards/rejected": -15.254934310913086, "step": 17312 }, { "epoch": 2.69, "learning_rate": 1.4492784895482131e-06, "logits/chosen": -2.48856258392334, "logits/rejected": -2.7614283561706543, "logps/chosen": -161.531982421875, "logps/rejected": -341.926025390625, "loss": 0.1141, "rewards/accuracies": 1.0, "rewards/chosen": -7.7644853591918945, "rewards/margins": 8.903251647949219, "rewards/rejected": -16.66773796081543, "step": 17313 }, { "epoch": 2.69, "learning_rate": 1.4485450490170652e-06, "logits/chosen": -2.310267210006714, "logits/rejected": -2.781193494796753, "logps/chosen": -436.29046630859375, "logps/rejected": -817.8780517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.556949138641357, "rewards/margins": 18.064861297607422, "rewards/rejected": -24.621810913085938, "step": 17314 }, { "epoch": 2.69, "learning_rate": 1.4478116084859175e-06, "logits/chosen": -1.737664818763733, "logits/rejected": -2.954509735107422, "logps/chosen": -254.93075561523438, "logps/rejected": -455.91552734375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -7.915655612945557, "rewards/margins": 7.5822906494140625, "rewards/rejected": -15.497945785522461, "step": 17315 }, { "epoch": 2.69, "learning_rate": 1.4470781679547694e-06, "logits/chosen": -1.8689918518066406, "logits/rejected": -2.9516453742980957, "logps/chosen": -160.2848358154297, "logps/rejected": -428.2242431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.911567687988281, "rewards/margins": 11.792041778564453, "rewards/rejected": -18.703609466552734, "step": 17316 }, { "epoch": 2.69, "learning_rate": 1.4463447274236215e-06, "logits/chosen": -2.437143325805664, "logits/rejected": -2.6399946212768555, "logps/chosen": -635.36669921875, "logps/rejected": -682.1568603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.82819938659668, "rewards/margins": 11.997909545898438, "rewards/rejected": -18.826108932495117, "step": 17317 }, { "epoch": 2.69, "learning_rate": 1.4456112868924736e-06, "logits/chosen": -1.6314371824264526, "logits/rejected": -2.6123456954956055, "logps/chosen": -235.81910705566406, "logps/rejected": -445.1460266113281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.79526424407959, "rewards/margins": 8.439138412475586, "rewards/rejected": -15.234402656555176, "step": 17318 }, { "epoch": 2.69, "learning_rate": 1.4448778463613259e-06, "logits/chosen": -2.697150468826294, "logits/rejected": -2.7785561084747314, "logps/chosen": -171.05918884277344, "logps/rejected": -300.3553466796875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.891439437866211, "rewards/margins": 11.977883338928223, "rewards/rejected": -17.86932373046875, "step": 17319 }, { "epoch": 2.69, "learning_rate": 1.444144405830178e-06, "logits/chosen": -2.503800630569458, "logits/rejected": -1.5627599954605103, "logps/chosen": -268.15435791015625, "logps/rejected": -296.364501953125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.832869529724121, "rewards/margins": 8.535783767700195, "rewards/rejected": -16.368654251098633, "step": 17320 }, { "epoch": 2.69, "learning_rate": 1.44341096529903e-06, "logits/chosen": -1.6019153594970703, "logits/rejected": -2.6755247116088867, "logps/chosen": -207.71583557128906, "logps/rejected": -348.0118408203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.762988567352295, "rewards/margins": 9.846565246582031, "rewards/rejected": -14.609553337097168, "step": 17321 }, { "epoch": 2.69, "learning_rate": 1.4426775247678821e-06, "logits/chosen": -2.3095178604125977, "logits/rejected": -2.807096481323242, "logps/chosen": -223.91943359375, "logps/rejected": -499.344482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.859064102172852, "rewards/margins": 14.494203567504883, "rewards/rejected": -19.353267669677734, "step": 17322 }, { "epoch": 2.69, "learning_rate": 1.4419440842367344e-06, "logits/chosen": -2.3983371257781982, "logits/rejected": -2.726388454437256, "logps/chosen": -198.57432556152344, "logps/rejected": -363.1451721191406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.267448425292969, "rewards/margins": 8.928287506103516, "rewards/rejected": -15.195735931396484, "step": 17323 }, { "epoch": 2.69, "learning_rate": 1.4412106437055865e-06, "logits/chosen": -2.6618471145629883, "logits/rejected": -2.606043577194214, "logps/chosen": -279.96728515625, "logps/rejected": -251.51942443847656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5093326568603516, "rewards/margins": 10.560604095458984, "rewards/rejected": -14.069936752319336, "step": 17324 }, { "epoch": 2.69, "learning_rate": 1.4404772031744386e-06, "logits/chosen": -2.0847134590148926, "logits/rejected": -2.548072099685669, "logps/chosen": -235.845947265625, "logps/rejected": -318.89825439453125, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -8.627138137817383, "rewards/margins": 8.139552116394043, "rewards/rejected": -16.76668930053711, "step": 17325 }, { "epoch": 2.69, "learning_rate": 1.4397437626432905e-06, "logits/chosen": -2.937986135482788, "logits/rejected": -2.7170515060424805, "logps/chosen": -222.86770629882812, "logps/rejected": -333.2826232910156, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -6.067147254943848, "rewards/margins": 9.182408332824707, "rewards/rejected": -15.249555587768555, "step": 17326 }, { "epoch": 2.69, "learning_rate": 1.4390103221121428e-06, "logits/chosen": -2.3733694553375244, "logits/rejected": -2.2946014404296875, "logps/chosen": -522.801513671875, "logps/rejected": -517.6630859375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -6.955256462097168, "rewards/margins": 8.694649696350098, "rewards/rejected": -15.649906158447266, "step": 17327 }, { "epoch": 2.69, "learning_rate": 1.4382768815809949e-06, "logits/chosen": -2.3465559482574463, "logits/rejected": -2.902796506881714, "logps/chosen": -413.3585205078125, "logps/rejected": -646.6809692382812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.928264617919922, "rewards/margins": 14.609095573425293, "rewards/rejected": -21.53736114501953, "step": 17328 }, { "epoch": 2.7, "learning_rate": 1.437543441049847e-06, "logits/chosen": -2.4244160652160645, "logits/rejected": -1.896681785583496, "logps/chosen": -434.5245056152344, "logps/rejected": -519.573486328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.694388389587402, "rewards/margins": 11.381227493286133, "rewards/rejected": -18.07561492919922, "step": 17329 }, { "epoch": 2.7, "learning_rate": 1.436810000518699e-06, "logits/chosen": -2.358142375946045, "logits/rejected": -2.401843547821045, "logps/chosen": -143.877685546875, "logps/rejected": -212.86497497558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.303593635559082, "rewards/margins": 10.287720680236816, "rewards/rejected": -16.5913143157959, "step": 17330 }, { "epoch": 2.7, "learning_rate": 1.4360765599875512e-06, "logits/chosen": -2.4912562370300293, "logits/rejected": -2.577751636505127, "logps/chosen": -354.49212646484375, "logps/rejected": -479.785888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.632721900939941, "rewards/margins": 12.963918685913086, "rewards/rejected": -20.596641540527344, "step": 17331 }, { "epoch": 2.7, "learning_rate": 1.4353431194564035e-06, "logits/chosen": -1.5963252782821655, "logits/rejected": -2.6380200386047363, "logps/chosen": -223.2006072998047, "logps/rejected": -571.17626953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.853833198547363, "rewards/margins": 13.365690231323242, "rewards/rejected": -20.219524383544922, "step": 17332 }, { "epoch": 2.7, "learning_rate": 1.4346096789252555e-06, "logits/chosen": -2.4922714233398438, "logits/rejected": -1.063578724861145, "logps/chosen": -177.00865173339844, "logps/rejected": -143.56060791015625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -1.5354712009429932, "rewards/margins": 8.345928192138672, "rewards/rejected": -9.881399154663086, "step": 17333 }, { "epoch": 2.7, "learning_rate": 1.4338762383941076e-06, "logits/chosen": -2.5263428688049316, "logits/rejected": -1.9706437587738037, "logps/chosen": -230.5348358154297, "logps/rejected": -306.11907958984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.21290397644043, "rewards/margins": 11.173547744750977, "rewards/rejected": -20.386451721191406, "step": 17334 }, { "epoch": 2.7, "learning_rate": 1.4331427978629597e-06, "logits/chosen": -2.2242648601531982, "logits/rejected": -2.8545374870300293, "logps/chosen": -221.82342529296875, "logps/rejected": -381.5244140625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.555028915405273, "rewards/margins": 7.032955169677734, "rewards/rejected": -13.587984085083008, "step": 17335 }, { "epoch": 2.7, "learning_rate": 1.4324093573318118e-06, "logits/chosen": -2.3777387142181396, "logits/rejected": -2.990490436553955, "logps/chosen": -100.00648498535156, "logps/rejected": -290.4560546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.40999698638916, "rewards/margins": 9.456114768981934, "rewards/rejected": -15.866111755371094, "step": 17336 }, { "epoch": 2.7, "learning_rate": 1.431675916800664e-06, "logits/chosen": -1.188421368598938, "logits/rejected": -2.6164286136627197, "logps/chosen": -167.4965362548828, "logps/rejected": -385.3170471191406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.0410261154174805, "rewards/margins": 8.899076461791992, "rewards/rejected": -14.940103530883789, "step": 17337 }, { "epoch": 2.7, "learning_rate": 1.430942476269516e-06, "logits/chosen": -2.8566033840179443, "logits/rejected": -2.938324213027954, "logps/chosen": -467.7668762207031, "logps/rejected": -518.5546875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -6.905517101287842, "rewards/margins": 8.743258476257324, "rewards/rejected": -15.648775100708008, "step": 17338 }, { "epoch": 2.7, "learning_rate": 1.430209035738368e-06, "logits/chosen": -2.676971673965454, "logits/rejected": -2.7575416564941406, "logps/chosen": -338.7918701171875, "logps/rejected": -520.531494140625, "loss": 0.0224, "rewards/accuracies": 1.0, "rewards/chosen": -8.293925285339355, "rewards/margins": 4.88559627532959, "rewards/rejected": -13.179521560668945, "step": 17339 }, { "epoch": 2.7, "learning_rate": 1.4294755952072204e-06, "logits/chosen": -0.6768119931221008, "logits/rejected": -1.980554223060608, "logps/chosen": -101.58159637451172, "logps/rejected": -633.26171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.12739372253418, "rewards/margins": 18.31378936767578, "rewards/rejected": -23.441181182861328, "step": 17340 }, { "epoch": 2.7, "learning_rate": 1.4287421546760725e-06, "logits/chosen": -1.7011210918426514, "logits/rejected": -2.7033517360687256, "logps/chosen": -379.3934631347656, "logps/rejected": -450.4534912109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.187357425689697, "rewards/margins": 9.131839752197266, "rewards/rejected": -15.319197654724121, "step": 17341 }, { "epoch": 2.7, "learning_rate": 1.4280087141449246e-06, "logits/chosen": -2.604602575302124, "logits/rejected": -2.4751861095428467, "logps/chosen": -207.2469482421875, "logps/rejected": -218.02529907226562, "loss": 0.216, "rewards/accuracies": 1.0, "rewards/chosen": -8.163572311401367, "rewards/margins": 3.3790619373321533, "rewards/rejected": -11.542634010314941, "step": 17342 }, { "epoch": 2.7, "learning_rate": 1.4272752736137767e-06, "logits/chosen": -1.5341428518295288, "logits/rejected": -2.61088228225708, "logps/chosen": -155.2298583984375, "logps/rejected": -414.6650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.680633068084717, "rewards/margins": 13.095035552978516, "rewards/rejected": -18.77566909790039, "step": 17343 }, { "epoch": 2.7, "learning_rate": 1.426541833082629e-06, "logits/chosen": -2.648622512817383, "logits/rejected": -2.6692111492156982, "logps/chosen": -270.5332336425781, "logps/rejected": -280.1052551269531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.313778877258301, "rewards/margins": 10.487274169921875, "rewards/rejected": -14.801053047180176, "step": 17344 }, { "epoch": 2.7, "learning_rate": 1.425808392551481e-06, "logits/chosen": -1.4551959037780762, "logits/rejected": -2.343271017074585, "logps/chosen": -154.5988311767578, "logps/rejected": -377.3078308105469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.176286697387695, "rewards/margins": 16.185420989990234, "rewards/rejected": -20.361705780029297, "step": 17345 }, { "epoch": 2.7, "learning_rate": 1.425074952020333e-06, "logits/chosen": -2.1855170726776123, "logits/rejected": -2.7032628059387207, "logps/chosen": -301.592529296875, "logps/rejected": -427.8946533203125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.431521415710449, "rewards/margins": 8.70699405670166, "rewards/rejected": -15.13851547241211, "step": 17346 }, { "epoch": 2.7, "learning_rate": 1.424341511489185e-06, "logits/chosen": -2.706752300262451, "logits/rejected": -2.8715012073516846, "logps/chosen": -770.9258422851562, "logps/rejected": -1103.396240234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.88923454284668, "rewards/margins": 11.237241744995117, "rewards/rejected": -22.126476287841797, "step": 17347 }, { "epoch": 2.7, "learning_rate": 1.4236080709580373e-06, "logits/chosen": -1.956944465637207, "logits/rejected": -2.4048383235931396, "logps/chosen": -323.8612060546875, "logps/rejected": -499.7369079589844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.835128784179688, "rewards/margins": 10.865629196166992, "rewards/rejected": -19.70075798034668, "step": 17348 }, { "epoch": 2.7, "learning_rate": 1.4228746304268894e-06, "logits/chosen": -1.830060601234436, "logits/rejected": -2.9006879329681396, "logps/chosen": -649.0951538085938, "logps/rejected": -549.6573486328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.846731185913086, "rewards/margins": 8.62096881866455, "rewards/rejected": -17.467700958251953, "step": 17349 }, { "epoch": 2.7, "learning_rate": 1.4221411898957415e-06, "logits/chosen": -2.640458822250366, "logits/rejected": -2.301586627960205, "logps/chosen": -466.1154479980469, "logps/rejected": -577.932373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.448284149169922, "rewards/margins": 10.60423469543457, "rewards/rejected": -19.052518844604492, "step": 17350 }, { "epoch": 2.7, "learning_rate": 1.4214077493645936e-06, "logits/chosen": -2.732008695602417, "logits/rejected": -2.892817497253418, "logps/chosen": -148.57984924316406, "logps/rejected": -304.5687255859375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -6.267205715179443, "rewards/margins": 9.333552360534668, "rewards/rejected": -15.600757598876953, "step": 17351 }, { "epoch": 2.7, "learning_rate": 1.4206743088334457e-06, "logits/chosen": -2.5882151126861572, "logits/rejected": -2.280606746673584, "logps/chosen": -403.8157958984375, "logps/rejected": -375.3598327636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.067446708679199, "rewards/margins": 11.314468383789062, "rewards/rejected": -17.381916046142578, "step": 17352 }, { "epoch": 2.7, "learning_rate": 1.419940868302298e-06, "logits/chosen": -2.7941644191741943, "logits/rejected": -2.3002188205718994, "logps/chosen": -593.0343627929688, "logps/rejected": -462.1596984863281, "loss": 0.5654, "rewards/accuracies": 0.5, "rewards/chosen": -5.540198802947998, "rewards/margins": 6.0191826820373535, "rewards/rejected": -11.559381484985352, "step": 17353 }, { "epoch": 2.7, "learning_rate": 1.41920742777115e-06, "logits/chosen": -2.5602805614471436, "logits/rejected": -2.696176767349243, "logps/chosen": -215.3865966796875, "logps/rejected": -355.4787902832031, "loss": 0.4663, "rewards/accuracies": 0.5, "rewards/chosen": -11.804643630981445, "rewards/margins": 6.968184947967529, "rewards/rejected": -18.772829055786133, "step": 17354 }, { "epoch": 2.7, "learning_rate": 1.4184739872400022e-06, "logits/chosen": -2.7924208641052246, "logits/rejected": -2.218423366546631, "logps/chosen": -183.12838745117188, "logps/rejected": -204.92831420898438, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/chosen": -7.5920891761779785, "rewards/margins": 5.075273036956787, "rewards/rejected": -12.667362213134766, "step": 17355 }, { "epoch": 2.7, "learning_rate": 1.417740546708854e-06, "logits/chosen": -1.6966034173965454, "logits/rejected": -2.832021713256836, "logps/chosen": -153.500732421875, "logps/rejected": -672.4110107421875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -11.69678020477295, "rewards/margins": 7.837353706359863, "rewards/rejected": -19.534133911132812, "step": 17356 }, { "epoch": 2.7, "learning_rate": 1.4170071061777063e-06, "logits/chosen": -2.6945338249206543, "logits/rejected": -2.1561853885650635, "logps/chosen": -466.10369873046875, "logps/rejected": -435.00970458984375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.603708267211914, "rewards/margins": 11.59195613861084, "rewards/rejected": -18.195663452148438, "step": 17357 }, { "epoch": 2.7, "learning_rate": 1.4162736656465584e-06, "logits/chosen": -2.586449146270752, "logits/rejected": -2.9134860038757324, "logps/chosen": -184.80191040039062, "logps/rejected": -343.7403869628906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.755399703979492, "rewards/margins": 12.009685516357422, "rewards/rejected": -17.765085220336914, "step": 17358 }, { "epoch": 2.7, "learning_rate": 1.4155402251154105e-06, "logits/chosen": -2.6166603565216064, "logits/rejected": -2.462385892868042, "logps/chosen": -614.0276489257812, "logps/rejected": -614.6273193359375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -8.986245155334473, "rewards/margins": 9.024105072021484, "rewards/rejected": -18.010351181030273, "step": 17359 }, { "epoch": 2.7, "learning_rate": 1.4148067845842626e-06, "logits/chosen": -1.8455445766448975, "logits/rejected": -2.5750837326049805, "logps/chosen": -234.1715850830078, "logps/rejected": -357.86712646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.0126161575317383, "rewards/margins": 11.101348876953125, "rewards/rejected": -14.113964080810547, "step": 17360 }, { "epoch": 2.7, "learning_rate": 1.414073344053115e-06, "logits/chosen": -2.660318374633789, "logits/rejected": -2.483006477355957, "logps/chosen": -197.55499267578125, "logps/rejected": -195.45359802246094, "loss": 0.0532, "rewards/accuracies": 1.0, "rewards/chosen": -11.32199478149414, "rewards/margins": 2.9258947372436523, "rewards/rejected": -14.247889518737793, "step": 17361 }, { "epoch": 2.7, "learning_rate": 1.413339903521967e-06, "logits/chosen": -2.4331939220428467, "logits/rejected": -2.381094455718994, "logps/chosen": -288.967529296875, "logps/rejected": -418.03521728515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.758464813232422, "rewards/margins": 12.560922622680664, "rewards/rejected": -17.319387435913086, "step": 17362 }, { "epoch": 2.7, "learning_rate": 1.412606462990819e-06, "logits/chosen": -2.5901496410369873, "logits/rejected": -2.5838093757629395, "logps/chosen": -301.0404357910156, "logps/rejected": -397.7129821777344, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -5.1640825271606445, "rewards/margins": 8.922746658325195, "rewards/rejected": -14.086828231811523, "step": 17363 }, { "epoch": 2.7, "learning_rate": 1.4118730224596712e-06, "logits/chosen": -2.076423406600952, "logits/rejected": -2.3679630756378174, "logps/chosen": -118.31756591796875, "logps/rejected": -361.1510009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.340986251831055, "rewards/margins": 14.685832023620605, "rewards/rejected": -19.026817321777344, "step": 17364 }, { "epoch": 2.7, "learning_rate": 1.4111395819285233e-06, "logits/chosen": -2.6555967330932617, "logits/rejected": -2.833933115005493, "logps/chosen": -515.4842529296875, "logps/rejected": -665.8436889648438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.453390598297119, "rewards/margins": 9.579585075378418, "rewards/rejected": -13.032976150512695, "step": 17365 }, { "epoch": 2.7, "learning_rate": 1.4104061413973754e-06, "logits/chosen": -2.7072019577026367, "logits/rejected": -3.0247793197631836, "logps/chosen": -329.3687744140625, "logps/rejected": -764.3492431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.184371948242188, "rewards/margins": 14.2965669631958, "rewards/rejected": -24.480937957763672, "step": 17366 }, { "epoch": 2.7, "learning_rate": 1.4096727008662275e-06, "logits/chosen": -2.1634132862091064, "logits/rejected": -2.8143489360809326, "logps/chosen": -203.80636596679688, "logps/rejected": -480.4283447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2555928230285645, "rewards/margins": 11.830887794494629, "rewards/rejected": -19.08648109436035, "step": 17367 }, { "epoch": 2.7, "learning_rate": 1.4089392603350795e-06, "logits/chosen": -1.7598607540130615, "logits/rejected": -2.3517377376556396, "logps/chosen": -887.36181640625, "logps/rejected": -620.8062744140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.251589298248291, "rewards/margins": 13.921060562133789, "rewards/rejected": -19.172649383544922, "step": 17368 }, { "epoch": 2.7, "learning_rate": 1.4082058198039316e-06, "logits/chosen": -2.2044615745544434, "logits/rejected": -2.6896376609802246, "logps/chosen": -102.17362976074219, "logps/rejected": -299.2209777832031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.000163078308105, "rewards/margins": 9.744029998779297, "rewards/rejected": -17.744192123413086, "step": 17369 }, { "epoch": 2.7, "learning_rate": 1.407472379272784e-06, "logits/chosen": -2.2320337295532227, "logits/rejected": -2.6290857791900635, "logps/chosen": -186.91201782226562, "logps/rejected": -392.86126708984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.95675802230835, "rewards/margins": 12.572113037109375, "rewards/rejected": -20.528871536254883, "step": 17370 }, { "epoch": 2.7, "learning_rate": 1.406738938741636e-06, "logits/chosen": -2.2942159175872803, "logits/rejected": -2.9730262756347656, "logps/chosen": -162.45272827148438, "logps/rejected": -419.4584655761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.694921493530273, "rewards/margins": 12.602272033691406, "rewards/rejected": -17.29719352722168, "step": 17371 }, { "epoch": 2.7, "learning_rate": 1.4060054982104881e-06, "logits/chosen": -2.689880132675171, "logits/rejected": -2.1353793144226074, "logps/chosen": -313.5931396484375, "logps/rejected": -276.9749755859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.135753631591797, "rewards/margins": 9.259685516357422, "rewards/rejected": -15.395439147949219, "step": 17372 }, { "epoch": 2.7, "learning_rate": 1.4052720576793402e-06, "logits/chosen": -2.434553384780884, "logits/rejected": -2.0181756019592285, "logps/chosen": -195.70687866210938, "logps/rejected": -343.077880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.424514293670654, "rewards/margins": 11.39892864227295, "rewards/rejected": -17.823444366455078, "step": 17373 }, { "epoch": 2.7, "learning_rate": 1.4045386171481925e-06, "logits/chosen": -2.5524981021881104, "logits/rejected": -2.0834929943084717, "logps/chosen": -463.64959716796875, "logps/rejected": -494.94476318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.14269733428955, "rewards/margins": 13.873321533203125, "rewards/rejected": -23.01601791381836, "step": 17374 }, { "epoch": 2.7, "learning_rate": 1.4038051766170444e-06, "logits/chosen": -2.510728120803833, "logits/rejected": -2.319298267364502, "logps/chosen": -281.3653564453125, "logps/rejected": -410.22900390625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -11.395162582397461, "rewards/margins": 8.480427742004395, "rewards/rejected": -19.875591278076172, "step": 17375 }, { "epoch": 2.7, "learning_rate": 1.4030717360858965e-06, "logits/chosen": -2.869875431060791, "logits/rejected": -2.57200288772583, "logps/chosen": -466.8160705566406, "logps/rejected": -585.38330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7769083976745605, "rewards/margins": 12.993795394897461, "rewards/rejected": -18.77070426940918, "step": 17376 }, { "epoch": 2.7, "learning_rate": 1.4023382955547486e-06, "logits/chosen": -2.3412516117095947, "logits/rejected": -2.669626474380493, "logps/chosen": -241.81488037109375, "logps/rejected": -350.6763916015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.1767683029174805, "rewards/margins": 7.658908367156982, "rewards/rejected": -13.835677146911621, "step": 17377 }, { "epoch": 2.7, "learning_rate": 1.4016048550236009e-06, "logits/chosen": -2.560370445251465, "logits/rejected": -1.9377211332321167, "logps/chosen": -497.59228515625, "logps/rejected": -526.05224609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.348567962646484, "rewards/margins": 11.484151840209961, "rewards/rejected": -17.832719802856445, "step": 17378 }, { "epoch": 2.7, "learning_rate": 1.400871414492453e-06, "logits/chosen": -1.3734861612319946, "logits/rejected": -2.3006436824798584, "logps/chosen": -105.81655883789062, "logps/rejected": -496.1865234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.2961273193359375, "rewards/margins": 12.871221542358398, "rewards/rejected": -20.167348861694336, "step": 17379 }, { "epoch": 2.7, "learning_rate": 1.400137973961305e-06, "logits/chosen": -2.675185441970825, "logits/rejected": -2.7871711254119873, "logps/chosen": -465.81365966796875, "logps/rejected": -540.7408447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.302760124206543, "rewards/margins": 10.060976028442383, "rewards/rejected": -16.36373519897461, "step": 17380 }, { "epoch": 2.7, "learning_rate": 1.3994045334301571e-06, "logits/chosen": -2.6328201293945312, "logits/rejected": -2.9929563999176025, "logps/chosen": -351.4929504394531, "logps/rejected": -437.17144775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.1229119300842285, "rewards/margins": 11.059457778930664, "rewards/rejected": -16.182369232177734, "step": 17381 }, { "epoch": 2.7, "learning_rate": 1.3986710928990094e-06, "logits/chosen": -2.9525434970855713, "logits/rejected": -3.176865577697754, "logps/chosen": -95.05009460449219, "logps/rejected": -216.70309448242188, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": -7.631068229675293, "rewards/margins": 6.675889492034912, "rewards/rejected": -14.306957244873047, "step": 17382 }, { "epoch": 2.7, "learning_rate": 1.3979376523678615e-06, "logits/chosen": -2.334240674972534, "logits/rejected": -1.6993833780288696, "logps/chosen": -191.83758544921875, "logps/rejected": -297.0494384765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.6439056396484375, "rewards/margins": 9.754692077636719, "rewards/rejected": -14.398598670959473, "step": 17383 }, { "epoch": 2.7, "learning_rate": 1.3972042118367136e-06, "logits/chosen": -1.3394931554794312, "logits/rejected": -2.5713043212890625, "logps/chosen": -92.31351470947266, "logps/rejected": -454.39141845703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.820736408233643, "rewards/margins": 9.290910720825195, "rewards/rejected": -16.111648559570312, "step": 17384 }, { "epoch": 2.7, "learning_rate": 1.3964707713055655e-06, "logits/chosen": -2.280869722366333, "logits/rejected": -1.715395212173462, "logps/chosen": -227.47244262695312, "logps/rejected": -290.4118347167969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.643625259399414, "rewards/margins": 8.234302520751953, "rewards/rejected": -15.877927780151367, "step": 17385 }, { "epoch": 2.7, "learning_rate": 1.3957373307744178e-06, "logits/chosen": -2.544829845428467, "logits/rejected": -1.0719586610794067, "logps/chosen": -551.0624389648438, "logps/rejected": -297.4757385253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.003718614578247, "rewards/margins": 11.619211196899414, "rewards/rejected": -14.622930526733398, "step": 17386 }, { "epoch": 2.7, "learning_rate": 1.3950038902432699e-06, "logits/chosen": -1.8973398208618164, "logits/rejected": -2.790619373321533, "logps/chosen": -269.876953125, "logps/rejected": -600.1055908203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.338220596313477, "rewards/margins": 9.364864349365234, "rewards/rejected": -18.703086853027344, "step": 17387 }, { "epoch": 2.7, "learning_rate": 1.394270449712122e-06, "logits/chosen": -1.1864519119262695, "logits/rejected": -2.8205976486206055, "logps/chosen": -136.19039916992188, "logps/rejected": -525.7571411132812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.442375183105469, "rewards/margins": 10.386232376098633, "rewards/rejected": -19.8286075592041, "step": 17388 }, { "epoch": 2.7, "learning_rate": 1.393537009180974e-06, "logits/chosen": -1.156935214996338, "logits/rejected": -2.8715152740478516, "logps/chosen": -307.8624267578125, "logps/rejected": -750.9400024414062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.768999099731445, "rewards/margins": 9.922513961791992, "rewards/rejected": -18.691513061523438, "step": 17389 }, { "epoch": 2.7, "learning_rate": 1.3928035686498262e-06, "logits/chosen": -2.2392349243164062, "logits/rejected": -2.781419038772583, "logps/chosen": -153.1348114013672, "logps/rejected": -349.3232727050781, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.6985273361206055, "rewards/margins": 8.876272201538086, "rewards/rejected": -16.574798583984375, "step": 17390 }, { "epoch": 2.7, "learning_rate": 1.3920701281186785e-06, "logits/chosen": -1.9230949878692627, "logits/rejected": -2.3426644802093506, "logps/chosen": -365.72808837890625, "logps/rejected": -405.15557861328125, "loss": 0.0533, "rewards/accuracies": 1.0, "rewards/chosen": -9.314470291137695, "rewards/margins": 6.234251499176025, "rewards/rejected": -15.548721313476562, "step": 17391 }, { "epoch": 2.7, "learning_rate": 1.3913366875875305e-06, "logits/chosen": -2.261057138442993, "logits/rejected": -2.455636978149414, "logps/chosen": -397.1805419921875, "logps/rejected": -509.13641357421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.739149570465088, "rewards/margins": 10.011592864990234, "rewards/rejected": -15.750741958618164, "step": 17392 }, { "epoch": 2.7, "learning_rate": 1.3906032470563826e-06, "logits/chosen": -2.643822431564331, "logits/rejected": -1.9964834451675415, "logps/chosen": -259.5660400390625, "logps/rejected": -248.759033203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.923213243484497, "rewards/margins": 9.364702224731445, "rewards/rejected": -12.287915229797363, "step": 17393 }, { "epoch": 2.71, "learning_rate": 1.3898698065252347e-06, "logits/chosen": -2.242415428161621, "logits/rejected": -2.781672954559326, "logps/chosen": -179.3905029296875, "logps/rejected": -502.3712158203125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -9.059591293334961, "rewards/margins": 11.021040916442871, "rewards/rejected": -20.080631256103516, "step": 17394 }, { "epoch": 2.71, "learning_rate": 1.3891363659940868e-06, "logits/chosen": -2.8060617446899414, "logits/rejected": -2.26094388961792, "logps/chosen": -283.34991455078125, "logps/rejected": -231.7782440185547, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.3289566040039062, "rewards/margins": 9.312494277954102, "rewards/rejected": -12.641450881958008, "step": 17395 }, { "epoch": 2.71, "learning_rate": 1.388402925462939e-06, "logits/chosen": -2.7294421195983887, "logits/rejected": -2.918248176574707, "logps/chosen": -586.2137451171875, "logps/rejected": -589.0564575195312, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -7.665231227874756, "rewards/margins": 7.554661750793457, "rewards/rejected": -15.219892501831055, "step": 17396 }, { "epoch": 2.71, "learning_rate": 1.387669484931791e-06, "logits/chosen": -2.737349510192871, "logits/rejected": -2.3652701377868652, "logps/chosen": -630.4557495117188, "logps/rejected": -546.250732421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.813447952270508, "rewards/margins": 9.321887016296387, "rewards/rejected": -18.135334014892578, "step": 17397 }, { "epoch": 2.71, "learning_rate": 1.386936044400643e-06, "logits/chosen": -2.4536736011505127, "logits/rejected": -1.3882997035980225, "logps/chosen": -218.34628295898438, "logps/rejected": -211.6519317626953, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": -7.262125492095947, "rewards/margins": 5.127027988433838, "rewards/rejected": -12.389153480529785, "step": 17398 }, { "epoch": 2.71, "learning_rate": 1.3862026038694954e-06, "logits/chosen": -2.440870761871338, "logits/rejected": -2.70078706741333, "logps/chosen": -277.3148498535156, "logps/rejected": -468.72216796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.872721672058105, "rewards/margins": 14.025750160217285, "rewards/rejected": -22.89847183227539, "step": 17399 }, { "epoch": 2.71, "learning_rate": 1.3854691633383475e-06, "logits/chosen": -2.265622854232788, "logits/rejected": -2.493227958679199, "logps/chosen": -250.29971313476562, "logps/rejected": -430.093017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.052828788757324, "rewards/margins": 13.367247581481934, "rewards/rejected": -20.420076370239258, "step": 17400 }, { "epoch": 2.71, "learning_rate": 1.3847357228071996e-06, "logits/chosen": -1.4291043281555176, "logits/rejected": -2.305746078491211, "logps/chosen": -221.10671997070312, "logps/rejected": -396.1815185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.805007457733154, "rewards/margins": 10.882013320922852, "rewards/rejected": -15.687021255493164, "step": 17401 }, { "epoch": 2.71, "learning_rate": 1.3840022822760517e-06, "logits/chosen": -2.6658425331115723, "logits/rejected": -3.0668282508850098, "logps/chosen": -640.0604248046875, "logps/rejected": -695.8775634765625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.698915958404541, "rewards/margins": 8.350645065307617, "rewards/rejected": -15.049560546875, "step": 17402 }, { "epoch": 2.71, "learning_rate": 1.383268841744904e-06, "logits/chosen": -1.8686705827713013, "logits/rejected": -2.894235849380493, "logps/chosen": -156.7093505859375, "logps/rejected": -315.99127197265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.85726261138916, "rewards/margins": 7.146961688995361, "rewards/rejected": -14.00422477722168, "step": 17403 }, { "epoch": 2.71, "learning_rate": 1.382535401213756e-06, "logits/chosen": -2.0179221630096436, "logits/rejected": -2.4548535346984863, "logps/chosen": -237.24502563476562, "logps/rejected": -446.9508056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.9010419845581055, "rewards/margins": 10.738518714904785, "rewards/rejected": -17.63956069946289, "step": 17404 }, { "epoch": 2.71, "learning_rate": 1.381801960682608e-06, "logits/chosen": -2.6525111198425293, "logits/rejected": -2.719675302505493, "logps/chosen": -182.360595703125, "logps/rejected": -421.3083801269531, "loss": 0.2111, "rewards/accuracies": 1.0, "rewards/chosen": -10.18425178527832, "rewards/margins": 7.593834400177002, "rewards/rejected": -17.778087615966797, "step": 17405 }, { "epoch": 2.71, "learning_rate": 1.38106852015146e-06, "logits/chosen": -2.6114420890808105, "logits/rejected": -2.871495485305786, "logps/chosen": -120.20976257324219, "logps/rejected": -466.7156677246094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1544508934021, "rewards/margins": 14.024517059326172, "rewards/rejected": -20.17896842956543, "step": 17406 }, { "epoch": 2.71, "learning_rate": 1.3803350796203123e-06, "logits/chosen": -1.6086995601654053, "logits/rejected": -2.3867363929748535, "logps/chosen": -192.69158935546875, "logps/rejected": -398.52984619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.786133766174316, "rewards/margins": 11.039056777954102, "rewards/rejected": -19.825191497802734, "step": 17407 }, { "epoch": 2.71, "learning_rate": 1.3796016390891644e-06, "logits/chosen": -2.6703102588653564, "logits/rejected": -2.547351598739624, "logps/chosen": -255.13980102539062, "logps/rejected": -306.66119384765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.757062911987305, "rewards/margins": 8.885461807250977, "rewards/rejected": -17.64252471923828, "step": 17408 }, { "epoch": 2.71, "learning_rate": 1.3788681985580165e-06, "logits/chosen": -2.0550577640533447, "logits/rejected": -2.769559621810913, "logps/chosen": -359.09521484375, "logps/rejected": -625.0850830078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.317198753356934, "rewards/margins": 9.52735424041748, "rewards/rejected": -18.844552993774414, "step": 17409 }, { "epoch": 2.71, "learning_rate": 1.3781347580268686e-06, "logits/chosen": -1.635362148284912, "logits/rejected": -2.5424907207489014, "logps/chosen": -227.41952514648438, "logps/rejected": -563.5009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.387360572814941, "rewards/margins": 12.75124454498291, "rewards/rejected": -22.13860511779785, "step": 17410 }, { "epoch": 2.71, "learning_rate": 1.3774013174957207e-06, "logits/chosen": -2.2244067192077637, "logits/rejected": -2.7547011375427246, "logps/chosen": -219.2947998046875, "logps/rejected": -426.32275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.623597145080566, "rewards/margins": 9.275293350219727, "rewards/rejected": -16.89889144897461, "step": 17411 }, { "epoch": 2.71, "learning_rate": 1.376667876964573e-06, "logits/chosen": -2.076251983642578, "logits/rejected": -2.5041751861572266, "logps/chosen": -202.28977966308594, "logps/rejected": -341.66015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -9.624393463134766, "rewards/margins": 10.460609436035156, "rewards/rejected": -20.085002899169922, "step": 17412 }, { "epoch": 2.71, "learning_rate": 1.375934436433425e-06, "logits/chosen": -2.020920753479004, "logits/rejected": -2.4627745151519775, "logps/chosen": -170.59701538085938, "logps/rejected": -230.7567901611328, "loss": 0.1751, "rewards/accuracies": 1.0, "rewards/chosen": -6.613718032836914, "rewards/margins": 6.439549446105957, "rewards/rejected": -13.053266525268555, "step": 17413 }, { "epoch": 2.71, "learning_rate": 1.3752009959022772e-06, "logits/chosen": -2.5057153701782227, "logits/rejected": -1.1055527925491333, "logps/chosen": -229.930908203125, "logps/rejected": -164.71533203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.9343018531799316, "rewards/margins": 8.340862274169922, "rewards/rejected": -12.275164604187012, "step": 17414 }, { "epoch": 2.71, "learning_rate": 1.374467555371129e-06, "logits/chosen": -2.650235176086426, "logits/rejected": -2.285081624984741, "logps/chosen": -257.6270751953125, "logps/rejected": -386.6995849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.829109191894531, "rewards/margins": 12.113901138305664, "rewards/rejected": -19.943012237548828, "step": 17415 }, { "epoch": 2.71, "learning_rate": 1.3737341148399813e-06, "logits/chosen": -2.176602602005005, "logits/rejected": -2.62435245513916, "logps/chosen": -86.99127197265625, "logps/rejected": -412.02105712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.959417343139648, "rewards/margins": 14.22452163696289, "rewards/rejected": -21.183940887451172, "step": 17416 }, { "epoch": 2.71, "learning_rate": 1.3730006743088334e-06, "logits/chosen": -2.420292854309082, "logits/rejected": -1.6080760955810547, "logps/chosen": -419.61395263671875, "logps/rejected": -367.21240234375, "loss": 0.0474, "rewards/accuracies": 1.0, "rewards/chosen": -9.41211223602295, "rewards/margins": 5.991483688354492, "rewards/rejected": -15.403595924377441, "step": 17417 }, { "epoch": 2.71, "learning_rate": 1.3722672337776855e-06, "logits/chosen": -2.0914855003356934, "logits/rejected": -2.586479425430298, "logps/chosen": -240.64479064941406, "logps/rejected": -489.1260070800781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.308553695678711, "rewards/margins": 14.061544418334961, "rewards/rejected": -21.370098114013672, "step": 17418 }, { "epoch": 2.71, "learning_rate": 1.3715337932465376e-06, "logits/chosen": -2.7059342861175537, "logits/rejected": -2.765760660171509, "logps/chosen": -221.36849975585938, "logps/rejected": -434.1083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.954842567443848, "rewards/margins": 10.692848205566406, "rewards/rejected": -17.647689819335938, "step": 17419 }, { "epoch": 2.71, "learning_rate": 1.37080035271539e-06, "logits/chosen": -1.9259350299835205, "logits/rejected": -3.0264790058135986, "logps/chosen": -163.54083251953125, "logps/rejected": -556.0986328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.730477333068848, "rewards/margins": 8.886375427246094, "rewards/rejected": -16.616853713989258, "step": 17420 }, { "epoch": 2.71, "learning_rate": 1.370066912184242e-06, "logits/chosen": -2.685547113418579, "logits/rejected": -1.6141130924224854, "logps/chosen": -318.78857421875, "logps/rejected": -126.69935607910156, "loss": 0.5493, "rewards/accuracies": 0.5, "rewards/chosen": -4.017958164215088, "rewards/margins": 6.5923895835876465, "rewards/rejected": -10.610347747802734, "step": 17421 }, { "epoch": 2.71, "learning_rate": 1.369333471653094e-06, "logits/chosen": -1.4131232500076294, "logits/rejected": -2.561297655105591, "logps/chosen": -241.47048950195312, "logps/rejected": -490.60186767578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.8156983852386475, "rewards/margins": 10.074329376220703, "rewards/rejected": -13.890027046203613, "step": 17422 }, { "epoch": 2.71, "learning_rate": 1.3686000311219462e-06, "logits/chosen": -2.312718391418457, "logits/rejected": -2.877499580383301, "logps/chosen": -200.22659301757812, "logps/rejected": -358.5157470703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.439722061157227, "rewards/margins": 10.24428939819336, "rewards/rejected": -16.684011459350586, "step": 17423 }, { "epoch": 2.71, "learning_rate": 1.3678665905907985e-06, "logits/chosen": -1.747672200202942, "logits/rejected": -2.6405069828033447, "logps/chosen": -127.68699645996094, "logps/rejected": -477.42877197265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.074682235717773, "rewards/margins": 9.64163589477539, "rewards/rejected": -17.716318130493164, "step": 17424 }, { "epoch": 2.71, "learning_rate": 1.3671331500596504e-06, "logits/chosen": -2.535403251647949, "logits/rejected": -1.9051305055618286, "logps/chosen": -173.82156372070312, "logps/rejected": -262.2574462890625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.222463607788086, "rewards/margins": 9.191251754760742, "rewards/rejected": -16.413715362548828, "step": 17425 }, { "epoch": 2.71, "learning_rate": 1.3663997095285025e-06, "logits/chosen": -2.698725700378418, "logits/rejected": -3.1753628253936768, "logps/chosen": -143.95657348632812, "logps/rejected": -414.1737365722656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.234501361846924, "rewards/margins": 12.036070823669434, "rewards/rejected": -18.270572662353516, "step": 17426 }, { "epoch": 2.71, "learning_rate": 1.3656662689973545e-06, "logits/chosen": -2.6388261318206787, "logits/rejected": -2.808133363723755, "logps/chosen": -120.02375793457031, "logps/rejected": -271.16314697265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.5253705978393555, "rewards/margins": 7.57858419418335, "rewards/rejected": -14.103954315185547, "step": 17427 }, { "epoch": 2.71, "learning_rate": 1.3649328284662069e-06, "logits/chosen": -2.185938596725464, "logits/rejected": -3.003530979156494, "logps/chosen": -216.3026580810547, "logps/rejected": -430.8230895996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.863376140594482, "rewards/margins": 10.421293258666992, "rewards/rejected": -15.284669876098633, "step": 17428 }, { "epoch": 2.71, "learning_rate": 1.364199387935059e-06, "logits/chosen": -1.6622177362442017, "logits/rejected": -2.5358951091766357, "logps/chosen": -340.937744140625, "logps/rejected": -521.829833984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.430215835571289, "rewards/margins": 9.6031494140625, "rewards/rejected": -16.03336524963379, "step": 17429 }, { "epoch": 2.71, "learning_rate": 1.363465947403911e-06, "logits/chosen": -1.6410795450210571, "logits/rejected": -2.531796932220459, "logps/chosen": -180.93667602539062, "logps/rejected": -482.63232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.662652015686035, "rewards/margins": 12.802848815917969, "rewards/rejected": -19.465499877929688, "step": 17430 }, { "epoch": 2.71, "learning_rate": 1.3627325068727631e-06, "logits/chosen": -2.3604609966278076, "logits/rejected": -2.1219236850738525, "logps/chosen": -547.385986328125, "logps/rejected": -515.9052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.788368225097656, "rewards/margins": 18.3670597076416, "rewards/rejected": -25.155427932739258, "step": 17431 }, { "epoch": 2.71, "learning_rate": 1.3619990663416152e-06, "logits/chosen": -1.5351048707962036, "logits/rejected": -1.9463402032852173, "logps/chosen": -314.9840087890625, "logps/rejected": -639.5228881835938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.351980209350586, "rewards/margins": 13.326746940612793, "rewards/rejected": -22.678726196289062, "step": 17432 }, { "epoch": 2.71, "learning_rate": 1.3612656258104675e-06, "logits/chosen": -1.964322566986084, "logits/rejected": -2.4469640254974365, "logps/chosen": -101.15292358398438, "logps/rejected": -223.13279724121094, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.074434280395508, "rewards/margins": 7.898120403289795, "rewards/rejected": -14.972555160522461, "step": 17433 }, { "epoch": 2.71, "learning_rate": 1.3605321852793194e-06, "logits/chosen": -1.4349671602249146, "logits/rejected": -2.246811866760254, "logps/chosen": -188.84567260742188, "logps/rejected": -345.3271484375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -9.190790176391602, "rewards/margins": 6.828310012817383, "rewards/rejected": -16.019100189208984, "step": 17434 }, { "epoch": 2.71, "learning_rate": 1.3597987447481715e-06, "logits/chosen": -2.8460588455200195, "logits/rejected": -2.7363381385803223, "logps/chosen": -362.08544921875, "logps/rejected": -505.86114501953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.01535177230835, "rewards/margins": 13.510250091552734, "rewards/rejected": -18.525602340698242, "step": 17435 }, { "epoch": 2.71, "learning_rate": 1.3590653042170236e-06, "logits/chosen": -2.5860445499420166, "logits/rejected": -2.637528419494629, "logps/chosen": -126.72703552246094, "logps/rejected": -156.63394165039062, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -5.464382171630859, "rewards/margins": 6.096400737762451, "rewards/rejected": -11.560783386230469, "step": 17436 }, { "epoch": 2.71, "learning_rate": 1.3583318636858759e-06, "logits/chosen": -2.307826280593872, "logits/rejected": -2.6495096683502197, "logps/chosen": -85.04987335205078, "logps/rejected": -363.0316162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.637855529785156, "rewards/margins": 10.955155372619629, "rewards/rejected": -16.59300994873047, "step": 17437 }, { "epoch": 2.71, "learning_rate": 1.357598423154728e-06, "logits/chosen": -2.5680253505706787, "logits/rejected": -2.470886707305908, "logps/chosen": -489.91583251953125, "logps/rejected": -743.850830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.145509719848633, "rewards/margins": 13.901583671569824, "rewards/rejected": -21.04709243774414, "step": 17438 }, { "epoch": 2.71, "learning_rate": 1.35686498262358e-06, "logits/chosen": -2.5592241287231445, "logits/rejected": -2.8926877975463867, "logps/chosen": -195.85769653320312, "logps/rejected": -477.4678955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.209897041320801, "rewards/margins": 12.811729431152344, "rewards/rejected": -19.021625518798828, "step": 17439 }, { "epoch": 2.71, "learning_rate": 1.3561315420924321e-06, "logits/chosen": -1.5381776094436646, "logits/rejected": -2.6371734142303467, "logps/chosen": -298.2568359375, "logps/rejected": -522.7916259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.972354888916016, "rewards/margins": 11.356603622436523, "rewards/rejected": -18.32895851135254, "step": 17440 }, { "epoch": 2.71, "learning_rate": 1.3553981015612844e-06, "logits/chosen": -1.4661117792129517, "logits/rejected": -2.71101450920105, "logps/chosen": -123.82036590576172, "logps/rejected": -635.79443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.320461750030518, "rewards/margins": 20.555404663085938, "rewards/rejected": -26.87586784362793, "step": 17441 }, { "epoch": 2.71, "learning_rate": 1.3546646610301365e-06, "logits/chosen": -2.2411324977874756, "logits/rejected": -2.4959988594055176, "logps/chosen": -451.09765625, "logps/rejected": -591.341552734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.805377006530762, "rewards/margins": 8.874835968017578, "rewards/rejected": -16.680212020874023, "step": 17442 }, { "epoch": 2.71, "learning_rate": 1.3539312204989886e-06, "logits/chosen": -1.5611432790756226, "logits/rejected": -2.581238031387329, "logps/chosen": -107.17637634277344, "logps/rejected": -458.37115478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.255655288696289, "rewards/margins": 13.466012954711914, "rewards/rejected": -20.721668243408203, "step": 17443 }, { "epoch": 2.71, "learning_rate": 1.3531977799678405e-06, "logits/chosen": -2.48545241355896, "logits/rejected": -1.2827595472335815, "logps/chosen": -276.0219421386719, "logps/rejected": -254.451416015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.191728591918945, "rewards/margins": 8.870481491088867, "rewards/rejected": -15.062210083007812, "step": 17444 }, { "epoch": 2.71, "learning_rate": 1.3524643394366928e-06, "logits/chosen": -2.506118059158325, "logits/rejected": -2.6404542922973633, "logps/chosen": -141.06875610351562, "logps/rejected": -302.4801025390625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.081831455230713, "rewards/margins": 6.975614547729492, "rewards/rejected": -13.057445526123047, "step": 17445 }, { "epoch": 2.71, "learning_rate": 1.351730898905545e-06, "logits/chosen": -2.3923001289367676, "logits/rejected": -2.1218502521514893, "logps/chosen": -206.95004272460938, "logps/rejected": -264.3763732910156, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -6.230128288269043, "rewards/margins": 7.227072238922119, "rewards/rejected": -13.45720100402832, "step": 17446 }, { "epoch": 2.71, "learning_rate": 1.350997458374397e-06, "logits/chosen": -2.5938847064971924, "logits/rejected": -2.4962894916534424, "logps/chosen": -400.90692138671875, "logps/rejected": -573.6837158203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.462522506713867, "rewards/margins": 11.154677391052246, "rewards/rejected": -20.617198944091797, "step": 17447 }, { "epoch": 2.71, "learning_rate": 1.350264017843249e-06, "logits/chosen": -2.272976875305176, "logits/rejected": -2.548020124435425, "logps/chosen": -260.65594482421875, "logps/rejected": -370.4224853515625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.822563171386719, "rewards/margins": 9.434331893920898, "rewards/rejected": -17.256895065307617, "step": 17448 }, { "epoch": 2.71, "learning_rate": 1.3495305773121012e-06, "logits/chosen": -1.7575057744979858, "logits/rejected": -2.237964630126953, "logps/chosen": -152.16986083984375, "logps/rejected": -226.22027587890625, "loss": 0.4445, "rewards/accuracies": 0.5, "rewards/chosen": -10.652641296386719, "rewards/margins": 4.728747844696045, "rewards/rejected": -15.381389617919922, "step": 17449 }, { "epoch": 2.71, "learning_rate": 1.3487971367809535e-06, "logits/chosen": -1.101069688796997, "logits/rejected": -1.9790773391723633, "logps/chosen": -117.62132263183594, "logps/rejected": -513.6925048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.81208610534668, "rewards/margins": 14.129836082458496, "rewards/rejected": -19.94192123413086, "step": 17450 }, { "epoch": 2.71, "learning_rate": 1.3480636962498056e-06, "logits/chosen": -1.5854575634002686, "logits/rejected": -2.6435115337371826, "logps/chosen": -178.5580596923828, "logps/rejected": -478.20770263671875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.851858139038086, "rewards/margins": 9.42416763305664, "rewards/rejected": -17.27602767944336, "step": 17451 }, { "epoch": 2.71, "learning_rate": 1.3473302557186576e-06, "logits/chosen": -2.6029069423675537, "logits/rejected": -2.5606236457824707, "logps/chosen": -341.8822326660156, "logps/rejected": -287.0279846191406, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -6.063041687011719, "rewards/margins": 8.21813678741455, "rewards/rejected": -14.281179428100586, "step": 17452 }, { "epoch": 2.71, "learning_rate": 1.3465968151875097e-06, "logits/chosen": -0.9997462630271912, "logits/rejected": -2.30609130859375, "logps/chosen": -116.39280700683594, "logps/rejected": -493.07684326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.512489318847656, "rewards/margins": 17.380603790283203, "rewards/rejected": -23.89309310913086, "step": 17453 }, { "epoch": 2.71, "learning_rate": 1.3458633746563618e-06, "logits/chosen": -2.3816678524017334, "logits/rejected": -2.764364719390869, "logps/chosen": -145.28244018554688, "logps/rejected": -336.9860534667969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.276752471923828, "rewards/margins": 11.677593231201172, "rewards/rejected": -19.954345703125, "step": 17454 }, { "epoch": 2.71, "learning_rate": 1.345129934125214e-06, "logits/chosen": -2.612412452697754, "logits/rejected": -2.2799320220947266, "logps/chosen": -477.1609191894531, "logps/rejected": -591.3048095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.515441417694092, "rewards/margins": 13.717994689941406, "rewards/rejected": -21.233436584472656, "step": 17455 }, { "epoch": 2.71, "learning_rate": 1.344396493594066e-06, "logits/chosen": -2.587282180786133, "logits/rejected": -2.8125104904174805, "logps/chosen": -225.5654296875, "logps/rejected": -455.17694091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0097174644470215, "rewards/margins": 16.936113357543945, "rewards/rejected": -20.945831298828125, "step": 17456 }, { "epoch": 2.71, "learning_rate": 1.343663053062918e-06, "logits/chosen": -2.3440053462982178, "logits/rejected": -2.8134095668792725, "logps/chosen": -118.21770477294922, "logps/rejected": -311.0293273925781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.273322105407715, "rewards/margins": 9.711933135986328, "rewards/rejected": -16.985254287719727, "step": 17457 }, { "epoch": 2.72, "learning_rate": 1.3429296125317704e-06, "logits/chosen": -1.7873330116271973, "logits/rejected": -2.1004247665405273, "logps/chosen": -227.31723022460938, "logps/rejected": -429.4052734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.346800804138184, "rewards/margins": 12.419547080993652, "rewards/rejected": -20.766347885131836, "step": 17458 }, { "epoch": 2.72, "learning_rate": 1.3421961720006225e-06, "logits/chosen": -2.844801664352417, "logits/rejected": -3.0215625762939453, "logps/chosen": -193.2738800048828, "logps/rejected": -471.06024169921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.313739776611328, "rewards/margins": 9.671577453613281, "rewards/rejected": -20.98531723022461, "step": 17459 }, { "epoch": 2.72, "learning_rate": 1.3414627314694746e-06, "logits/chosen": -2.058014154434204, "logits/rejected": -2.6030073165893555, "logps/chosen": -298.9725036621094, "logps/rejected": -493.7918395996094, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -9.113574981689453, "rewards/margins": 7.047142028808594, "rewards/rejected": -16.160717010498047, "step": 17460 }, { "epoch": 2.72, "learning_rate": 1.3407292909383267e-06, "logits/chosen": -2.567774772644043, "logits/rejected": -2.6806857585906982, "logps/chosen": -452.4272155761719, "logps/rejected": -576.1608276367188, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.098957061767578, "rewards/margins": 8.908231735229492, "rewards/rejected": -16.00718879699707, "step": 17461 }, { "epoch": 2.72, "learning_rate": 1.339995850407179e-06, "logits/chosen": -2.4550106525421143, "logits/rejected": -2.5623886585235596, "logps/chosen": -260.80889892578125, "logps/rejected": -247.09767150878906, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -7.425915718078613, "rewards/margins": 7.727234840393066, "rewards/rejected": -15.15315055847168, "step": 17462 }, { "epoch": 2.72, "learning_rate": 1.339262409876031e-06, "logits/chosen": -1.4806064367294312, "logits/rejected": -2.502772569656372, "logps/chosen": -143.13906860351562, "logps/rejected": -376.4782409667969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.026342868804932, "rewards/margins": 8.394979476928711, "rewards/rejected": -15.421321868896484, "step": 17463 }, { "epoch": 2.72, "learning_rate": 1.338528969344883e-06, "logits/chosen": -1.4997154474258423, "logits/rejected": -2.437883138656616, "logps/chosen": -240.62945556640625, "logps/rejected": -407.9089050292969, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -5.888855457305908, "rewards/margins": 7.57168436050415, "rewards/rejected": -13.460539817810059, "step": 17464 }, { "epoch": 2.72, "learning_rate": 1.337795528813735e-06, "logits/chosen": -2.2878503799438477, "logits/rejected": -1.9418435096740723, "logps/chosen": -205.97723388671875, "logps/rejected": -459.071044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.452713966369629, "rewards/margins": 14.092850685119629, "rewards/rejected": -21.545564651489258, "step": 17465 }, { "epoch": 2.72, "learning_rate": 1.3370620882825873e-06, "logits/chosen": -2.061659574508667, "logits/rejected": -2.976638078689575, "logps/chosen": -325.38897705078125, "logps/rejected": -775.343994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.050350189208984, "rewards/margins": 13.503069877624512, "rewards/rejected": -19.553421020507812, "step": 17466 }, { "epoch": 2.72, "learning_rate": 1.3363286477514394e-06, "logits/chosen": -2.0224478244781494, "logits/rejected": -2.7071774005889893, "logps/chosen": -156.98728942871094, "logps/rejected": -543.328857421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.849397659301758, "rewards/margins": 9.995574951171875, "rewards/rejected": -14.84497356414795, "step": 17467 }, { "epoch": 2.72, "learning_rate": 1.3355952072202915e-06, "logits/chosen": -1.626002311706543, "logits/rejected": -2.783759832382202, "logps/chosen": -137.00082397460938, "logps/rejected": -524.433349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.274862766265869, "rewards/margins": 13.365585327148438, "rewards/rejected": -20.64044952392578, "step": 17468 }, { "epoch": 2.72, "learning_rate": 1.3348617666891436e-06, "logits/chosen": -1.9786781072616577, "logits/rejected": -2.6675097942352295, "logps/chosen": -154.97171020507812, "logps/rejected": -431.38531494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.318489074707031, "rewards/margins": 11.51506233215332, "rewards/rejected": -19.83355140686035, "step": 17469 }, { "epoch": 2.72, "learning_rate": 1.3341283261579957e-06, "logits/chosen": -2.813422203063965, "logits/rejected": -1.8086603879928589, "logps/chosen": -407.4924621582031, "logps/rejected": -134.15963745117188, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/chosen": -5.869807720184326, "rewards/margins": 4.171515941619873, "rewards/rejected": -10.0413236618042, "step": 17470 }, { "epoch": 2.72, "learning_rate": 1.333394885626848e-06, "logits/chosen": -1.615535020828247, "logits/rejected": -2.2578659057617188, "logps/chosen": -223.86187744140625, "logps/rejected": -397.29888916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.036686897277832, "rewards/margins": 10.161012649536133, "rewards/rejected": -16.19770050048828, "step": 17471 }, { "epoch": 2.72, "learning_rate": 1.3326614450957e-06, "logits/chosen": -2.636190176010132, "logits/rejected": -1.7674202919006348, "logps/chosen": -446.0463562011719, "logps/rejected": -337.0248107910156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.361856937408447, "rewards/margins": 12.371916770935059, "rewards/rejected": -17.733774185180664, "step": 17472 }, { "epoch": 2.72, "learning_rate": 1.3319280045645522e-06, "logits/chosen": -2.37245512008667, "logits/rejected": -2.736844778060913, "logps/chosen": -122.89700317382812, "logps/rejected": -290.1785888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.770043849945068, "rewards/margins": 10.272321701049805, "rewards/rejected": -18.04236602783203, "step": 17473 }, { "epoch": 2.72, "learning_rate": 1.331194564033404e-06, "logits/chosen": -2.7938320636749268, "logits/rejected": -1.6792047023773193, "logps/chosen": -420.8677978515625, "logps/rejected": -353.3902587890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.811544895172119, "rewards/margins": 9.40652847290039, "rewards/rejected": -12.218072891235352, "step": 17474 }, { "epoch": 2.72, "learning_rate": 1.3304611235022564e-06, "logits/chosen": -2.5309555530548096, "logits/rejected": -2.524946451187134, "logps/chosen": -573.7147827148438, "logps/rejected": -499.33770751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6813764572143555, "rewards/margins": 12.739358901977539, "rewards/rejected": -19.420734405517578, "step": 17475 }, { "epoch": 2.72, "learning_rate": 1.3297276829711084e-06, "logits/chosen": -2.509753704071045, "logits/rejected": -2.2506704330444336, "logps/chosen": -1032.6915283203125, "logps/rejected": -827.0219116210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.614944458007812, "rewards/margins": 11.809679985046387, "rewards/rejected": -20.424625396728516, "step": 17476 }, { "epoch": 2.72, "learning_rate": 1.3289942424399605e-06, "logits/chosen": -2.586986541748047, "logits/rejected": -2.8290202617645264, "logps/chosen": -447.7655944824219, "logps/rejected": -577.2068481445312, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -7.268455505371094, "rewards/margins": 6.5424394607543945, "rewards/rejected": -13.810894966125488, "step": 17477 }, { "epoch": 2.72, "learning_rate": 1.3282608019088126e-06, "logits/chosen": -1.3940062522888184, "logits/rejected": -2.562011241912842, "logps/chosen": -276.07086181640625, "logps/rejected": -559.7393798828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.240180015563965, "rewards/margins": 13.342308044433594, "rewards/rejected": -20.582489013671875, "step": 17478 }, { "epoch": 2.72, "learning_rate": 1.327527361377665e-06, "logits/chosen": -2.429558753967285, "logits/rejected": -2.681360960006714, "logps/chosen": -323.28106689453125, "logps/rejected": -399.75140380859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -8.70904541015625, "rewards/margins": 7.467784404754639, "rewards/rejected": -16.176830291748047, "step": 17479 }, { "epoch": 2.72, "learning_rate": 1.326793920846517e-06, "logits/chosen": -1.9038602113723755, "logits/rejected": -2.6433022022247314, "logps/chosen": -92.15911102294922, "logps/rejected": -449.69659423828125, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -7.195664882659912, "rewards/margins": 10.630212783813477, "rewards/rejected": -17.825878143310547, "step": 17480 }, { "epoch": 2.72, "learning_rate": 1.326060480315369e-06, "logits/chosen": -1.6239662170410156, "logits/rejected": -1.8502599000930786, "logps/chosen": -291.9233703613281, "logps/rejected": -353.033203125, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/chosen": -6.804193496704102, "rewards/margins": 8.140613555908203, "rewards/rejected": -14.944807052612305, "step": 17481 }, { "epoch": 2.72, "learning_rate": 1.3253270397842212e-06, "logits/chosen": -2.44299054145813, "logits/rejected": -2.792863607406616, "logps/chosen": -93.43692779541016, "logps/rejected": -255.58346557617188, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.5976881980896, "rewards/margins": 8.438892364501953, "rewards/rejected": -15.036580085754395, "step": 17482 }, { "epoch": 2.72, "learning_rate": 1.3245935992530735e-06, "logits/chosen": -2.0114333629608154, "logits/rejected": -2.989579200744629, "logps/chosen": -262.8736877441406, "logps/rejected": -446.8813781738281, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.501049041748047, "rewards/margins": 6.95947265625, "rewards/rejected": -14.460521697998047, "step": 17483 }, { "epoch": 2.72, "learning_rate": 1.3238601587219254e-06, "logits/chosen": -2.6156866550445557, "logits/rejected": -1.5929813385009766, "logps/chosen": -369.6912841796875, "logps/rejected": -378.07147216796875, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/chosen": -11.36463737487793, "rewards/margins": 8.490362167358398, "rewards/rejected": -19.854999542236328, "step": 17484 }, { "epoch": 2.72, "learning_rate": 1.3231267181907775e-06, "logits/chosen": -1.3560237884521484, "logits/rejected": -2.673375129699707, "logps/chosen": -139.63339233398438, "logps/rejected": -528.8941040039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.5997796058654785, "rewards/margins": 14.902551651000977, "rewards/rejected": -20.502330780029297, "step": 17485 }, { "epoch": 2.72, "learning_rate": 1.3223932776596296e-06, "logits/chosen": -2.1870338916778564, "logits/rejected": -2.770125150680542, "logps/chosen": -511.26953125, "logps/rejected": -832.3012084960938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.79924201965332, "rewards/margins": 12.903099060058594, "rewards/rejected": -21.702341079711914, "step": 17486 }, { "epoch": 2.72, "learning_rate": 1.3216598371284819e-06, "logits/chosen": -1.8528779745101929, "logits/rejected": -2.413693428039551, "logps/chosen": -220.62953186035156, "logps/rejected": -315.29925537109375, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": -7.576727867126465, "rewards/margins": 6.588765621185303, "rewards/rejected": -14.165493965148926, "step": 17487 }, { "epoch": 2.72, "learning_rate": 1.320926396597334e-06, "logits/chosen": -2.8644495010375977, "logits/rejected": -2.664260149002075, "logps/chosen": -436.4666442871094, "logps/rejected": -647.6502685546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.365588188171387, "rewards/margins": 10.882229804992676, "rewards/rejected": -18.247817993164062, "step": 17488 }, { "epoch": 2.72, "learning_rate": 1.320192956066186e-06, "logits/chosen": -1.5408358573913574, "logits/rejected": -2.422405958175659, "logps/chosen": -239.22509765625, "logps/rejected": -483.71142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.24099063873291, "rewards/margins": 13.36966609954834, "rewards/rejected": -18.61065673828125, "step": 17489 }, { "epoch": 2.72, "learning_rate": 1.3194595155350381e-06, "logits/chosen": -2.282940149307251, "logits/rejected": -2.5346877574920654, "logps/chosen": -172.73492431640625, "logps/rejected": -261.9736633300781, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -7.3788909912109375, "rewards/margins": 7.376075744628906, "rewards/rejected": -14.754966735839844, "step": 17490 }, { "epoch": 2.72, "learning_rate": 1.3187260750038902e-06, "logits/chosen": -2.353410482406616, "logits/rejected": -2.594965696334839, "logps/chosen": -362.3074951171875, "logps/rejected": -406.5223693847656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.453251838684082, "rewards/margins": 9.03034496307373, "rewards/rejected": -14.483596801757812, "step": 17491 }, { "epoch": 2.72, "learning_rate": 1.3179926344727425e-06, "logits/chosen": -0.8733673691749573, "logits/rejected": -2.288078546524048, "logps/chosen": -318.3839111328125, "logps/rejected": -717.2567138671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.953973770141602, "rewards/margins": 11.212221145629883, "rewards/rejected": -21.166194915771484, "step": 17492 }, { "epoch": 2.72, "learning_rate": 1.3172591939415944e-06, "logits/chosen": -2.2329015731811523, "logits/rejected": -2.526751756668091, "logps/chosen": -202.4052734375, "logps/rejected": -394.29168701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.897186279296875, "rewards/margins": 12.53148078918457, "rewards/rejected": -19.428668975830078, "step": 17493 }, { "epoch": 2.72, "learning_rate": 1.3165257534104465e-06, "logits/chosen": -1.9067689180374146, "logits/rejected": -2.792339563369751, "logps/chosen": -107.09066009521484, "logps/rejected": -527.2274169921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.028472900390625, "rewards/margins": 8.707660675048828, "rewards/rejected": -17.736133575439453, "step": 17494 }, { "epoch": 2.72, "learning_rate": 1.3157923128792986e-06, "logits/chosen": -1.8490452766418457, "logits/rejected": -2.8663454055786133, "logps/chosen": -128.98487854003906, "logps/rejected": -321.0869445800781, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -9.462785720825195, "rewards/margins": 7.079473972320557, "rewards/rejected": -16.542259216308594, "step": 17495 }, { "epoch": 2.72, "learning_rate": 1.3150588723481509e-06, "logits/chosen": -2.3886780738830566, "logits/rejected": -1.6451977491378784, "logps/chosen": -531.088623046875, "logps/rejected": -488.8044128417969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.91328239440918, "rewards/margins": 12.767000198364258, "rewards/rejected": -20.680282592773438, "step": 17496 }, { "epoch": 2.72, "learning_rate": 1.314325431817003e-06, "logits/chosen": -2.791295289993286, "logits/rejected": -2.7452404499053955, "logps/chosen": -237.8710479736328, "logps/rejected": -326.2267150878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.028779029846191, "rewards/margins": 10.270843505859375, "rewards/rejected": -16.29962158203125, "step": 17497 }, { "epoch": 2.72, "learning_rate": 1.313591991285855e-06, "logits/chosen": -1.7898765802383423, "logits/rejected": -2.7894136905670166, "logps/chosen": -212.97125244140625, "logps/rejected": -464.2117919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.5172343254089355, "rewards/margins": 14.44548225402832, "rewards/rejected": -19.962717056274414, "step": 17498 }, { "epoch": 2.72, "learning_rate": 1.3128585507547071e-06, "logits/chosen": -1.9775538444519043, "logits/rejected": -2.450908899307251, "logps/chosen": -198.87408447265625, "logps/rejected": -387.0383605957031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.10261344909668, "rewards/margins": 9.383962631225586, "rewards/rejected": -18.486576080322266, "step": 17499 }, { "epoch": 2.72, "learning_rate": 1.3121251102235595e-06, "logits/chosen": -2.0983211994171143, "logits/rejected": -2.843353509902954, "logps/chosen": -163.6514892578125, "logps/rejected": -461.04473876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.963198661804199, "rewards/margins": 11.431528091430664, "rewards/rejected": -17.394725799560547, "step": 17500 }, { "epoch": 2.72, "learning_rate": 1.3113916696924115e-06, "logits/chosen": -2.81506609916687, "logits/rejected": -2.9173431396484375, "logps/chosen": -888.0119018554688, "logps/rejected": -655.9798583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.359004020690918, "rewards/margins": 10.241293907165527, "rewards/rejected": -17.600297927856445, "step": 17501 }, { "epoch": 2.72, "learning_rate": 1.3106582291612636e-06, "logits/chosen": -1.5134085416793823, "logits/rejected": -2.427586078643799, "logps/chosen": -231.12265014648438, "logps/rejected": -477.7578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.0062837600708, "rewards/margins": 11.432425498962402, "rewards/rejected": -20.438709259033203, "step": 17502 }, { "epoch": 2.72, "learning_rate": 1.3099247886301155e-06, "logits/chosen": -1.118924617767334, "logits/rejected": -2.506971597671509, "logps/chosen": -237.40011596679688, "logps/rejected": -516.9371948242188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.149103164672852, "rewards/margins": 9.921148300170898, "rewards/rejected": -19.07025146484375, "step": 17503 }, { "epoch": 2.72, "learning_rate": 1.3091913480989678e-06, "logits/chosen": -2.4579946994781494, "logits/rejected": -2.3875386714935303, "logps/chosen": -166.73573303222656, "logps/rejected": -305.36016845703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.090535640716553, "rewards/margins": 9.75476360321045, "rewards/rejected": -15.845298767089844, "step": 17504 }, { "epoch": 2.72, "learning_rate": 1.30845790756782e-06, "logits/chosen": -1.926357626914978, "logits/rejected": -1.6203203201293945, "logps/chosen": -483.08697509765625, "logps/rejected": -444.9598083496094, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -9.256900787353516, "rewards/margins": 10.318793296813965, "rewards/rejected": -19.575695037841797, "step": 17505 }, { "epoch": 2.72, "learning_rate": 1.307724467036672e-06, "logits/chosen": -2.1902406215667725, "logits/rejected": -2.806873321533203, "logps/chosen": -110.92654418945312, "logps/rejected": -572.5553588867188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.213046073913574, "rewards/margins": 11.31375503540039, "rewards/rejected": -19.52680206298828, "step": 17506 }, { "epoch": 2.72, "learning_rate": 1.306991026505524e-06, "logits/chosen": -2.7936418056488037, "logits/rejected": -2.6771600246429443, "logps/chosen": -172.99478149414062, "logps/rejected": -248.14031982421875, "loss": 1.0067, "rewards/accuracies": 0.5, "rewards/chosen": -11.163515090942383, "rewards/margins": 5.312848091125488, "rewards/rejected": -16.476364135742188, "step": 17507 }, { "epoch": 2.72, "learning_rate": 1.3062575859743764e-06, "logits/chosen": -2.324751853942871, "logits/rejected": -1.6314536333084106, "logps/chosen": -301.1034851074219, "logps/rejected": -373.38470458984375, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": -10.398887634277344, "rewards/margins": 6.374542236328125, "rewards/rejected": -16.77342987060547, "step": 17508 }, { "epoch": 2.72, "learning_rate": 1.3055241454432285e-06, "logits/chosen": -2.277500867843628, "logits/rejected": -2.2911040782928467, "logps/chosen": -400.0105285644531, "logps/rejected": -656.720947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.149310111999512, "rewards/margins": 14.664609909057617, "rewards/rejected": -23.813920974731445, "step": 17509 }, { "epoch": 2.72, "learning_rate": 1.3047907049120806e-06, "logits/chosen": -2.9197514057159424, "logits/rejected": -3.0664103031158447, "logps/chosen": -128.6747283935547, "logps/rejected": -241.43418884277344, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -4.907162666320801, "rewards/margins": 9.047731399536133, "rewards/rejected": -13.954894065856934, "step": 17510 }, { "epoch": 2.72, "learning_rate": 1.3040572643809327e-06, "logits/chosen": -2.265813112258911, "logits/rejected": -2.7748539447784424, "logps/chosen": -185.5895538330078, "logps/rejected": -495.0542907714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.851591110229492, "rewards/margins": 11.305450439453125, "rewards/rejected": -21.157041549682617, "step": 17511 }, { "epoch": 2.72, "learning_rate": 1.3033238238497847e-06, "logits/chosen": -2.6606619358062744, "logits/rejected": -2.1188080310821533, "logps/chosen": -321.126708984375, "logps/rejected": -382.3443603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.978599548339844, "rewards/margins": 12.060152053833008, "rewards/rejected": -22.03875160217285, "step": 17512 }, { "epoch": 2.72, "learning_rate": 1.3025903833186368e-06, "logits/chosen": -1.9364198446273804, "logits/rejected": -2.451632499694824, "logps/chosen": -117.60263061523438, "logps/rejected": -249.16175842285156, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.905115604400635, "rewards/margins": 7.11912727355957, "rewards/rejected": -14.024242401123047, "step": 17513 }, { "epoch": 2.72, "learning_rate": 1.301856942787489e-06, "logits/chosen": -2.6365878582000732, "logits/rejected": -1.7439627647399902, "logps/chosen": -664.5736694335938, "logps/rejected": -533.7060546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.44887113571167, "rewards/margins": 13.123558044433594, "rewards/rejected": -19.572429656982422, "step": 17514 }, { "epoch": 2.72, "learning_rate": 1.301123502256341e-06, "logits/chosen": -2.704005718231201, "logits/rejected": -2.819075345993042, "logps/chosen": -321.0862121582031, "logps/rejected": -343.03497314453125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -8.324601173400879, "rewards/margins": 9.174446105957031, "rewards/rejected": -17.499046325683594, "step": 17515 }, { "epoch": 2.72, "learning_rate": 1.300390061725193e-06, "logits/chosen": -1.6929694414138794, "logits/rejected": -2.6199522018432617, "logps/chosen": -382.7823486328125, "logps/rejected": -501.5428771972656, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.364008903503418, "rewards/margins": 7.366292476654053, "rewards/rejected": -12.730300903320312, "step": 17516 }, { "epoch": 2.72, "learning_rate": 1.2996566211940454e-06, "logits/chosen": -2.2277567386627197, "logits/rejected": -2.683105945587158, "logps/chosen": -293.451416015625, "logps/rejected": -484.22900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.03046178817749, "rewards/margins": 13.315938949584961, "rewards/rejected": -20.34640121459961, "step": 17517 }, { "epoch": 2.72, "learning_rate": 1.2989231806628975e-06, "logits/chosen": -2.266909122467041, "logits/rejected": -2.271589756011963, "logps/chosen": -183.84808349609375, "logps/rejected": -238.4401092529297, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -7.116826057434082, "rewards/margins": 7.8197479248046875, "rewards/rejected": -14.93657398223877, "step": 17518 }, { "epoch": 2.72, "learning_rate": 1.2981897401317496e-06, "logits/chosen": -2.619842290878296, "logits/rejected": -1.6420543193817139, "logps/chosen": -452.20440673828125, "logps/rejected": -380.65289306640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.276849269866943, "rewards/margins": 9.931961059570312, "rewards/rejected": -14.208810806274414, "step": 17519 }, { "epoch": 2.72, "learning_rate": 1.2974562996006017e-06, "logits/chosen": -1.7375770807266235, "logits/rejected": -2.5153424739837646, "logps/chosen": -336.26422119140625, "logps/rejected": -627.3596801757812, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.3868842124938965, "rewards/margins": 8.62661361694336, "rewards/rejected": -15.013498306274414, "step": 17520 }, { "epoch": 2.72, "learning_rate": 1.296722859069454e-06, "logits/chosen": -2.8017091751098633, "logits/rejected": -2.628514051437378, "logps/chosen": -237.95156860351562, "logps/rejected": -236.58990478515625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.5096940994262695, "rewards/margins": 7.233450889587402, "rewards/rejected": -11.743144989013672, "step": 17521 }, { "epoch": 2.73, "learning_rate": 1.295989418538306e-06, "logits/chosen": -1.9746828079223633, "logits/rejected": -2.296853542327881, "logps/chosen": -192.3926544189453, "logps/rejected": -377.34381103515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.616258144378662, "rewards/margins": 9.16836929321289, "rewards/rejected": -15.784627914428711, "step": 17522 }, { "epoch": 2.73, "learning_rate": 1.295255978007158e-06, "logits/chosen": -1.5454325675964355, "logits/rejected": -2.319814443588257, "logps/chosen": -187.56573486328125, "logps/rejected": -290.0936584472656, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.4696550369262695, "rewards/margins": 7.107588768005371, "rewards/rejected": -12.57724380493164, "step": 17523 }, { "epoch": 2.73, "learning_rate": 1.29452253747601e-06, "logits/chosen": -2.313495397567749, "logits/rejected": -2.537005662918091, "logps/chosen": -136.40440368652344, "logps/rejected": -211.98095703125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -5.875954627990723, "rewards/margins": 6.068750381469727, "rewards/rejected": -11.944705963134766, "step": 17524 }, { "epoch": 2.73, "learning_rate": 1.2937890969448623e-06, "logits/chosen": -0.4350292384624481, "logits/rejected": -2.1402337551116943, "logps/chosen": -146.6239471435547, "logps/rejected": -625.2154541015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.16407299041748, "rewards/margins": 14.459221839904785, "rewards/rejected": -23.623294830322266, "step": 17525 }, { "epoch": 2.73, "learning_rate": 1.2930556564137144e-06, "logits/chosen": -2.617008686065674, "logits/rejected": -2.758237600326538, "logps/chosen": -144.3426513671875, "logps/rejected": -383.2578125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -7.795595169067383, "rewards/margins": 5.901378631591797, "rewards/rejected": -13.69697380065918, "step": 17526 }, { "epoch": 2.73, "learning_rate": 1.2923222158825665e-06, "logits/chosen": -2.528021812438965, "logits/rejected": -2.594264507293701, "logps/chosen": -122.3258056640625, "logps/rejected": -199.056396484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.790737628936768, "rewards/margins": 10.533473014831543, "rewards/rejected": -16.32421112060547, "step": 17527 }, { "epoch": 2.73, "learning_rate": 1.2915887753514186e-06, "logits/chosen": -2.589186191558838, "logits/rejected": -2.6655471324920654, "logps/chosen": -306.21405029296875, "logps/rejected": -419.30303955078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.204061985015869, "rewards/margins": 9.576813697814941, "rewards/rejected": -16.78087615966797, "step": 17528 }, { "epoch": 2.73, "learning_rate": 1.290855334820271e-06, "logits/chosen": -2.735184907913208, "logits/rejected": -2.8475303649902344, "logps/chosen": -122.45222473144531, "logps/rejected": -228.38168334960938, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -8.206204414367676, "rewards/margins": 9.261336326599121, "rewards/rejected": -17.467540740966797, "step": 17529 }, { "epoch": 2.73, "learning_rate": 1.290121894289123e-06, "logits/chosen": -2.727818489074707, "logits/rejected": -2.8671936988830566, "logps/chosen": -514.82861328125, "logps/rejected": -567.6325073242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.811178684234619, "rewards/margins": 11.813016891479492, "rewards/rejected": -16.624195098876953, "step": 17530 }, { "epoch": 2.73, "learning_rate": 1.289388453757975e-06, "logits/chosen": -2.3070647716522217, "logits/rejected": -2.046475648880005, "logps/chosen": -208.83258056640625, "logps/rejected": -245.54441833496094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.76017951965332, "rewards/margins": 8.225458145141602, "rewards/rejected": -15.985637664794922, "step": 17531 }, { "epoch": 2.73, "learning_rate": 1.2886550132268272e-06, "logits/chosen": -2.7037079334259033, "logits/rejected": -2.860499620437622, "logps/chosen": -103.42962646484375, "logps/rejected": -219.84420776367188, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -7.173251628875732, "rewards/margins": 9.258138656616211, "rewards/rejected": -16.4313907623291, "step": 17532 }, { "epoch": 2.73, "learning_rate": 1.2879215726956793e-06, "logits/chosen": -1.7819766998291016, "logits/rejected": -2.6089348793029785, "logps/chosen": -144.4321746826172, "logps/rejected": -255.86900329589844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.307261943817139, "rewards/margins": 9.439885139465332, "rewards/rejected": -16.747146606445312, "step": 17533 }, { "epoch": 2.73, "learning_rate": 1.2871881321645314e-06, "logits/chosen": -2.4229073524475098, "logits/rejected": -2.249565601348877, "logps/chosen": -161.4781494140625, "logps/rejected": -298.5491027832031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.729608058929443, "rewards/margins": 14.573965072631836, "rewards/rejected": -20.303573608398438, "step": 17534 }, { "epoch": 2.73, "learning_rate": 1.2864546916333834e-06, "logits/chosen": -3.0377860069274902, "logits/rejected": -2.805382013320923, "logps/chosen": -433.0355224609375, "logps/rejected": -663.4345703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.928006172180176, "rewards/margins": 9.449268341064453, "rewards/rejected": -16.377273559570312, "step": 17535 }, { "epoch": 2.73, "learning_rate": 1.2857212511022355e-06, "logits/chosen": -2.4818501472473145, "logits/rejected": -2.573701858520508, "logps/chosen": -258.3958435058594, "logps/rejected": -334.39068603515625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.402902603149414, "rewards/margins": 7.415128707885742, "rewards/rejected": -15.818031311035156, "step": 17536 }, { "epoch": 2.73, "learning_rate": 1.2849878105710876e-06, "logits/chosen": -2.326681613922119, "logits/rejected": -2.7159292697906494, "logps/chosen": -204.5939178466797, "logps/rejected": -319.73614501953125, "loss": 1.97, "rewards/accuracies": 0.5, "rewards/chosen": -11.77461051940918, "rewards/margins": 4.327798366546631, "rewards/rejected": -16.10240936279297, "step": 17537 }, { "epoch": 2.73, "learning_rate": 1.28425437003994e-06, "logits/chosen": -2.575714588165283, "logits/rejected": -2.0593416690826416, "logps/chosen": -157.89382934570312, "logps/rejected": -329.14520263671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.089102745056152, "rewards/margins": 11.456523895263672, "rewards/rejected": -18.54562759399414, "step": 17538 }, { "epoch": 2.73, "learning_rate": 1.283520929508792e-06, "logits/chosen": -2.667393207550049, "logits/rejected": -1.9440760612487793, "logps/chosen": -732.8145141601562, "logps/rejected": -472.4458312988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -0.6585464477539062, "rewards/margins": 12.91998291015625, "rewards/rejected": -13.578529357910156, "step": 17539 }, { "epoch": 2.73, "learning_rate": 1.2827874889776441e-06, "logits/chosen": -2.1278343200683594, "logits/rejected": -2.8036866188049316, "logps/chosen": -252.42095947265625, "logps/rejected": -305.2449645996094, "loss": 0.1585, "rewards/accuracies": 1.0, "rewards/chosen": -4.634382247924805, "rewards/margins": 6.121583938598633, "rewards/rejected": -10.755966186523438, "step": 17540 }, { "epoch": 2.73, "learning_rate": 1.2820540484464962e-06, "logits/chosen": -2.558742046356201, "logits/rejected": -2.447706937789917, "logps/chosen": -478.3070068359375, "logps/rejected": -522.173583984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.66594409942627, "rewards/margins": 12.003015518188477, "rewards/rejected": -20.668960571289062, "step": 17541 }, { "epoch": 2.73, "learning_rate": 1.2813206079153485e-06, "logits/chosen": -1.8930811882019043, "logits/rejected": -2.497925281524658, "logps/chosen": -139.81854248046875, "logps/rejected": -204.0589141845703, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.775320053100586, "rewards/margins": 8.904647827148438, "rewards/rejected": -13.679967880249023, "step": 17542 }, { "epoch": 2.73, "learning_rate": 1.2805871673842004e-06, "logits/chosen": -2.3949742317199707, "logits/rejected": -2.5170373916625977, "logps/chosen": -120.3391342163086, "logps/rejected": -469.1824951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.6010260581970215, "rewards/margins": 19.39767837524414, "rewards/rejected": -24.998706817626953, "step": 17543 }, { "epoch": 2.73, "learning_rate": 1.2798537268530525e-06, "logits/chosen": -1.7293930053710938, "logits/rejected": -2.7288737297058105, "logps/chosen": -175.35382080078125, "logps/rejected": -686.4774169921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.840921401977539, "rewards/margins": 14.182373046875, "rewards/rejected": -20.02329444885254, "step": 17544 }, { "epoch": 2.73, "learning_rate": 1.2791202863219046e-06, "logits/chosen": -2.740797519683838, "logits/rejected": -2.203686237335205, "logps/chosen": -389.46124267578125, "logps/rejected": -335.0038757324219, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.6976518630981445, "rewards/margins": 7.956478118896484, "rewards/rejected": -12.654129981994629, "step": 17545 }, { "epoch": 2.73, "learning_rate": 1.2783868457907569e-06, "logits/chosen": -2.137253522872925, "logits/rejected": -2.770252227783203, "logps/chosen": -154.6983642578125, "logps/rejected": -552.3866577148438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.847462177276611, "rewards/margins": 11.133544921875, "rewards/rejected": -16.981006622314453, "step": 17546 }, { "epoch": 2.73, "learning_rate": 1.277653405259609e-06, "logits/chosen": -1.7943633794784546, "logits/rejected": -2.512861490249634, "logps/chosen": -198.7418975830078, "logps/rejected": -455.15264892578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.996543884277344, "rewards/margins": 12.329262733459473, "rewards/rejected": -20.325807571411133, "step": 17547 }, { "epoch": 2.73, "learning_rate": 1.276919964728461e-06, "logits/chosen": -2.721529006958008, "logits/rejected": -2.7360548973083496, "logps/chosen": -171.37258911132812, "logps/rejected": -180.76100158691406, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -4.761773109436035, "rewards/margins": 8.904748916625977, "rewards/rejected": -13.666522979736328, "step": 17548 }, { "epoch": 2.73, "learning_rate": 1.2761865241973131e-06, "logits/chosen": -2.74214506149292, "logits/rejected": -1.8918594121932983, "logps/chosen": -377.0780944824219, "logps/rejected": -369.5351257324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.1897239685058594, "rewards/margins": 13.076730728149414, "rewards/rejected": -15.266454696655273, "step": 17549 }, { "epoch": 2.73, "learning_rate": 1.2754530836661652e-06, "logits/chosen": -2.908139944076538, "logits/rejected": -2.740677833557129, "logps/chosen": -290.72064208984375, "logps/rejected": -452.9422302246094, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -9.035158157348633, "rewards/margins": 10.985638618469238, "rewards/rejected": -20.020797729492188, "step": 17550 }, { "epoch": 2.73, "learning_rate": 1.2747196431350175e-06, "logits/chosen": -2.6659011840820312, "logits/rejected": -3.1067230701446533, "logps/chosen": -108.69223022460938, "logps/rejected": -288.2784729003906, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.07600212097168, "rewards/margins": 7.128119468688965, "rewards/rejected": -15.204120635986328, "step": 17551 }, { "epoch": 2.73, "learning_rate": 1.2739862026038696e-06, "logits/chosen": -2.4565415382385254, "logits/rejected": -2.764596462249756, "logps/chosen": -254.56597900390625, "logps/rejected": -532.4677734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.793050765991211, "rewards/margins": 10.2391939163208, "rewards/rejected": -21.032243728637695, "step": 17552 }, { "epoch": 2.73, "learning_rate": 1.2732527620727215e-06, "logits/chosen": -2.5313186645507812, "logits/rejected": -2.2903950214385986, "logps/chosen": -247.35147094726562, "logps/rejected": -417.3592834472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.132833957672119, "rewards/margins": 14.210368156433105, "rewards/rejected": -20.34320068359375, "step": 17553 }, { "epoch": 2.73, "learning_rate": 1.2725193215415736e-06, "logits/chosen": -1.342376947402954, "logits/rejected": -2.333672523498535, "logps/chosen": -211.63153076171875, "logps/rejected": -460.0367431640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.895418167114258, "rewards/margins": 9.922357559204102, "rewards/rejected": -16.81777572631836, "step": 17554 }, { "epoch": 2.73, "learning_rate": 1.2717858810104259e-06, "logits/chosen": -0.36928725242614746, "logits/rejected": -1.9779821634292603, "logps/chosen": -165.2032470703125, "logps/rejected": -835.7923583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.438226699829102, "rewards/margins": 18.129220962524414, "rewards/rejected": -24.567447662353516, "step": 17555 }, { "epoch": 2.73, "learning_rate": 1.271052440479278e-06, "logits/chosen": -1.907269835472107, "logits/rejected": -2.743471145629883, "logps/chosen": -459.9834289550781, "logps/rejected": -650.529052734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.829089164733887, "rewards/margins": 13.229622840881348, "rewards/rejected": -20.058712005615234, "step": 17556 }, { "epoch": 2.73, "learning_rate": 1.27031899994813e-06, "logits/chosen": -2.3322207927703857, "logits/rejected": -2.5220766067504883, "logps/chosen": -302.6830139160156, "logps/rejected": -348.6448974609375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -8.16006851196289, "rewards/margins": 9.235106468200684, "rewards/rejected": -17.395174026489258, "step": 17557 }, { "epoch": 2.73, "learning_rate": 1.2695855594169822e-06, "logits/chosen": -2.5773112773895264, "logits/rejected": -2.6435158252716064, "logps/chosen": -109.63491821289062, "logps/rejected": -266.498779296875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -7.061739921569824, "rewards/margins": 7.595085144042969, "rewards/rejected": -14.656825065612793, "step": 17558 }, { "epoch": 2.73, "learning_rate": 1.2688521188858345e-06, "logits/chosen": -1.7059412002563477, "logits/rejected": -2.696601152420044, "logps/chosen": -323.20880126953125, "logps/rejected": -571.8814697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.199860095977783, "rewards/margins": 13.886848449707031, "rewards/rejected": -20.08670997619629, "step": 17559 }, { "epoch": 2.73, "learning_rate": 1.2681186783546865e-06, "logits/chosen": -2.677877426147461, "logits/rejected": -2.8417892456054688, "logps/chosen": -616.0734252929688, "logps/rejected": -521.3341064453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.389636278152466, "rewards/margins": 10.753211975097656, "rewards/rejected": -13.142847061157227, "step": 17560 }, { "epoch": 2.73, "learning_rate": 1.2673852378235386e-06, "logits/chosen": -2.446775197982788, "logits/rejected": -2.0470776557922363, "logps/chosen": -315.7281799316406, "logps/rejected": -369.80419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.790759086608887, "rewards/margins": 10.279399871826172, "rewards/rejected": -18.070158004760742, "step": 17561 }, { "epoch": 2.73, "learning_rate": 1.2666517972923905e-06, "logits/chosen": -2.331874132156372, "logits/rejected": -2.7930004596710205, "logps/chosen": -721.3690185546875, "logps/rejected": -848.9462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.724882125854492, "rewards/margins": 13.761754989624023, "rewards/rejected": -22.486637115478516, "step": 17562 }, { "epoch": 2.73, "learning_rate": 1.2659183567612428e-06, "logits/chosen": -1.7622119188308716, "logits/rejected": -3.0278007984161377, "logps/chosen": -152.43930053710938, "logps/rejected": -459.8720703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9732513427734375, "rewards/margins": 12.21921157836914, "rewards/rejected": -17.192462921142578, "step": 17563 }, { "epoch": 2.73, "learning_rate": 1.265184916230095e-06, "logits/chosen": -2.3609251976013184, "logits/rejected": -2.215179204940796, "logps/chosen": -193.1466827392578, "logps/rejected": -342.9941101074219, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -10.342867851257324, "rewards/margins": 9.729201316833496, "rewards/rejected": -20.07206916809082, "step": 17564 }, { "epoch": 2.73, "learning_rate": 1.264451475698947e-06, "logits/chosen": -2.3280391693115234, "logits/rejected": -2.759828805923462, "logps/chosen": -493.57000732421875, "logps/rejected": -474.2118225097656, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.956421852111816, "rewards/margins": 7.358958721160889, "rewards/rejected": -16.315380096435547, "step": 17565 }, { "epoch": 2.73, "learning_rate": 1.263718035167799e-06, "logits/chosen": -2.150926113128662, "logits/rejected": -2.6234359741210938, "logps/chosen": -216.67507934570312, "logps/rejected": -380.40216064453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.60300064086914, "rewards/margins": 8.957218170166016, "rewards/rejected": -17.560218811035156, "step": 17566 }, { "epoch": 2.73, "learning_rate": 1.2629845946366514e-06, "logits/chosen": -2.3856797218322754, "logits/rejected": -2.683847427368164, "logps/chosen": -135.1837921142578, "logps/rejected": -323.58203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.283092975616455, "rewards/margins": 12.421525955200195, "rewards/rejected": -16.704618453979492, "step": 17567 }, { "epoch": 2.73, "learning_rate": 1.2622511541055035e-06, "logits/chosen": -1.4544967412948608, "logits/rejected": -2.213001251220703, "logps/chosen": -389.5769348144531, "logps/rejected": -498.7650146484375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -7.206973552703857, "rewards/margins": 6.815238952636719, "rewards/rejected": -14.022212982177734, "step": 17568 }, { "epoch": 2.73, "learning_rate": 1.2615177135743556e-06, "logits/chosen": -2.199528694152832, "logits/rejected": -2.517747640609741, "logps/chosen": -347.034423828125, "logps/rejected": -418.66107177734375, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/chosen": -6.026576519012451, "rewards/margins": 7.223951816558838, "rewards/rejected": -13.250528335571289, "step": 17569 }, { "epoch": 2.73, "learning_rate": 1.2607842730432077e-06, "logits/chosen": -2.7731168270111084, "logits/rejected": -2.2870893478393555, "logps/chosen": -207.0793914794922, "logps/rejected": -209.27743530273438, "loss": 0.2065, "rewards/accuracies": 1.0, "rewards/chosen": -7.443043231964111, "rewards/margins": 4.038877964019775, "rewards/rejected": -11.481921195983887, "step": 17570 }, { "epoch": 2.73, "learning_rate": 1.2600508325120598e-06, "logits/chosen": -2.532135248184204, "logits/rejected": -2.8684465885162354, "logps/chosen": -262.55712890625, "logps/rejected": -389.65777587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.713630437850952, "rewards/margins": 13.327922821044922, "rewards/rejected": -16.041553497314453, "step": 17571 }, { "epoch": 2.73, "learning_rate": 1.2593173919809118e-06, "logits/chosen": -2.9620893001556396, "logits/rejected": -2.852864980697632, "logps/chosen": -782.740966796875, "logps/rejected": -577.32958984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.822454929351807, "rewards/margins": 8.616663932800293, "rewards/rejected": -15.439119338989258, "step": 17572 }, { "epoch": 2.73, "learning_rate": 1.258583951449764e-06, "logits/chosen": -2.6935389041900635, "logits/rejected": -2.603266954421997, "logps/chosen": -418.22357177734375, "logps/rejected": -459.7681884765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.851871490478516, "rewards/margins": 10.936056137084961, "rewards/rejected": -16.787927627563477, "step": 17573 }, { "epoch": 2.73, "learning_rate": 1.257850510918616e-06, "logits/chosen": -2.5039451122283936, "logits/rejected": -2.111464262008667, "logps/chosen": -288.9367980957031, "logps/rejected": -358.70599365234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.491090774536133, "rewards/margins": 7.990160942077637, "rewards/rejected": -14.481250762939453, "step": 17574 }, { "epoch": 2.73, "learning_rate": 1.2571170703874681e-06, "logits/chosen": -2.519942045211792, "logits/rejected": -2.954312801361084, "logps/chosen": -173.56101989746094, "logps/rejected": -473.09881591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.799959659576416, "rewards/margins": 11.044920921325684, "rewards/rejected": -17.844881057739258, "step": 17575 }, { "epoch": 2.73, "learning_rate": 1.2563836298563204e-06, "logits/chosen": -2.848290205001831, "logits/rejected": -2.2271852493286133, "logps/chosen": -378.7177429199219, "logps/rejected": -385.27362060546875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -12.344279289245605, "rewards/margins": 5.2323455810546875, "rewards/rejected": -17.57662582397461, "step": 17576 }, { "epoch": 2.73, "learning_rate": 1.2556501893251725e-06, "logits/chosen": -1.639453649520874, "logits/rejected": -2.6502573490142822, "logps/chosen": -177.73245239257812, "logps/rejected": -366.6787109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.723650932312012, "rewards/margins": 7.734596252441406, "rewards/rejected": -14.458247184753418, "step": 17577 }, { "epoch": 2.73, "learning_rate": 1.2549167487940246e-06, "logits/chosen": -2.3843467235565186, "logits/rejected": -2.850142002105713, "logps/chosen": -479.380859375, "logps/rejected": -654.6854858398438, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.783576965332031, "rewards/margins": 6.914997100830078, "rewards/rejected": -14.69857406616211, "step": 17578 }, { "epoch": 2.73, "learning_rate": 1.2541833082628767e-06, "logits/chosen": -2.4060895442962646, "logits/rejected": -2.5841612815856934, "logps/chosen": -201.95108032226562, "logps/rejected": -358.41485595703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.357375621795654, "rewards/margins": 9.488945007324219, "rewards/rejected": -16.84632110595703, "step": 17579 }, { "epoch": 2.73, "learning_rate": 1.253449867731729e-06, "logits/chosen": -2.4693338871002197, "logits/rejected": -1.3029203414916992, "logps/chosen": -278.20135498046875, "logps/rejected": -293.5380859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.067913055419922, "rewards/margins": 9.717780113220215, "rewards/rejected": -14.785694122314453, "step": 17580 }, { "epoch": 2.73, "learning_rate": 1.252716427200581e-06, "logits/chosen": -1.871933937072754, "logits/rejected": -2.3642830848693848, "logps/chosen": -257.9143371582031, "logps/rejected": -421.6826477050781, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -7.681670665740967, "rewards/margins": 6.704598903656006, "rewards/rejected": -14.386269569396973, "step": 17581 }, { "epoch": 2.73, "learning_rate": 1.251982986669433e-06, "logits/chosen": -2.3525397777557373, "logits/rejected": -2.5472629070281982, "logps/chosen": -483.1592712402344, "logps/rejected": -388.4500732421875, "loss": 0.0354, "rewards/accuracies": 1.0, "rewards/chosen": -9.764178276062012, "rewards/margins": 6.407546043395996, "rewards/rejected": -16.171724319458008, "step": 17582 }, { "epoch": 2.73, "learning_rate": 1.251249546138285e-06, "logits/chosen": -2.474475622177124, "logits/rejected": -2.826460123062134, "logps/chosen": -176.87918090820312, "logps/rejected": -361.3863830566406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.769460678100586, "rewards/margins": 10.141727447509766, "rewards/rejected": -15.911188125610352, "step": 17583 }, { "epoch": 2.73, "learning_rate": 1.2505161056071373e-06, "logits/chosen": -2.707382917404175, "logits/rejected": -2.985854387283325, "logps/chosen": -110.35836029052734, "logps/rejected": -279.12640380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.694515228271484, "rewards/margins": 10.168015480041504, "rewards/rejected": -14.862530708312988, "step": 17584 }, { "epoch": 2.73, "learning_rate": 1.2497826650759894e-06, "logits/chosen": -1.3441739082336426, "logits/rejected": -2.267615556716919, "logps/chosen": -313.7491455078125, "logps/rejected": -624.8232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.458669662475586, "rewards/margins": 12.054969787597656, "rewards/rejected": -23.513641357421875, "step": 17585 }, { "epoch": 2.73, "learning_rate": 1.2490492245448415e-06, "logits/chosen": -3.1044209003448486, "logits/rejected": -2.7564353942871094, "logps/chosen": -249.07969665527344, "logps/rejected": -243.97650146484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.360222578048706, "rewards/margins": 12.78673267364502, "rewards/rejected": -16.146955490112305, "step": 17586 }, { "epoch": 2.74, "learning_rate": 1.2483157840136936e-06, "logits/chosen": -2.385468006134033, "logits/rejected": -2.532243251800537, "logps/chosen": -132.43994140625, "logps/rejected": -267.5111083984375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -9.16081428527832, "rewards/margins": 9.602479934692383, "rewards/rejected": -18.763294219970703, "step": 17587 }, { "epoch": 2.74, "learning_rate": 1.247582343482546e-06, "logits/chosen": -2.3581223487854004, "logits/rejected": -2.8178765773773193, "logps/chosen": -76.55824279785156, "logps/rejected": -370.17108154296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.947422504425049, "rewards/margins": 12.77358341217041, "rewards/rejected": -18.721004486083984, "step": 17588 }, { "epoch": 2.74, "learning_rate": 1.246848902951398e-06, "logits/chosen": -1.8739991188049316, "logits/rejected": -2.518986701965332, "logps/chosen": -699.9733276367188, "logps/rejected": -734.0728149414062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.668606758117676, "rewards/margins": 8.420473098754883, "rewards/rejected": -17.089080810546875, "step": 17589 }, { "epoch": 2.74, "learning_rate": 1.24611546242025e-06, "logits/chosen": -2.736989974975586, "logits/rejected": -2.3053810596466064, "logps/chosen": -719.1017456054688, "logps/rejected": -725.3899536132812, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -5.0127105712890625, "rewards/margins": 11.373095512390137, "rewards/rejected": -16.385807037353516, "step": 17590 }, { "epoch": 2.74, "learning_rate": 1.2453820218891022e-06, "logits/chosen": -1.8516370058059692, "logits/rejected": -2.315487861633301, "logps/chosen": -281.7159423828125, "logps/rejected": -512.9962768554688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.60162878036499, "rewards/margins": 16.74629783630371, "rewards/rejected": -24.34792709350586, "step": 17591 }, { "epoch": 2.74, "learning_rate": 1.2446485813579543e-06, "logits/chosen": -2.5834991931915283, "logits/rejected": -2.2236037254333496, "logps/chosen": -467.3951416015625, "logps/rejected": -466.6423645019531, "loss": 1.2254, "rewards/accuracies": 0.5, "rewards/chosen": -13.048601150512695, "rewards/margins": 4.3338847160339355, "rewards/rejected": -17.38248634338379, "step": 17592 }, { "epoch": 2.74, "learning_rate": 1.2439151408268064e-06, "logits/chosen": -2.6157028675079346, "logits/rejected": -2.413142204284668, "logps/chosen": -240.2723388671875, "logps/rejected": -405.864501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.108988285064697, "rewards/margins": 11.74218463897705, "rewards/rejected": -16.851173400878906, "step": 17593 }, { "epoch": 2.74, "learning_rate": 1.2431817002956585e-06, "logits/chosen": -2.4894790649414062, "logits/rejected": -2.3460323810577393, "logps/chosen": -337.7298583984375, "logps/rejected": -410.44091796875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -8.11259937286377, "rewards/margins": 10.884879112243652, "rewards/rejected": -18.997478485107422, "step": 17594 }, { "epoch": 2.74, "learning_rate": 1.2424482597645105e-06, "logits/chosen": -2.946122884750366, "logits/rejected": -2.339488983154297, "logps/chosen": -209.29177856445312, "logps/rejected": -204.48538208007812, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -4.635615348815918, "rewards/margins": 7.321219444274902, "rewards/rejected": -11.95683479309082, "step": 17595 }, { "epoch": 2.74, "learning_rate": 1.2417148192333626e-06, "logits/chosen": -2.5459790229797363, "logits/rejected": -2.6965906620025635, "logps/chosen": -196.02316284179688, "logps/rejected": -515.6109619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.848093509674072, "rewards/margins": 10.738800048828125, "rewards/rejected": -17.58689308166504, "step": 17596 }, { "epoch": 2.74, "learning_rate": 1.240981378702215e-06, "logits/chosen": -2.675459146499634, "logits/rejected": -1.3961131572723389, "logps/chosen": -717.1481323242188, "logps/rejected": -571.0771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.8161301612854, "rewards/margins": 15.13190746307373, "rewards/rejected": -21.94803810119629, "step": 17597 }, { "epoch": 2.74, "learning_rate": 1.240247938171067e-06, "logits/chosen": -2.7371785640716553, "logits/rejected": -2.190553903579712, "logps/chosen": -207.418212890625, "logps/rejected": -216.18203735351562, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -6.356502532958984, "rewards/margins": 8.100578308105469, "rewards/rejected": -14.457080841064453, "step": 17598 }, { "epoch": 2.74, "learning_rate": 1.2395144976399191e-06, "logits/chosen": -1.454026699066162, "logits/rejected": -2.3718299865722656, "logps/chosen": -362.6213073730469, "logps/rejected": -458.3917541503906, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.939301490783691, "rewards/margins": 9.389867782592773, "rewards/rejected": -16.32917022705078, "step": 17599 }, { "epoch": 2.74, "learning_rate": 1.2387810571087712e-06, "logits/chosen": -2.5406999588012695, "logits/rejected": -0.9415284991264343, "logps/chosen": -511.65960693359375, "logps/rejected": -333.0371398925781, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -8.168377876281738, "rewards/margins": 8.727703094482422, "rewards/rejected": -16.896080017089844, "step": 17600 }, { "epoch": 2.74, "learning_rate": 1.2380476165776235e-06, "logits/chosen": -2.7044551372528076, "logits/rejected": -2.252980947494507, "logps/chosen": -438.7832946777344, "logps/rejected": -435.62908935546875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -3.8411684036254883, "rewards/margins": 7.527345657348633, "rewards/rejected": -11.368514060974121, "step": 17601 }, { "epoch": 2.74, "learning_rate": 1.2373141760464754e-06, "logits/chosen": -2.5019774436950684, "logits/rejected": -2.2642579078674316, "logps/chosen": -138.8353271484375, "logps/rejected": -322.5504150390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.770520210266113, "rewards/margins": 8.355018615722656, "rewards/rejected": -14.12553882598877, "step": 17602 }, { "epoch": 2.74, "learning_rate": 1.2365807355153275e-06, "logits/chosen": -2.7775919437408447, "logits/rejected": -3.0243539810180664, "logps/chosen": -153.08627319335938, "logps/rejected": -234.86605834960938, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -6.101101398468018, "rewards/margins": 4.488461494445801, "rewards/rejected": -10.589563369750977, "step": 17603 }, { "epoch": 2.74, "learning_rate": 1.2358472949841796e-06, "logits/chosen": -2.675809383392334, "logits/rejected": -2.829918384552002, "logps/chosen": -173.3983612060547, "logps/rejected": -279.5069274902344, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -7.148299217224121, "rewards/margins": 7.43737268447876, "rewards/rejected": -14.585672378540039, "step": 17604 }, { "epoch": 2.74, "learning_rate": 1.2351138544530319e-06, "logits/chosen": -2.493532419204712, "logits/rejected": -2.6366384029388428, "logps/chosen": -428.04852294921875, "logps/rejected": -695.73291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.031157493591309, "rewards/margins": 10.862388610839844, "rewards/rejected": -19.89354705810547, "step": 17605 }, { "epoch": 2.74, "learning_rate": 1.234380413921884e-06, "logits/chosen": -2.7563488483428955, "logits/rejected": -2.8154666423797607, "logps/chosen": -119.15690612792969, "logps/rejected": -218.83804321289062, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.2019944190979, "rewards/margins": 8.512285232543945, "rewards/rejected": -13.714279174804688, "step": 17606 }, { "epoch": 2.74, "learning_rate": 1.233646973390736e-06, "logits/chosen": -2.3909456729888916, "logits/rejected": -2.0766451358795166, "logps/chosen": -399.7012939453125, "logps/rejected": -406.44903564453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7218620777130127, "rewards/margins": 11.191225051879883, "rewards/rejected": -14.913087844848633, "step": 17607 }, { "epoch": 2.74, "learning_rate": 1.2329135328595881e-06, "logits/chosen": -1.4376798868179321, "logits/rejected": -2.5252466201782227, "logps/chosen": -175.45907592773438, "logps/rejected": -404.24713134765625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -3.5963454246520996, "rewards/margins": 8.813592910766602, "rewards/rejected": -12.409937858581543, "step": 17608 }, { "epoch": 2.74, "learning_rate": 1.2321800923284404e-06, "logits/chosen": -2.3411099910736084, "logits/rejected": -2.641157865524292, "logps/chosen": -167.21163940429688, "logps/rejected": -413.0861511230469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.058560848236084, "rewards/margins": 16.77051544189453, "rewards/rejected": -21.829078674316406, "step": 17609 }, { "epoch": 2.74, "learning_rate": 1.2314466517972925e-06, "logits/chosen": -1.5379935503005981, "logits/rejected": -2.535215139389038, "logps/chosen": -123.40392303466797, "logps/rejected": -451.34039306640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.10257625579834, "rewards/margins": 13.0405912399292, "rewards/rejected": -19.14316749572754, "step": 17610 }, { "epoch": 2.74, "learning_rate": 1.2307132112661446e-06, "logits/chosen": -2.5746185779571533, "logits/rejected": -2.4429986476898193, "logps/chosen": -838.779541015625, "logps/rejected": -757.8373413085938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.49106502532959, "rewards/margins": 9.76018238067627, "rewards/rejected": -19.25124740600586, "step": 17611 }, { "epoch": 2.74, "learning_rate": 1.2299797707349965e-06, "logits/chosen": -1.1121467351913452, "logits/rejected": -2.9314324855804443, "logps/chosen": -183.314208984375, "logps/rejected": -556.8541259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.517741680145264, "rewards/margins": 11.326314926147461, "rewards/rejected": -17.84405517578125, "step": 17612 }, { "epoch": 2.74, "learning_rate": 1.2292463302038488e-06, "logits/chosen": -2.3147473335266113, "logits/rejected": -2.8997855186462402, "logps/chosen": -161.94009399414062, "logps/rejected": -411.1539306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.302779674530029, "rewards/margins": 13.357254028320312, "rewards/rejected": -19.6600341796875, "step": 17613 }, { "epoch": 2.74, "learning_rate": 1.2285128896727009e-06, "logits/chosen": -2.289219617843628, "logits/rejected": -2.2892065048217773, "logps/chosen": -193.16238403320312, "logps/rejected": -258.6380310058594, "loss": 1.7567, "rewards/accuracies": 0.5, "rewards/chosen": -7.7757062911987305, "rewards/margins": 7.257009506225586, "rewards/rejected": -15.032715797424316, "step": 17614 }, { "epoch": 2.74, "learning_rate": 1.227779449141553e-06, "logits/chosen": -1.6924986839294434, "logits/rejected": -2.211777925491333, "logps/chosen": -241.16220092773438, "logps/rejected": -304.77349853515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -7.101551532745361, "rewards/margins": 9.181083679199219, "rewards/rejected": -16.282634735107422, "step": 17615 }, { "epoch": 2.74, "learning_rate": 1.227046008610405e-06, "logits/chosen": -3.062861204147339, "logits/rejected": -2.550710678100586, "logps/chosen": -287.32574462890625, "logps/rejected": -162.38629150390625, "loss": 1.9014, "rewards/accuracies": 0.5, "rewards/chosen": -9.23354721069336, "rewards/margins": 0.5489547252655029, "rewards/rejected": -9.782502174377441, "step": 17616 }, { "epoch": 2.74, "learning_rate": 1.2263125680792572e-06, "logits/chosen": -2.45859956741333, "logits/rejected": -2.7265686988830566, "logps/chosen": -168.7923583984375, "logps/rejected": -357.6373596191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.468988418579102, "rewards/margins": 12.450162887573242, "rewards/rejected": -20.919151306152344, "step": 17617 }, { "epoch": 2.74, "learning_rate": 1.2255791275481095e-06, "logits/chosen": -1.1743583679199219, "logits/rejected": -1.9288241863250732, "logps/chosen": -114.88223266601562, "logps/rejected": -515.482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4370880126953125, "rewards/margins": 17.432451248168945, "rewards/rejected": -22.869539260864258, "step": 17618 }, { "epoch": 2.74, "learning_rate": 1.2248456870169616e-06, "logits/chosen": -1.7579431533813477, "logits/rejected": -2.7903287410736084, "logps/chosen": -176.30990600585938, "logps/rejected": -510.03094482421875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -8.59266471862793, "rewards/margins": 8.488041877746582, "rewards/rejected": -17.080707550048828, "step": 17619 }, { "epoch": 2.74, "learning_rate": 1.2241122464858136e-06, "logits/chosen": -2.2004969120025635, "logits/rejected": -2.6524617671966553, "logps/chosen": -285.5875549316406, "logps/rejected": -471.76263427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.961923599243164, "rewards/margins": 10.54571533203125, "rewards/rejected": -16.507638931274414, "step": 17620 }, { "epoch": 2.74, "learning_rate": 1.2233788059546655e-06, "logits/chosen": -1.5781207084655762, "logits/rejected": -2.6192750930786133, "logps/chosen": -236.6622314453125, "logps/rejected": -516.3627319335938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.815483093261719, "rewards/margins": 16.755014419555664, "rewards/rejected": -22.570497512817383, "step": 17621 }, { "epoch": 2.74, "learning_rate": 1.2226453654235178e-06, "logits/chosen": -2.8579447269439697, "logits/rejected": -2.858320713043213, "logps/chosen": -196.47219848632812, "logps/rejected": -369.9628601074219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.437846660614014, "rewards/margins": 8.277071952819824, "rewards/rejected": -15.71491813659668, "step": 17622 }, { "epoch": 2.74, "learning_rate": 1.22191192489237e-06, "logits/chosen": -1.5969945192337036, "logits/rejected": -2.284776210784912, "logps/chosen": -153.26922607421875, "logps/rejected": -367.86993408203125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -10.779525756835938, "rewards/margins": 7.387353420257568, "rewards/rejected": -18.166879653930664, "step": 17623 }, { "epoch": 2.74, "learning_rate": 1.221178484361222e-06, "logits/chosen": -2.064694404602051, "logits/rejected": -2.7091946601867676, "logps/chosen": -183.3564910888672, "logps/rejected": -450.55426025390625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -7.298754692077637, "rewards/margins": 8.602331161499023, "rewards/rejected": -15.90108585357666, "step": 17624 }, { "epoch": 2.74, "learning_rate": 1.220445043830074e-06, "logits/chosen": -3.077509641647339, "logits/rejected": -3.1041486263275146, "logps/chosen": -185.0870361328125, "logps/rejected": -247.5460205078125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -10.857433319091797, "rewards/margins": 6.623699188232422, "rewards/rejected": -17.48113250732422, "step": 17625 }, { "epoch": 2.74, "learning_rate": 1.2197116032989264e-06, "logits/chosen": -1.7469843626022339, "logits/rejected": -1.693268060684204, "logps/chosen": -408.34820556640625, "logps/rejected": -530.275390625, "loss": 0.0239, "rewards/accuracies": 1.0, "rewards/chosen": -14.103979110717773, "rewards/margins": 3.7600154876708984, "rewards/rejected": -17.863994598388672, "step": 17626 }, { "epoch": 2.74, "learning_rate": 1.2189781627677785e-06, "logits/chosen": -2.8073034286499023, "logits/rejected": -2.373684883117676, "logps/chosen": -238.4761505126953, "logps/rejected": -334.96795654296875, "loss": 0.0247, "rewards/accuracies": 1.0, "rewards/chosen": -9.477029800415039, "rewards/margins": 4.180577278137207, "rewards/rejected": -13.657607078552246, "step": 17627 }, { "epoch": 2.74, "learning_rate": 1.2182447222366306e-06, "logits/chosen": -2.175189733505249, "logits/rejected": -2.6409637928009033, "logps/chosen": -179.66758728027344, "logps/rejected": -415.4436950683594, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -8.309450149536133, "rewards/margins": 8.438995361328125, "rewards/rejected": -16.748445510864258, "step": 17628 }, { "epoch": 2.74, "learning_rate": 1.2175112817054827e-06, "logits/chosen": -2.154229164123535, "logits/rejected": -2.760305166244507, "logps/chosen": -109.22760772705078, "logps/rejected": -256.6726989746094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.224949836730957, "rewards/margins": 10.311939239501953, "rewards/rejected": -15.536890029907227, "step": 17629 }, { "epoch": 2.74, "learning_rate": 1.216777841174335e-06, "logits/chosen": -1.942234992980957, "logits/rejected": -2.6695101261138916, "logps/chosen": -312.4104309082031, "logps/rejected": -433.4914245605469, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -9.108929634094238, "rewards/margins": 9.463303565979004, "rewards/rejected": -18.572233200073242, "step": 17630 }, { "epoch": 2.74, "learning_rate": 1.2160444006431868e-06, "logits/chosen": -2.022674798965454, "logits/rejected": -2.0604288578033447, "logps/chosen": -262.0852966308594, "logps/rejected": -293.71002197265625, "loss": 0.0209, "rewards/accuracies": 1.0, "rewards/chosen": -6.9272356033325195, "rewards/margins": 7.550006866455078, "rewards/rejected": -14.477243423461914, "step": 17631 }, { "epoch": 2.74, "learning_rate": 1.215310960112039e-06, "logits/chosen": -2.244598627090454, "logits/rejected": -2.7280688285827637, "logps/chosen": -106.5849838256836, "logps/rejected": -293.35711669921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.720903396606445, "rewards/margins": 9.971388816833496, "rewards/rejected": -17.692291259765625, "step": 17632 }, { "epoch": 2.74, "learning_rate": 1.214577519580891e-06, "logits/chosen": -2.5739364624023438, "logits/rejected": -1.4549520015716553, "logps/chosen": -300.0423278808594, "logps/rejected": -205.58522033691406, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/chosen": -4.1600446701049805, "rewards/margins": 3.9982995986938477, "rewards/rejected": -8.158344268798828, "step": 17633 }, { "epoch": 2.74, "learning_rate": 1.2138440790497433e-06, "logits/chosen": -2.6080737113952637, "logits/rejected": -2.6034538745880127, "logps/chosen": -253.74693298339844, "logps/rejected": -451.98681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.684849739074707, "rewards/margins": 11.512687683105469, "rewards/rejected": -20.19753646850586, "step": 17634 }, { "epoch": 2.74, "learning_rate": 1.2131106385185954e-06, "logits/chosen": -2.21026611328125, "logits/rejected": -2.534156322479248, "logps/chosen": -796.8032836914062, "logps/rejected": -693.9317626953125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -10.22702693939209, "rewards/margins": 7.90897798538208, "rewards/rejected": -18.136005401611328, "step": 17635 }, { "epoch": 2.74, "learning_rate": 1.2123771979874475e-06, "logits/chosen": -2.653669834136963, "logits/rejected": -2.8367931842803955, "logps/chosen": -376.03887939453125, "logps/rejected": -390.0053405761719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.868294715881348, "rewards/margins": 8.333048820495605, "rewards/rejected": -16.201343536376953, "step": 17636 }, { "epoch": 2.74, "learning_rate": 1.2116437574562996e-06, "logits/chosen": -2.681824207305908, "logits/rejected": -2.3278443813323975, "logps/chosen": -308.85589599609375, "logps/rejected": -468.9544982910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.825333118438721, "rewards/margins": 13.376083374023438, "rewards/rejected": -21.201416015625, "step": 17637 }, { "epoch": 2.74, "learning_rate": 1.2109103169251517e-06, "logits/chosen": -2.5965540409088135, "logits/rejected": -2.9903223514556885, "logps/chosen": -351.53619384765625, "logps/rejected": -495.3934631347656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.200843334197998, "rewards/margins": 10.525640487670898, "rewards/rejected": -16.726484298706055, "step": 17638 }, { "epoch": 2.74, "learning_rate": 1.210176876394004e-06, "logits/chosen": -1.6982667446136475, "logits/rejected": -2.5823872089385986, "logps/chosen": -357.24072265625, "logps/rejected": -625.3057861328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.686777114868164, "rewards/margins": 14.777206420898438, "rewards/rejected": -21.4639835357666, "step": 17639 }, { "epoch": 2.74, "learning_rate": 1.209443435862856e-06, "logits/chosen": -2.466153621673584, "logits/rejected": -2.7797129154205322, "logps/chosen": -191.5167236328125, "logps/rejected": -389.7365417480469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.318678379058838, "rewards/margins": 14.49032974243164, "rewards/rejected": -17.809009552001953, "step": 17640 }, { "epoch": 2.74, "learning_rate": 1.208709995331708e-06, "logits/chosen": -2.7588610649108887, "logits/rejected": -2.681063652038574, "logps/chosen": -425.02001953125, "logps/rejected": -359.352783203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.309464931488037, "rewards/margins": 9.953194618225098, "rewards/rejected": -15.262660026550293, "step": 17641 }, { "epoch": 2.74, "learning_rate": 1.20797655480056e-06, "logits/chosen": -2.6624104976654053, "logits/rejected": -1.9929746389389038, "logps/chosen": -335.21685791015625, "logps/rejected": -273.7677917480469, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.073590278625488, "rewards/margins": 7.690479278564453, "rewards/rejected": -14.764068603515625, "step": 17642 }, { "epoch": 2.74, "learning_rate": 1.2072431142694124e-06, "logits/chosen": -2.4972140789031982, "logits/rejected": -2.898533582687378, "logps/chosen": -137.86248779296875, "logps/rejected": -309.2247314453125, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -4.21657133102417, "rewards/margins": 6.290441036224365, "rewards/rejected": -10.507012367248535, "step": 17643 }, { "epoch": 2.74, "learning_rate": 1.2065096737382644e-06, "logits/chosen": -0.9453955888748169, "logits/rejected": -2.3025476932525635, "logps/chosen": -133.83705139160156, "logps/rejected": -586.6737670898438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.49170446395874, "rewards/margins": 14.233097076416016, "rewards/rejected": -21.72480010986328, "step": 17644 }, { "epoch": 2.74, "learning_rate": 1.2057762332071165e-06, "logits/chosen": -2.939145565032959, "logits/rejected": -2.8585762977600098, "logps/chosen": -184.049560546875, "logps/rejected": -193.1391143798828, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -4.500894069671631, "rewards/margins": 6.9123029708862305, "rewards/rejected": -11.41319751739502, "step": 17645 }, { "epoch": 2.74, "learning_rate": 1.2050427926759686e-06, "logits/chosen": -2.555115222930908, "logits/rejected": -2.8237698078155518, "logps/chosen": -130.12591552734375, "logps/rejected": -352.2735900878906, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -8.272013664245605, "rewards/margins": 11.725707054138184, "rewards/rejected": -19.99772071838379, "step": 17646 }, { "epoch": 2.74, "learning_rate": 1.204309352144821e-06, "logits/chosen": -2.3362693786621094, "logits/rejected": -2.555041790008545, "logps/chosen": -511.5991516113281, "logps/rejected": -574.3323364257812, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -10.000368118286133, "rewards/margins": 7.72339391708374, "rewards/rejected": -17.72376251220703, "step": 17647 }, { "epoch": 2.74, "learning_rate": 1.203575911613673e-06, "logits/chosen": -2.438516616821289, "logits/rejected": -2.3639373779296875, "logps/chosen": -329.658935546875, "logps/rejected": -472.03448486328125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.80886173248291, "rewards/margins": 7.516039848327637, "rewards/rejected": -14.324901580810547, "step": 17648 }, { "epoch": 2.74, "learning_rate": 1.202842471082525e-06, "logits/chosen": -2.5850064754486084, "logits/rejected": -2.11820387840271, "logps/chosen": -288.65093994140625, "logps/rejected": -312.25836181640625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -4.911030292510986, "rewards/margins": 6.533288955688477, "rewards/rejected": -11.444318771362305, "step": 17649 }, { "epoch": 2.74, "learning_rate": 1.2021090305513772e-06, "logits/chosen": -2.743849515914917, "logits/rejected": -1.8937880992889404, "logps/chosen": -344.319091796875, "logps/rejected": -377.1340026855469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.33416748046875, "rewards/margins": 9.237489700317383, "rewards/rejected": -17.571657180786133, "step": 17650 }, { "epoch": 2.75, "learning_rate": 1.2013755900202293e-06, "logits/chosen": -1.679322600364685, "logits/rejected": -2.599388599395752, "logps/chosen": -130.0762176513672, "logps/rejected": -310.6650390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.851264476776123, "rewards/margins": 10.065004348754883, "rewards/rejected": -14.916269302368164, "step": 17651 }, { "epoch": 2.75, "learning_rate": 1.2006421494890814e-06, "logits/chosen": -1.5368815660476685, "logits/rejected": -2.4021198749542236, "logps/chosen": -178.3088836669922, "logps/rejected": -373.06689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.994351387023926, "rewards/margins": 11.424589157104492, "rewards/rejected": -18.418941497802734, "step": 17652 }, { "epoch": 2.75, "learning_rate": 1.1999087089579335e-06, "logits/chosen": -2.7199699878692627, "logits/rejected": -2.612063407897949, "logps/chosen": -151.9625244140625, "logps/rejected": -166.88165283203125, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -7.504830360412598, "rewards/margins": 6.600264549255371, "rewards/rejected": -14.105094909667969, "step": 17653 }, { "epoch": 2.75, "learning_rate": 1.1991752684267856e-06, "logits/chosen": -2.6487913131713867, "logits/rejected": -2.7834205627441406, "logps/chosen": -388.82037353515625, "logps/rejected": -329.1993103027344, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -11.601187705993652, "rewards/margins": 5.431585311889648, "rewards/rejected": -17.032772064208984, "step": 17654 }, { "epoch": 2.75, "learning_rate": 1.1984418278956376e-06, "logits/chosen": -2.677028179168701, "logits/rejected": -2.7474961280822754, "logps/chosen": -273.6223449707031, "logps/rejected": -523.0020141601562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.69052505493164, "rewards/margins": 13.992807388305664, "rewards/rejected": -22.683334350585938, "step": 17655 }, { "epoch": 2.75, "learning_rate": 1.19770838736449e-06, "logits/chosen": -2.819321393966675, "logits/rejected": -2.2916054725646973, "logps/chosen": -189.7821502685547, "logps/rejected": -201.84893798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.216720104217529, "rewards/margins": 10.502572059631348, "rewards/rejected": -15.719291687011719, "step": 17656 }, { "epoch": 2.75, "learning_rate": 1.196974946833342e-06, "logits/chosen": -2.3761978149414062, "logits/rejected": -2.5364136695861816, "logps/chosen": -408.77728271484375, "logps/rejected": -378.73297119140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7591705322265625, "rewards/margins": 9.200384140014648, "rewards/rejected": -12.959554672241211, "step": 17657 }, { "epoch": 2.75, "learning_rate": 1.1962415063021941e-06, "logits/chosen": -2.4761600494384766, "logits/rejected": -2.70378041267395, "logps/chosen": -144.02084350585938, "logps/rejected": -268.3721923828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.225929260253906, "rewards/margins": 8.929985046386719, "rewards/rejected": -15.155914306640625, "step": 17658 }, { "epoch": 2.75, "learning_rate": 1.1955080657710462e-06, "logits/chosen": -2.8631298542022705, "logits/rejected": -2.0247881412506104, "logps/chosen": -611.2130126953125, "logps/rejected": -354.40423583984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.6839356422424316, "rewards/margins": 11.553784370422363, "rewards/rejected": -15.237720489501953, "step": 17659 }, { "epoch": 2.75, "learning_rate": 1.1947746252398985e-06, "logits/chosen": -2.5949764251708984, "logits/rejected": -2.830547571182251, "logps/chosen": -104.07661437988281, "logps/rejected": -377.58209228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.335841178894043, "rewards/margins": 11.619719505310059, "rewards/rejected": -17.9555606842041, "step": 17660 }, { "epoch": 2.75, "learning_rate": 1.1940411847087504e-06, "logits/chosen": -2.216627836227417, "logits/rejected": -2.8141419887542725, "logps/chosen": -272.72491455078125, "logps/rejected": -303.5125732421875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -7.3963165283203125, "rewards/margins": 7.908682823181152, "rewards/rejected": -15.304999351501465, "step": 17661 }, { "epoch": 2.75, "learning_rate": 1.1933077441776025e-06, "logits/chosen": -2.4981725215911865, "logits/rejected": -1.5704666376113892, "logps/chosen": -209.55239868164062, "logps/rejected": -268.8041076660156, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.343669891357422, "rewards/margins": 8.419820785522461, "rewards/rejected": -14.763490676879883, "step": 17662 }, { "epoch": 2.75, "learning_rate": 1.1925743036464546e-06, "logits/chosen": -2.5379297733306885, "logits/rejected": -1.5705900192260742, "logps/chosen": -251.66790771484375, "logps/rejected": -163.39068603515625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -1.1306710243225098, "rewards/margins": 12.544600486755371, "rewards/rejected": -13.675271987915039, "step": 17663 }, { "epoch": 2.75, "learning_rate": 1.1918408631153069e-06, "logits/chosen": -2.269094705581665, "logits/rejected": -2.460206985473633, "logps/chosen": -99.57289123535156, "logps/rejected": -349.50982666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.928683280944824, "rewards/margins": 13.217097282409668, "rewards/rejected": -19.145780563354492, "step": 17664 }, { "epoch": 2.75, "learning_rate": 1.191107422584159e-06, "logits/chosen": -1.9973244667053223, "logits/rejected": -2.718798875808716, "logps/chosen": -237.04220581054688, "logps/rejected": -396.94873046875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.755023002624512, "rewards/margins": 8.2647066116333, "rewards/rejected": -14.019729614257812, "step": 17665 }, { "epoch": 2.75, "learning_rate": 1.190373982053011e-06, "logits/chosen": -2.595890522003174, "logits/rejected": -1.9335017204284668, "logps/chosen": -349.7377014160156, "logps/rejected": -485.19268798828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.906264305114746, "rewards/margins": 7.645473957061768, "rewards/rejected": -11.551738739013672, "step": 17666 }, { "epoch": 2.75, "learning_rate": 1.1896405415218631e-06, "logits/chosen": -1.8579245805740356, "logits/rejected": -2.899862051010132, "logps/chosen": -190.40957641601562, "logps/rejected": -410.397216796875, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -6.001791000366211, "rewards/margins": 6.616238594055176, "rewards/rejected": -12.618029594421387, "step": 17667 }, { "epoch": 2.75, "learning_rate": 1.1889071009907154e-06, "logits/chosen": -2.553349018096924, "logits/rejected": -2.7185652256011963, "logps/chosen": -127.3470458984375, "logps/rejected": -263.425537109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.592390537261963, "rewards/margins": 11.884683609008789, "rewards/rejected": -15.477073669433594, "step": 17668 }, { "epoch": 2.75, "learning_rate": 1.1881736604595675e-06, "logits/chosen": -1.5942702293395996, "logits/rejected": -2.7338225841522217, "logps/chosen": -203.1884765625, "logps/rejected": -424.34796142578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.707511901855469, "rewards/margins": 9.417531967163086, "rewards/rejected": -18.125043869018555, "step": 17669 }, { "epoch": 2.75, "learning_rate": 1.1874402199284196e-06, "logits/chosen": -2.600374460220337, "logits/rejected": -2.075824022293091, "logps/chosen": -394.7774353027344, "logps/rejected": -324.6251220703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.709066390991211, "rewards/margins": 6.852169036865234, "rewards/rejected": -14.561235427856445, "step": 17670 }, { "epoch": 2.75, "learning_rate": 1.1867067793972715e-06, "logits/chosen": -2.642582893371582, "logits/rejected": -1.8234081268310547, "logps/chosen": -854.3438720703125, "logps/rejected": -516.5587158203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.2088847160339355, "rewards/margins": 9.361355781555176, "rewards/rejected": -14.570240020751953, "step": 17671 }, { "epoch": 2.75, "learning_rate": 1.1859733388661238e-06, "logits/chosen": -1.295594573020935, "logits/rejected": -2.6302294731140137, "logps/chosen": -167.3024444580078, "logps/rejected": -588.412353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.77992057800293, "rewards/margins": 15.065671920776367, "rewards/rejected": -20.845592498779297, "step": 17672 }, { "epoch": 2.75, "learning_rate": 1.185239898334976e-06, "logits/chosen": -1.433699369430542, "logits/rejected": -2.5123252868652344, "logps/chosen": -142.697509765625, "logps/rejected": -427.1795959472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.774102687835693, "rewards/margins": 13.75130844116211, "rewards/rejected": -21.525409698486328, "step": 17673 }, { "epoch": 2.75, "learning_rate": 1.184506457803828e-06, "logits/chosen": -3.0280604362487793, "logits/rejected": -2.6692147254943848, "logps/chosen": -552.7919921875, "logps/rejected": -812.4971923828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.518153667449951, "rewards/margins": 13.413871765136719, "rewards/rejected": -20.932025909423828, "step": 17674 }, { "epoch": 2.75, "learning_rate": 1.18377301727268e-06, "logits/chosen": -1.2265980243682861, "logits/rejected": -2.5099167823791504, "logps/chosen": -152.00601196289062, "logps/rejected": -510.0887145996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.222189903259277, "rewards/margins": 11.46171760559082, "rewards/rejected": -21.68390655517578, "step": 17675 }, { "epoch": 2.75, "learning_rate": 1.1830395767415322e-06, "logits/chosen": -1.4536573886871338, "logits/rejected": -2.768120527267456, "logps/chosen": -162.20773315429688, "logps/rejected": -427.988037109375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.43954849243164, "rewards/margins": 9.015115737915039, "rewards/rejected": -17.45466423034668, "step": 17676 }, { "epoch": 2.75, "learning_rate": 1.1823061362103845e-06, "logits/chosen": -2.64566707611084, "logits/rejected": -2.7311530113220215, "logps/chosen": -92.94210815429688, "logps/rejected": -261.2038879394531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.408886909484863, "rewards/margins": 10.668871879577637, "rewards/rejected": -17.0777587890625, "step": 17677 }, { "epoch": 2.75, "learning_rate": 1.1815726956792366e-06, "logits/chosen": -1.9912773370742798, "logits/rejected": -1.8545176982879639, "logps/chosen": -277.7281188964844, "logps/rejected": -593.07763671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.654674530029297, "rewards/margins": 10.134683609008789, "rewards/rejected": -18.789356231689453, "step": 17678 }, { "epoch": 2.75, "learning_rate": 1.1808392551480887e-06, "logits/chosen": -2.489670753479004, "logits/rejected": -2.170358896255493, "logps/chosen": -571.2694091796875, "logps/rejected": -524.9840087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.359127044677734, "rewards/margins": 11.14094066619873, "rewards/rejected": -17.50006866455078, "step": 17679 }, { "epoch": 2.75, "learning_rate": 1.1801058146169405e-06, "logits/chosen": -1.7220338582992554, "logits/rejected": -2.4083025455474854, "logps/chosen": -249.21365356445312, "logps/rejected": -411.41497802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.328778266906738, "rewards/margins": 10.810552597045898, "rewards/rejected": -18.139331817626953, "step": 17680 }, { "epoch": 2.75, "learning_rate": 1.1793723740857928e-06, "logits/chosen": -1.3241748809814453, "logits/rejected": -2.6515655517578125, "logps/chosen": -283.7444152832031, "logps/rejected": -372.16192626953125, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/chosen": -5.678296089172363, "rewards/margins": 6.659701347351074, "rewards/rejected": -12.337997436523438, "step": 17681 }, { "epoch": 2.75, "learning_rate": 1.178638933554645e-06, "logits/chosen": -2.561535358428955, "logits/rejected": -2.9203360080718994, "logps/chosen": -462.536376953125, "logps/rejected": -560.900634765625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -11.178050994873047, "rewards/margins": 9.611871719360352, "rewards/rejected": -20.789920806884766, "step": 17682 }, { "epoch": 2.75, "learning_rate": 1.177905493023497e-06, "logits/chosen": -1.368786096572876, "logits/rejected": -1.7792222499847412, "logps/chosen": -304.006103515625, "logps/rejected": -727.9989013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.729100227355957, "rewards/margins": 15.068527221679688, "rewards/rejected": -22.797626495361328, "step": 17683 }, { "epoch": 2.75, "learning_rate": 1.177172052492349e-06, "logits/chosen": -0.9507012367248535, "logits/rejected": -2.530306100845337, "logps/chosen": -193.34181213378906, "logps/rejected": -377.31842041015625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -8.023691177368164, "rewards/margins": 6.069849967956543, "rewards/rejected": -14.093541145324707, "step": 17684 }, { "epoch": 2.75, "learning_rate": 1.1764386119612014e-06, "logits/chosen": -2.6800332069396973, "logits/rejected": -1.7710050344467163, "logps/chosen": -630.0354614257812, "logps/rejected": -422.13275146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.160785675048828, "rewards/margins": 11.421390533447266, "rewards/rejected": -16.582176208496094, "step": 17685 }, { "epoch": 2.75, "learning_rate": 1.1757051714300535e-06, "logits/chosen": -2.9011082649230957, "logits/rejected": -2.963615894317627, "logps/chosen": -427.9212341308594, "logps/rejected": -424.2115783691406, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.1980085372924805, "rewards/margins": 10.90444564819336, "rewards/rejected": -16.102455139160156, "step": 17686 }, { "epoch": 2.75, "learning_rate": 1.1749717308989056e-06, "logits/chosen": -2.654994010925293, "logits/rejected": -2.3544037342071533, "logps/chosen": -514.5579223632812, "logps/rejected": -419.2408447265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -3.1397430896759033, "rewards/margins": 8.651815414428711, "rewards/rejected": -11.791558265686035, "step": 17687 }, { "epoch": 2.75, "learning_rate": 1.1742382903677577e-06, "logits/chosen": -1.1378486156463623, "logits/rejected": -2.078310012817383, "logps/chosen": -116.468017578125, "logps/rejected": -296.5653991699219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.9504714012146, "rewards/margins": 9.35824203491211, "rewards/rejected": -16.308713912963867, "step": 17688 }, { "epoch": 2.75, "learning_rate": 1.17350484983661e-06, "logits/chosen": -2.696748971939087, "logits/rejected": -1.532535195350647, "logps/chosen": -418.9977722167969, "logps/rejected": -319.796630859375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/chosen": -6.981746196746826, "rewards/margins": 5.206183910369873, "rewards/rejected": -12.1879301071167, "step": 17689 }, { "epoch": 2.75, "learning_rate": 1.1727714093054619e-06, "logits/chosen": -2.6265361309051514, "logits/rejected": -2.4687659740448, "logps/chosen": -464.49267578125, "logps/rejected": -332.7223815917969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.564169883728027, "rewards/margins": 10.240264892578125, "rewards/rejected": -15.804433822631836, "step": 17690 }, { "epoch": 2.75, "learning_rate": 1.172037968774314e-06, "logits/chosen": -1.1105636358261108, "logits/rejected": -2.2221908569335938, "logps/chosen": -251.14437866210938, "logps/rejected": -451.6985778808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.279512405395508, "rewards/margins": 12.18313217163086, "rewards/rejected": -19.462644577026367, "step": 17691 }, { "epoch": 2.75, "learning_rate": 1.171304528243166e-06, "logits/chosen": -1.374602198600769, "logits/rejected": -2.3994603157043457, "logps/chosen": -95.39358520507812, "logps/rejected": -322.4692077636719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.862306594848633, "rewards/margins": 10.110421180725098, "rewards/rejected": -15.972726821899414, "step": 17692 }, { "epoch": 2.75, "learning_rate": 1.1705710877120183e-06, "logits/chosen": -2.3025333881378174, "logits/rejected": -2.729612350463867, "logps/chosen": -136.7662353515625, "logps/rejected": -380.05242919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.333754062652588, "rewards/margins": 12.63023567199707, "rewards/rejected": -19.9639892578125, "step": 17693 }, { "epoch": 2.75, "learning_rate": 1.1698376471808704e-06, "logits/chosen": -2.5497384071350098, "logits/rejected": -2.399707794189453, "logps/chosen": -161.20623779296875, "logps/rejected": -253.83544921875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.835869789123535, "rewards/margins": 9.571249008178711, "rewards/rejected": -15.40711784362793, "step": 17694 }, { "epoch": 2.75, "learning_rate": 1.1691042066497225e-06, "logits/chosen": -2.0944340229034424, "logits/rejected": -1.1466096639633179, "logps/chosen": -312.624267578125, "logps/rejected": -257.4683837890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.23357629776001, "rewards/margins": 9.715325355529785, "rewards/rejected": -15.948902130126953, "step": 17695 }, { "epoch": 2.75, "learning_rate": 1.1683707661185746e-06, "logits/chosen": -2.1531927585601807, "logits/rejected": -2.849933385848999, "logps/chosen": -140.96827697753906, "logps/rejected": -376.16717529296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.230584144592285, "rewards/margins": 8.671895027160645, "rewards/rejected": -15.90247917175293, "step": 17696 }, { "epoch": 2.75, "learning_rate": 1.1676373255874267e-06, "logits/chosen": -1.509375810623169, "logits/rejected": -2.513310194015503, "logps/chosen": -330.9593505859375, "logps/rejected": -662.0928344726562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.0370893478393555, "rewards/margins": 10.832503318786621, "rewards/rejected": -17.869592666625977, "step": 17697 }, { "epoch": 2.75, "learning_rate": 1.166903885056279e-06, "logits/chosen": -1.519856333732605, "logits/rejected": -2.503293991088867, "logps/chosen": -159.75205993652344, "logps/rejected": -414.62200927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.862634658813477, "rewards/margins": 11.794318199157715, "rewards/rejected": -18.656951904296875, "step": 17698 }, { "epoch": 2.75, "learning_rate": 1.166170444525131e-06, "logits/chosen": -2.129896879196167, "logits/rejected": -2.357114553451538, "logps/chosen": -215.71881103515625, "logps/rejected": -229.63511657714844, "loss": 0.1042, "rewards/accuracies": 1.0, "rewards/chosen": -7.820000648498535, "rewards/margins": 4.233858108520508, "rewards/rejected": -12.053857803344727, "step": 17699 }, { "epoch": 2.75, "learning_rate": 1.165437003993983e-06, "logits/chosen": -1.869404673576355, "logits/rejected": -3.0450985431671143, "logps/chosen": -128.7294464111328, "logps/rejected": -405.20904541015625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -8.233720779418945, "rewards/margins": 7.549159049987793, "rewards/rejected": -15.782879829406738, "step": 17700 }, { "epoch": 2.75, "learning_rate": 1.164703563462835e-06, "logits/chosen": -2.6204497814178467, "logits/rejected": -2.465437889099121, "logps/chosen": -359.4417724609375, "logps/rejected": -494.3843994140625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -9.722790718078613, "rewards/margins": 8.214788436889648, "rewards/rejected": -17.937578201293945, "step": 17701 }, { "epoch": 2.75, "learning_rate": 1.1639701229316874e-06, "logits/chosen": -1.561241865158081, "logits/rejected": -1.9777350425720215, "logps/chosen": -361.2017822265625, "logps/rejected": -504.22674560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.2843098640441895, "rewards/margins": 10.50507640838623, "rewards/rejected": -16.789386749267578, "step": 17702 }, { "epoch": 2.75, "learning_rate": 1.1632366824005394e-06, "logits/chosen": -2.1455159187316895, "logits/rejected": -2.5548722743988037, "logps/chosen": -495.1921691894531, "logps/rejected": -626.8571166992188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -12.458702087402344, "rewards/margins": 10.426172256469727, "rewards/rejected": -22.884876251220703, "step": 17703 }, { "epoch": 2.75, "learning_rate": 1.1625032418693915e-06, "logits/chosen": -2.5790469646453857, "logits/rejected": -1.7791001796722412, "logps/chosen": -345.5220947265625, "logps/rejected": -325.2261657714844, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.298311710357666, "rewards/margins": 9.437522888183594, "rewards/rejected": -15.735834121704102, "step": 17704 }, { "epoch": 2.75, "learning_rate": 1.1617698013382436e-06, "logits/chosen": -1.8702242374420166, "logits/rejected": -2.4663376808166504, "logps/chosen": -222.1097869873047, "logps/rejected": -587.125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.9335527420043945, "rewards/margins": 7.384861946105957, "rewards/rejected": -15.318414688110352, "step": 17705 }, { "epoch": 2.75, "learning_rate": 1.161036360807096e-06, "logits/chosen": -2.388606548309326, "logits/rejected": -2.5553953647613525, "logps/chosen": -322.6997375488281, "logps/rejected": -384.2965087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.905899524688721, "rewards/margins": 10.56881046295166, "rewards/rejected": -15.474710464477539, "step": 17706 }, { "epoch": 2.75, "learning_rate": 1.160302920275948e-06, "logits/chosen": -2.086604118347168, "logits/rejected": -2.8315725326538086, "logps/chosen": -85.8390121459961, "logps/rejected": -382.35211181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.289780139923096, "rewards/margins": 13.469932556152344, "rewards/rejected": -17.75971221923828, "step": 17707 }, { "epoch": 2.75, "learning_rate": 1.1595694797448001e-06, "logits/chosen": -2.6415693759918213, "logits/rejected": -2.5953621864318848, "logps/chosen": -392.7434387207031, "logps/rejected": -480.1423034667969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.068650722503662, "rewards/margins": 14.97221565246582, "rewards/rejected": -21.04086685180664, "step": 17708 }, { "epoch": 2.75, "learning_rate": 1.1588360392136522e-06, "logits/chosen": -2.5643699169158936, "logits/rejected": -2.5839219093322754, "logps/chosen": -733.7732543945312, "logps/rejected": -845.3468017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.218107223510742, "rewards/margins": 13.00124740600586, "rewards/rejected": -22.2193546295166, "step": 17709 }, { "epoch": 2.75, "learning_rate": 1.1581025986825043e-06, "logits/chosen": -1.8154971599578857, "logits/rejected": -2.333655595779419, "logps/chosen": -162.9022979736328, "logps/rejected": -304.5257263183594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.148301124572754, "rewards/margins": 9.354150772094727, "rewards/rejected": -17.502452850341797, "step": 17710 }, { "epoch": 2.75, "learning_rate": 1.1573691581513564e-06, "logits/chosen": -2.6037204265594482, "logits/rejected": -2.3001179695129395, "logps/chosen": -401.65179443359375, "logps/rejected": -364.6218566894531, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -7.187855243682861, "rewards/margins": 6.840095520019531, "rewards/rejected": -14.027950286865234, "step": 17711 }, { "epoch": 2.75, "learning_rate": 1.1566357176202085e-06, "logits/chosen": -2.1957647800445557, "logits/rejected": -2.51699161529541, "logps/chosen": -168.4892578125, "logps/rejected": -306.2969665527344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.1342010498046875, "rewards/margins": 10.478659629821777, "rewards/rejected": -16.61285972595215, "step": 17712 }, { "epoch": 2.75, "learning_rate": 1.1559022770890606e-06, "logits/chosen": -1.4697904586791992, "logits/rejected": -2.621011972427368, "logps/chosen": -165.02798461914062, "logps/rejected": -379.1830139160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.906092643737793, "rewards/margins": 12.876672744750977, "rewards/rejected": -20.782764434814453, "step": 17713 }, { "epoch": 2.75, "learning_rate": 1.1551688365579129e-06, "logits/chosen": -2.6212780475616455, "logits/rejected": -2.5218794345855713, "logps/chosen": -253.464599609375, "logps/rejected": -277.02374267578125, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/chosen": -11.49646282196045, "rewards/margins": 3.396484851837158, "rewards/rejected": -14.892948150634766, "step": 17714 }, { "epoch": 2.76, "learning_rate": 1.154435396026765e-06, "logits/chosen": -2.7572951316833496, "logits/rejected": -1.805936574935913, "logps/chosen": -801.607177734375, "logps/rejected": -373.48675537109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.823910713195801, "rewards/margins": 8.739906311035156, "rewards/rejected": -15.563817024230957, "step": 17715 }, { "epoch": 2.76, "learning_rate": 1.153701955495617e-06, "logits/chosen": -2.604067325592041, "logits/rejected": -2.529053211212158, "logps/chosen": -229.16592407226562, "logps/rejected": -556.8655395507812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.522324085235596, "rewards/margins": 12.79494857788086, "rewards/rejected": -17.317272186279297, "step": 17716 }, { "epoch": 2.76, "learning_rate": 1.1529685149644691e-06, "logits/chosen": -2.0082271099090576, "logits/rejected": -2.635613203048706, "logps/chosen": -695.943603515625, "logps/rejected": -776.8936767578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.455789566040039, "rewards/margins": 10.078060150146484, "rewards/rejected": -17.533851623535156, "step": 17717 }, { "epoch": 2.76, "learning_rate": 1.1522350744333212e-06, "logits/chosen": -2.4044103622436523, "logits/rejected": -2.6737558841705322, "logps/chosen": -95.72328186035156, "logps/rejected": -209.8227081298828, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.525148868560791, "rewards/margins": 7.68308162689209, "rewards/rejected": -14.208230972290039, "step": 17718 }, { "epoch": 2.76, "learning_rate": 1.1515016339021735e-06, "logits/chosen": -2.2566003799438477, "logits/rejected": -2.181260347366333, "logps/chosen": -514.0399169921875, "logps/rejected": -457.98126220703125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.921298980712891, "rewards/margins": 7.593391418457031, "rewards/rejected": -15.514690399169922, "step": 17719 }, { "epoch": 2.76, "learning_rate": 1.1507681933710254e-06, "logits/chosen": -2.3985824584960938, "logits/rejected": -2.6937718391418457, "logps/chosen": -162.91001892089844, "logps/rejected": -317.44232177734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.532314300537109, "rewards/margins": 9.292963981628418, "rewards/rejected": -13.825278282165527, "step": 17720 }, { "epoch": 2.76, "learning_rate": 1.1500347528398775e-06, "logits/chosen": -1.8858810663223267, "logits/rejected": -2.635432481765747, "logps/chosen": -208.17874145507812, "logps/rejected": -444.1733703613281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.272594451904297, "rewards/margins": 12.096928596496582, "rewards/rejected": -20.369522094726562, "step": 17721 }, { "epoch": 2.76, "learning_rate": 1.1493013123087296e-06, "logits/chosen": -1.5831471681594849, "logits/rejected": -2.7231850624084473, "logps/chosen": -298.3484802246094, "logps/rejected": -564.5889892578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.700785160064697, "rewards/margins": 9.688056945800781, "rewards/rejected": -17.38884162902832, "step": 17722 }, { "epoch": 2.76, "learning_rate": 1.1485678717775819e-06, "logits/chosen": -2.523256778717041, "logits/rejected": -1.960835337638855, "logps/chosen": -209.05670166015625, "logps/rejected": -222.34088134765625, "loss": 0.1881, "rewards/accuracies": 1.0, "rewards/chosen": -8.60506820678711, "rewards/margins": 1.7135379314422607, "rewards/rejected": -10.318605422973633, "step": 17723 }, { "epoch": 2.76, "learning_rate": 1.147834431246434e-06, "logits/chosen": -2.712083578109741, "logits/rejected": -1.737438440322876, "logps/chosen": -492.2031555175781, "logps/rejected": -547.8135986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.557868957519531, "rewards/margins": 13.40542984008789, "rewards/rejected": -21.963298797607422, "step": 17724 }, { "epoch": 2.76, "learning_rate": 1.147100990715286e-06, "logits/chosen": -2.430725336074829, "logits/rejected": -2.8052682876586914, "logps/chosen": -489.4853210449219, "logps/rejected": -693.3263549804688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.444713592529297, "rewards/margins": 8.964943885803223, "rewards/rejected": -19.409656524658203, "step": 17725 }, { "epoch": 2.76, "learning_rate": 1.1463675501841382e-06, "logits/chosen": -0.722767174243927, "logits/rejected": -2.2656829357147217, "logps/chosen": -112.88626861572266, "logps/rejected": -421.576416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.545546531677246, "rewards/margins": 12.694769859313965, "rewards/rejected": -20.24031639099121, "step": 17726 }, { "epoch": 2.76, "learning_rate": 1.1456341096529905e-06, "logits/chosen": -2.847019672393799, "logits/rejected": -2.565455675125122, "logps/chosen": -268.3663635253906, "logps/rejected": -358.6645202636719, "loss": 0.0857, "rewards/accuracies": 1.0, "rewards/chosen": -6.143679618835449, "rewards/margins": 6.265552520751953, "rewards/rejected": -12.409232139587402, "step": 17727 }, { "epoch": 2.76, "learning_rate": 1.1449006691218425e-06, "logits/chosen": -1.3210721015930176, "logits/rejected": -2.5531466007232666, "logps/chosen": -131.2548828125, "logps/rejected": -460.98779296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.7570881843566895, "rewards/margins": 10.29263687133789, "rewards/rejected": -16.049724578857422, "step": 17728 }, { "epoch": 2.76, "learning_rate": 1.1441672285906946e-06, "logits/chosen": -2.7384848594665527, "logits/rejected": -2.9223451614379883, "logps/chosen": -249.67657470703125, "logps/rejected": -485.3380126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.933406829833984, "rewards/margins": 14.535116195678711, "rewards/rejected": -21.468523025512695, "step": 17729 }, { "epoch": 2.76, "learning_rate": 1.1434337880595465e-06, "logits/chosen": -2.683162212371826, "logits/rejected": -2.4775376319885254, "logps/chosen": -600.0496215820312, "logps/rejected": -548.09375, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -10.073552131652832, "rewards/margins": 5.282413482666016, "rewards/rejected": -15.355966567993164, "step": 17730 }, { "epoch": 2.76, "learning_rate": 1.1427003475283988e-06, "logits/chosen": -2.581840991973877, "logits/rejected": -2.502920389175415, "logps/chosen": -233.03427124023438, "logps/rejected": -142.23300170898438, "loss": 0.2396, "rewards/accuracies": 1.0, "rewards/chosen": -6.51038932800293, "rewards/margins": 3.0991549491882324, "rewards/rejected": -9.60954475402832, "step": 17731 }, { "epoch": 2.76, "learning_rate": 1.141966906997251e-06, "logits/chosen": -2.502047300338745, "logits/rejected": -2.2367875576019287, "logps/chosen": -173.90402221679688, "logps/rejected": -240.65786743164062, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.447622299194336, "rewards/margins": 9.310188293457031, "rewards/rejected": -16.757810592651367, "step": 17732 }, { "epoch": 2.76, "learning_rate": 1.141233466466103e-06, "logits/chosen": -2.7921688556671143, "logits/rejected": -2.655980110168457, "logps/chosen": -872.2346801757812, "logps/rejected": -1206.146728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.950345039367676, "rewards/margins": 10.761541366577148, "rewards/rejected": -16.71188735961914, "step": 17733 }, { "epoch": 2.76, "learning_rate": 1.140500025934955e-06, "logits/chosen": -2.7493841648101807, "logits/rejected": -3.0868422985076904, "logps/chosen": -123.53479766845703, "logps/rejected": -197.48074340820312, "loss": 0.0653, "rewards/accuracies": 1.0, "rewards/chosen": -7.019025802612305, "rewards/margins": 4.937367916107178, "rewards/rejected": -11.95639419555664, "step": 17734 }, { "epoch": 2.76, "learning_rate": 1.1397665854038072e-06, "logits/chosen": -2.807612895965576, "logits/rejected": -2.4086482524871826, "logps/chosen": -306.0606384277344, "logps/rejected": -298.28094482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.050538063049316, "rewards/margins": 11.716676712036133, "rewards/rejected": -17.767213821411133, "step": 17735 }, { "epoch": 2.76, "learning_rate": 1.1390331448726595e-06, "logits/chosen": -2.8567638397216797, "logits/rejected": -2.4325428009033203, "logps/chosen": -204.53924560546875, "logps/rejected": -245.4748077392578, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.041057586669922, "rewards/margins": 11.968328475952148, "rewards/rejected": -15.00938606262207, "step": 17736 }, { "epoch": 2.76, "learning_rate": 1.1382997043415116e-06, "logits/chosen": -1.868299126625061, "logits/rejected": -2.7323646545410156, "logps/chosen": -218.3748779296875, "logps/rejected": -411.52435302734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.011011123657227, "rewards/margins": 11.444470405578613, "rewards/rejected": -15.45548152923584, "step": 17737 }, { "epoch": 2.76, "learning_rate": 1.1375662638103637e-06, "logits/chosen": -2.611024856567383, "logits/rejected": -2.815979242324829, "logps/chosen": -169.74705505371094, "logps/rejected": -387.95025634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.504112243652344, "rewards/margins": 10.63077449798584, "rewards/rejected": -17.1348876953125, "step": 17738 }, { "epoch": 2.76, "learning_rate": 1.1368328232792157e-06, "logits/chosen": -2.663686513900757, "logits/rejected": -3.0618183612823486, "logps/chosen": -140.86080932617188, "logps/rejected": -444.0267333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.784313201904297, "rewards/margins": 12.549713134765625, "rewards/rejected": -16.334026336669922, "step": 17739 }, { "epoch": 2.76, "learning_rate": 1.1360993827480678e-06, "logits/chosen": -2.691521167755127, "logits/rejected": -1.3413034677505493, "logps/chosen": -570.4989013671875, "logps/rejected": -180.83030700683594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.228924751281738, "rewards/margins": 9.031573295593262, "rewards/rejected": -13.260498046875, "step": 17740 }, { "epoch": 2.76, "learning_rate": 1.13536594221692e-06, "logits/chosen": -2.5850937366485596, "logits/rejected": -2.5509872436523438, "logps/chosen": -499.66558837890625, "logps/rejected": -569.2576293945312, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -10.24126148223877, "rewards/margins": 11.065953254699707, "rewards/rejected": -21.307214736938477, "step": 17741 }, { "epoch": 2.76, "learning_rate": 1.134632501685772e-06, "logits/chosen": -2.671926736831665, "logits/rejected": -2.2416157722473145, "logps/chosen": -208.92050170898438, "logps/rejected": -395.010986328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.154492139816284, "rewards/margins": 11.324378967285156, "rewards/rejected": -14.478870391845703, "step": 17742 }, { "epoch": 2.76, "learning_rate": 1.1338990611546241e-06, "logits/chosen": -2.20851731300354, "logits/rejected": -2.159459352493286, "logps/chosen": -530.8948364257812, "logps/rejected": -777.411865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.332979202270508, "rewards/margins": 12.102222442626953, "rewards/rejected": -22.435199737548828, "step": 17743 }, { "epoch": 2.76, "learning_rate": 1.1331656206234764e-06, "logits/chosen": -2.4532525539398193, "logits/rejected": -2.845479726791382, "logps/chosen": -153.92550659179688, "logps/rejected": -470.56182861328125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -8.889932632446289, "rewards/margins": 6.766002655029297, "rewards/rejected": -15.655935287475586, "step": 17744 }, { "epoch": 2.76, "learning_rate": 1.1324321800923285e-06, "logits/chosen": -2.377140522003174, "logits/rejected": -2.852205514907837, "logps/chosen": -256.98822021484375, "logps/rejected": -441.5259704589844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -8.60499382019043, "rewards/margins": 9.82994270324707, "rewards/rejected": -18.4349365234375, "step": 17745 }, { "epoch": 2.76, "learning_rate": 1.1316987395611806e-06, "logits/chosen": -1.8361990451812744, "logits/rejected": -2.518062114715576, "logps/chosen": -625.43994140625, "logps/rejected": -758.014404296875, "loss": 0.4751, "rewards/accuracies": 0.5, "rewards/chosen": -10.405198097229004, "rewards/margins": 13.334966659545898, "rewards/rejected": -23.74016571044922, "step": 17746 }, { "epoch": 2.76, "learning_rate": 1.1309652990300327e-06, "logits/chosen": -2.8742244243621826, "logits/rejected": -2.1659088134765625, "logps/chosen": -290.1240234375, "logps/rejected": -253.7648162841797, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.968438148498535, "rewards/margins": 10.59288215637207, "rewards/rejected": -16.561321258544922, "step": 17747 }, { "epoch": 2.76, "learning_rate": 1.130231858498885e-06, "logits/chosen": -1.9183579683303833, "logits/rejected": -2.580237865447998, "logps/chosen": -114.63414001464844, "logps/rejected": -298.40057373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.310529708862305, "rewards/margins": 10.597671508789062, "rewards/rejected": -15.908201217651367, "step": 17748 }, { "epoch": 2.76, "learning_rate": 1.1294984179677369e-06, "logits/chosen": -2.008856773376465, "logits/rejected": -2.8162591457366943, "logps/chosen": -160.075439453125, "logps/rejected": -344.01611328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.769681930541992, "rewards/margins": 9.002806663513184, "rewards/rejected": -14.772488594055176, "step": 17749 }, { "epoch": 2.76, "learning_rate": 1.128764977436589e-06, "logits/chosen": -1.9258579015731812, "logits/rejected": -2.460324764251709, "logps/chosen": -176.6101837158203, "logps/rejected": -394.52606201171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.44130802154541, "rewards/margins": 8.918689727783203, "rewards/rejected": -18.359996795654297, "step": 17750 }, { "epoch": 2.76, "learning_rate": 1.128031536905441e-06, "logits/chosen": -2.110658645629883, "logits/rejected": -2.2597618103027344, "logps/chosen": -189.74057006835938, "logps/rejected": -513.3063354492188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.631878852844238, "rewards/margins": 12.332574844360352, "rewards/rejected": -16.964454650878906, "step": 17751 }, { "epoch": 2.76, "learning_rate": 1.1272980963742933e-06, "logits/chosen": -2.643533229827881, "logits/rejected": -2.535628318786621, "logps/chosen": -263.64849853515625, "logps/rejected": -310.723876953125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -10.214090347290039, "rewards/margins": 6.874555587768555, "rewards/rejected": -17.088645935058594, "step": 17752 }, { "epoch": 2.76, "learning_rate": 1.1265646558431454e-06, "logits/chosen": -1.9258469343185425, "logits/rejected": -2.279573440551758, "logps/chosen": -238.09158325195312, "logps/rejected": -554.5885009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.9976806640625, "rewards/margins": 15.029611587524414, "rewards/rejected": -24.027292251586914, "step": 17753 }, { "epoch": 2.76, "learning_rate": 1.1258312153119975e-06, "logits/chosen": -1.4262598752975464, "logits/rejected": -1.8569214344024658, "logps/chosen": -172.4198760986328, "logps/rejected": -486.40069580078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.831094741821289, "rewards/margins": 12.704316139221191, "rewards/rejected": -22.535411834716797, "step": 17754 }, { "epoch": 2.76, "learning_rate": 1.1250977747808496e-06, "logits/chosen": -2.532639503479004, "logits/rejected": -2.930638074874878, "logps/chosen": -149.7474365234375, "logps/rejected": -393.4122314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.819120407104492, "rewards/margins": 12.184088706970215, "rewards/rejected": -18.003210067749023, "step": 17755 }, { "epoch": 2.76, "learning_rate": 1.1243643342497017e-06, "logits/chosen": -2.5288257598876953, "logits/rejected": -2.191472291946411, "logps/chosen": -536.0279541015625, "logps/rejected": -561.5079956054688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.958818435668945, "rewards/margins": 15.96418571472168, "rewards/rejected": -20.923004150390625, "step": 17756 }, { "epoch": 2.76, "learning_rate": 1.123630893718554e-06, "logits/chosen": -2.213277816772461, "logits/rejected": -2.6147096157073975, "logps/chosen": -159.5432891845703, "logps/rejected": -406.9223327636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.147249698638916, "rewards/margins": 14.24106216430664, "rewards/rejected": -20.38831329345703, "step": 17757 }, { "epoch": 2.76, "learning_rate": 1.122897453187406e-06, "logits/chosen": -2.5224838256835938, "logits/rejected": -2.7229700088500977, "logps/chosen": -637.108154296875, "logps/rejected": -604.6292724609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.937559604644775, "rewards/margins": 12.561138153076172, "rewards/rejected": -17.49869728088379, "step": 17758 }, { "epoch": 2.76, "learning_rate": 1.122164012656258e-06, "logits/chosen": -1.6459277868270874, "logits/rejected": -2.7791149616241455, "logps/chosen": -217.8397216796875, "logps/rejected": -347.69464111328125, "loss": 0.9155, "rewards/accuracies": 0.5, "rewards/chosen": -12.61085319519043, "rewards/margins": 5.040080547332764, "rewards/rejected": -17.65093421936035, "step": 17759 }, { "epoch": 2.76, "learning_rate": 1.12143057212511e-06, "logits/chosen": -2.0541982650756836, "logits/rejected": -2.489241123199463, "logps/chosen": -277.9635314941406, "logps/rejected": -322.352783203125, "loss": 0.0233, "rewards/accuracies": 1.0, "rewards/chosen": -7.688065528869629, "rewards/margins": 5.783823013305664, "rewards/rejected": -13.471888542175293, "step": 17760 }, { "epoch": 2.76, "learning_rate": 1.1206971315939624e-06, "logits/chosen": -1.514344573020935, "logits/rejected": -1.9516884088516235, "logps/chosen": -219.54991149902344, "logps/rejected": -431.2547912597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.915345191955566, "rewards/margins": 13.69400405883789, "rewards/rejected": -20.60934829711914, "step": 17761 }, { "epoch": 2.76, "learning_rate": 1.1199636910628145e-06, "logits/chosen": -2.7181267738342285, "logits/rejected": -2.6986401081085205, "logps/chosen": -169.93255615234375, "logps/rejected": -359.9127502441406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.209633827209473, "rewards/margins": 10.031641006469727, "rewards/rejected": -19.241275787353516, "step": 17762 }, { "epoch": 2.76, "learning_rate": 1.1192302505316665e-06, "logits/chosen": -2.1143455505371094, "logits/rejected": -2.7238268852233887, "logps/chosen": -176.8813018798828, "logps/rejected": -276.6170349121094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.568004608154297, "rewards/margins": 8.316559791564941, "rewards/rejected": -14.884563446044922, "step": 17763 }, { "epoch": 2.76, "learning_rate": 1.1184968100005186e-06, "logits/chosen": -2.5741348266601562, "logits/rejected": -2.0189404487609863, "logps/chosen": -181.5525360107422, "logps/rejected": -251.19625854492188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.049711227416992, "rewards/margins": 9.826837539672852, "rewards/rejected": -15.876547813415527, "step": 17764 }, { "epoch": 2.76, "learning_rate": 1.117763369469371e-06, "logits/chosen": -1.2935125827789307, "logits/rejected": -1.6450613737106323, "logps/chosen": -185.75845336914062, "logps/rejected": -339.7303466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.197822570800781, "rewards/margins": 10.21330738067627, "rewards/rejected": -15.41112995147705, "step": 17765 }, { "epoch": 2.76, "learning_rate": 1.117029928938223e-06, "logits/chosen": -1.8023654222488403, "logits/rejected": -2.599870443344116, "logps/chosen": -261.3233947753906, "logps/rejected": -452.57623291015625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.681133270263672, "rewards/margins": 7.603198051452637, "rewards/rejected": -16.284332275390625, "step": 17766 }, { "epoch": 2.76, "learning_rate": 1.1162964884070751e-06, "logits/chosen": -2.036022186279297, "logits/rejected": -2.619283437728882, "logps/chosen": -437.6758117675781, "logps/rejected": -506.85662841796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.567700386047363, "rewards/margins": 8.595833778381348, "rewards/rejected": -19.16353416442871, "step": 17767 }, { "epoch": 2.76, "learning_rate": 1.1155630478759272e-06, "logits/chosen": -0.6007954478263855, "logits/rejected": -1.6220425367355347, "logps/chosen": -186.55772399902344, "logps/rejected": -368.5577392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.3244237899780273, "rewards/margins": 16.12457275390625, "rewards/rejected": -18.448997497558594, "step": 17768 }, { "epoch": 2.76, "learning_rate": 1.1148296073447793e-06, "logits/chosen": -2.4958090782165527, "logits/rejected": -1.73544180393219, "logps/chosen": -572.8972778320312, "logps/rejected": -305.99298095703125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.740212440490723, "rewards/margins": 9.32962417602539, "rewards/rejected": -16.06983757019043, "step": 17769 }, { "epoch": 2.76, "learning_rate": 1.1140961668136314e-06, "logits/chosen": -2.374005079269409, "logits/rejected": -2.684412956237793, "logps/chosen": -355.9641418457031, "logps/rejected": -342.6492614746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.65426778793335, "rewards/margins": 13.810868263244629, "rewards/rejected": -19.46513557434082, "step": 17770 }, { "epoch": 2.76, "learning_rate": 1.1133627262824835e-06, "logits/chosen": -1.6515212059020996, "logits/rejected": -2.2173728942871094, "logps/chosen": -210.3280029296875, "logps/rejected": -476.37542724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.235560417175293, "rewards/margins": 14.558140754699707, "rewards/rejected": -20.793701171875, "step": 17771 }, { "epoch": 2.76, "learning_rate": 1.1126292857513356e-06, "logits/chosen": -2.312962293624878, "logits/rejected": -2.998776912689209, "logps/chosen": -150.2059326171875, "logps/rejected": -313.78314208984375, "loss": 0.048, "rewards/accuracies": 1.0, "rewards/chosen": -9.408358573913574, "rewards/margins": 5.3892292976379395, "rewards/rejected": -14.797588348388672, "step": 17772 }, { "epoch": 2.76, "learning_rate": 1.1118958452201879e-06, "logits/chosen": -2.3828279972076416, "logits/rejected": -2.8256733417510986, "logps/chosen": -186.9647979736328, "logps/rejected": -388.1592712402344, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -5.909290790557861, "rewards/margins": 9.060285568237305, "rewards/rejected": -14.969575881958008, "step": 17773 }, { "epoch": 2.76, "learning_rate": 1.11116240468904e-06, "logits/chosen": -1.1395606994628906, "logits/rejected": -2.419771671295166, "logps/chosen": -200.14739990234375, "logps/rejected": -488.5263366699219, "loss": 0.261, "rewards/accuracies": 1.0, "rewards/chosen": -9.175520896911621, "rewards/margins": 9.125385284423828, "rewards/rejected": -18.300907135009766, "step": 17774 }, { "epoch": 2.76, "learning_rate": 1.110428964157892e-06, "logits/chosen": -2.496089220046997, "logits/rejected": -1.8895909786224365, "logps/chosen": -302.894287109375, "logps/rejected": -292.4923095703125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -7.023899078369141, "rewards/margins": 6.994536399841309, "rewards/rejected": -14.01843547821045, "step": 17775 }, { "epoch": 2.76, "learning_rate": 1.1096955236267441e-06, "logits/chosen": -2.539881706237793, "logits/rejected": -2.7299256324768066, "logps/chosen": -192.078125, "logps/rejected": -352.8853759765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -9.186803817749023, "rewards/margins": 10.085243225097656, "rewards/rejected": -19.27204704284668, "step": 17776 }, { "epoch": 2.76, "learning_rate": 1.1089620830955962e-06, "logits/chosen": -2.9234087467193604, "logits/rejected": -3.0689573287963867, "logps/chosen": -96.16131591796875, "logps/rejected": -337.696044921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.041204452514648, "rewards/margins": 9.23124885559082, "rewards/rejected": -15.272453308105469, "step": 17777 }, { "epoch": 2.76, "learning_rate": 1.1082286425644485e-06, "logits/chosen": -2.761122941970825, "logits/rejected": -1.3657337427139282, "logps/chosen": -571.8805541992188, "logps/rejected": -469.4339294433594, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -6.4723663330078125, "rewards/margins": 6.458691596984863, "rewards/rejected": -12.931057929992676, "step": 17778 }, { "epoch": 2.77, "learning_rate": 1.1074952020333004e-06, "logits/chosen": -1.9990688562393188, "logits/rejected": -2.659313201904297, "logps/chosen": -167.42227172851562, "logps/rejected": -300.36248779296875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -8.376927375793457, "rewards/margins": 7.646804332733154, "rewards/rejected": -16.023731231689453, "step": 17779 }, { "epoch": 2.77, "learning_rate": 1.1067617615021525e-06, "logits/chosen": -2.6823959350585938, "logits/rejected": -2.9253838062286377, "logps/chosen": -174.14959716796875, "logps/rejected": -307.4318542480469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.712502479553223, "rewards/margins": 9.474191665649414, "rewards/rejected": -17.186695098876953, "step": 17780 }, { "epoch": 2.77, "learning_rate": 1.1060283209710046e-06, "logits/chosen": -1.2530444860458374, "logits/rejected": -2.397188663482666, "logps/chosen": -148.1453857421875, "logps/rejected": -362.4241943359375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.257911205291748, "rewards/margins": 11.218037605285645, "rewards/rejected": -18.475948333740234, "step": 17781 }, { "epoch": 2.77, "learning_rate": 1.1052948804398569e-06, "logits/chosen": -2.596508502960205, "logits/rejected": -2.295541286468506, "logps/chosen": -456.1301574707031, "logps/rejected": -418.52093505859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.840032577514648, "rewards/margins": 9.976710319519043, "rewards/rejected": -18.816741943359375, "step": 17782 }, { "epoch": 2.77, "learning_rate": 1.104561439908709e-06, "logits/chosen": -2.7372395992279053, "logits/rejected": -2.6205649375915527, "logps/chosen": -706.5050659179688, "logps/rejected": -596.6984252929688, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.391188144683838, "rewards/margins": 7.448099136352539, "rewards/rejected": -14.839286804199219, "step": 17783 }, { "epoch": 2.77, "learning_rate": 1.103827999377561e-06, "logits/chosen": -2.6171798706054688, "logits/rejected": -2.9034430980682373, "logps/chosen": -493.8447265625, "logps/rejected": -576.82177734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.593435764312744, "rewards/margins": 9.550538063049316, "rewards/rejected": -15.143974304199219, "step": 17784 }, { "epoch": 2.77, "learning_rate": 1.1030945588464132e-06, "logits/chosen": -1.7002657651901245, "logits/rejected": -2.5639500617980957, "logps/chosen": -125.763916015625, "logps/rejected": -354.8267822265625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -7.747264385223389, "rewards/margins": 6.468243598937988, "rewards/rejected": -14.215507507324219, "step": 17785 }, { "epoch": 2.77, "learning_rate": 1.1023611183152655e-06, "logits/chosen": -2.267298460006714, "logits/rejected": -2.0003585815429688, "logps/chosen": -587.66943359375, "logps/rejected": -647.6775512695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.946178436279297, "rewards/margins": 12.717824935913086, "rewards/rejected": -24.66400146484375, "step": 17786 }, { "epoch": 2.77, "learning_rate": 1.1016276777841176e-06, "logits/chosen": -2.913802146911621, "logits/rejected": -2.5113534927368164, "logps/chosen": -539.08837890625, "logps/rejected": -230.86944580078125, "loss": 0.0295, "rewards/accuracies": 1.0, "rewards/chosen": -13.12198257446289, "rewards/margins": 3.519111156463623, "rewards/rejected": -16.641094207763672, "step": 17787 }, { "epoch": 2.77, "learning_rate": 1.1008942372529696e-06, "logits/chosen": -2.7208592891693115, "logits/rejected": -2.886000871658325, "logps/chosen": -171.37132263183594, "logps/rejected": -287.6089782714844, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.791630268096924, "rewards/margins": 7.461468696594238, "rewards/rejected": -13.25309944152832, "step": 17788 }, { "epoch": 2.77, "learning_rate": 1.1001607967218215e-06, "logits/chosen": -2.3653059005737305, "logits/rejected": -2.6232504844665527, "logps/chosen": -369.16583251953125, "logps/rejected": -591.943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.798795223236084, "rewards/margins": 14.988186836242676, "rewards/rejected": -18.786983489990234, "step": 17789 }, { "epoch": 2.77, "learning_rate": 1.0994273561906738e-06, "logits/chosen": -2.991776943206787, "logits/rejected": -3.0641090869903564, "logps/chosen": -119.81034851074219, "logps/rejected": -269.15618896484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.678448677062988, "rewards/margins": 8.817615509033203, "rewards/rejected": -17.496065139770508, "step": 17790 }, { "epoch": 2.77, "learning_rate": 1.098693915659526e-06, "logits/chosen": -2.633193254470825, "logits/rejected": -2.508809804916382, "logps/chosen": -638.5748901367188, "logps/rejected": -532.2803955078125, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -11.61490249633789, "rewards/margins": 4.820558071136475, "rewards/rejected": -16.43545913696289, "step": 17791 }, { "epoch": 2.77, "learning_rate": 1.097960475128378e-06, "logits/chosen": -2.7845160961151123, "logits/rejected": -2.719202756881714, "logps/chosen": -567.018310546875, "logps/rejected": -997.4161376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.3571062088012695, "rewards/margins": 14.991206169128418, "rewards/rejected": -19.348312377929688, "step": 17792 }, { "epoch": 2.77, "learning_rate": 1.09722703459723e-06, "logits/chosen": -1.9870303869247437, "logits/rejected": -2.6671302318573, "logps/chosen": -288.3858337402344, "logps/rejected": -456.7664794921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.772412300109863, "rewards/margins": 9.715921401977539, "rewards/rejected": -18.48833465576172, "step": 17793 }, { "epoch": 2.77, "learning_rate": 1.0964935940660824e-06, "logits/chosen": -2.736158847808838, "logits/rejected": -2.8089776039123535, "logps/chosen": -103.48408508300781, "logps/rejected": -207.74191284179688, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.66206169128418, "rewards/margins": 7.0008225440979, "rewards/rejected": -13.662883758544922, "step": 17794 }, { "epoch": 2.77, "learning_rate": 1.0957601535349345e-06, "logits/chosen": -1.297347903251648, "logits/rejected": -2.491806983947754, "logps/chosen": -226.61135864257812, "logps/rejected": -757.1446533203125, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -8.648895263671875, "rewards/margins": 14.935035705566406, "rewards/rejected": -23.58393096923828, "step": 17795 }, { "epoch": 2.77, "learning_rate": 1.0950267130037866e-06, "logits/chosen": -2.271970272064209, "logits/rejected": -2.7382123470306396, "logps/chosen": -92.22125244140625, "logps/rejected": -489.15533447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.278124809265137, "rewards/margins": 15.229480743408203, "rewards/rejected": -21.507606506347656, "step": 17796 }, { "epoch": 2.77, "learning_rate": 1.0942932724726387e-06, "logits/chosen": -1.2853162288665771, "logits/rejected": -2.127854824066162, "logps/chosen": -258.06756591796875, "logps/rejected": -493.94842529296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.445688247680664, "rewards/margins": 9.669988632202148, "rewards/rejected": -17.115676879882812, "step": 17797 }, { "epoch": 2.77, "learning_rate": 1.0935598319414908e-06, "logits/chosen": -1.515406608581543, "logits/rejected": -2.757669448852539, "logps/chosen": -360.60784912109375, "logps/rejected": -541.2939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.3101487159729, "rewards/margins": 12.495670318603516, "rewards/rejected": -16.805818557739258, "step": 17798 }, { "epoch": 2.77, "learning_rate": 1.0928263914103428e-06, "logits/chosen": -2.692805290222168, "logits/rejected": -2.853412628173828, "logps/chosen": -473.3709411621094, "logps/rejected": -721.5494384765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.093414306640625, "rewards/margins": 10.872390747070312, "rewards/rejected": -15.965805053710938, "step": 17799 }, { "epoch": 2.77, "learning_rate": 1.092092950879195e-06, "logits/chosen": -2.6122331619262695, "logits/rejected": -2.882580280303955, "logps/chosen": -88.5889892578125, "logps/rejected": -291.76226806640625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -3.884866237640381, "rewards/margins": 10.868780136108398, "rewards/rejected": -14.753646850585938, "step": 17800 }, { "epoch": 2.77, "learning_rate": 1.091359510348047e-06, "logits/chosen": -1.5576777458190918, "logits/rejected": -2.6118524074554443, "logps/chosen": -192.84909057617188, "logps/rejected": -450.32373046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.643825531005859, "rewards/margins": 13.023908615112305, "rewards/rejected": -17.667734146118164, "step": 17801 }, { "epoch": 2.77, "learning_rate": 1.0906260698168991e-06, "logits/chosen": -2.227652072906494, "logits/rejected": -2.1670539379119873, "logps/chosen": -127.31977081298828, "logps/rejected": -292.3636474609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.532040119171143, "rewards/margins": 9.315840721130371, "rewards/rejected": -13.847880363464355, "step": 17802 }, { "epoch": 2.77, "learning_rate": 1.0898926292857514e-06, "logits/chosen": -1.1269034147262573, "logits/rejected": -2.651646137237549, "logps/chosen": -147.53773498535156, "logps/rejected": -522.6339111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.956279754638672, "rewards/margins": 11.459531784057617, "rewards/rejected": -20.415813446044922, "step": 17803 }, { "epoch": 2.77, "learning_rate": 1.0891591887546035e-06, "logits/chosen": -1.4006118774414062, "logits/rejected": -2.7596688270568848, "logps/chosen": -222.27825927734375, "logps/rejected": -434.61151123046875, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": -6.353050231933594, "rewards/margins": 8.025557518005371, "rewards/rejected": -14.378606796264648, "step": 17804 }, { "epoch": 2.77, "learning_rate": 1.0884257482234556e-06, "logits/chosen": -2.883619785308838, "logits/rejected": -3.0396742820739746, "logps/chosen": -100.71623992919922, "logps/rejected": -208.65736389160156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.117486000061035, "rewards/margins": 10.233030319213867, "rewards/rejected": -14.350517272949219, "step": 17805 }, { "epoch": 2.77, "learning_rate": 1.0876923076923077e-06, "logits/chosen": -2.2795028686523438, "logits/rejected": -1.7476435899734497, "logps/chosen": -328.25970458984375, "logps/rejected": -325.59423828125, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -7.305431365966797, "rewards/margins": 6.8026862144470215, "rewards/rejected": -14.108118057250977, "step": 17806 }, { "epoch": 2.77, "learning_rate": 1.08695886716116e-06, "logits/chosen": -2.012756824493408, "logits/rejected": -2.480595111846924, "logps/chosen": -198.78343200683594, "logps/rejected": -213.1060791015625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -5.9734601974487305, "rewards/margins": 5.570925712585449, "rewards/rejected": -11.54438591003418, "step": 17807 }, { "epoch": 2.77, "learning_rate": 1.0862254266300119e-06, "logits/chosen": -2.6413018703460693, "logits/rejected": -2.9312381744384766, "logps/chosen": -81.70494079589844, "logps/rejected": -305.52398681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.73176383972168, "rewards/margins": 10.917125701904297, "rewards/rejected": -17.64889144897461, "step": 17808 }, { "epoch": 2.77, "learning_rate": 1.085491986098864e-06, "logits/chosen": -2.829742193222046, "logits/rejected": -2.760420560836792, "logps/chosen": -193.31626892089844, "logps/rejected": -305.1956481933594, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.312521934509277, "rewards/margins": 9.81136703491211, "rewards/rejected": -16.123889923095703, "step": 17809 }, { "epoch": 2.77, "learning_rate": 1.084758545567716e-06, "logits/chosen": -2.393854856491089, "logits/rejected": -3.147014856338501, "logps/chosen": -146.80686950683594, "logps/rejected": -455.3389892578125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -5.806024074554443, "rewards/margins": 9.819452285766602, "rewards/rejected": -15.625476837158203, "step": 17810 }, { "epoch": 2.77, "learning_rate": 1.0840251050365683e-06, "logits/chosen": -2.2484757900238037, "logits/rejected": -2.5095643997192383, "logps/chosen": -236.19920349121094, "logps/rejected": -305.6247863769531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.281529426574707, "rewards/margins": 8.285151481628418, "rewards/rejected": -13.566680908203125, "step": 17811 }, { "epoch": 2.77, "learning_rate": 1.0832916645054204e-06, "logits/chosen": -1.507987380027771, "logits/rejected": -2.518967390060425, "logps/chosen": -289.8927001953125, "logps/rejected": -631.2147216796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.256932258605957, "rewards/margins": 13.384489059448242, "rewards/rejected": -20.641422271728516, "step": 17812 }, { "epoch": 2.77, "learning_rate": 1.0825582239742725e-06, "logits/chosen": -2.6473171710968018, "logits/rejected": -2.4601101875305176, "logps/chosen": -844.3768920898438, "logps/rejected": -658.4616088867188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.509729385375977, "rewards/margins": 9.706209182739258, "rewards/rejected": -16.215938568115234, "step": 17813 }, { "epoch": 2.77, "learning_rate": 1.0818247834431246e-06, "logits/chosen": -2.7296924591064453, "logits/rejected": -2.19679856300354, "logps/chosen": -545.509033203125, "logps/rejected": -518.955810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.779432773590088, "rewards/margins": 12.57208251953125, "rewards/rejected": -18.35151481628418, "step": 17814 }, { "epoch": 2.77, "learning_rate": 1.081091342911977e-06, "logits/chosen": -2.35178804397583, "logits/rejected": -2.487816095352173, "logps/chosen": -318.4554748535156, "logps/rejected": -342.32550048828125, "loss": 0.0261, "rewards/accuracies": 1.0, "rewards/chosen": -9.68338680267334, "rewards/margins": 5.987920761108398, "rewards/rejected": -15.671307563781738, "step": 17815 }, { "epoch": 2.77, "learning_rate": 1.080357902380829e-06, "logits/chosen": -1.3764398097991943, "logits/rejected": -2.257063150405884, "logps/chosen": -191.32254028320312, "logps/rejected": -481.11102294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.232851028442383, "rewards/margins": 12.130443572998047, "rewards/rejected": -19.36329460144043, "step": 17816 }, { "epoch": 2.77, "learning_rate": 1.079624461849681e-06, "logits/chosen": -2.771156072616577, "logits/rejected": -2.4702000617980957, "logps/chosen": -145.73182678222656, "logps/rejected": -183.16033935546875, "loss": 1.6001, "rewards/accuracies": 0.5, "rewards/chosen": -8.545768737792969, "rewards/margins": 2.57099986076355, "rewards/rejected": -11.116768836975098, "step": 17817 }, { "epoch": 2.77, "learning_rate": 1.078891021318533e-06, "logits/chosen": -1.2846043109893799, "logits/rejected": -2.5222322940826416, "logps/chosen": -144.5887451171875, "logps/rejected": -561.2125244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.617116928100586, "rewards/margins": 12.508708953857422, "rewards/rejected": -19.125825881958008, "step": 17818 }, { "epoch": 2.77, "learning_rate": 1.0781575807873853e-06, "logits/chosen": -2.8965368270874023, "logits/rejected": -3.10990834236145, "logps/chosen": -154.27069091796875, "logps/rejected": -353.0859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.3763909339904785, "rewards/margins": 9.947763442993164, "rewards/rejected": -14.324153900146484, "step": 17819 }, { "epoch": 2.77, "learning_rate": 1.0774241402562374e-06, "logits/chosen": -1.5634092092514038, "logits/rejected": -2.4996843338012695, "logps/chosen": -248.06643676757812, "logps/rejected": -559.0585327148438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.574617385864258, "rewards/margins": 14.877839088439941, "rewards/rejected": -22.452457427978516, "step": 17820 }, { "epoch": 2.77, "learning_rate": 1.0766906997250895e-06, "logits/chosen": -1.4664106369018555, "logits/rejected": -2.5087544918060303, "logps/chosen": -149.68460083007812, "logps/rejected": -383.5815124511719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.210222244262695, "rewards/margins": 10.683767318725586, "rewards/rejected": -17.89398956298828, "step": 17821 }, { "epoch": 2.77, "learning_rate": 1.0759572591939416e-06, "logits/chosen": -1.6345919370651245, "logits/rejected": -2.39966082572937, "logps/chosen": -182.38453674316406, "logps/rejected": -348.8648376464844, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.430695533752441, "rewards/margins": 8.192296028137207, "rewards/rejected": -16.62299156188965, "step": 17822 }, { "epoch": 2.77, "learning_rate": 1.0752238186627936e-06, "logits/chosen": -1.4603506326675415, "logits/rejected": -2.3720173835754395, "logps/chosen": -106.86273193359375, "logps/rejected": -399.05279541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.537844657897949, "rewards/margins": 17.542863845825195, "rewards/rejected": -23.080707550048828, "step": 17823 }, { "epoch": 2.77, "learning_rate": 1.074490378131646e-06, "logits/chosen": -2.445537805557251, "logits/rejected": -2.9558498859405518, "logps/chosen": -126.7806625366211, "logps/rejected": -377.5435791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.6148481369018555, "rewards/margins": 11.582237243652344, "rewards/rejected": -19.197086334228516, "step": 17824 }, { "epoch": 2.77, "learning_rate": 1.073756937600498e-06, "logits/chosen": -2.7278482913970947, "logits/rejected": -1.8510595560073853, "logps/chosen": -797.6453247070312, "logps/rejected": -562.6080932617188, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.885242462158203, "rewards/margins": 9.710604667663574, "rewards/rejected": -17.595848083496094, "step": 17825 }, { "epoch": 2.77, "learning_rate": 1.0730234970693501e-06, "logits/chosen": -2.711294651031494, "logits/rejected": -2.4245994091033936, "logps/chosen": -230.123291015625, "logps/rejected": -187.2261505126953, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -5.041301727294922, "rewards/margins": 5.053441047668457, "rewards/rejected": -10.094742774963379, "step": 17826 }, { "epoch": 2.77, "learning_rate": 1.0722900565382022e-06, "logits/chosen": -2.68670654296875, "logits/rejected": -2.765273332595825, "logps/chosen": -140.10205078125, "logps/rejected": -295.89923095703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.443840026855469, "rewards/margins": 10.189993858337402, "rewards/rejected": -18.633834838867188, "step": 17827 }, { "epoch": 2.77, "learning_rate": 1.0715566160070543e-06, "logits/chosen": -2.6227545738220215, "logits/rejected": -2.750703811645508, "logps/chosen": -142.2655792236328, "logps/rejected": -313.716064453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.498148441314697, "rewards/margins": 10.181068420410156, "rewards/rejected": -17.679218292236328, "step": 17828 }, { "epoch": 2.77, "learning_rate": 1.0708231754759064e-06, "logits/chosen": -2.419267177581787, "logits/rejected": -2.7183425426483154, "logps/chosen": -158.23387145996094, "logps/rejected": -452.9669494628906, "loss": 0.077, "rewards/accuracies": 1.0, "rewards/chosen": -6.622110366821289, "rewards/margins": 6.461933135986328, "rewards/rejected": -13.084043502807617, "step": 17829 }, { "epoch": 2.77, "learning_rate": 1.0700897349447585e-06, "logits/chosen": -2.857145071029663, "logits/rejected": -2.7732555866241455, "logps/chosen": -119.63227844238281, "logps/rejected": -283.4742431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.493290424346924, "rewards/margins": 13.131914138793945, "rewards/rejected": -15.625204086303711, "step": 17830 }, { "epoch": 2.77, "learning_rate": 1.0693562944136106e-06, "logits/chosen": -2.143883466720581, "logits/rejected": -2.7957122325897217, "logps/chosen": -719.3010864257812, "logps/rejected": -894.1947021484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.979015350341797, "rewards/margins": 11.799598693847656, "rewards/rejected": -22.778614044189453, "step": 17831 }, { "epoch": 2.77, "learning_rate": 1.0686228538824629e-06, "logits/chosen": -2.609107732772827, "logits/rejected": -2.5422539710998535, "logps/chosen": -194.38427734375, "logps/rejected": -245.40170288085938, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.573078155517578, "rewards/margins": 7.235293388366699, "rewards/rejected": -14.808370590209961, "step": 17832 }, { "epoch": 2.77, "learning_rate": 1.067889413351315e-06, "logits/chosen": -2.4492127895355225, "logits/rejected": -1.4712885618209839, "logps/chosen": -386.3926696777344, "logps/rejected": -296.3905334472656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.99185037612915, "rewards/margins": 8.839771270751953, "rewards/rejected": -13.831621170043945, "step": 17833 }, { "epoch": 2.77, "learning_rate": 1.067155972820167e-06, "logits/chosen": -2.7049214839935303, "logits/rejected": -2.618471145629883, "logps/chosen": -154.70367431640625, "logps/rejected": -303.37591552734375, "loss": 0.8247, "rewards/accuracies": 0.5, "rewards/chosen": -10.790210723876953, "rewards/margins": 7.705681324005127, "rewards/rejected": -18.495891571044922, "step": 17834 }, { "epoch": 2.77, "learning_rate": 1.0664225322890191e-06, "logits/chosen": -2.899993658065796, "logits/rejected": -2.6134440898895264, "logps/chosen": -227.7210235595703, "logps/rejected": -312.1959228515625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.99553108215332, "rewards/margins": 7.970890522003174, "rewards/rejected": -14.966421127319336, "step": 17835 }, { "epoch": 2.77, "learning_rate": 1.0656890917578712e-06, "logits/chosen": -2.3148555755615234, "logits/rejected": -2.483250856399536, "logps/chosen": -526.4752197265625, "logps/rejected": -424.70953369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.654092788696289, "rewards/margins": 11.650884628295898, "rewards/rejected": -17.304977416992188, "step": 17836 }, { "epoch": 2.77, "learning_rate": 1.0649556512267235e-06, "logits/chosen": -2.7807552814483643, "logits/rejected": -2.510685443878174, "logps/chosen": -183.6026153564453, "logps/rejected": -154.77401733398438, "loss": 0.065, "rewards/accuracies": 1.0, "rewards/chosen": -6.548536777496338, "rewards/margins": 4.287212371826172, "rewards/rejected": -10.835748672485352, "step": 17837 }, { "epoch": 2.77, "learning_rate": 1.0642222106955754e-06, "logits/chosen": -2.5495846271514893, "logits/rejected": -2.793954372406006, "logps/chosen": -390.49688720703125, "logps/rejected": -467.2615966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.2102251052856445, "rewards/margins": 10.905389785766602, "rewards/rejected": -16.115615844726562, "step": 17838 }, { "epoch": 2.77, "learning_rate": 1.0634887701644275e-06, "logits/chosen": -2.3064823150634766, "logits/rejected": -2.9418447017669678, "logps/chosen": -131.6022186279297, "logps/rejected": -338.54974365234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.045127868652344, "rewards/margins": 10.668676376342773, "rewards/rejected": -20.713804244995117, "step": 17839 }, { "epoch": 2.77, "learning_rate": 1.0627553296332796e-06, "logits/chosen": -2.5288453102111816, "logits/rejected": -1.9190123081207275, "logps/chosen": -200.6899871826172, "logps/rejected": -270.814697265625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.854153633117676, "rewards/margins": 9.453760147094727, "rewards/rejected": -16.30791473388672, "step": 17840 }, { "epoch": 2.77, "learning_rate": 1.062021889102132e-06, "logits/chosen": -2.0577993392944336, "logits/rejected": -2.90053391456604, "logps/chosen": -191.94149780273438, "logps/rejected": -478.19989013671875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.6499810218811035, "rewards/margins": 7.98541784286499, "rewards/rejected": -13.635398864746094, "step": 17841 }, { "epoch": 2.77, "learning_rate": 1.061288448570984e-06, "logits/chosen": -2.6194710731506348, "logits/rejected": -2.855341911315918, "logps/chosen": -245.04129028320312, "logps/rejected": -452.563720703125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -9.947793960571289, "rewards/margins": 7.156034469604492, "rewards/rejected": -17.10382843017578, "step": 17842 }, { "epoch": 2.77, "learning_rate": 1.060555008039836e-06, "logits/chosen": -1.9720580577850342, "logits/rejected": -2.7763803005218506, "logps/chosen": -136.57745361328125, "logps/rejected": -404.0829772949219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.47389030456543, "rewards/margins": 11.277758598327637, "rewards/rejected": -16.75164794921875, "step": 17843 }, { "epoch": 2.78, "learning_rate": 1.0598215675086882e-06, "logits/chosen": -2.5303070545196533, "logits/rejected": -1.2776720523834229, "logps/chosen": -559.558837890625, "logps/rejected": -486.5610656738281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.509409427642822, "rewards/margins": 11.99061393737793, "rewards/rejected": -18.500022888183594, "step": 17844 }, { "epoch": 2.78, "learning_rate": 1.0590881269775405e-06, "logits/chosen": -2.7452683448791504, "logits/rejected": -2.7026593685150146, "logps/chosen": -148.55870056152344, "logps/rejected": -240.0269775390625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -9.983654022216797, "rewards/margins": 7.218904495239258, "rewards/rejected": -17.202558517456055, "step": 17845 }, { "epoch": 2.78, "learning_rate": 1.0583546864463926e-06, "logits/chosen": -1.4008115530014038, "logits/rejected": -2.4514191150665283, "logps/chosen": -210.00222778320312, "logps/rejected": -396.094482421875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.710048675537109, "rewards/margins": 10.584907531738281, "rewards/rejected": -17.29495620727539, "step": 17846 }, { "epoch": 2.78, "learning_rate": 1.0576212459152446e-06, "logits/chosen": -2.4159133434295654, "logits/rejected": -2.1401174068450928, "logps/chosen": -211.16624450683594, "logps/rejected": -326.95452880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.261785507202148, "rewards/margins": 13.071893692016602, "rewards/rejected": -18.33367919921875, "step": 17847 }, { "epoch": 2.78, "learning_rate": 1.0568878053840965e-06, "logits/chosen": -1.8366601467132568, "logits/rejected": -2.621039867401123, "logps/chosen": -159.10784912109375, "logps/rejected": -345.6485290527344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.845331192016602, "rewards/margins": 9.056707382202148, "rewards/rejected": -15.90203857421875, "step": 17848 }, { "epoch": 2.78, "learning_rate": 1.0561543648529488e-06, "logits/chosen": -1.5342907905578613, "logits/rejected": -2.505260705947876, "logps/chosen": -246.3314666748047, "logps/rejected": -481.3081359863281, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.962103366851807, "rewards/margins": 7.210043907165527, "rewards/rejected": -14.172147750854492, "step": 17849 }, { "epoch": 2.78, "learning_rate": 1.055420924321801e-06, "logits/chosen": -2.7032268047332764, "logits/rejected": -2.255769968032837, "logps/chosen": -372.6885986328125, "logps/rejected": -333.76373291015625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -7.152961730957031, "rewards/margins": 5.946662425994873, "rewards/rejected": -13.099624633789062, "step": 17850 }, { "epoch": 2.78, "learning_rate": 1.054687483790653e-06, "logits/chosen": -2.708111047744751, "logits/rejected": -2.5069665908813477, "logps/chosen": -341.2181396484375, "logps/rejected": -451.5949401855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.214859962463379, "rewards/margins": 14.626686096191406, "rewards/rejected": -24.84154510498047, "step": 17851 }, { "epoch": 2.78, "learning_rate": 1.053954043259505e-06, "logits/chosen": -1.5263365507125854, "logits/rejected": -2.302647590637207, "logps/chosen": -435.1752014160156, "logps/rejected": -659.658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.002815246582031, "rewards/margins": 15.805582046508789, "rewards/rejected": -25.80839729309082, "step": 17852 }, { "epoch": 2.78, "learning_rate": 1.0532206027283574e-06, "logits/chosen": -2.475285768508911, "logits/rejected": -1.320775032043457, "logps/chosen": -165.15423583984375, "logps/rejected": -198.99740600585938, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.706042289733887, "rewards/margins": 8.363511085510254, "rewards/rejected": -15.06955337524414, "step": 17853 }, { "epoch": 2.78, "learning_rate": 1.0524871621972095e-06, "logits/chosen": -2.0034050941467285, "logits/rejected": -2.4055352210998535, "logps/chosen": -239.62416076660156, "logps/rejected": -330.4390563964844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.863029956817627, "rewards/margins": 7.888396263122559, "rewards/rejected": -13.751425743103027, "step": 17854 }, { "epoch": 2.78, "learning_rate": 1.0517537216660616e-06, "logits/chosen": -1.8043572902679443, "logits/rejected": -2.612851142883301, "logps/chosen": -454.2325744628906, "logps/rejected": -425.32684326171875, "loss": 0.9102, "rewards/accuracies": 0.5, "rewards/chosen": -10.740396499633789, "rewards/margins": 3.093473434448242, "rewards/rejected": -13.833869934082031, "step": 17855 }, { "epoch": 2.78, "learning_rate": 1.0510202811349137e-06, "logits/chosen": -2.0576136112213135, "logits/rejected": -2.5131900310516357, "logps/chosen": -262.51593017578125, "logps/rejected": -340.2153015136719, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -6.794329643249512, "rewards/margins": 7.005489349365234, "rewards/rejected": -13.79981803894043, "step": 17856 }, { "epoch": 2.78, "learning_rate": 1.0502868406037658e-06, "logits/chosen": -2.4114625453948975, "logits/rejected": -2.4066812992095947, "logps/chosen": -114.69969177246094, "logps/rejected": -218.4550323486328, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.662075042724609, "rewards/margins": 7.321212291717529, "rewards/rejected": -14.983287811279297, "step": 17857 }, { "epoch": 2.78, "learning_rate": 1.0495534000726179e-06, "logits/chosen": -2.2854723930358887, "logits/rejected": -2.633049726486206, "logps/chosen": -189.65663146972656, "logps/rejected": -383.4517822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.311795711517334, "rewards/margins": 16.42246437072754, "rewards/rejected": -21.73426055908203, "step": 17858 }, { "epoch": 2.78, "learning_rate": 1.04881995954147e-06, "logits/chosen": -1.0551220178604126, "logits/rejected": -2.8382914066314697, "logps/chosen": -158.94017028808594, "logps/rejected": -729.9730224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.490971565246582, "rewards/margins": 16.104400634765625, "rewards/rejected": -22.59537124633789, "step": 17859 }, { "epoch": 2.78, "learning_rate": 1.048086519010322e-06, "logits/chosen": -3.0878734588623047, "logits/rejected": -3.1593780517578125, "logps/chosen": -111.57379150390625, "logps/rejected": -239.10589599609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.631851196289062, "rewards/margins": 8.260354042053223, "rewards/rejected": -16.89220428466797, "step": 17860 }, { "epoch": 2.78, "learning_rate": 1.0473530784791741e-06, "logits/chosen": -2.5306613445281982, "logits/rejected": -1.8960806131362915, "logps/chosen": -209.95216369628906, "logps/rejected": -177.84555053710938, "loss": 0.0225, "rewards/accuracies": 1.0, "rewards/chosen": -7.127065658569336, "rewards/margins": 5.347568511962891, "rewards/rejected": -12.474634170532227, "step": 17861 }, { "epoch": 2.78, "learning_rate": 1.0466196379480264e-06, "logits/chosen": -2.7039670944213867, "logits/rejected": -3.0344862937927246, "logps/chosen": -92.82887268066406, "logps/rejected": -254.0113983154297, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.307615280151367, "rewards/margins": 11.351954460144043, "rewards/rejected": -15.659568786621094, "step": 17862 }, { "epoch": 2.78, "learning_rate": 1.0458861974168785e-06, "logits/chosen": -2.261052370071411, "logits/rejected": -2.8321633338928223, "logps/chosen": -336.7246398925781, "logps/rejected": -566.3631591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.317278861999512, "rewards/margins": 12.567145347595215, "rewards/rejected": -23.884424209594727, "step": 17863 }, { "epoch": 2.78, "learning_rate": 1.0451527568857306e-06, "logits/chosen": -2.61195969581604, "logits/rejected": -2.990581512451172, "logps/chosen": -117.4125747680664, "logps/rejected": -312.15350341796875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.590105056762695, "rewards/margins": 10.269423484802246, "rewards/rejected": -15.859528541564941, "step": 17864 }, { "epoch": 2.78, "learning_rate": 1.0444193163545827e-06, "logits/chosen": -1.656461477279663, "logits/rejected": -2.4379453659057617, "logps/chosen": -312.00018310546875, "logps/rejected": -368.23577880859375, "loss": 0.1296, "rewards/accuracies": 1.0, "rewards/chosen": -7.229403495788574, "rewards/margins": 7.191256999969482, "rewards/rejected": -14.420660972595215, "step": 17865 }, { "epoch": 2.78, "learning_rate": 1.043685875823435e-06, "logits/chosen": -2.6337015628814697, "logits/rejected": -2.1249663829803467, "logps/chosen": -393.2547912597656, "logps/rejected": -310.99114990234375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -9.475236892700195, "rewards/margins": 5.408429145812988, "rewards/rejected": -14.883665084838867, "step": 17866 }, { "epoch": 2.78, "learning_rate": 1.0429524352922869e-06, "logits/chosen": -2.5218112468719482, "logits/rejected": -2.4993746280670166, "logps/chosen": -226.7451171875, "logps/rejected": -445.782470703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.542651176452637, "rewards/margins": 14.211968421936035, "rewards/rejected": -19.754619598388672, "step": 17867 }, { "epoch": 2.78, "learning_rate": 1.042218994761139e-06, "logits/chosen": -0.9635367393493652, "logits/rejected": -2.540705442428589, "logps/chosen": -300.6205139160156, "logps/rejected": -573.2999267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.853633880615234, "rewards/margins": 10.782975196838379, "rewards/rejected": -18.636608123779297, "step": 17868 }, { "epoch": 2.78, "learning_rate": 1.041485554229991e-06, "logits/chosen": -2.5472869873046875, "logits/rejected": -2.3898844718933105, "logps/chosen": -480.578857421875, "logps/rejected": -509.97210693359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.082866668701172, "rewards/margins": 10.607587814331055, "rewards/rejected": -18.690454483032227, "step": 17869 }, { "epoch": 2.78, "learning_rate": 1.0407521136988434e-06, "logits/chosen": -1.2836745977401733, "logits/rejected": -2.85145902633667, "logps/chosen": -258.100341796875, "logps/rejected": -652.3568115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.3503642082214355, "rewards/margins": 11.795828819274902, "rewards/rejected": -16.14619255065918, "step": 17870 }, { "epoch": 2.78, "learning_rate": 1.0400186731676954e-06, "logits/chosen": -3.1018829345703125, "logits/rejected": -2.0617480278015137, "logps/chosen": -604.2778930664062, "logps/rejected": -580.4827270507812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.178906440734863, "rewards/margins": 8.072176933288574, "rewards/rejected": -17.251083374023438, "step": 17871 }, { "epoch": 2.78, "learning_rate": 1.0392852326365475e-06, "logits/chosen": -2.5229923725128174, "logits/rejected": -2.5726358890533447, "logps/chosen": -608.3086547851562, "logps/rejected": -838.6370239257812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.169172286987305, "rewards/margins": 15.185665130615234, "rewards/rejected": -21.35483741760254, "step": 17872 }, { "epoch": 2.78, "learning_rate": 1.0385517921053996e-06, "logits/chosen": -2.544705390930176, "logits/rejected": -2.8037562370300293, "logps/chosen": -88.20942687988281, "logps/rejected": -195.85731506347656, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.53887939453125, "rewards/margins": 7.1327714920043945, "rewards/rejected": -12.671650886535645, "step": 17873 }, { "epoch": 2.78, "learning_rate": 1.037818351574252e-06, "logits/chosen": -2.232736110687256, "logits/rejected": -2.7961835861206055, "logps/chosen": -149.0634307861328, "logps/rejected": -350.5867614746094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.470566749572754, "rewards/margins": 11.574244499206543, "rewards/rejected": -17.044811248779297, "step": 17874 }, { "epoch": 2.78, "learning_rate": 1.037084911043104e-06, "logits/chosen": -2.4890382289886475, "logits/rejected": -2.6288905143737793, "logps/chosen": -418.0066223144531, "logps/rejected": -345.8082580566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.224005222320557, "rewards/margins": 10.698607444763184, "rewards/rejected": -15.922612190246582, "step": 17875 }, { "epoch": 2.78, "learning_rate": 1.0363514705119561e-06, "logits/chosen": -2.830045223236084, "logits/rejected": -1.7659765481948853, "logps/chosen": -923.202392578125, "logps/rejected": -377.9841613769531, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -7.322278022766113, "rewards/margins": 6.973881721496582, "rewards/rejected": -14.296159744262695, "step": 17876 }, { "epoch": 2.78, "learning_rate": 1.035618029980808e-06, "logits/chosen": -2.7734203338623047, "logits/rejected": -3.040698528289795, "logps/chosen": -367.7006530761719, "logps/rejected": -682.8969116210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.300232887268066, "rewards/margins": 15.615349769592285, "rewards/rejected": -23.91558265686035, "step": 17877 }, { "epoch": 2.78, "learning_rate": 1.0348845894496603e-06, "logits/chosen": -2.76875638961792, "logits/rejected": -2.8869826793670654, "logps/chosen": -201.72320556640625, "logps/rejected": -372.32623291015625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -5.367945671081543, "rewards/margins": 7.434121608734131, "rewards/rejected": -12.802067756652832, "step": 17878 }, { "epoch": 2.78, "learning_rate": 1.0341511489185124e-06, "logits/chosen": -2.464122772216797, "logits/rejected": -1.5007802248001099, "logps/chosen": -178.4208221435547, "logps/rejected": -221.01254272460938, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.087408542633057, "rewards/margins": 8.960707664489746, "rewards/rejected": -15.048116683959961, "step": 17879 }, { "epoch": 2.78, "learning_rate": 1.0334177083873645e-06, "logits/chosen": -3.175635814666748, "logits/rejected": -2.703660011291504, "logps/chosen": -400.390869140625, "logps/rejected": -277.7690124511719, "loss": 0.1278, "rewards/accuracies": 1.0, "rewards/chosen": -9.67758560180664, "rewards/margins": 7.324143409729004, "rewards/rejected": -17.001728057861328, "step": 17880 }, { "epoch": 2.78, "learning_rate": 1.0326842678562166e-06, "logits/chosen": -2.206853151321411, "logits/rejected": -2.5576040744781494, "logps/chosen": -141.6239776611328, "logps/rejected": -326.102783203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -7.23490571975708, "rewards/margins": 9.230058670043945, "rewards/rejected": -16.464963912963867, "step": 17881 }, { "epoch": 2.78, "learning_rate": 1.0319508273250686e-06, "logits/chosen": -1.897217869758606, "logits/rejected": -1.722289800643921, "logps/chosen": -220.77313232421875, "logps/rejected": -346.05279541015625, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -7.422773361206055, "rewards/margins": 7.8436102867126465, "rewards/rejected": -15.26638412475586, "step": 17882 }, { "epoch": 2.78, "learning_rate": 1.031217386793921e-06, "logits/chosen": -2.4357385635375977, "logits/rejected": -1.8138928413391113, "logps/chosen": -283.75555419921875, "logps/rejected": -371.5687561035156, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -5.571481704711914, "rewards/margins": 6.55687141418457, "rewards/rejected": -12.128353118896484, "step": 17883 }, { "epoch": 2.78, "learning_rate": 1.030483946262773e-06, "logits/chosen": -2.50388503074646, "logits/rejected": -2.9398956298828125, "logps/chosen": -239.50901794433594, "logps/rejected": -407.952880859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.793669700622559, "rewards/margins": 8.107861518859863, "rewards/rejected": -17.901531219482422, "step": 17884 }, { "epoch": 2.78, "learning_rate": 1.0297505057316251e-06, "logits/chosen": -1.1317342519760132, "logits/rejected": -2.4636576175689697, "logps/chosen": -132.0815887451172, "logps/rejected": -525.1634521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.4647932052612305, "rewards/margins": 14.460039138793945, "rewards/rejected": -20.924833297729492, "step": 17885 }, { "epoch": 2.78, "learning_rate": 1.0290170652004772e-06, "logits/chosen": -1.460740566253662, "logits/rejected": -2.548431873321533, "logps/chosen": -211.29478454589844, "logps/rejected": -639.90185546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.0639848709106445, "rewards/margins": 11.501374244689941, "rewards/rejected": -17.565359115600586, "step": 17886 }, { "epoch": 2.78, "learning_rate": 1.0282836246693293e-06, "logits/chosen": -2.4550118446350098, "logits/rejected": -2.548527479171753, "logps/chosen": -100.40865325927734, "logps/rejected": -379.5765380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.748471260070801, "rewards/margins": 13.102360725402832, "rewards/rejected": -18.850831985473633, "step": 17887 }, { "epoch": 2.78, "learning_rate": 1.0275501841381814e-06, "logits/chosen": -2.6722075939178467, "logits/rejected": -2.4162635803222656, "logps/chosen": -386.3642578125, "logps/rejected": -307.66058349609375, "loss": 0.1792, "rewards/accuracies": 1.0, "rewards/chosen": -8.814480781555176, "rewards/margins": 4.528855323791504, "rewards/rejected": -13.34333610534668, "step": 17888 }, { "epoch": 2.78, "learning_rate": 1.0268167436070335e-06, "logits/chosen": -2.2352681159973145, "logits/rejected": -2.6106367111206055, "logps/chosen": -235.1342315673828, "logps/rejected": -657.6449584960938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.659663200378418, "rewards/margins": 14.054388046264648, "rewards/rejected": -19.714052200317383, "step": 17889 }, { "epoch": 2.78, "learning_rate": 1.0260833030758856e-06, "logits/chosen": -1.8347715139389038, "logits/rejected": -2.770587921142578, "logps/chosen": -136.3646240234375, "logps/rejected": -376.4918212890625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.764049053192139, "rewards/margins": 8.520502090454102, "rewards/rejected": -15.284551620483398, "step": 17890 }, { "epoch": 2.78, "learning_rate": 1.0253498625447379e-06, "logits/chosen": -2.176492214202881, "logits/rejected": -2.992385149002075, "logps/chosen": -165.971435546875, "logps/rejected": -360.8074035644531, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.971295356750488, "rewards/margins": 11.58807373046875, "rewards/rejected": -17.559370040893555, "step": 17891 }, { "epoch": 2.78, "learning_rate": 1.02461642201359e-06, "logits/chosen": -2.4480550289154053, "logits/rejected": -1.7246246337890625, "logps/chosen": -260.1173400878906, "logps/rejected": -374.2774353027344, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -8.905170440673828, "rewards/margins": 6.792084217071533, "rewards/rejected": -15.69725513458252, "step": 17892 }, { "epoch": 2.78, "learning_rate": 1.023882981482442e-06, "logits/chosen": -1.3555902242660522, "logits/rejected": -2.7827417850494385, "logps/chosen": -508.0301513671875, "logps/rejected": -725.2242431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.616467475891113, "rewards/margins": 14.993917465209961, "rewards/rejected": -20.610383987426758, "step": 17893 }, { "epoch": 2.78, "learning_rate": 1.0231495409512942e-06, "logits/chosen": -2.216534376144409, "logits/rejected": -2.874218225479126, "logps/chosen": -285.9498596191406, "logps/rejected": -491.757568359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.475246429443359, "rewards/margins": 10.598289489746094, "rewards/rejected": -15.073535919189453, "step": 17894 }, { "epoch": 2.78, "learning_rate": 1.0224161004201465e-06, "logits/chosen": -0.9722884893417358, "logits/rejected": -2.079660415649414, "logps/chosen": -145.03851318359375, "logps/rejected": -294.5755920410156, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.589024543762207, "rewards/margins": 7.608988285064697, "rewards/rejected": -14.198013305664062, "step": 17895 }, { "epoch": 2.78, "learning_rate": 1.0216826598889985e-06, "logits/chosen": -2.689821720123291, "logits/rejected": -2.712786912918091, "logps/chosen": -372.13232421875, "logps/rejected": -498.411865234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.867020606994629, "rewards/margins": 10.354342460632324, "rewards/rejected": -17.221363067626953, "step": 17896 }, { "epoch": 2.78, "learning_rate": 1.0209492193578504e-06, "logits/chosen": -2.187041759490967, "logits/rejected": -2.8594970703125, "logps/chosen": -150.91510009765625, "logps/rejected": -310.731689453125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.631200790405273, "rewards/margins": 7.895455360412598, "rewards/rejected": -15.526656150817871, "step": 17897 }, { "epoch": 2.78, "learning_rate": 1.0202157788267025e-06, "logits/chosen": -1.7449942827224731, "logits/rejected": -2.771188974380493, "logps/chosen": -219.0088653564453, "logps/rejected": -454.69140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.4666547775268555, "rewards/margins": 11.767745018005371, "rewards/rejected": -16.234399795532227, "step": 17898 }, { "epoch": 2.78, "learning_rate": 1.0194823382955548e-06, "logits/chosen": -2.342738628387451, "logits/rejected": -2.500319719314575, "logps/chosen": -107.19495391845703, "logps/rejected": -361.69329833984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.352095603942871, "rewards/margins": 12.272933959960938, "rewards/rejected": -18.625030517578125, "step": 17899 }, { "epoch": 2.78, "learning_rate": 1.018748897764407e-06, "logits/chosen": -1.8891569375991821, "logits/rejected": -2.4306297302246094, "logps/chosen": -463.5523986816406, "logps/rejected": -503.75604248046875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -9.53941535949707, "rewards/margins": 8.372054100036621, "rewards/rejected": -17.911468505859375, "step": 17900 }, { "epoch": 2.78, "learning_rate": 1.018015457233259e-06, "logits/chosen": -1.5760793685913086, "logits/rejected": -2.6883249282836914, "logps/chosen": -328.7381591796875, "logps/rejected": -743.0322875976562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.854205131530762, "rewards/margins": 11.369491577148438, "rewards/rejected": -17.223697662353516, "step": 17901 }, { "epoch": 2.78, "learning_rate": 1.017282016702111e-06, "logits/chosen": -2.441908121109009, "logits/rejected": -2.9565274715423584, "logps/chosen": -364.355712890625, "logps/rejected": -312.3375244140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.8675537109375, "rewards/margins": 7.590161323547363, "rewards/rejected": -14.457715034484863, "step": 17902 }, { "epoch": 2.78, "learning_rate": 1.0165485761709632e-06, "logits/chosen": -2.1768851280212402, "logits/rejected": -2.495893716812134, "logps/chosen": -334.9612121582031, "logps/rejected": -458.4844055175781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.9758148193359375, "rewards/margins": 9.369383811950684, "rewards/rejected": -16.345199584960938, "step": 17903 }, { "epoch": 2.78, "learning_rate": 1.0158151356398155e-06, "logits/chosen": -2.858781576156616, "logits/rejected": -2.3412609100341797, "logps/chosen": -198.11622619628906, "logps/rejected": -156.32015991210938, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": -4.837848663330078, "rewards/margins": 6.719145774841309, "rewards/rejected": -11.556994438171387, "step": 17904 }, { "epoch": 2.78, "learning_rate": 1.0150816951086676e-06, "logits/chosen": -2.2308952808380127, "logits/rejected": -2.725724697113037, "logps/chosen": -253.4160919189453, "logps/rejected": -450.2898254394531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.487185955047607, "rewards/margins": 11.433652877807617, "rewards/rejected": -18.920839309692383, "step": 17905 }, { "epoch": 2.78, "learning_rate": 1.0143482545775197e-06, "logits/chosen": -1.6944687366485596, "logits/rejected": -2.2899651527404785, "logps/chosen": -316.6037292480469, "logps/rejected": -390.17559814453125, "loss": 0.0506, "rewards/accuracies": 1.0, "rewards/chosen": -6.89188814163208, "rewards/margins": 5.011015892028809, "rewards/rejected": -11.902904510498047, "step": 17906 }, { "epoch": 2.78, "learning_rate": 1.0136148140463715e-06, "logits/chosen": -2.9439520835876465, "logits/rejected": -2.0163369178771973, "logps/chosen": -431.04132080078125, "logps/rejected": -304.61669921875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -0.06558376550674438, "rewards/margins": 9.439483642578125, "rewards/rejected": -9.505067825317383, "step": 17907 }, { "epoch": 2.79, "learning_rate": 1.0128813735152238e-06, "logits/chosen": -1.884259819984436, "logits/rejected": -2.428964853286743, "logps/chosen": -284.2829284667969, "logps/rejected": -389.361572265625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.646535873413086, "rewards/margins": 7.989166736602783, "rewards/rejected": -13.635702133178711, "step": 17908 }, { "epoch": 2.79, "learning_rate": 1.012147932984076e-06, "logits/chosen": -2.4785659313201904, "logits/rejected": -1.4428151845932007, "logps/chosen": -516.341796875, "logps/rejected": -441.36956787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.507260322570801, "rewards/margins": 16.456092834472656, "rewards/rejected": -23.96335220336914, "step": 17909 }, { "epoch": 2.79, "learning_rate": 1.011414492452928e-06, "logits/chosen": -2.6801106929779053, "logits/rejected": -2.6746692657470703, "logps/chosen": -164.39610290527344, "logps/rejected": -285.0938415527344, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -9.120497703552246, "rewards/margins": 6.598278045654297, "rewards/rejected": -15.718774795532227, "step": 17910 }, { "epoch": 2.79, "learning_rate": 1.0106810519217801e-06, "logits/chosen": -2.624206066131592, "logits/rejected": -2.5910041332244873, "logps/chosen": -474.2160339355469, "logps/rejected": -413.0995178222656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.03802490234375, "rewards/margins": 9.614233016967773, "rewards/rejected": -17.652257919311523, "step": 17911 }, { "epoch": 2.79, "learning_rate": 1.0099476113906324e-06, "logits/chosen": -3.225741147994995, "logits/rejected": -3.1524200439453125, "logps/chosen": -360.3956298828125, "logps/rejected": -386.5339660644531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.486544609069824, "rewards/margins": 9.829538345336914, "rewards/rejected": -15.316082954406738, "step": 17912 }, { "epoch": 2.79, "learning_rate": 1.0092141708594845e-06, "logits/chosen": -2.2573254108428955, "logits/rejected": -2.6947741508483887, "logps/chosen": -269.394287109375, "logps/rejected": -426.1163330078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -8.032621383666992, "rewards/margins": 9.72408390045166, "rewards/rejected": -17.75670623779297, "step": 17913 }, { "epoch": 2.79, "learning_rate": 1.0084807303283366e-06, "logits/chosen": -2.534877300262451, "logits/rejected": -3.0944976806640625, "logps/chosen": -403.1748352050781, "logps/rejected": -473.1275634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.008440017700195, "rewards/margins": 13.894302368164062, "rewards/rejected": -22.902742385864258, "step": 17914 }, { "epoch": 2.79, "learning_rate": 1.0077472897971887e-06, "logits/chosen": -2.5695502758026123, "logits/rejected": -2.218384027481079, "logps/chosen": -254.34869384765625, "logps/rejected": -267.3930969238281, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.841081380844116, "rewards/margins": 6.423084259033203, "rewards/rejected": -10.264165878295898, "step": 17915 }, { "epoch": 2.79, "learning_rate": 1.007013849266041e-06, "logits/chosen": -2.5797536373138428, "logits/rejected": -2.500288248062134, "logps/chosen": -206.0128631591797, "logps/rejected": -218.67950439453125, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -6.966327667236328, "rewards/margins": 5.479575157165527, "rewards/rejected": -12.445902824401855, "step": 17916 }, { "epoch": 2.79, "learning_rate": 1.0062804087348929e-06, "logits/chosen": -1.2579652070999146, "logits/rejected": -2.6136679649353027, "logps/chosen": -119.845947265625, "logps/rejected": -338.3780212402344, "loss": 0.0222, "rewards/accuracies": 1.0, "rewards/chosen": -6.8482818603515625, "rewards/margins": 8.11154556274414, "rewards/rejected": -14.959826469421387, "step": 17917 }, { "epoch": 2.79, "learning_rate": 1.005546968203745e-06, "logits/chosen": -2.5252699851989746, "logits/rejected": -1.6248401403427124, "logps/chosen": -252.18246459960938, "logps/rejected": -288.99114990234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.4272918701171875, "rewards/margins": 8.432870864868164, "rewards/rejected": -15.860163688659668, "step": 17918 }, { "epoch": 2.79, "learning_rate": 1.004813527672597e-06, "logits/chosen": -1.5477343797683716, "logits/rejected": -2.5109012126922607, "logps/chosen": -180.91143798828125, "logps/rejected": -292.5489196777344, "loss": 0.7371, "rewards/accuracies": 0.5, "rewards/chosen": -9.45775032043457, "rewards/margins": 6.885860443115234, "rewards/rejected": -16.343610763549805, "step": 17919 }, { "epoch": 2.79, "learning_rate": 1.0040800871414491e-06, "logits/chosen": -2.7766380310058594, "logits/rejected": -1.333745002746582, "logps/chosen": -627.876220703125, "logps/rejected": -422.9945983886719, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.385597229003906, "rewards/margins": 8.604042053222656, "rewards/rejected": -13.989639282226562, "step": 17920 }, { "epoch": 2.79, "learning_rate": 1.0033466466103014e-06, "logits/chosen": -2.563791513442993, "logits/rejected": -2.161827802658081, "logps/chosen": -206.14613342285156, "logps/rejected": -322.053466796875, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": -9.955670356750488, "rewards/margins": 2.8663055896759033, "rewards/rejected": -12.821975708007812, "step": 17921 }, { "epoch": 2.79, "learning_rate": 1.0026132060791535e-06, "logits/chosen": -1.8520631790161133, "logits/rejected": -2.5950794219970703, "logps/chosen": -540.8877563476562, "logps/rejected": -788.3450927734375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -7.972774982452393, "rewards/margins": 12.695280075073242, "rewards/rejected": -20.66805648803711, "step": 17922 }, { "epoch": 2.79, "learning_rate": 1.0018797655480056e-06, "logits/chosen": -2.3741583824157715, "logits/rejected": -2.7344253063201904, "logps/chosen": -186.44825744628906, "logps/rejected": -477.56500244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.985651016235352, "rewards/margins": 12.231796264648438, "rewards/rejected": -21.21744728088379, "step": 17923 }, { "epoch": 2.79, "learning_rate": 1.0011463250168577e-06, "logits/chosen": -1.716568946838379, "logits/rejected": -2.8300366401672363, "logps/chosen": -346.6250915527344, "logps/rejected": -647.976318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.029329299926758, "rewards/margins": 13.503501892089844, "rewards/rejected": -19.5328311920166, "step": 17924 }, { "epoch": 2.79, "learning_rate": 1.00041288448571e-06, "logits/chosen": -2.319307327270508, "logits/rejected": -2.137155771255493, "logps/chosen": -277.4083251953125, "logps/rejected": -428.81903076171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.791337013244629, "rewards/margins": 9.496879577636719, "rewards/rejected": -16.28821563720703, "step": 17925 }, { "epoch": 2.79, "learning_rate": 9.996794439545619e-07, "logits/chosen": -1.2880709171295166, "logits/rejected": -2.6387763023376465, "logps/chosen": -155.81393432617188, "logps/rejected": -472.54736328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.617115497589111, "rewards/margins": 9.417411804199219, "rewards/rejected": -17.034526824951172, "step": 17926 }, { "epoch": 2.79, "learning_rate": 9.98946003423414e-07, "logits/chosen": -2.488842487335205, "logits/rejected": -2.338528871536255, "logps/chosen": -590.3218383789062, "logps/rejected": -691.591796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.5399322509765625, "rewards/margins": 13.732491493225098, "rewards/rejected": -20.272422790527344, "step": 17927 }, { "epoch": 2.79, "learning_rate": 9.98212562892266e-07, "logits/chosen": -2.4916634559631348, "logits/rejected": -2.5457675457000732, "logps/chosen": -339.1124572753906, "logps/rejected": -279.54730224609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.968996524810791, "rewards/margins": 11.1690034866333, "rewards/rejected": -17.13800048828125, "step": 17928 }, { "epoch": 2.79, "learning_rate": 9.974791223611184e-07, "logits/chosen": -2.7882962226867676, "logits/rejected": -3.1594648361206055, "logps/chosen": -357.8817138671875, "logps/rejected": -498.8524169921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.9021472930908203, "rewards/margins": 9.618678092956543, "rewards/rejected": -13.520825386047363, "step": 17929 }, { "epoch": 2.79, "learning_rate": 9.967456818299705e-07, "logits/chosen": -2.7881741523742676, "logits/rejected": -2.2956738471984863, "logps/chosen": -244.35723876953125, "logps/rejected": -314.93707275390625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.41367769241333, "rewards/margins": 12.133317947387695, "rewards/rejected": -16.5469970703125, "step": 17930 }, { "epoch": 2.79, "learning_rate": 9.960122412988225e-07, "logits/chosen": -2.8531885147094727, "logits/rejected": -2.6404480934143066, "logps/chosen": -243.1700897216797, "logps/rejected": -642.2462158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.9698591232299805, "rewards/margins": 13.863210678100586, "rewards/rejected": -18.83306884765625, "step": 17931 }, { "epoch": 2.79, "learning_rate": 9.952788007676746e-07, "logits/chosen": -2.48008131980896, "logits/rejected": -1.8995716571807861, "logps/chosen": -327.58416748046875, "logps/rejected": -452.13189697265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.674942970275879, "rewards/margins": 12.860523223876953, "rewards/rejected": -19.535465240478516, "step": 17932 }, { "epoch": 2.79, "learning_rate": 9.94545360236527e-07, "logits/chosen": -1.9711639881134033, "logits/rejected": -2.551217794418335, "logps/chosen": -103.06578063964844, "logps/rejected": -240.29498291015625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -7.195772171020508, "rewards/margins": 6.810947418212891, "rewards/rejected": -14.006719589233398, "step": 17933 }, { "epoch": 2.79, "learning_rate": 9.93811919705379e-07, "logits/chosen": -2.689643383026123, "logits/rejected": -2.4396450519561768, "logps/chosen": -244.5521240234375, "logps/rejected": -258.6549072265625, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -5.149147033691406, "rewards/margins": 7.435923099517822, "rewards/rejected": -12.58506965637207, "step": 17934 }, { "epoch": 2.79, "learning_rate": 9.930784791742311e-07, "logits/chosen": -2.4374380111694336, "logits/rejected": -2.9733612537384033, "logps/chosen": -169.5256805419922, "logps/rejected": -404.1861572265625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -8.073126792907715, "rewards/margins": 8.078912734985352, "rewards/rejected": -16.15203857421875, "step": 17935 }, { "epoch": 2.79, "learning_rate": 9.92345038643083e-07, "logits/chosen": -1.8816301822662354, "logits/rejected": -2.706026554107666, "logps/chosen": -185.55287170410156, "logps/rejected": -582.330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.271624565124512, "rewards/margins": 15.76429557800293, "rewards/rejected": -24.035921096801758, "step": 17936 }, { "epoch": 2.79, "learning_rate": 9.916115981119353e-07, "logits/chosen": -2.5978338718414307, "logits/rejected": -2.510159730911255, "logps/chosen": -222.1995849609375, "logps/rejected": -406.82318115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.726169586181641, "rewards/margins": 10.940892219543457, "rewards/rejected": -16.66706085205078, "step": 17937 }, { "epoch": 2.79, "learning_rate": 9.908781575807874e-07, "logits/chosen": -2.738218069076538, "logits/rejected": -2.977797508239746, "logps/chosen": -401.0152587890625, "logps/rejected": -705.7940673828125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -11.244009017944336, "rewards/margins": 8.952384948730469, "rewards/rejected": -20.196393966674805, "step": 17938 }, { "epoch": 2.79, "learning_rate": 9.901447170496395e-07, "logits/chosen": -2.7231478691101074, "logits/rejected": -2.820143222808838, "logps/chosen": -288.4730529785156, "logps/rejected": -422.0107116699219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.598487854003906, "rewards/margins": 9.226806640625, "rewards/rejected": -16.825294494628906, "step": 17939 }, { "epoch": 2.79, "learning_rate": 9.894112765184916e-07, "logits/chosen": -1.4491726160049438, "logits/rejected": -2.5897603034973145, "logps/chosen": -211.033447265625, "logps/rejected": -374.3010559082031, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.413142204284668, "rewards/margins": 8.486444473266602, "rewards/rejected": -13.899587631225586, "step": 17940 }, { "epoch": 2.79, "learning_rate": 9.886778359873437e-07, "logits/chosen": -2.3957676887512207, "logits/rejected": -2.423480987548828, "logps/chosen": -406.09759521484375, "logps/rejected": -385.5887756347656, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -7.961947441101074, "rewards/margins": 7.0255537033081055, "rewards/rejected": -14.98750114440918, "step": 17941 }, { "epoch": 2.79, "learning_rate": 9.87944395456196e-07, "logits/chosen": -1.215739369392395, "logits/rejected": -2.110031843185425, "logps/chosen": -227.4190216064453, "logps/rejected": -544.7246704101562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.103936195373535, "rewards/margins": 14.822626113891602, "rewards/rejected": -20.926563262939453, "step": 17942 }, { "epoch": 2.79, "learning_rate": 9.87210954925048e-07, "logits/chosen": -1.7709912061691284, "logits/rejected": -2.688525438308716, "logps/chosen": -368.02105712890625, "logps/rejected": -483.5438537597656, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.735097885131836, "rewards/margins": 11.95596694946289, "rewards/rejected": -18.691064834594727, "step": 17943 }, { "epoch": 2.79, "learning_rate": 9.864775143939001e-07, "logits/chosen": -2.8571934700012207, "logits/rejected": -2.5965003967285156, "logps/chosen": -225.38226318359375, "logps/rejected": -431.7571105957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.028280735015869, "rewards/margins": 12.025256156921387, "rewards/rejected": -19.053537368774414, "step": 17944 }, { "epoch": 2.79, "learning_rate": 9.857440738627522e-07, "logits/chosen": -2.2413389682769775, "logits/rejected": -2.749572277069092, "logps/chosen": -120.54702758789062, "logps/rejected": -317.62518310546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.449336051940918, "rewards/margins": 8.881964683532715, "rewards/rejected": -15.331300735473633, "step": 17945 }, { "epoch": 2.79, "learning_rate": 9.850106333316043e-07, "logits/chosen": -2.526547431945801, "logits/rejected": -2.6238551139831543, "logps/chosen": -290.9320373535156, "logps/rejected": -387.57354736328125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -7.941311836242676, "rewards/margins": 6.166074752807617, "rewards/rejected": -14.10738754272461, "step": 17946 }, { "epoch": 2.79, "learning_rate": 9.842771928004564e-07, "logits/chosen": -2.8108303546905518, "logits/rejected": -2.1851284503936768, "logps/chosen": -329.2672119140625, "logps/rejected": -403.6620178222656, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.972342491149902, "rewards/margins": 7.264954566955566, "rewards/rejected": -16.23729705810547, "step": 17947 }, { "epoch": 2.79, "learning_rate": 9.835437522693085e-07, "logits/chosen": -2.337989091873169, "logits/rejected": -2.741628408432007, "logps/chosen": -191.33944702148438, "logps/rejected": -368.6905517578125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.1651506423950195, "rewards/margins": 7.5568952560424805, "rewards/rejected": -13.7220458984375, "step": 17948 }, { "epoch": 2.79, "learning_rate": 9.828103117381606e-07, "logits/chosen": -2.0650103092193604, "logits/rejected": -2.8951098918914795, "logps/chosen": -321.090576171875, "logps/rejected": -573.3287353515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.924713134765625, "rewards/margins": 11.234583854675293, "rewards/rejected": -16.159297943115234, "step": 17949 }, { "epoch": 2.79, "learning_rate": 9.820768712070129e-07, "logits/chosen": -2.4409372806549072, "logits/rejected": -1.9276208877563477, "logps/chosen": -261.3332824707031, "logps/rejected": -366.3372802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.768613815307617, "rewards/margins": 10.970166206359863, "rewards/rejected": -19.738780975341797, "step": 17950 }, { "epoch": 2.79, "learning_rate": 9.81343430675865e-07, "logits/chosen": -2.2905430793762207, "logits/rejected": -2.169358015060425, "logps/chosen": -134.65370178222656, "logps/rejected": -273.6886291503906, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.7997636795043945, "rewards/margins": 11.261922836303711, "rewards/rejected": -17.061687469482422, "step": 17951 }, { "epoch": 2.79, "learning_rate": 9.80609990144717e-07, "logits/chosen": -2.9462103843688965, "logits/rejected": -2.155132532119751, "logps/chosen": -377.7113037109375, "logps/rejected": -228.41506958007812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.8466973304748535, "rewards/margins": 10.955595970153809, "rewards/rejected": -15.802292823791504, "step": 17952 }, { "epoch": 2.79, "learning_rate": 9.798765496135692e-07, "logits/chosen": -2.346188545227051, "logits/rejected": -1.7381536960601807, "logps/chosen": -204.9840545654297, "logps/rejected": -368.4967956542969, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/chosen": -6.978363990783691, "rewards/margins": 7.86931037902832, "rewards/rejected": -14.847674369812012, "step": 17953 }, { "epoch": 2.79, "learning_rate": 9.791431090824215e-07, "logits/chosen": -2.0901880264282227, "logits/rejected": -2.6094956398010254, "logps/chosen": -227.2398681640625, "logps/rejected": -469.9611511230469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.106791496276855, "rewards/margins": 10.099769592285156, "rewards/rejected": -19.206562042236328, "step": 17954 }, { "epoch": 2.79, "learning_rate": 9.784096685512736e-07, "logits/chosen": -2.5612668991088867, "logits/rejected": -2.627764940261841, "logps/chosen": -149.3872528076172, "logps/rejected": -310.2835693359375, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -10.500223159790039, "rewards/margins": 6.317359447479248, "rewards/rejected": -16.817583084106445, "step": 17955 }, { "epoch": 2.79, "learning_rate": 9.776762280201254e-07, "logits/chosen": -2.4287872314453125, "logits/rejected": -2.5039589405059814, "logps/chosen": -323.0800476074219, "logps/rejected": -539.5645751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.756364822387695, "rewards/margins": 13.490942001342773, "rewards/rejected": -21.24730682373047, "step": 17956 }, { "epoch": 2.79, "learning_rate": 9.769427874889775e-07, "logits/chosen": -2.1884312629699707, "logits/rejected": -1.9189987182617188, "logps/chosen": -180.7860870361328, "logps/rejected": -371.7414245605469, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -8.067634582519531, "rewards/margins": 11.273747444152832, "rewards/rejected": -19.341381072998047, "step": 17957 }, { "epoch": 2.79, "learning_rate": 9.762093469578298e-07, "logits/chosen": -2.196911334991455, "logits/rejected": -2.6767184734344482, "logps/chosen": -168.3306427001953, "logps/rejected": -436.2327880859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.113000392913818, "rewards/margins": 8.6708402633667, "rewards/rejected": -15.78384017944336, "step": 17958 }, { "epoch": 2.79, "learning_rate": 9.75475906426682e-07, "logits/chosen": -2.6716980934143066, "logits/rejected": -2.6051480770111084, "logps/chosen": -579.8477783203125, "logps/rejected": -579.46240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.923814535140991, "rewards/margins": 12.56905746459961, "rewards/rejected": -16.49287223815918, "step": 17959 }, { "epoch": 2.79, "learning_rate": 9.74742465895534e-07, "logits/chosen": -2.29474139213562, "logits/rejected": -2.6345744132995605, "logps/chosen": -560.8494262695312, "logps/rejected": -685.454833984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.564016342163086, "rewards/margins": 8.44833755493164, "rewards/rejected": -17.012353897094727, "step": 17960 }, { "epoch": 2.79, "learning_rate": 9.74009025364386e-07, "logits/chosen": -2.6848673820495605, "logits/rejected": -2.6101317405700684, "logps/chosen": -139.436279296875, "logps/rejected": -221.39541625976562, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.871336460113525, "rewards/margins": 7.054717063903809, "rewards/rejected": -12.926054000854492, "step": 17961 }, { "epoch": 2.79, "learning_rate": 9.732755848332382e-07, "logits/chosen": -1.6677703857421875, "logits/rejected": -2.299633741378784, "logps/chosen": -472.04864501953125, "logps/rejected": -975.10888671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.127012729644775, "rewards/margins": 17.51894187927246, "rewards/rejected": -24.645954132080078, "step": 17962 }, { "epoch": 2.79, "learning_rate": 9.725421443020905e-07, "logits/chosen": -2.5911290645599365, "logits/rejected": -2.8021016120910645, "logps/chosen": -192.8115692138672, "logps/rejected": -351.8126220703125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.468648433685303, "rewards/margins": 8.207223892211914, "rewards/rejected": -14.675871849060059, "step": 17963 }, { "epoch": 2.79, "learning_rate": 9.718087037709426e-07, "logits/chosen": -2.0991742610931396, "logits/rejected": -2.4855780601501465, "logps/chosen": -820.6375122070312, "logps/rejected": -747.0489501953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.286852836608887, "rewards/margins": 8.930602073669434, "rewards/rejected": -19.21745491027832, "step": 17964 }, { "epoch": 2.79, "learning_rate": 9.710752632397947e-07, "logits/chosen": -2.4312188625335693, "logits/rejected": -2.4395596981048584, "logps/chosen": -187.24879455566406, "logps/rejected": -376.7295227050781, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -7.2752251625061035, "rewards/margins": 6.872778415679932, "rewards/rejected": -14.148003578186035, "step": 17965 }, { "epoch": 2.79, "learning_rate": 9.703418227086465e-07, "logits/chosen": -2.361412286758423, "logits/rejected": -2.1234591007232666, "logps/chosen": -164.73291015625, "logps/rejected": -252.98448181152344, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -10.944828033447266, "rewards/margins": 7.57512903213501, "rewards/rejected": -18.519956588745117, "step": 17966 }, { "epoch": 2.79, "learning_rate": 9.696083821774988e-07, "logits/chosen": -2.660061836242676, "logits/rejected": -3.1304287910461426, "logps/chosen": -137.22540283203125, "logps/rejected": -303.6346130371094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.290449142456055, "rewards/margins": 8.831291198730469, "rewards/rejected": -17.121740341186523, "step": 17967 }, { "epoch": 2.79, "learning_rate": 9.68874941646351e-07, "logits/chosen": -2.647000551223755, "logits/rejected": -2.5050830841064453, "logps/chosen": -234.44403076171875, "logps/rejected": -315.223876953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.845425605773926, "rewards/margins": 10.674656867980957, "rewards/rejected": -17.520082473754883, "step": 17968 }, { "epoch": 2.79, "learning_rate": 9.68141501115203e-07, "logits/chosen": -2.177469491958618, "logits/rejected": -2.5935068130493164, "logps/chosen": -132.39227294921875, "logps/rejected": -304.40899658203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.6349101066589355, "rewards/margins": 8.455820083618164, "rewards/rejected": -13.090730667114258, "step": 17969 }, { "epoch": 2.79, "learning_rate": 9.674080605840551e-07, "logits/chosen": -1.3421683311462402, "logits/rejected": -2.076763868331909, "logps/chosen": -250.7135009765625, "logps/rejected": -556.6514892578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.300298690795898, "rewards/margins": 9.79831314086914, "rewards/rejected": -18.09861183166504, "step": 17970 }, { "epoch": 2.79, "learning_rate": 9.666746200529074e-07, "logits/chosen": -2.2348713874816895, "logits/rejected": -1.5205470323562622, "logps/chosen": -253.1549530029297, "logps/rejected": -351.8866882324219, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.698119163513184, "rewards/margins": 12.361930847167969, "rewards/rejected": -19.060049057006836, "step": 17971 }, { "epoch": 2.8, "learning_rate": 9.659411795217595e-07, "logits/chosen": -0.9190999865531921, "logits/rejected": -1.950784683227539, "logps/chosen": -275.12689208984375, "logps/rejected": -597.287109375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.693967819213867, "rewards/margins": 14.893088340759277, "rewards/rejected": -24.587055206298828, "step": 17972 }, { "epoch": 2.8, "learning_rate": 9.652077389906116e-07, "logits/chosen": -2.35884690284729, "logits/rejected": -2.656595230102539, "logps/chosen": -229.65789794921875, "logps/rejected": -396.87689208984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.903005599975586, "rewards/margins": 8.904083251953125, "rewards/rejected": -16.80708885192871, "step": 17973 }, { "epoch": 2.8, "learning_rate": 9.644742984594637e-07, "logits/chosen": -2.4595999717712402, "logits/rejected": -1.7120267152786255, "logps/chosen": -587.6720581054688, "logps/rejected": -441.26690673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.785611152648926, "rewards/margins": 13.3079833984375, "rewards/rejected": -19.09359359741211, "step": 17974 }, { "epoch": 2.8, "learning_rate": 9.63740857928316e-07, "logits/chosen": -2.6819372177124023, "logits/rejected": -2.698533773422241, "logps/chosen": -129.4244384765625, "logps/rejected": -272.9919128417969, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.520689487457275, "rewards/margins": 7.040020942687988, "rewards/rejected": -12.560710906982422, "step": 17975 }, { "epoch": 2.8, "learning_rate": 9.630074173971679e-07, "logits/chosen": -2.3507184982299805, "logits/rejected": -2.4901981353759766, "logps/chosen": -294.5833740234375, "logps/rejected": -476.7153625488281, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.5506391525268555, "rewards/margins": 8.395357131958008, "rewards/rejected": -14.945995330810547, "step": 17976 }, { "epoch": 2.8, "learning_rate": 9.6227397686602e-07, "logits/chosen": -1.617537498474121, "logits/rejected": -2.018214464187622, "logps/chosen": -163.47802734375, "logps/rejected": -391.69696044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.035152435302734, "rewards/margins": 12.72287654876709, "rewards/rejected": -21.75802993774414, "step": 17977 }, { "epoch": 2.8, "learning_rate": 9.61540536334872e-07, "logits/chosen": -2.180920124053955, "logits/rejected": -2.746971845626831, "logps/chosen": -214.524658203125, "logps/rejected": -413.8658142089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.264278411865234, "rewards/margins": 12.55551528930664, "rewards/rejected": -16.819793701171875, "step": 17978 }, { "epoch": 2.8, "learning_rate": 9.608070958037243e-07, "logits/chosen": -2.0145018100738525, "logits/rejected": -2.615053653717041, "logps/chosen": -150.47085571289062, "logps/rejected": -385.03857421875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -7.613591194152832, "rewards/margins": 10.750818252563477, "rewards/rejected": -18.364410400390625, "step": 17979 }, { "epoch": 2.8, "learning_rate": 9.600736552725764e-07, "logits/chosen": -2.3187005519866943, "logits/rejected": -2.768251895904541, "logps/chosen": -207.41363525390625, "logps/rejected": -275.00006103515625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -6.340376853942871, "rewards/margins": 5.421977996826172, "rewards/rejected": -11.76235580444336, "step": 17980 }, { "epoch": 2.8, "learning_rate": 9.593402147414285e-07, "logits/chosen": -1.8053231239318848, "logits/rejected": -2.3254010677337646, "logps/chosen": -175.0016632080078, "logps/rejected": -415.28125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.680275917053223, "rewards/margins": 14.354690551757812, "rewards/rejected": -20.03496551513672, "step": 17981 }, { "epoch": 2.8, "learning_rate": 9.586067742102806e-07, "logits/chosen": -2.4987707138061523, "logits/rejected": -1.5786958932876587, "logps/chosen": -612.0692138671875, "logps/rejected": -690.2041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.957671165466309, "rewards/margins": 17.897600173950195, "rewards/rejected": -26.85527229309082, "step": 17982 }, { "epoch": 2.8, "learning_rate": 9.578733336791327e-07, "logits/chosen": -1.1701005697250366, "logits/rejected": -2.6501312255859375, "logps/chosen": -135.33343505859375, "logps/rejected": -530.5784301757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.078530311584473, "rewards/margins": 13.487968444824219, "rewards/rejected": -18.566497802734375, "step": 17983 }, { "epoch": 2.8, "learning_rate": 9.57139893147985e-07, "logits/chosen": -2.260443925857544, "logits/rejected": -2.2895126342773438, "logps/chosen": -238.32321166992188, "logps/rejected": -312.7892150878906, "loss": 0.4532, "rewards/accuracies": 0.5, "rewards/chosen": -9.488777160644531, "rewards/margins": 4.713596343994141, "rewards/rejected": -14.202373504638672, "step": 17984 }, { "epoch": 2.8, "learning_rate": 9.56406452616837e-07, "logits/chosen": -2.4490647315979004, "logits/rejected": -1.7008603811264038, "logps/chosen": -384.67535400390625, "logps/rejected": -356.748291015625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.4571533203125, "rewards/margins": 7.9896650314331055, "rewards/rejected": -14.446819305419922, "step": 17985 }, { "epoch": 2.8, "learning_rate": 9.55673012085689e-07, "logits/chosen": -1.6229370832443237, "logits/rejected": -2.523402452468872, "logps/chosen": -137.60784912109375, "logps/rejected": -396.92193603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.1049723625183105, "rewards/margins": 13.361300468444824, "rewards/rejected": -18.466272354125977, "step": 17986 }, { "epoch": 2.8, "learning_rate": 9.54939571554541e-07, "logits/chosen": -2.6125099658966064, "logits/rejected": -2.5012130737304688, "logps/chosen": -182.33770751953125, "logps/rejected": -234.95831298828125, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -7.956789016723633, "rewards/margins": 5.597538948059082, "rewards/rejected": -13.554328918457031, "step": 17987 }, { "epoch": 2.8, "learning_rate": 9.542061310233934e-07, "logits/chosen": -2.6876184940338135, "logits/rejected": -2.5343544483184814, "logps/chosen": -236.87152099609375, "logps/rejected": -251.89068603515625, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -7.731682777404785, "rewards/margins": 8.713136672973633, "rewards/rejected": -16.444820404052734, "step": 17988 }, { "epoch": 2.8, "learning_rate": 9.534726904922455e-07, "logits/chosen": -0.9598307013511658, "logits/rejected": -2.503357410430908, "logps/chosen": -193.97781372070312, "logps/rejected": -587.6363525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2574381828308105, "rewards/margins": 16.83663558959961, "rewards/rejected": -24.094074249267578, "step": 17989 }, { "epoch": 2.8, "learning_rate": 9.527392499610975e-07, "logits/chosen": -2.5583176612854004, "logits/rejected": -2.7320539951324463, "logps/chosen": -184.5867156982422, "logps/rejected": -379.43505859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.691241264343262, "rewards/margins": 7.416081428527832, "rewards/rejected": -15.107322692871094, "step": 17990 }, { "epoch": 2.8, "learning_rate": 9.520058094299497e-07, "logits/chosen": -2.285188913345337, "logits/rejected": -2.671605110168457, "logps/chosen": -129.97711181640625, "logps/rejected": -414.42431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.293832302093506, "rewards/margins": 16.828819274902344, "rewards/rejected": -22.122652053833008, "step": 17991 }, { "epoch": 2.8, "learning_rate": 9.512723688988018e-07, "logits/chosen": -2.49104642868042, "logits/rejected": -2.7758114337921143, "logps/chosen": -69.05870819091797, "logps/rejected": -269.7684020996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.410649299621582, "rewards/margins": 10.633279800415039, "rewards/rejected": -15.043929100036621, "step": 17992 }, { "epoch": 2.8, "learning_rate": 9.50538928367654e-07, "logits/chosen": -2.0146591663360596, "logits/rejected": -2.8566064834594727, "logps/chosen": -120.85325622558594, "logps/rejected": -298.5695495605469, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -8.686470985412598, "rewards/margins": 6.020247459411621, "rewards/rejected": -14.706718444824219, "step": 17993 }, { "epoch": 2.8, "learning_rate": 9.498054878365061e-07, "logits/chosen": -2.1752264499664307, "logits/rejected": -2.7027394771575928, "logps/chosen": -106.11248779296875, "logps/rejected": -308.2130126953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -8.918006896972656, "rewards/margins": 8.063129425048828, "rewards/rejected": -16.981136322021484, "step": 17994 }, { "epoch": 2.8, "learning_rate": 9.490720473053581e-07, "logits/chosen": -2.895500659942627, "logits/rejected": -1.5251438617706299, "logps/chosen": -308.40191650390625, "logps/rejected": -199.0809326171875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.414250373840332, "rewards/margins": 7.919173240661621, "rewards/rejected": -12.333423614501953, "step": 17995 }, { "epoch": 2.8, "learning_rate": 9.483386067742102e-07, "logits/chosen": -0.8362202644348145, "logits/rejected": -2.725942373275757, "logps/chosen": -115.85211181640625, "logps/rejected": -553.3414306640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.41539192199707, "rewards/margins": 9.873003959655762, "rewards/rejected": -18.288394927978516, "step": 17996 }, { "epoch": 2.8, "learning_rate": 9.476051662430624e-07, "logits/chosen": -1.77745521068573, "logits/rejected": -2.5271215438842773, "logps/chosen": -270.887451171875, "logps/rejected": -419.89923095703125, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/chosen": -9.165654182434082, "rewards/margins": 7.978328704833984, "rewards/rejected": -17.14398193359375, "step": 17997 }, { "epoch": 2.8, "learning_rate": 9.468717257119145e-07, "logits/chosen": -2.6750810146331787, "logits/rejected": -2.767263412475586, "logps/chosen": -131.2589111328125, "logps/rejected": -293.75762939453125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.571066856384277, "rewards/margins": 9.433798789978027, "rewards/rejected": -16.004865646362305, "step": 17998 }, { "epoch": 2.8, "learning_rate": 9.461382851807667e-07, "logits/chosen": -1.142727255821228, "logits/rejected": -2.4222259521484375, "logps/chosen": -208.85089111328125, "logps/rejected": -628.8892822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.979426383972168, "rewards/margins": 12.508821487426758, "rewards/rejected": -22.48824691772461, "step": 17999 }, { "epoch": 2.8, "learning_rate": 9.454048446496188e-07, "logits/chosen": -2.02335786819458, "logits/rejected": -2.880916118621826, "logps/chosen": -163.23880004882812, "logps/rejected": -523.4158325195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.582937240600586, "rewards/margins": 11.364144325256348, "rewards/rejected": -17.94708251953125, "step": 18000 }, { "epoch": 2.8, "learning_rate": 9.446714041184709e-07, "logits/chosen": -2.72735595703125, "logits/rejected": -2.678218126296997, "logps/chosen": -274.0535583496094, "logps/rejected": -506.27978515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.892936706542969, "rewards/margins": 12.019636154174805, "rewards/rejected": -16.912572860717773, "step": 18001 }, { "epoch": 2.8, "learning_rate": 9.439379635873231e-07, "logits/chosen": -1.6325643062591553, "logits/rejected": -2.4920077323913574, "logps/chosen": -181.70445251464844, "logps/rejected": -367.95477294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.205550193786621, "rewards/margins": 10.42911434173584, "rewards/rejected": -20.63466453552246, "step": 18002 }, { "epoch": 2.8, "learning_rate": 9.432045230561751e-07, "logits/chosen": -2.6049821376800537, "logits/rejected": -2.660299777984619, "logps/chosen": -406.202880859375, "logps/rejected": -727.79345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.420154571533203, "rewards/margins": 12.313128471374512, "rewards/rejected": -20.73328399658203, "step": 18003 }, { "epoch": 2.8, "learning_rate": 9.424710825250273e-07, "logits/chosen": -1.8868656158447266, "logits/rejected": -2.6283652782440186, "logps/chosen": -417.6440124511719, "logps/rejected": -661.5670166015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.584400177001953, "rewards/margins": 10.634078979492188, "rewards/rejected": -16.21847915649414, "step": 18004 }, { "epoch": 2.8, "learning_rate": 9.417376419938792e-07, "logits/chosen": -1.7630027532577515, "logits/rejected": -2.501589298248291, "logps/chosen": -175.64633178710938, "logps/rejected": -265.2694091796875, "loss": 0.1097, "rewards/accuracies": 1.0, "rewards/chosen": -9.182759284973145, "rewards/margins": 4.095538139343262, "rewards/rejected": -13.278297424316406, "step": 18005 }, { "epoch": 2.8, "learning_rate": 9.410042014627314e-07, "logits/chosen": -2.695690393447876, "logits/rejected": -2.755405902862549, "logps/chosen": -90.86000061035156, "logps/rejected": -244.39077758789062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.049469947814941, "rewards/margins": 10.12485408782959, "rewards/rejected": -16.17432403564453, "step": 18006 }, { "epoch": 2.8, "learning_rate": 9.402707609315835e-07, "logits/chosen": -2.470953941345215, "logits/rejected": -2.4322593212127686, "logps/chosen": -412.6505432128906, "logps/rejected": -498.065673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.435413360595703, "rewards/margins": 12.587066650390625, "rewards/rejected": -23.022480010986328, "step": 18007 }, { "epoch": 2.8, "learning_rate": 9.395373204004357e-07, "logits/chosen": -2.676192283630371, "logits/rejected": -2.0222980976104736, "logps/chosen": -461.7640380859375, "logps/rejected": -453.838134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.201542854309082, "rewards/margins": 10.38401985168457, "rewards/rejected": -16.58556365966797, "step": 18008 }, { "epoch": 2.8, "learning_rate": 9.388038798692878e-07, "logits/chosen": -2.5391483306884766, "logits/rejected": -2.7880423069000244, "logps/chosen": -713.6597900390625, "logps/rejected": -593.6798095703125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.642007827758789, "rewards/margins": 9.025142669677734, "rewards/rejected": -17.66714859008789, "step": 18009 }, { "epoch": 2.8, "learning_rate": 9.3807043933814e-07, "logits/chosen": -1.6116770505905151, "logits/rejected": -2.5984585285186768, "logps/chosen": -202.47500610351562, "logps/rejected": -394.77001953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -12.594437599182129, "rewards/margins": 9.364015579223633, "rewards/rejected": -21.958454132080078, "step": 18010 }, { "epoch": 2.8, "learning_rate": 9.373369988069921e-07, "logits/chosen": -1.781042456626892, "logits/rejected": -2.2095224857330322, "logps/chosen": -253.84323120117188, "logps/rejected": -300.3187561035156, "loss": 0.0275, "rewards/accuracies": 1.0, "rewards/chosen": -6.570101737976074, "rewards/margins": 9.617359161376953, "rewards/rejected": -16.187461853027344, "step": 18011 }, { "epoch": 2.8, "learning_rate": 9.366035582758443e-07, "logits/chosen": -2.881112575531006, "logits/rejected": -2.3095755577087402, "logps/chosen": -223.88259887695312, "logps/rejected": -196.274658203125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.000875949859619, "rewards/margins": 8.951850891113281, "rewards/rejected": -14.952726364135742, "step": 18012 }, { "epoch": 2.8, "learning_rate": 9.358701177446964e-07, "logits/chosen": -2.5505475997924805, "logits/rejected": -2.918200731277466, "logps/chosen": -219.06275939941406, "logps/rejected": -339.8543395996094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.883330345153809, "rewards/margins": 9.978155136108398, "rewards/rejected": -16.86148452758789, "step": 18013 }, { "epoch": 2.8, "learning_rate": 9.351366772135486e-07, "logits/chosen": -2.2782950401306152, "logits/rejected": -2.5889341831207275, "logps/chosen": -299.36810302734375, "logps/rejected": -354.2264709472656, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.629502773284912, "rewards/margins": 8.928771018981934, "rewards/rejected": -14.558273315429688, "step": 18014 }, { "epoch": 2.8, "learning_rate": 9.344032366824004e-07, "logits/chosen": -2.361168146133423, "logits/rejected": -2.7465834617614746, "logps/chosen": -216.13079833984375, "logps/rejected": -445.4288635253906, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -8.002002716064453, "rewards/margins": 8.714466094970703, "rewards/rejected": -16.716468811035156, "step": 18015 }, { "epoch": 2.8, "learning_rate": 9.336697961512526e-07, "logits/chosen": -2.6731326580047607, "logits/rejected": -2.610680341720581, "logps/chosen": -176.07577514648438, "logps/rejected": -242.0174560546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -1.6638320684432983, "rewards/margins": 9.802067756652832, "rewards/rejected": -11.465899467468262, "step": 18016 }, { "epoch": 2.8, "learning_rate": 9.329363556201047e-07, "logits/chosen": -2.6128666400909424, "logits/rejected": -2.775578737258911, "logps/chosen": -120.2939453125, "logps/rejected": -200.43798828125, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -5.467049598693848, "rewards/margins": 5.533053398132324, "rewards/rejected": -11.000102996826172, "step": 18017 }, { "epoch": 2.8, "learning_rate": 9.322029150889569e-07, "logits/chosen": -2.512678384780884, "logits/rejected": -2.939147472381592, "logps/chosen": -312.6336364746094, "logps/rejected": -540.9410400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.567295074462891, "rewards/margins": 12.583625793457031, "rewards/rejected": -17.150920867919922, "step": 18018 }, { "epoch": 2.8, "learning_rate": 9.31469474557809e-07, "logits/chosen": -2.770817756652832, "logits/rejected": -2.1302804946899414, "logps/chosen": -340.02972412109375, "logps/rejected": -350.72552490234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.052194118499756, "rewards/margins": 9.508506774902344, "rewards/rejected": -13.560700416564941, "step": 18019 }, { "epoch": 2.8, "learning_rate": 9.307360340266612e-07, "logits/chosen": -2.0720951557159424, "logits/rejected": -2.7396509647369385, "logps/chosen": -342.33258056640625, "logps/rejected": -626.1012573242188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.033220291137695, "rewards/margins": 10.682140350341797, "rewards/rejected": -19.71535873413086, "step": 18020 }, { "epoch": 2.8, "learning_rate": 9.300025934955133e-07, "logits/chosen": -2.5371897220611572, "logits/rejected": -2.0048911571502686, "logps/chosen": -462.0448303222656, "logps/rejected": -416.5718078613281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.988749504089355, "rewards/margins": 13.257705688476562, "rewards/rejected": -22.246456146240234, "step": 18021 }, { "epoch": 2.8, "learning_rate": 9.292691529643654e-07, "logits/chosen": -2.639806032180786, "logits/rejected": -1.4241527318954468, "logps/chosen": -494.540771484375, "logps/rejected": -215.6335906982422, "loss": 0.2759, "rewards/accuracies": 1.0, "rewards/chosen": -10.944194793701172, "rewards/margins": 2.6573293209075928, "rewards/rejected": -13.601524353027344, "step": 18022 }, { "epoch": 2.8, "learning_rate": 9.285357124332176e-07, "logits/chosen": -2.5350613594055176, "logits/rejected": -2.665214776992798, "logps/chosen": -255.53414916992188, "logps/rejected": -486.8577880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.366767883300781, "rewards/margins": 12.69957447052002, "rewards/rejected": -17.066341400146484, "step": 18023 }, { "epoch": 2.8, "learning_rate": 9.278022719020697e-07, "logits/chosen": -2.07904052734375, "logits/rejected": -2.1689083576202393, "logps/chosen": -348.7110595703125, "logps/rejected": -397.0279541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.046356201171875, "rewards/margins": 10.97681999206543, "rewards/rejected": -17.023176193237305, "step": 18024 }, { "epoch": 2.8, "learning_rate": 9.270688313709217e-07, "logits/chosen": -2.497343063354492, "logits/rejected": -2.4945192337036133, "logps/chosen": -111.91326904296875, "logps/rejected": -152.86085510253906, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -6.208007335662842, "rewards/margins": 5.770106315612793, "rewards/rejected": -11.978113174438477, "step": 18025 }, { "epoch": 2.8, "learning_rate": 9.263353908397737e-07, "logits/chosen": -2.9131734371185303, "logits/rejected": -2.5635876655578613, "logps/chosen": -259.5622253417969, "logps/rejected": -423.07568359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.383835792541504, "rewards/margins": 14.209444046020508, "rewards/rejected": -17.593278884887695, "step": 18026 }, { "epoch": 2.8, "learning_rate": 9.256019503086259e-07, "logits/chosen": -2.472121238708496, "logits/rejected": -1.7518419027328491, "logps/chosen": -462.6080627441406, "logps/rejected": -441.0132141113281, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -8.011171340942383, "rewards/margins": 8.848882675170898, "rewards/rejected": -16.86005401611328, "step": 18027 }, { "epoch": 2.8, "learning_rate": 9.24868509777478e-07, "logits/chosen": -1.504080057144165, "logits/rejected": -2.8177080154418945, "logps/chosen": -487.0152893066406, "logps/rejected": -648.15625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.959821701049805, "rewards/margins": 13.413208961486816, "rewards/rejected": -18.373031616210938, "step": 18028 }, { "epoch": 2.8, "learning_rate": 9.241350692463302e-07, "logits/chosen": -1.6113414764404297, "logits/rejected": -2.6433894634246826, "logps/chosen": -139.08486938476562, "logps/rejected": -384.33050537109375, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/chosen": -7.808721542358398, "rewards/margins": 9.813665390014648, "rewards/rejected": -17.622386932373047, "step": 18029 }, { "epoch": 2.8, "learning_rate": 9.234016287151823e-07, "logits/chosen": -2.535306453704834, "logits/rejected": -2.608083486557007, "logps/chosen": -359.1401062011719, "logps/rejected": -462.44775390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.264538764953613, "rewards/margins": 11.334338188171387, "rewards/rejected": -17.598876953125, "step": 18030 }, { "epoch": 2.8, "learning_rate": 9.226681881840345e-07, "logits/chosen": -2.6864209175109863, "logits/rejected": -2.7433671951293945, "logps/chosen": -139.0592498779297, "logps/rejected": -231.6770782470703, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -6.027069091796875, "rewards/margins": 7.1577324867248535, "rewards/rejected": -13.18480110168457, "step": 18031 }, { "epoch": 2.8, "learning_rate": 9.219347476528866e-07, "logits/chosen": -2.7031009197235107, "logits/rejected": -2.2315566539764404, "logps/chosen": -409.7369689941406, "logps/rejected": -392.1585693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.494017124176025, "rewards/margins": 12.187331199645996, "rewards/rejected": -17.68134880065918, "step": 18032 }, { "epoch": 2.8, "learning_rate": 9.212013071217388e-07, "logits/chosen": -2.3934476375579834, "logits/rejected": -1.6267091035842896, "logps/chosen": -240.21292114257812, "logps/rejected": -285.5609130859375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.4309797286987305, "rewards/margins": 11.14045238494873, "rewards/rejected": -17.57143211364746, "step": 18033 }, { "epoch": 2.8, "learning_rate": 9.204678665905909e-07, "logits/chosen": -1.598936915397644, "logits/rejected": -2.222105026245117, "logps/chosen": -141.82537841796875, "logps/rejected": -296.418701171875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.622784614562988, "rewards/margins": 7.691161155700684, "rewards/rejected": -14.313945770263672, "step": 18034 }, { "epoch": 2.8, "learning_rate": 9.197344260594429e-07, "logits/chosen": -2.3201236724853516, "logits/rejected": -2.284456968307495, "logps/chosen": -214.49920654296875, "logps/rejected": -272.1061706542969, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.65768051147461, "rewards/margins": 7.970499038696289, "rewards/rejected": -17.6281795501709, "step": 18035 }, { "epoch": 2.8, "learning_rate": 9.19000985528295e-07, "logits/chosen": -2.4150993824005127, "logits/rejected": -2.504608392715454, "logps/chosen": -136.62646484375, "logps/rejected": -241.898193359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.889273166656494, "rewards/margins": 9.225119590759277, "rewards/rejected": -15.114392280578613, "step": 18036 }, { "epoch": 2.81, "learning_rate": 9.182675449971472e-07, "logits/chosen": -2.6239802837371826, "logits/rejected": -2.512357711791992, "logps/chosen": -439.5808410644531, "logps/rejected": -551.4208374023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.1601667404174805, "rewards/margins": 12.800704002380371, "rewards/rejected": -19.96087074279785, "step": 18037 }, { "epoch": 2.81, "learning_rate": 9.175341044659992e-07, "logits/chosen": -2.4766299724578857, "logits/rejected": -2.069197416305542, "logps/chosen": -527.6464233398438, "logps/rejected": -449.6224365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.80873441696167, "rewards/margins": 10.45291519165039, "rewards/rejected": -18.26165008544922, "step": 18038 }, { "epoch": 2.81, "learning_rate": 9.168006639348514e-07, "logits/chosen": -1.2107375860214233, "logits/rejected": -2.168741464614868, "logps/chosen": -127.24388122558594, "logps/rejected": -390.2032470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.743786334991455, "rewards/margins": 12.379478454589844, "rewards/rejected": -20.12326431274414, "step": 18039 }, { "epoch": 2.81, "learning_rate": 9.160672234037035e-07, "logits/chosen": -2.614431142807007, "logits/rejected": -1.46026611328125, "logps/chosen": -411.44134521484375, "logps/rejected": -380.73309326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.287981986999512, "rewards/margins": 12.3045654296875, "rewards/rejected": -17.592548370361328, "step": 18040 }, { "epoch": 2.81, "learning_rate": 9.153337828725556e-07, "logits/chosen": -2.6691935062408447, "logits/rejected": -2.6621665954589844, "logps/chosen": -408.2636413574219, "logps/rejected": -693.1586303710938, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.244443893432617, "rewards/margins": 9.225269317626953, "rewards/rejected": -17.469715118408203, "step": 18041 }, { "epoch": 2.81, "learning_rate": 9.146003423414078e-07, "logits/chosen": -2.4608991146087646, "logits/rejected": -2.540181875228882, "logps/chosen": -269.998779296875, "logps/rejected": -453.2132873535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.655270576477051, "rewards/margins": 14.554643630981445, "rewards/rejected": -20.209915161132812, "step": 18042 }, { "epoch": 2.81, "learning_rate": 9.138669018102599e-07, "logits/chosen": -2.6191155910491943, "logits/rejected": -2.1888017654418945, "logps/chosen": -641.973388671875, "logps/rejected": -555.9288940429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.030122756958008, "rewards/margins": 11.388778686523438, "rewards/rejected": -16.418901443481445, "step": 18043 }, { "epoch": 2.81, "learning_rate": 9.131334612791121e-07, "logits/chosen": -1.545562982559204, "logits/rejected": -2.4515552520751953, "logps/chosen": -217.61468505859375, "logps/rejected": -449.02276611328125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.90893030166626, "rewards/margins": 7.396433353424072, "rewards/rejected": -13.305363655090332, "step": 18044 }, { "epoch": 2.81, "learning_rate": 9.12400020747964e-07, "logits/chosen": -2.4503958225250244, "logits/rejected": -2.7915849685668945, "logps/chosen": -85.8663330078125, "logps/rejected": -376.71746826171875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -6.304793357849121, "rewards/margins": 10.100345611572266, "rewards/rejected": -16.40513801574707, "step": 18045 }, { "epoch": 2.81, "learning_rate": 9.116665802168162e-07, "logits/chosen": -2.190250873565674, "logits/rejected": -2.9091880321502686, "logps/chosen": -223.36085510253906, "logps/rejected": -409.4005126953125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -9.572684288024902, "rewards/margins": 8.49897289276123, "rewards/rejected": -18.071657180786133, "step": 18046 }, { "epoch": 2.81, "learning_rate": 9.109331396856683e-07, "logits/chosen": -2.8594565391540527, "logits/rejected": -2.3400120735168457, "logps/chosen": -634.0543212890625, "logps/rejected": -476.814453125, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -8.47632884979248, "rewards/margins": 5.995198726654053, "rewards/rejected": -14.471527099609375, "step": 18047 }, { "epoch": 2.81, "learning_rate": 9.101996991545205e-07, "logits/chosen": -1.7017858028411865, "logits/rejected": -2.589998960494995, "logps/chosen": -139.53085327148438, "logps/rejected": -612.50439453125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.462032318115234, "rewards/margins": 10.309139251708984, "rewards/rejected": -19.77117156982422, "step": 18048 }, { "epoch": 2.81, "learning_rate": 9.094662586233726e-07, "logits/chosen": -2.512871026992798, "logits/rejected": -2.072390556335449, "logps/chosen": -564.1090087890625, "logps/rejected": -649.3768310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.264524459838867, "rewards/margins": 15.084317207336426, "rewards/rejected": -21.34884262084961, "step": 18049 }, { "epoch": 2.81, "learning_rate": 9.087328180922248e-07, "logits/chosen": -2.754549264907837, "logits/rejected": -2.393538236618042, "logps/chosen": -311.67047119140625, "logps/rejected": -295.19940185546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.843198299407959, "rewards/margins": 9.132549285888672, "rewards/rejected": -14.975748062133789, "step": 18050 }, { "epoch": 2.81, "learning_rate": 9.079993775610768e-07, "logits/chosen": -2.52115797996521, "logits/rejected": -2.2454564571380615, "logps/chosen": -277.7474670410156, "logps/rejected": -431.9539794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.568502902984619, "rewards/margins": 13.2522554397583, "rewards/rejected": -19.820758819580078, "step": 18051 }, { "epoch": 2.81, "learning_rate": 9.07265937029929e-07, "logits/chosen": -2.1767539978027344, "logits/rejected": -2.754373073577881, "logps/chosen": -217.93589782714844, "logps/rejected": -506.98431396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.013606071472168, "rewards/margins": 13.805582046508789, "rewards/rejected": -20.819189071655273, "step": 18052 }, { "epoch": 2.81, "learning_rate": 9.065324964987811e-07, "logits/chosen": -1.9586338996887207, "logits/rejected": -2.7254464626312256, "logps/chosen": -419.94012451171875, "logps/rejected": -708.4193725585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -12.210128784179688, "rewards/margins": 12.354635238647461, "rewards/rejected": -24.564762115478516, "step": 18053 }, { "epoch": 2.81, "learning_rate": 9.057990559676331e-07, "logits/chosen": -1.2628583908081055, "logits/rejected": -2.3120715618133545, "logps/chosen": -192.0200653076172, "logps/rejected": -340.376953125, "loss": 0.0499, "rewards/accuracies": 1.0, "rewards/chosen": -12.89480972290039, "rewards/margins": 6.43220853805542, "rewards/rejected": -19.32701873779297, "step": 18054 }, { "epoch": 2.81, "learning_rate": 9.050656154364852e-07, "logits/chosen": -2.729527473449707, "logits/rejected": -2.679806709289551, "logps/chosen": -201.37179565429688, "logps/rejected": -267.7742004394531, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.310425758361816, "rewards/margins": 9.420494079589844, "rewards/rejected": -16.730920791625977, "step": 18055 }, { "epoch": 2.81, "learning_rate": 9.043321749053374e-07, "logits/chosen": -2.5362460613250732, "logits/rejected": -2.6526424884796143, "logps/chosen": -389.51025390625, "logps/rejected": -480.24615478515625, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": -12.526336669921875, "rewards/margins": 6.182443141937256, "rewards/rejected": -18.70878028869629, "step": 18056 }, { "epoch": 2.81, "learning_rate": 9.035987343741895e-07, "logits/chosen": -1.6791733503341675, "logits/rejected": -2.6753528118133545, "logps/chosen": -225.03787231445312, "logps/rejected": -383.2490234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.57674503326416, "rewards/margins": 8.107954025268555, "rewards/rejected": -16.6846981048584, "step": 18057 }, { "epoch": 2.81, "learning_rate": 9.028652938430417e-07, "logits/chosen": -2.6651148796081543, "logits/rejected": -2.0990796089172363, "logps/chosen": -233.3443603515625, "logps/rejected": -314.8329162597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.192888259887695, "rewards/margins": 12.841275215148926, "rewards/rejected": -18.034162521362305, "step": 18058 }, { "epoch": 2.81, "learning_rate": 9.021318533118938e-07, "logits/chosen": -1.1560325622558594, "logits/rejected": -2.0739383697509766, "logps/chosen": -376.2862243652344, "logps/rejected": -409.3387145996094, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -9.053898811340332, "rewards/margins": 7.795407295227051, "rewards/rejected": -16.849306106567383, "step": 18059 }, { "epoch": 2.81, "learning_rate": 9.01398412780746e-07, "logits/chosen": -2.4341983795166016, "logits/rejected": -2.7758564949035645, "logps/chosen": -233.9129638671875, "logps/rejected": -367.34716796875, "loss": 0.0724, "rewards/accuracies": 1.0, "rewards/chosen": -8.519315719604492, "rewards/margins": 4.313416957855225, "rewards/rejected": -12.832733154296875, "step": 18060 }, { "epoch": 2.81, "learning_rate": 9.006649722495981e-07, "logits/chosen": -1.1518899202346802, "logits/rejected": -2.8064053058624268, "logps/chosen": -193.94146728515625, "logps/rejected": -553.0445556640625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -10.857942581176758, "rewards/margins": 8.294120788574219, "rewards/rejected": -19.152063369750977, "step": 18061 }, { "epoch": 2.81, "learning_rate": 8.999315317184502e-07, "logits/chosen": -2.9182257652282715, "logits/rejected": -2.5935378074645996, "logps/chosen": -513.2025756835938, "logps/rejected": -390.1646728515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.518503189086914, "rewards/margins": 8.868729591369629, "rewards/rejected": -17.38723373413086, "step": 18062 }, { "epoch": 2.81, "learning_rate": 8.991980911873023e-07, "logits/chosen": -1.7259966135025024, "logits/rejected": -2.5902953147888184, "logps/chosen": -143.5975341796875, "logps/rejected": -378.9074401855469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.562440872192383, "rewards/margins": 9.5058012008667, "rewards/rejected": -17.068241119384766, "step": 18063 }, { "epoch": 2.81, "learning_rate": 8.984646506561543e-07, "logits/chosen": -1.6679002046585083, "logits/rejected": -2.598794460296631, "logps/chosen": -351.70452880859375, "logps/rejected": -497.4251708984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.987787246704102, "rewards/margins": 11.571829795837402, "rewards/rejected": -17.559616088867188, "step": 18064 }, { "epoch": 2.81, "learning_rate": 8.977312101250064e-07, "logits/chosen": -1.9850600957870483, "logits/rejected": -2.345987558364868, "logps/chosen": -166.41552734375, "logps/rejected": -380.2791748046875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.840075492858887, "rewards/margins": 11.67372989654541, "rewards/rejected": -17.513805389404297, "step": 18065 }, { "epoch": 2.81, "learning_rate": 8.969977695938585e-07, "logits/chosen": -1.3829307556152344, "logits/rejected": -2.418240547180176, "logps/chosen": -172.62094116210938, "logps/rejected": -341.88690185546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.232643127441406, "rewards/margins": 8.2160062789917, "rewards/rejected": -16.448650360107422, "step": 18066 }, { "epoch": 2.81, "learning_rate": 8.962643290627107e-07, "logits/chosen": -2.4506280422210693, "logits/rejected": -1.251665711402893, "logps/chosen": -400.26025390625, "logps/rejected": -321.24456787109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.283308029174805, "rewards/margins": 10.737752914428711, "rewards/rejected": -20.021060943603516, "step": 18067 }, { "epoch": 2.81, "learning_rate": 8.955308885315628e-07, "logits/chosen": -2.810077667236328, "logits/rejected": -2.147782802581787, "logps/chosen": -363.25445556640625, "logps/rejected": -351.6101989746094, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.226513862609863, "rewards/margins": 8.315357208251953, "rewards/rejected": -15.541871070861816, "step": 18068 }, { "epoch": 2.81, "learning_rate": 8.94797448000415e-07, "logits/chosen": -2.3739728927612305, "logits/rejected": -2.59220814704895, "logps/chosen": -171.32794189453125, "logps/rejected": -431.1164245605469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.8551788330078125, "rewards/margins": 13.51574420928955, "rewards/rejected": -19.370922088623047, "step": 18069 }, { "epoch": 2.81, "learning_rate": 8.940640074692671e-07, "logits/chosen": -2.7379815578460693, "logits/rejected": -2.5626680850982666, "logps/chosen": -287.7823486328125, "logps/rejected": -229.20376586914062, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": -7.819255352020264, "rewards/margins": 7.000958442687988, "rewards/rejected": -14.820213317871094, "step": 18070 }, { "epoch": 2.81, "learning_rate": 8.933305669381193e-07, "logits/chosen": -2.4800124168395996, "logits/rejected": -2.9352989196777344, "logps/chosen": -114.19392395019531, "logps/rejected": -367.69476318359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.97977352142334, "rewards/margins": 11.171356201171875, "rewards/rejected": -19.15113067626953, "step": 18071 }, { "epoch": 2.81, "learning_rate": 8.925971264069714e-07, "logits/chosen": -2.4784882068634033, "logits/rejected": -2.077500343322754, "logps/chosen": -1299.5888671875, "logps/rejected": -861.62744140625, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -13.210198402404785, "rewards/margins": 10.018524169921875, "rewards/rejected": -23.228721618652344, "step": 18072 }, { "epoch": 2.81, "learning_rate": 8.918636858758236e-07, "logits/chosen": -2.8106181621551514, "logits/rejected": -2.820652723312378, "logps/chosen": -337.1322326660156, "logps/rejected": -387.0948791503906, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -8.516111373901367, "rewards/margins": 7.979318618774414, "rewards/rejected": -16.49542999267578, "step": 18073 }, { "epoch": 2.81, "learning_rate": 8.911302453446754e-07, "logits/chosen": -2.601598024368286, "logits/rejected": -2.7131786346435547, "logps/chosen": -512.1907958984375, "logps/rejected": -795.6822509765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.898050308227539, "rewards/margins": 8.967758178710938, "rewards/rejected": -15.865809440612793, "step": 18074 }, { "epoch": 2.81, "learning_rate": 8.903968048135276e-07, "logits/chosen": -2.2219021320343018, "logits/rejected": -2.9517736434936523, "logps/chosen": -149.3330841064453, "logps/rejected": -746.2490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.074825763702393, "rewards/margins": 10.478137969970703, "rewards/rejected": -15.552963256835938, "step": 18075 }, { "epoch": 2.81, "learning_rate": 8.896633642823797e-07, "logits/chosen": -2.8752200603485107, "logits/rejected": -2.8760838508605957, "logps/chosen": -223.02191162109375, "logps/rejected": -323.936767578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.063519477844238, "rewards/margins": 9.329126358032227, "rewards/rejected": -17.39264678955078, "step": 18076 }, { "epoch": 2.81, "learning_rate": 8.889299237512319e-07, "logits/chosen": -2.669377088546753, "logits/rejected": -2.614964723587036, "logps/chosen": -213.73776245117188, "logps/rejected": -431.3819580078125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -9.295547485351562, "rewards/margins": 10.75197982788086, "rewards/rejected": -20.047527313232422, "step": 18077 }, { "epoch": 2.81, "learning_rate": 8.88196483220084e-07, "logits/chosen": -2.245509147644043, "logits/rejected": -2.4370100498199463, "logps/chosen": -507.8880615234375, "logps/rejected": -385.9384765625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -8.726157188415527, "rewards/margins": 5.751492023468018, "rewards/rejected": -14.477649688720703, "step": 18078 }, { "epoch": 2.81, "learning_rate": 8.874630426889362e-07, "logits/chosen": -1.7357035875320435, "logits/rejected": -2.202122449874878, "logps/chosen": -232.58091735839844, "logps/rejected": -347.5644836425781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.713895320892334, "rewards/margins": 8.810755729675293, "rewards/rejected": -15.524650573730469, "step": 18079 }, { "epoch": 2.81, "learning_rate": 8.867296021577883e-07, "logits/chosen": -2.6196303367614746, "logits/rejected": -2.204286813735962, "logps/chosen": -467.3669738769531, "logps/rejected": -415.1067810058594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.836531639099121, "rewards/margins": 11.805898666381836, "rewards/rejected": -15.64242935180664, "step": 18080 }, { "epoch": 2.81, "learning_rate": 8.859961616266405e-07, "logits/chosen": -1.1670819520950317, "logits/rejected": -2.7567219734191895, "logps/chosen": -154.73770141601562, "logps/rejected": -608.51123046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.483573913574219, "rewards/margins": 8.603334426879883, "rewards/rejected": -16.0869083404541, "step": 18081 }, { "epoch": 2.81, "learning_rate": 8.852627210954926e-07, "logits/chosen": -2.5208775997161865, "logits/rejected": -2.513185501098633, "logps/chosen": -118.07395935058594, "logps/rejected": -272.35101318359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.767752647399902, "rewards/margins": 12.104362487792969, "rewards/rejected": -18.872114181518555, "step": 18082 }, { "epoch": 2.81, "learning_rate": 8.845292805643447e-07, "logits/chosen": -1.292963981628418, "logits/rejected": -2.4588096141815186, "logps/chosen": -140.72515869140625, "logps/rejected": -380.601806640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.779875755310059, "rewards/margins": 9.582014083862305, "rewards/rejected": -17.361888885498047, "step": 18083 }, { "epoch": 2.81, "learning_rate": 8.837958400331967e-07, "logits/chosen": -2.0755481719970703, "logits/rejected": -2.8216631412506104, "logps/chosen": -157.7274169921875, "logps/rejected": -414.1062316894531, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.493597030639648, "rewards/margins": 8.235898971557617, "rewards/rejected": -16.729496002197266, "step": 18084 }, { "epoch": 2.81, "learning_rate": 8.830623995020489e-07, "logits/chosen": -2.6365556716918945, "logits/rejected": -1.013299822807312, "logps/chosen": -243.54624938964844, "logps/rejected": -156.90975952148438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.703919410705566, "rewards/margins": 9.413400650024414, "rewards/rejected": -14.117321014404297, "step": 18085 }, { "epoch": 2.81, "learning_rate": 8.823289589709009e-07, "logits/chosen": -2.0881645679473877, "logits/rejected": -2.9262123107910156, "logps/chosen": -177.33621215820312, "logps/rejected": -509.49249267578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.655464172363281, "rewards/margins": 10.186420440673828, "rewards/rejected": -19.84188461303711, "step": 18086 }, { "epoch": 2.81, "learning_rate": 8.81595518439753e-07, "logits/chosen": -2.3523199558258057, "logits/rejected": -2.2198445796966553, "logps/chosen": -244.83343505859375, "logps/rejected": -311.39227294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.366004943847656, "rewards/margins": 9.972827911376953, "rewards/rejected": -19.33883285522461, "step": 18087 }, { "epoch": 2.81, "learning_rate": 8.808620779086052e-07, "logits/chosen": -1.9932688474655151, "logits/rejected": -2.4439826011657715, "logps/chosen": -394.882080078125, "logps/rejected": -470.899658203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.913228988647461, "rewards/margins": 9.107582092285156, "rewards/rejected": -19.020811080932617, "step": 18088 }, { "epoch": 2.81, "learning_rate": 8.801286373774573e-07, "logits/chosen": -2.535670518875122, "logits/rejected": -1.383708119392395, "logps/chosen": -221.4032745361328, "logps/rejected": -272.5840148925781, "loss": 1.3365, "rewards/accuracies": 0.5, "rewards/chosen": -9.568313598632812, "rewards/margins": 6.874535083770752, "rewards/rejected": -16.442848205566406, "step": 18089 }, { "epoch": 2.81, "learning_rate": 8.793951968463095e-07, "logits/chosen": -2.251194953918457, "logits/rejected": -2.586984157562256, "logps/chosen": -205.05630493164062, "logps/rejected": -437.99664306640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.813901901245117, "rewards/margins": 10.568527221679688, "rewards/rejected": -18.382429122924805, "step": 18090 }, { "epoch": 2.81, "learning_rate": 8.786617563151615e-07, "logits/chosen": -1.3160436153411865, "logits/rejected": -2.535655975341797, "logps/chosen": -393.44091796875, "logps/rejected": -612.396240234375, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -7.950146198272705, "rewards/margins": 6.410828113555908, "rewards/rejected": -14.360974311828613, "step": 18091 }, { "epoch": 2.81, "learning_rate": 8.779283157840137e-07, "logits/chosen": -2.2122976779937744, "logits/rejected": -2.6111338138580322, "logps/chosen": -127.68445587158203, "logps/rejected": -299.5604248046875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -7.276914119720459, "rewards/margins": 7.37270450592041, "rewards/rejected": -14.649618148803711, "step": 18092 }, { "epoch": 2.81, "learning_rate": 8.771948752528658e-07, "logits/chosen": -1.9021806716918945, "logits/rejected": -2.501312494277954, "logps/chosen": -281.7128601074219, "logps/rejected": -519.035888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.474386215209961, "rewards/margins": 10.114124298095703, "rewards/rejected": -16.588510513305664, "step": 18093 }, { "epoch": 2.81, "learning_rate": 8.76461434721718e-07, "logits/chosen": -0.6348708868026733, "logits/rejected": -1.8188823461532593, "logps/chosen": -148.39303588867188, "logps/rejected": -378.97991943359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.76315689086914, "rewards/margins": 10.020515441894531, "rewards/rejected": -18.783672332763672, "step": 18094 }, { "epoch": 2.81, "learning_rate": 8.757279941905701e-07, "logits/chosen": -2.9233975410461426, "logits/rejected": -3.135901927947998, "logps/chosen": -115.01215362548828, "logps/rejected": -233.7209930419922, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.575976371765137, "rewards/margins": 10.184820175170898, "rewards/rejected": -14.760795593261719, "step": 18095 }, { "epoch": 2.81, "learning_rate": 8.749945536594222e-07, "logits/chosen": -2.438988208770752, "logits/rejected": -2.6035308837890625, "logps/chosen": -421.08380126953125, "logps/rejected": -356.7137145996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.606936931610107, "rewards/margins": 11.274660110473633, "rewards/rejected": -16.8815975189209, "step": 18096 }, { "epoch": 2.81, "learning_rate": 8.742611131282743e-07, "logits/chosen": -2.507032871246338, "logits/rejected": -2.5585222244262695, "logps/chosen": -351.7864685058594, "logps/rejected": -422.75982666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.713675498962402, "rewards/margins": 12.544145584106445, "rewards/rejected": -18.257822036743164, "step": 18097 }, { "epoch": 2.81, "learning_rate": 8.735276725971265e-07, "logits/chosen": -2.2860629558563232, "logits/rejected": -2.1660549640655518, "logps/chosen": -193.93130493164062, "logps/rejected": -292.953369140625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.533633708953857, "rewards/margins": 8.731785774230957, "rewards/rejected": -14.265419960021973, "step": 18098 }, { "epoch": 2.81, "learning_rate": 8.727942320659785e-07, "logits/chosen": -1.1409846544265747, "logits/rejected": -2.3901634216308594, "logps/chosen": -291.59197998046875, "logps/rejected": -637.2218017578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.228748321533203, "rewards/margins": 16.803857803344727, "rewards/rejected": -24.03260612487793, "step": 18099 }, { "epoch": 2.81, "learning_rate": 8.720607915348307e-07, "logits/chosen": -1.791717767715454, "logits/rejected": -2.5441720485687256, "logps/chosen": -207.41989135742188, "logps/rejected": -406.9130859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.079702377319336, "rewards/margins": 9.631917953491211, "rewards/rejected": -19.711620330810547, "step": 18100 }, { "epoch": 2.82, "learning_rate": 8.713273510036827e-07, "logits/chosen": -2.430415630340576, "logits/rejected": -3.1091344356536865, "logps/chosen": -141.3546142578125, "logps/rejected": -557.085205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6534423828125, "rewards/margins": 12.788248062133789, "rewards/rejected": -19.44169044494629, "step": 18101 }, { "epoch": 2.82, "learning_rate": 8.705939104725349e-07, "logits/chosen": -1.5132588148117065, "logits/rejected": -2.636598825454712, "logps/chosen": -168.49026489257812, "logps/rejected": -687.459228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.438098907470703, "rewards/margins": 12.763304710388184, "rewards/rejected": -19.201404571533203, "step": 18102 }, { "epoch": 2.82, "learning_rate": 8.69860469941387e-07, "logits/chosen": -2.5424270629882812, "logits/rejected": -2.1407835483551025, "logps/chosen": -541.2483520507812, "logps/rejected": -515.7577514648438, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -8.923358917236328, "rewards/margins": 6.596275806427002, "rewards/rejected": -15.519634246826172, "step": 18103 }, { "epoch": 2.82, "learning_rate": 8.691270294102391e-07, "logits/chosen": -1.5364564657211304, "logits/rejected": -2.5125038623809814, "logps/chosen": -198.76963806152344, "logps/rejected": -594.98095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.555606842041016, "rewards/margins": 14.14786148071289, "rewards/rejected": -22.703468322753906, "step": 18104 }, { "epoch": 2.82, "learning_rate": 8.683935888790913e-07, "logits/chosen": -1.7130451202392578, "logits/rejected": -2.8071835041046143, "logps/chosen": -183.04318237304688, "logps/rejected": -545.0248413085938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.167909622192383, "rewards/margins": 13.81137752532959, "rewards/rejected": -21.979286193847656, "step": 18105 }, { "epoch": 2.82, "learning_rate": 8.676601483479433e-07, "logits/chosen": -2.7966084480285645, "logits/rejected": -2.645869255065918, "logps/chosen": -438.44256591796875, "logps/rejected": -447.6292419433594, "loss": 0.0967, "rewards/accuracies": 1.0, "rewards/chosen": -10.1301851272583, "rewards/margins": 6.206499099731445, "rewards/rejected": -16.336685180664062, "step": 18106 }, { "epoch": 2.82, "learning_rate": 8.669267078167955e-07, "logits/chosen": -1.5315937995910645, "logits/rejected": -2.1206352710723877, "logps/chosen": -165.10093688964844, "logps/rejected": -311.72552490234375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.582928657531738, "rewards/margins": 10.701854705810547, "rewards/rejected": -15.284784317016602, "step": 18107 }, { "epoch": 2.82, "learning_rate": 8.661932672856476e-07, "logits/chosen": -1.8177075386047363, "logits/rejected": -2.8473916053771973, "logps/chosen": -170.63418579101562, "logps/rejected": -485.00921630859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.760321617126465, "rewards/margins": 12.041326522827148, "rewards/rejected": -19.801647186279297, "step": 18108 }, { "epoch": 2.82, "learning_rate": 8.654598267544998e-07, "logits/chosen": -1.1235837936401367, "logits/rejected": -2.3650455474853516, "logps/chosen": -131.32508850097656, "logps/rejected": -351.8473815917969, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.915945053100586, "rewards/margins": 9.26024341583252, "rewards/rejected": -14.176189422607422, "step": 18109 }, { "epoch": 2.82, "learning_rate": 8.647263862233518e-07, "logits/chosen": -1.2975095510482788, "logits/rejected": -2.22951078414917, "logps/chosen": -101.67125701904297, "logps/rejected": -284.8478088378906, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -5.042957782745361, "rewards/margins": 9.032681465148926, "rewards/rejected": -14.075638771057129, "step": 18110 }, { "epoch": 2.82, "learning_rate": 8.639929456922039e-07, "logits/chosen": -1.9265276193618774, "logits/rejected": -2.675814390182495, "logps/chosen": -338.2609558105469, "logps/rejected": -450.4906311035156, "loss": 0.0696, "rewards/accuracies": 1.0, "rewards/chosen": -8.981019973754883, "rewards/margins": 11.825653076171875, "rewards/rejected": -20.806673049926758, "step": 18111 }, { "epoch": 2.82, "learning_rate": 8.63259505161056e-07, "logits/chosen": -2.288581609725952, "logits/rejected": -2.7365076541900635, "logps/chosen": -451.26983642578125, "logps/rejected": -504.5145568847656, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.975203037261963, "rewards/margins": 9.997081756591797, "rewards/rejected": -14.972284317016602, "step": 18112 }, { "epoch": 2.82, "learning_rate": 8.625260646299082e-07, "logits/chosen": -1.4254528284072876, "logits/rejected": -2.5051376819610596, "logps/chosen": -118.65072631835938, "logps/rejected": -435.03240966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.8869404792785645, "rewards/margins": 14.413553237915039, "rewards/rejected": -19.300495147705078, "step": 18113 }, { "epoch": 2.82, "learning_rate": 8.617926240987603e-07, "logits/chosen": -2.5562000274658203, "logits/rejected": -2.9694457054138184, "logps/chosen": -510.78594970703125, "logps/rejected": -564.330810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.318598747253418, "rewards/margins": 11.13736629486084, "rewards/rejected": -17.455965042114258, "step": 18114 }, { "epoch": 2.82, "learning_rate": 8.610591835676125e-07, "logits/chosen": -0.6806235313415527, "logits/rejected": -2.813307046890259, "logps/chosen": -149.74339294433594, "logps/rejected": -653.8731079101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.814226150512695, "rewards/margins": 9.966928482055664, "rewards/rejected": -18.78115463256836, "step": 18115 }, { "epoch": 2.82, "learning_rate": 8.603257430364645e-07, "logits/chosen": -1.392728567123413, "logits/rejected": -1.9613877534866333, "logps/chosen": -205.87701416015625, "logps/rejected": -371.40325927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.287750720977783, "rewards/margins": 12.425539016723633, "rewards/rejected": -17.71329116821289, "step": 18116 }, { "epoch": 2.82, "learning_rate": 8.595923025053167e-07, "logits/chosen": -2.7933645248413086, "logits/rejected": -2.1305747032165527, "logps/chosen": -942.112060546875, "logps/rejected": -739.57080078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.937857151031494, "rewards/margins": 10.178335189819336, "rewards/rejected": -18.116191864013672, "step": 18117 }, { "epoch": 2.82, "learning_rate": 8.588588619741688e-07, "logits/chosen": -2.8373172283172607, "logits/rejected": -2.8973450660705566, "logps/chosen": -157.06935119628906, "logps/rejected": -279.0146484375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -7.5579681396484375, "rewards/margins": 5.917043685913086, "rewards/rejected": -13.475011825561523, "step": 18118 }, { "epoch": 2.82, "learning_rate": 8.58125421443021e-07, "logits/chosen": -2.7799313068389893, "logits/rejected": -2.6148881912231445, "logps/chosen": -224.5428924560547, "logps/rejected": -296.1602478027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.4677228927612305, "rewards/margins": 10.062812805175781, "rewards/rejected": -17.530536651611328, "step": 18119 }, { "epoch": 2.82, "learning_rate": 8.573919809118731e-07, "logits/chosen": -2.8526272773742676, "logits/rejected": -2.5509588718414307, "logps/chosen": -324.6693420410156, "logps/rejected": -349.80206298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.654894828796387, "rewards/margins": 12.474043846130371, "rewards/rejected": -21.128938674926758, "step": 18120 }, { "epoch": 2.82, "learning_rate": 8.566585403807252e-07, "logits/chosen": -2.0128517150878906, "logits/rejected": -2.877694606781006, "logps/chosen": -378.2160339355469, "logps/rejected": -536.2662963867188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.249517917633057, "rewards/margins": 12.303592681884766, "rewards/rejected": -18.553110122680664, "step": 18121 }, { "epoch": 2.82, "learning_rate": 8.559250998495772e-07, "logits/chosen": -2.8357183933258057, "logits/rejected": -2.8479857444763184, "logps/chosen": -353.55096435546875, "logps/rejected": -408.6748046875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.677485466003418, "rewards/margins": 11.22158432006836, "rewards/rejected": -17.899070739746094, "step": 18122 }, { "epoch": 2.82, "learning_rate": 8.551916593184294e-07, "logits/chosen": -2.023362874984741, "logits/rejected": -2.341421127319336, "logps/chosen": -148.84556579589844, "logps/rejected": -465.5865173339844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.8129377365112305, "rewards/margins": 13.843478202819824, "rewards/rejected": -18.656415939331055, "step": 18123 }, { "epoch": 2.82, "learning_rate": 8.544582187872815e-07, "logits/chosen": -2.146824598312378, "logits/rejected": -2.6870577335357666, "logps/chosen": -263.6545715332031, "logps/rejected": -429.6007995605469, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -5.80850887298584, "rewards/margins": 10.35472583770752, "rewards/rejected": -16.16323471069336, "step": 18124 }, { "epoch": 2.82, "learning_rate": 8.537247782561336e-07, "logits/chosen": -2.6636862754821777, "logits/rejected": -2.851989507675171, "logps/chosen": -400.97393798828125, "logps/rejected": -393.6210632324219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.172560453414917, "rewards/margins": 9.516733169555664, "rewards/rejected": -12.689292907714844, "step": 18125 }, { "epoch": 2.82, "learning_rate": 8.529913377249857e-07, "logits/chosen": -2.677671432495117, "logits/rejected": -1.6940059661865234, "logps/chosen": -425.3077392578125, "logps/rejected": -263.32513427734375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.056323051452637, "rewards/margins": 9.400863647460938, "rewards/rejected": -16.45718765258789, "step": 18126 }, { "epoch": 2.82, "learning_rate": 8.522578971938378e-07, "logits/chosen": -2.5704262256622314, "logits/rejected": -1.68962824344635, "logps/chosen": -335.04595947265625, "logps/rejected": -206.171875, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -6.188832759857178, "rewards/margins": 5.448748588562012, "rewards/rejected": -11.637580871582031, "step": 18127 }, { "epoch": 2.82, "learning_rate": 8.5152445666269e-07, "logits/chosen": -1.4566895961761475, "logits/rejected": -2.382108688354492, "logps/chosen": -199.27670288085938, "logps/rejected": -358.84552001953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.727770805358887, "rewards/margins": 9.761268615722656, "rewards/rejected": -15.489039421081543, "step": 18128 }, { "epoch": 2.82, "learning_rate": 8.507910161315421e-07, "logits/chosen": -2.314070224761963, "logits/rejected": -2.3966712951660156, "logps/chosen": -140.62478637695312, "logps/rejected": -157.9735107421875, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": -7.27296257019043, "rewards/margins": 5.068589210510254, "rewards/rejected": -12.341552734375, "step": 18129 }, { "epoch": 2.82, "learning_rate": 8.500575756003943e-07, "logits/chosen": -3.0219430923461914, "logits/rejected": -3.0181658267974854, "logps/chosen": -826.5392456054688, "logps/rejected": -685.4844970703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.788413047790527, "rewards/margins": 10.784669876098633, "rewards/rejected": -18.573081970214844, "step": 18130 }, { "epoch": 2.82, "learning_rate": 8.493241350692463e-07, "logits/chosen": -1.4421833753585815, "logits/rejected": -2.505030870437622, "logps/chosen": -195.35182189941406, "logps/rejected": -449.3108215332031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.236333847045898, "rewards/margins": 10.04129409790039, "rewards/rejected": -17.27762794494629, "step": 18131 }, { "epoch": 2.82, "learning_rate": 8.485906945380985e-07, "logits/chosen": -2.0672616958618164, "logits/rejected": -2.522249937057495, "logps/chosen": -149.28488159179688, "logps/rejected": -300.04931640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.716834545135498, "rewards/margins": 10.754218101501465, "rewards/rejected": -16.471052169799805, "step": 18132 }, { "epoch": 2.82, "learning_rate": 8.478572540069506e-07, "logits/chosen": -1.8520660400390625, "logits/rejected": -2.826667308807373, "logps/chosen": -362.67498779296875, "logps/rejected": -587.2186279296875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.641463756561279, "rewards/margins": 8.305219650268555, "rewards/rejected": -15.946682929992676, "step": 18133 }, { "epoch": 2.82, "learning_rate": 8.471238134758028e-07, "logits/chosen": -1.5463672876358032, "logits/rejected": -2.3740949630737305, "logps/chosen": -244.39297485351562, "logps/rejected": -471.40643310546875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -6.642951011657715, "rewards/margins": 10.271076202392578, "rewards/rejected": -16.91402816772461, "step": 18134 }, { "epoch": 2.82, "learning_rate": 8.463903729446548e-07, "logits/chosen": -1.837958812713623, "logits/rejected": -2.8121626377105713, "logps/chosen": -161.4972686767578, "logps/rejected": -566.077880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.8481202125549316, "rewards/margins": 13.575033187866211, "rewards/rejected": -17.423152923583984, "step": 18135 }, { "epoch": 2.82, "learning_rate": 8.456569324135069e-07, "logits/chosen": -1.8777059316635132, "logits/rejected": -2.7309746742248535, "logps/chosen": -266.8636169433594, "logps/rejected": -529.7655029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.1436767578125, "rewards/margins": 10.953226089477539, "rewards/rejected": -17.09690284729004, "step": 18136 }, { "epoch": 2.82, "learning_rate": 8.44923491882359e-07, "logits/chosen": -1.3155829906463623, "logits/rejected": -2.3783981800079346, "logps/chosen": -285.71533203125, "logps/rejected": -576.3884887695312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.643985748291016, "rewards/margins": 11.1229248046875, "rewards/rejected": -19.766910552978516, "step": 18137 }, { "epoch": 2.82, "learning_rate": 8.441900513512112e-07, "logits/chosen": -2.5870144367218018, "logits/rejected": -2.3057994842529297, "logps/chosen": -335.80291748046875, "logps/rejected": -518.6494140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.201854228973389, "rewards/margins": 10.579687118530273, "rewards/rejected": -17.78154182434082, "step": 18138 }, { "epoch": 2.82, "learning_rate": 8.434566108200633e-07, "logits/chosen": -1.2792633771896362, "logits/rejected": -2.386702537536621, "logps/chosen": -202.1905517578125, "logps/rejected": -546.1845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.522753715515137, "rewards/margins": 13.950933456420898, "rewards/rejected": -22.47368621826172, "step": 18139 }, { "epoch": 2.82, "learning_rate": 8.427231702889155e-07, "logits/chosen": -1.0242269039154053, "logits/rejected": -2.342031955718994, "logps/chosen": -183.47653198242188, "logps/rejected": -544.296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.343918800354004, "rewards/margins": 11.805695533752441, "rewards/rejected": -21.149614334106445, "step": 18140 }, { "epoch": 2.82, "learning_rate": 8.419897297577675e-07, "logits/chosen": -2.5138838291168213, "logits/rejected": -2.903353214263916, "logps/chosen": -105.03128814697266, "logps/rejected": -389.1488037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.8106794357299805, "rewards/margins": 9.649643898010254, "rewards/rejected": -17.460323333740234, "step": 18141 }, { "epoch": 2.82, "learning_rate": 8.412562892266197e-07, "logits/chosen": -2.265843629837036, "logits/rejected": -2.4294400215148926, "logps/chosen": -303.54437255859375, "logps/rejected": -481.49737548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.863395690917969, "rewards/margins": 13.362638473510742, "rewards/rejected": -19.22603416442871, "step": 18142 }, { "epoch": 2.82, "learning_rate": 8.405228486954718e-07, "logits/chosen": -1.6684839725494385, "logits/rejected": -2.5549228191375732, "logps/chosen": -187.6833953857422, "logps/rejected": -336.16015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.593212127685547, "rewards/margins": 8.123994827270508, "rewards/rejected": -17.717206954956055, "step": 18143 }, { "epoch": 2.82, "learning_rate": 8.397894081643239e-07, "logits/chosen": -2.729750394821167, "logits/rejected": -2.0544071197509766, "logps/chosen": -256.8919982910156, "logps/rejected": -341.4659423828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.929495811462402, "rewards/margins": 10.54472541809082, "rewards/rejected": -16.47422218322754, "step": 18144 }, { "epoch": 2.82, "learning_rate": 8.39055967633176e-07, "logits/chosen": -2.6594460010528564, "logits/rejected": -2.7511394023895264, "logps/chosen": -202.34494018554688, "logps/rejected": -265.1475830078125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.614803314208984, "rewards/margins": 7.549169063568115, "rewards/rejected": -13.163972854614258, "step": 18145 }, { "epoch": 2.82, "learning_rate": 8.38322527102028e-07, "logits/chosen": -1.825362205505371, "logits/rejected": -2.7338809967041016, "logps/chosen": -238.43939208984375, "logps/rejected": -471.97528076171875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -12.16745662689209, "rewards/margins": 6.820013046264648, "rewards/rejected": -18.987468719482422, "step": 18146 }, { "epoch": 2.82, "learning_rate": 8.375890865708802e-07, "logits/chosen": -2.5110280513763428, "logits/rejected": -1.798495888710022, "logps/chosen": -308.76385498046875, "logps/rejected": -286.465087890625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -8.691617965698242, "rewards/margins": 6.22875452041626, "rewards/rejected": -14.92037296295166, "step": 18147 }, { "epoch": 2.82, "learning_rate": 8.368556460397323e-07, "logits/chosen": -2.3871450424194336, "logits/rejected": -2.770627021789551, "logps/chosen": -197.39588928222656, "logps/rejected": -336.07769775390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.444849014282227, "rewards/margins": 7.590515613555908, "rewards/rejected": -15.035364151000977, "step": 18148 }, { "epoch": 2.82, "learning_rate": 8.361222055085845e-07, "logits/chosen": -2.5224368572235107, "logits/rejected": -2.6067745685577393, "logps/chosen": -91.10408782958984, "logps/rejected": -234.70091247558594, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.042545795440674, "rewards/margins": 8.58495044708252, "rewards/rejected": -14.627496719360352, "step": 18149 }, { "epoch": 2.82, "learning_rate": 8.353887649774365e-07, "logits/chosen": -2.1547038555145264, "logits/rejected": -2.8454296588897705, "logps/chosen": -189.66700744628906, "logps/rejected": -473.8528747558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.38623046875, "rewards/margins": 17.534984588623047, "rewards/rejected": -21.921215057373047, "step": 18150 }, { "epoch": 2.82, "learning_rate": 8.346553244462887e-07, "logits/chosen": -2.3159027099609375, "logits/rejected": -2.160691022872925, "logps/chosen": -158.7413787841797, "logps/rejected": -257.01708984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.866368770599365, "rewards/margins": 9.026394844055176, "rewards/rejected": -15.8927640914917, "step": 18151 }, { "epoch": 2.82, "learning_rate": 8.339218839151408e-07, "logits/chosen": -2.345714807510376, "logits/rejected": -0.8969284892082214, "logps/chosen": -352.11407470703125, "logps/rejected": -288.6474304199219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7855751514434814, "rewards/margins": 10.970672607421875, "rewards/rejected": -13.756247520446777, "step": 18152 }, { "epoch": 2.82, "learning_rate": 8.33188443383993e-07, "logits/chosen": -1.8447506427764893, "logits/rejected": -2.6088743209838867, "logps/chosen": -202.4482421875, "logps/rejected": -353.181640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.056727409362793, "rewards/margins": 8.948198318481445, "rewards/rejected": -18.004924774169922, "step": 18153 }, { "epoch": 2.82, "learning_rate": 8.324550028528451e-07, "logits/chosen": -1.819661259651184, "logits/rejected": -2.3211658000946045, "logps/chosen": -363.8088073730469, "logps/rejected": -582.1829833984375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.301050186157227, "rewards/margins": 10.28316593170166, "rewards/rejected": -18.58421516418457, "step": 18154 }, { "epoch": 2.82, "learning_rate": 8.317215623216972e-07, "logits/chosen": -2.4971110820770264, "logits/rejected": -2.606895923614502, "logps/chosen": -122.22709655761719, "logps/rejected": -234.1927032470703, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.492002487182617, "rewards/margins": 8.085590362548828, "rewards/rejected": -17.577592849731445, "step": 18155 }, { "epoch": 2.82, "learning_rate": 8.309881217905493e-07, "logits/chosen": -2.5573570728302, "logits/rejected": -2.9679722785949707, "logps/chosen": -146.822265625, "logps/rejected": -767.8248901367188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.613134384155273, "rewards/margins": 17.223285675048828, "rewards/rejected": -22.8364200592041, "step": 18156 }, { "epoch": 2.82, "learning_rate": 8.302546812594015e-07, "logits/chosen": -1.33464777469635, "logits/rejected": -2.6776511669158936, "logps/chosen": -426.495361328125, "logps/rejected": -765.9276123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.722064018249512, "rewards/margins": 11.390715599060059, "rewards/rejected": -19.11277961730957, "step": 18157 }, { "epoch": 2.82, "learning_rate": 8.295212407282535e-07, "logits/chosen": -2.7683193683624268, "logits/rejected": -1.1044697761535645, "logps/chosen": -455.1353454589844, "logps/rejected": -261.12188720703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.212563991546631, "rewards/margins": 8.131800651550293, "rewards/rejected": -14.344365119934082, "step": 18158 }, { "epoch": 2.82, "learning_rate": 8.287878001971057e-07, "logits/chosen": -1.8089141845703125, "logits/rejected": -2.7405271530151367, "logps/chosen": -322.14178466796875, "logps/rejected": -676.6376953125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -10.118646621704102, "rewards/margins": 6.795871257781982, "rewards/rejected": -16.914518356323242, "step": 18159 }, { "epoch": 2.82, "learning_rate": 8.280543596659577e-07, "logits/chosen": -2.338688850402832, "logits/rejected": -2.4804553985595703, "logps/chosen": -141.5973358154297, "logps/rejected": -171.87660217285156, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -6.694062232971191, "rewards/margins": 6.398144721984863, "rewards/rejected": -13.092206954956055, "step": 18160 }, { "epoch": 2.82, "learning_rate": 8.273209191348099e-07, "logits/chosen": -2.5364675521850586, "logits/rejected": -2.2696120738983154, "logps/chosen": -341.176025390625, "logps/rejected": -495.9974365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.596923351287842, "rewards/margins": 12.181208610534668, "rewards/rejected": -16.77813148498535, "step": 18161 }, { "epoch": 2.82, "learning_rate": 8.26587478603662e-07, "logits/chosen": -2.0374021530151367, "logits/rejected": -2.663011074066162, "logps/chosen": -281.687255859375, "logps/rejected": -475.5165710449219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.228503227233887, "rewards/margins": 9.711688041687012, "rewards/rejected": -19.9401912689209, "step": 18162 }, { "epoch": 2.82, "learning_rate": 8.258540380725142e-07, "logits/chosen": -2.862612009048462, "logits/rejected": -2.9171204566955566, "logps/chosen": -462.4183349609375, "logps/rejected": -472.86602783203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.697209358215332, "rewards/margins": 8.741217613220215, "rewards/rejected": -15.438426971435547, "step": 18163 }, { "epoch": 2.82, "learning_rate": 8.251205975413663e-07, "logits/chosen": -2.782567262649536, "logits/rejected": -2.5844411849975586, "logps/chosen": -205.20663452148438, "logps/rejected": -203.9910888671875, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -8.667716979980469, "rewards/margins": 4.804965972900391, "rewards/rejected": -13.47268295288086, "step": 18164 }, { "epoch": 2.83, "learning_rate": 8.243871570102184e-07, "logits/chosen": -2.4738528728485107, "logits/rejected": -2.3010737895965576, "logps/chosen": -165.2576904296875, "logps/rejected": -269.29376220703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.638545036315918, "rewards/margins": 10.51638412475586, "rewards/rejected": -16.154930114746094, "step": 18165 }, { "epoch": 2.83, "learning_rate": 8.236537164790705e-07, "logits/chosen": -1.8519837856292725, "logits/rejected": -2.3740131855010986, "logps/chosen": -208.5450439453125, "logps/rejected": -377.09716796875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -6.044910430908203, "rewards/margins": 7.750123023986816, "rewards/rejected": -13.79503345489502, "step": 18166 }, { "epoch": 2.83, "learning_rate": 8.229202759479226e-07, "logits/chosen": -2.1199851036071777, "logits/rejected": -2.776078224182129, "logps/chosen": -241.32684326171875, "logps/rejected": -284.31488037109375, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/chosen": -5.21102237701416, "rewards/margins": 6.2516608238220215, "rewards/rejected": -11.462682723999023, "step": 18167 }, { "epoch": 2.83, "learning_rate": 8.221868354167748e-07, "logits/chosen": -2.3736026287078857, "logits/rejected": -2.764880657196045, "logps/chosen": -406.9156799316406, "logps/rejected": -511.3325500488281, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.16572380065918, "rewards/margins": 9.434938430786133, "rewards/rejected": -17.600662231445312, "step": 18168 }, { "epoch": 2.83, "learning_rate": 8.214533948856269e-07, "logits/chosen": -0.9806602001190186, "logits/rejected": -2.696653366088867, "logps/chosen": -158.84683227539062, "logps/rejected": -688.2911376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.71510124206543, "rewards/margins": 11.89094352722168, "rewards/rejected": -21.60604476928711, "step": 18169 }, { "epoch": 2.83, "learning_rate": 8.207199543544789e-07, "logits/chosen": -2.8578484058380127, "logits/rejected": -2.3761727809906006, "logps/chosen": -225.2261505126953, "logps/rejected": -176.77313232421875, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -7.287810325622559, "rewards/margins": 6.678125381469727, "rewards/rejected": -13.965936660766602, "step": 18170 }, { "epoch": 2.83, "learning_rate": 8.19986513823331e-07, "logits/chosen": -0.5177577137947083, "logits/rejected": -2.560570240020752, "logps/chosen": -143.22335815429688, "logps/rejected": -521.77490234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.10391616821289, "rewards/margins": 10.991830825805664, "rewards/rejected": -20.095746994018555, "step": 18171 }, { "epoch": 2.83, "learning_rate": 8.192530732921832e-07, "logits/chosen": -0.9120917916297913, "logits/rejected": -1.8244249820709229, "logps/chosen": -258.8839111328125, "logps/rejected": -457.0276794433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.547236442565918, "rewards/margins": 11.674192428588867, "rewards/rejected": -18.2214298248291, "step": 18172 }, { "epoch": 2.83, "learning_rate": 8.185196327610353e-07, "logits/chosen": -2.208160161972046, "logits/rejected": -2.8708407878875732, "logps/chosen": -362.4439697265625, "logps/rejected": -478.0108642578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.560249328613281, "rewards/margins": 9.768930435180664, "rewards/rejected": -15.329179763793945, "step": 18173 }, { "epoch": 2.83, "learning_rate": 8.177861922298875e-07, "logits/chosen": -3.04660701751709, "logits/rejected": -2.9670262336730957, "logps/chosen": -126.71743774414062, "logps/rejected": -170.68814086914062, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -5.853986740112305, "rewards/margins": 7.532196998596191, "rewards/rejected": -13.386183738708496, "step": 18174 }, { "epoch": 2.83, "learning_rate": 8.170527516987395e-07, "logits/chosen": -1.879122018814087, "logits/rejected": -3.0628702640533447, "logps/chosen": -419.2358703613281, "logps/rejected": -656.6741943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.42143440246582, "rewards/margins": 10.830171585083008, "rewards/rejected": -19.251605987548828, "step": 18175 }, { "epoch": 2.83, "learning_rate": 8.163193111675917e-07, "logits/chosen": -2.1232757568359375, "logits/rejected": -2.7002644538879395, "logps/chosen": -287.15045166015625, "logps/rejected": -480.5374450683594, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -8.836343765258789, "rewards/margins": 10.756256103515625, "rewards/rejected": -19.592601776123047, "step": 18176 }, { "epoch": 2.83, "learning_rate": 8.155858706364438e-07, "logits/chosen": -1.403543472290039, "logits/rejected": -2.52292537689209, "logps/chosen": -220.94161987304688, "logps/rejected": -537.26904296875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.145471572875977, "rewards/margins": 13.938844680786133, "rewards/rejected": -19.08431625366211, "step": 18177 }, { "epoch": 2.83, "learning_rate": 8.14852430105296e-07, "logits/chosen": -2.5259742736816406, "logits/rejected": -2.404819965362549, "logps/chosen": -249.6009521484375, "logps/rejected": -317.6505126953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.843223571777344, "rewards/margins": 8.46182918548584, "rewards/rejected": -16.3050537109375, "step": 18178 }, { "epoch": 2.83, "learning_rate": 8.141189895741481e-07, "logits/chosen": -0.9898509979248047, "logits/rejected": -2.499657154083252, "logps/chosen": -200.15353393554688, "logps/rejected": -457.1298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.303055286407471, "rewards/margins": 11.007430076599121, "rewards/rejected": -17.31048583984375, "step": 18179 }, { "epoch": 2.83, "learning_rate": 8.133855490430002e-07, "logits/chosen": -2.626018524169922, "logits/rejected": -2.1830291748046875, "logps/chosen": -224.17349243164062, "logps/rejected": -155.66156005859375, "loss": 0.2197, "rewards/accuracies": 1.0, "rewards/chosen": -9.302245140075684, "rewards/margins": 1.430260181427002, "rewards/rejected": -10.732504844665527, "step": 18180 }, { "epoch": 2.83, "learning_rate": 8.126521085118523e-07, "logits/chosen": -2.176327705383301, "logits/rejected": -2.4613378047943115, "logps/chosen": -332.53240966796875, "logps/rejected": -434.57415771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.548941135406494, "rewards/margins": 11.27955436706543, "rewards/rejected": -16.828495025634766, "step": 18181 }, { "epoch": 2.83, "learning_rate": 8.119186679807044e-07, "logits/chosen": -2.681950569152832, "logits/rejected": -2.8488283157348633, "logps/chosen": -264.9028625488281, "logps/rejected": -525.2099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.811160564422607, "rewards/margins": 11.155033111572266, "rewards/rejected": -18.96619415283203, "step": 18182 }, { "epoch": 2.83, "learning_rate": 8.111852274495565e-07, "logits/chosen": -2.812283992767334, "logits/rejected": -2.072984457015991, "logps/chosen": -400.0906982421875, "logps/rejected": -250.8193359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.870029449462891, "rewards/margins": 6.811302185058594, "rewards/rejected": -14.681331634521484, "step": 18183 }, { "epoch": 2.83, "learning_rate": 8.104517869184086e-07, "logits/chosen": -2.250981092453003, "logits/rejected": -2.32808518409729, "logps/chosen": -290.4708251953125, "logps/rejected": -404.2534484863281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.858922958374023, "rewards/margins": 9.029565811157227, "rewards/rejected": -20.88848876953125, "step": 18184 }, { "epoch": 2.83, "learning_rate": 8.097183463872607e-07, "logits/chosen": -2.3200736045837402, "logits/rejected": -1.720511794090271, "logps/chosen": -278.6717834472656, "logps/rejected": -343.55706787109375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.575595378875732, "rewards/margins": 9.770014762878418, "rewards/rejected": -15.345609664916992, "step": 18185 }, { "epoch": 2.83, "learning_rate": 8.089849058561128e-07, "logits/chosen": -2.5625908374786377, "logits/rejected": -2.176028251647949, "logps/chosen": -274.87664794921875, "logps/rejected": -322.97857666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.741060733795166, "rewards/margins": 14.762203216552734, "rewards/rejected": -20.503265380859375, "step": 18186 }, { "epoch": 2.83, "learning_rate": 8.08251465324965e-07, "logits/chosen": -2.4884393215179443, "logits/rejected": -2.7539403438568115, "logps/chosen": -227.25543212890625, "logps/rejected": -344.76055908203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.103338241577148, "rewards/margins": 8.16923999786377, "rewards/rejected": -15.272579193115234, "step": 18187 }, { "epoch": 2.83, "learning_rate": 8.075180247938171e-07, "logits/chosen": -2.7995758056640625, "logits/rejected": -1.5272390842437744, "logps/chosen": -233.34706115722656, "logps/rejected": -145.31539916992188, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -7.174445152282715, "rewards/margins": 4.760059356689453, "rewards/rejected": -11.934503555297852, "step": 18188 }, { "epoch": 2.83, "learning_rate": 8.067845842626693e-07, "logits/chosen": -1.8002632856369019, "logits/rejected": -2.8613884449005127, "logps/chosen": -177.98435974121094, "logps/rejected": -477.15496826171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.607349395751953, "rewards/margins": 11.284285545349121, "rewards/rejected": -15.891634941101074, "step": 18189 }, { "epoch": 2.83, "learning_rate": 8.060511437315213e-07, "logits/chosen": -2.642207622528076, "logits/rejected": -2.317725658416748, "logps/chosen": -173.93453979492188, "logps/rejected": -277.65570068359375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -7.823977470397949, "rewards/margins": 6.675226211547852, "rewards/rejected": -14.4992036819458, "step": 18190 }, { "epoch": 2.83, "learning_rate": 8.053177032003735e-07, "logits/chosen": -2.1925759315490723, "logits/rejected": -2.7390055656433105, "logps/chosen": -107.58065032958984, "logps/rejected": -330.10443115234375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -9.192127227783203, "rewards/margins": 8.816370964050293, "rewards/rejected": -18.008499145507812, "step": 18191 }, { "epoch": 2.83, "learning_rate": 8.045842626692256e-07, "logits/chosen": -1.6732593774795532, "logits/rejected": -2.6099331378936768, "logps/chosen": -379.4466857910156, "logps/rejected": -595.767822265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.14432430267334, "rewards/margins": 9.00912857055664, "rewards/rejected": -16.153453826904297, "step": 18192 }, { "epoch": 2.83, "learning_rate": 8.038508221380778e-07, "logits/chosen": -2.53218412399292, "logits/rejected": -2.093005657196045, "logps/chosen": -914.0982666015625, "logps/rejected": -710.232421875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -8.055285453796387, "rewards/margins": 7.141045570373535, "rewards/rejected": -15.196331024169922, "step": 18193 }, { "epoch": 2.83, "learning_rate": 8.031173816069298e-07, "logits/chosen": -1.8417164087295532, "logits/rejected": -1.6000096797943115, "logps/chosen": -277.0775146484375, "logps/rejected": -495.9654235839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.406418800354004, "rewards/margins": 12.13123893737793, "rewards/rejected": -20.53765869140625, "step": 18194 }, { "epoch": 2.83, "learning_rate": 8.023839410757819e-07, "logits/chosen": -2.6981775760650635, "logits/rejected": -2.819032669067383, "logps/chosen": -149.16539001464844, "logps/rejected": -263.97186279296875, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -5.9908576011657715, "rewards/margins": 6.356482028961182, "rewards/rejected": -12.347339630126953, "step": 18195 }, { "epoch": 2.83, "learning_rate": 8.01650500544634e-07, "logits/chosen": -1.482499361038208, "logits/rejected": -2.3992230892181396, "logps/chosen": -258.1757507324219, "logps/rejected": -449.5855712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.289122581481934, "rewards/margins": 12.041869163513184, "rewards/rejected": -16.330991744995117, "step": 18196 }, { "epoch": 2.83, "learning_rate": 8.009170600134862e-07, "logits/chosen": -2.030123472213745, "logits/rejected": -2.491889476776123, "logps/chosen": -143.17904663085938, "logps/rejected": -222.40701293945312, "loss": 0.0472, "rewards/accuracies": 1.0, "rewards/chosen": -8.63934326171875, "rewards/margins": 5.349133014678955, "rewards/rejected": -13.988475799560547, "step": 18197 }, { "epoch": 2.83, "learning_rate": 8.001836194823383e-07, "logits/chosen": -0.9489691853523254, "logits/rejected": -2.006896495819092, "logps/chosen": -299.6462707519531, "logps/rejected": -550.1660766601562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.024648666381836, "rewards/margins": 10.631658554077148, "rewards/rejected": -18.656307220458984, "step": 18198 }, { "epoch": 2.83, "learning_rate": 7.994501789511905e-07, "logits/chosen": -3.0765750408172607, "logits/rejected": -2.214489221572876, "logps/chosen": -611.3521118164062, "logps/rejected": -340.4664306640625, "loss": 0.0484, "rewards/accuracies": 1.0, "rewards/chosen": -6.889523506164551, "rewards/margins": 4.157858848571777, "rewards/rejected": -11.047382354736328, "step": 18199 }, { "epoch": 2.83, "learning_rate": 7.987167384200425e-07, "logits/chosen": -2.0692129135131836, "logits/rejected": -2.545130968093872, "logps/chosen": -264.20489501953125, "logps/rejected": -531.3282470703125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -9.75556755065918, "rewards/margins": 7.38785982131958, "rewards/rejected": -17.143428802490234, "step": 18200 }, { "epoch": 2.83, "learning_rate": 7.979832978888947e-07, "logits/chosen": -2.6654956340789795, "logits/rejected": -2.7318689823150635, "logps/chosen": -138.0515899658203, "logps/rejected": -229.62457275390625, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/chosen": -9.60498046875, "rewards/margins": 4.40311861038208, "rewards/rejected": -14.008098602294922, "step": 18201 }, { "epoch": 2.83, "learning_rate": 7.972498573577468e-07, "logits/chosen": -1.8365225791931152, "logits/rejected": -2.3119003772735596, "logps/chosen": -148.98599243164062, "logps/rejected": -318.47698974609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.660451889038086, "rewards/margins": 8.33311653137207, "rewards/rejected": -17.993568420410156, "step": 18202 }, { "epoch": 2.83, "learning_rate": 7.96516416826599e-07, "logits/chosen": -1.8750407695770264, "logits/rejected": -2.411741256713867, "logps/chosen": -325.807861328125, "logps/rejected": -463.83184814453125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.360047340393066, "rewards/margins": 7.994240760803223, "rewards/rejected": -17.35428810119629, "step": 18203 }, { "epoch": 2.83, "learning_rate": 7.957829762954511e-07, "logits/chosen": -2.6684274673461914, "logits/rejected": -2.8155391216278076, "logps/chosen": -123.57569885253906, "logps/rejected": -238.08407592773438, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -7.103018760681152, "rewards/margins": 7.593791484832764, "rewards/rejected": -14.696809768676758, "step": 18204 }, { "epoch": 2.83, "learning_rate": 7.950495357643032e-07, "logits/chosen": -1.722785472869873, "logits/rejected": -2.7290401458740234, "logps/chosen": -150.1260986328125, "logps/rejected": -426.92596435546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.9592370986938477, "rewards/margins": 10.197517395019531, "rewards/rejected": -13.156753540039062, "step": 18205 }, { "epoch": 2.83, "learning_rate": 7.943160952331552e-07, "logits/chosen": -2.546121835708618, "logits/rejected": -2.0826241970062256, "logps/chosen": -217.4283447265625, "logps/rejected": -385.38177490234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.531830310821533, "rewards/margins": 13.626283645629883, "rewards/rejected": -19.15811538696289, "step": 18206 }, { "epoch": 2.83, "learning_rate": 7.935826547020073e-07, "logits/chosen": -2.664807081222534, "logits/rejected": -2.1560726165771484, "logps/chosen": -394.08953857421875, "logps/rejected": -348.3370666503906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.886621475219727, "rewards/margins": 9.86674690246582, "rewards/rejected": -17.753368377685547, "step": 18207 }, { "epoch": 2.83, "learning_rate": 7.928492141708595e-07, "logits/chosen": -1.9330525398254395, "logits/rejected": -2.4579360485076904, "logps/chosen": -303.5326843261719, "logps/rejected": -410.5721435546875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -6.29505729675293, "rewards/margins": 6.275533676147461, "rewards/rejected": -12.57059097290039, "step": 18208 }, { "epoch": 2.83, "learning_rate": 7.921157736397115e-07, "logits/chosen": -1.9601622819900513, "logits/rejected": -2.6112637519836426, "logps/chosen": -250.37290954589844, "logps/rejected": -453.3782043457031, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -5.154096603393555, "rewards/margins": 9.399303436279297, "rewards/rejected": -14.553400039672852, "step": 18209 }, { "epoch": 2.83, "learning_rate": 7.913823331085637e-07, "logits/chosen": -2.9025843143463135, "logits/rejected": -2.232593059539795, "logps/chosen": -544.614990234375, "logps/rejected": -550.1375732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.96325397491455, "rewards/margins": 16.609426498413086, "rewards/rejected": -25.57267951965332, "step": 18210 }, { "epoch": 2.83, "learning_rate": 7.906488925774158e-07, "logits/chosen": -2.5862627029418945, "logits/rejected": -2.4534056186676025, "logps/chosen": -517.6663208007812, "logps/rejected": -564.9859619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.495500564575195, "rewards/margins": 9.898473739624023, "rewards/rejected": -19.39397430419922, "step": 18211 }, { "epoch": 2.83, "learning_rate": 7.89915452046268e-07, "logits/chosen": -2.640838861465454, "logits/rejected": -2.836688995361328, "logps/chosen": -140.7340087890625, "logps/rejected": -297.099853515625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.249467372894287, "rewards/margins": 8.530794143676758, "rewards/rejected": -15.780261993408203, "step": 18212 }, { "epoch": 2.83, "learning_rate": 7.891820115151201e-07, "logits/chosen": -1.9069905281066895, "logits/rejected": -2.2609927654266357, "logps/chosen": -147.650146484375, "logps/rejected": -338.4120788574219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.769134521484375, "rewards/margins": 10.296191215515137, "rewards/rejected": -19.065326690673828, "step": 18213 }, { "epoch": 2.83, "learning_rate": 7.884485709839722e-07, "logits/chosen": -1.8653501272201538, "logits/rejected": -2.5612075328826904, "logps/chosen": -190.70651245117188, "logps/rejected": -276.3134765625, "loss": 0.5635, "rewards/accuracies": 0.5, "rewards/chosen": -8.922028541564941, "rewards/margins": 4.774794578552246, "rewards/rejected": -13.696823120117188, "step": 18214 }, { "epoch": 2.83, "learning_rate": 7.877151304528243e-07, "logits/chosen": -2.076502561569214, "logits/rejected": -2.9177167415618896, "logps/chosen": -349.774658203125, "logps/rejected": -533.2640380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.92694091796875, "rewards/margins": 13.266302108764648, "rewards/rejected": -23.1932430267334, "step": 18215 }, { "epoch": 2.83, "learning_rate": 7.869816899216765e-07, "logits/chosen": -2.429936647415161, "logits/rejected": -2.1422553062438965, "logps/chosen": -355.95745849609375, "logps/rejected": -403.36090087890625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -8.276495933532715, "rewards/margins": 5.177677154541016, "rewards/rejected": -13.454172134399414, "step": 18216 }, { "epoch": 2.83, "learning_rate": 7.862482493905286e-07, "logits/chosen": -1.4933818578720093, "logits/rejected": -2.406928062438965, "logps/chosen": -286.7982177734375, "logps/rejected": -451.4815673828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.879341125488281, "rewards/margins": 9.571784973144531, "rewards/rejected": -16.451126098632812, "step": 18217 }, { "epoch": 2.83, "learning_rate": 7.855148088593808e-07, "logits/chosen": -2.0404953956604004, "logits/rejected": -2.6931097507476807, "logps/chosen": -163.54150390625, "logps/rejected": -404.7290954589844, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -9.199929237365723, "rewards/margins": 6.979219913482666, "rewards/rejected": -16.179149627685547, "step": 18218 }, { "epoch": 2.83, "learning_rate": 7.847813683282327e-07, "logits/chosen": -1.0049399137496948, "logits/rejected": -1.7204136848449707, "logps/chosen": -327.9429931640625, "logps/rejected": -594.64404296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.100468635559082, "rewards/margins": 12.578163146972656, "rewards/rejected": -21.678630828857422, "step": 18219 }, { "epoch": 2.83, "learning_rate": 7.840479277970849e-07, "logits/chosen": -2.5269641876220703, "logits/rejected": -2.479491710662842, "logps/chosen": -481.20330810546875, "logps/rejected": -350.2618103027344, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.916384696960449, "rewards/margins": 9.526519775390625, "rewards/rejected": -17.44290542602539, "step": 18220 }, { "epoch": 2.83, "learning_rate": 7.83314487265937e-07, "logits/chosen": -2.4246318340301514, "logits/rejected": -2.5487005710601807, "logps/chosen": -401.8997497558594, "logps/rejected": -460.38787841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.7289490699768066, "rewards/margins": 15.204822540283203, "rewards/rejected": -17.93377113342285, "step": 18221 }, { "epoch": 2.83, "learning_rate": 7.825810467347892e-07, "logits/chosen": -2.5619821548461914, "logits/rejected": -2.441845178604126, "logps/chosen": -478.28033447265625, "logps/rejected": -754.5972900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.282905101776123, "rewards/margins": 15.654842376708984, "rewards/rejected": -20.937747955322266, "step": 18222 }, { "epoch": 2.83, "learning_rate": 7.818476062036413e-07, "logits/chosen": -2.759058952331543, "logits/rejected": -2.2560365200042725, "logps/chosen": -271.4150085449219, "logps/rejected": -462.69195556640625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -7.055150985717773, "rewards/margins": 14.41150188446045, "rewards/rejected": -21.466651916503906, "step": 18223 }, { "epoch": 2.83, "learning_rate": 7.811141656724934e-07, "logits/chosen": -0.9899560809135437, "logits/rejected": -2.58716082572937, "logps/chosen": -250.01548767089844, "logps/rejected": -522.69482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.505110740661621, "rewards/margins": 14.621439933776855, "rewards/rejected": -24.126550674438477, "step": 18224 }, { "epoch": 2.83, "learning_rate": 7.803807251413455e-07, "logits/chosen": -1.5363563299179077, "logits/rejected": -2.6292672157287598, "logps/chosen": -258.1867370605469, "logps/rejected": -456.3143310546875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -9.679704666137695, "rewards/margins": 5.546558380126953, "rewards/rejected": -15.226263046264648, "step": 18225 }, { "epoch": 2.83, "learning_rate": 7.796472846101976e-07, "logits/chosen": -2.7317585945129395, "logits/rejected": -1.9993058443069458, "logps/chosen": -388.15997314453125, "logps/rejected": -386.939453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.2153778076171875, "rewards/margins": 12.101076126098633, "rewards/rejected": -17.31645393371582, "step": 18226 }, { "epoch": 2.83, "learning_rate": 7.789138440790498e-07, "logits/chosen": -2.5287559032440186, "logits/rejected": -2.6879920959472656, "logps/chosen": -200.464599609375, "logps/rejected": -358.13623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.744344711303711, "rewards/margins": 11.8641996383667, "rewards/rejected": -16.608545303344727, "step": 18227 }, { "epoch": 2.83, "learning_rate": 7.781804035479019e-07, "logits/chosen": -2.7534639835357666, "logits/rejected": -1.877140998840332, "logps/chosen": -235.67071533203125, "logps/rejected": -332.1321105957031, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -7.88394832611084, "rewards/margins": 9.879941940307617, "rewards/rejected": -17.763891220092773, "step": 18228 }, { "epoch": 2.83, "learning_rate": 7.77446963016754e-07, "logits/chosen": -2.7122788429260254, "logits/rejected": -2.499872922897339, "logps/chosen": -169.80824279785156, "logps/rejected": -188.01544189453125, "loss": 0.0911, "rewards/accuracies": 1.0, "rewards/chosen": -5.73560905456543, "rewards/margins": 5.694406509399414, "rewards/rejected": -11.430015563964844, "step": 18229 }, { "epoch": 2.84, "learning_rate": 7.76713522485606e-07, "logits/chosen": -2.8434746265411377, "logits/rejected": -1.9125703573226929, "logps/chosen": -203.98745727539062, "logps/rejected": -194.85232543945312, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -7.104652404785156, "rewards/margins": 8.20122241973877, "rewards/rejected": -15.305874824523926, "step": 18230 }, { "epoch": 2.84, "learning_rate": 7.759800819544582e-07, "logits/chosen": -2.5772032737731934, "logits/rejected": -2.0068743228912354, "logps/chosen": -418.89263916015625, "logps/rejected": -379.9652099609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.242484092712402, "rewards/margins": 9.180399894714355, "rewards/rejected": -15.422883987426758, "step": 18231 }, { "epoch": 2.84, "learning_rate": 7.752466414233103e-07, "logits/chosen": -2.481004238128662, "logits/rejected": -2.0149924755096436, "logps/chosen": -170.83297729492188, "logps/rejected": -249.97836303710938, "loss": 0.1251, "rewards/accuracies": 1.0, "rewards/chosen": -8.138465881347656, "rewards/margins": 4.977308750152588, "rewards/rejected": -13.115774154663086, "step": 18232 }, { "epoch": 2.84, "learning_rate": 7.745132008921625e-07, "logits/chosen": -2.214555501937866, "logits/rejected": -2.604498863220215, "logps/chosen": -136.65821838378906, "logps/rejected": -335.3523864746094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.281813621520996, "rewards/margins": 11.569581985473633, "rewards/rejected": -19.851394653320312, "step": 18233 }, { "epoch": 2.84, "learning_rate": 7.737797603610145e-07, "logits/chosen": -1.6864286661148071, "logits/rejected": -2.526097059249878, "logps/chosen": -113.55105590820312, "logps/rejected": -375.054931640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.098396301269531, "rewards/margins": 9.391532897949219, "rewards/rejected": -16.48992919921875, "step": 18234 }, { "epoch": 2.84, "learning_rate": 7.730463198298667e-07, "logits/chosen": -2.3486015796661377, "logits/rejected": -2.281486749649048, "logps/chosen": -414.06787109375, "logps/rejected": -438.99847412109375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.246840476989746, "rewards/margins": 9.63960075378418, "rewards/rejected": -14.886442184448242, "step": 18235 }, { "epoch": 2.84, "learning_rate": 7.723128792987188e-07, "logits/chosen": -1.8279608488082886, "logits/rejected": -2.601400136947632, "logps/chosen": -206.05210876464844, "logps/rejected": -475.58685302734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.827308177947998, "rewards/margins": 13.126564025878906, "rewards/rejected": -19.953872680664062, "step": 18236 }, { "epoch": 2.84, "learning_rate": 7.71579438767571e-07, "logits/chosen": -2.136747360229492, "logits/rejected": -2.729996919631958, "logps/chosen": -114.3522720336914, "logps/rejected": -353.668212890625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -9.300651550292969, "rewards/margins": 8.43940544128418, "rewards/rejected": -17.74005889892578, "step": 18237 }, { "epoch": 2.84, "learning_rate": 7.708459982364231e-07, "logits/chosen": -2.750319480895996, "logits/rejected": -2.736762046813965, "logps/chosen": -323.5881652832031, "logps/rejected": -403.984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.260584831237793, "rewards/margins": 14.458152770996094, "rewards/rejected": -23.71873664855957, "step": 18238 }, { "epoch": 2.84, "learning_rate": 7.701125577052752e-07, "logits/chosen": -2.532391309738159, "logits/rejected": -2.6905884742736816, "logps/chosen": -133.75660705566406, "logps/rejected": -328.3744201660156, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.736702919006348, "rewards/margins": 7.276057720184326, "rewards/rejected": -15.012760162353516, "step": 18239 }, { "epoch": 2.84, "learning_rate": 7.693791171741273e-07, "logits/chosen": -2.600153923034668, "logits/rejected": -2.7071709632873535, "logps/chosen": -233.52223205566406, "logps/rejected": -290.2244873046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.638350009918213, "rewards/margins": 8.608901977539062, "rewards/rejected": -15.247251510620117, "step": 18240 }, { "epoch": 2.84, "learning_rate": 7.686456766429795e-07, "logits/chosen": -2.5630948543548584, "logits/rejected": -1.4541161060333252, "logps/chosen": -522.7274780273438, "logps/rejected": -382.2894287109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.201438903808594, "rewards/margins": 9.821651458740234, "rewards/rejected": -18.023090362548828, "step": 18241 }, { "epoch": 2.84, "learning_rate": 7.679122361118315e-07, "logits/chosen": -1.1013758182525635, "logits/rejected": -2.478766918182373, "logps/chosen": -128.03518676757812, "logps/rejected": -498.1645202636719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.349562644958496, "rewards/margins": 11.391132354736328, "rewards/rejected": -18.740694046020508, "step": 18242 }, { "epoch": 2.84, "learning_rate": 7.671787955806837e-07, "logits/chosen": -1.088379979133606, "logits/rejected": -2.232598066329956, "logps/chosen": -159.68719482421875, "logps/rejected": -466.984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.0194783210754395, "rewards/margins": 17.457246780395508, "rewards/rejected": -23.47672462463379, "step": 18243 }, { "epoch": 2.84, "learning_rate": 7.664453550495357e-07, "logits/chosen": -2.555283784866333, "logits/rejected": -2.8775429725646973, "logps/chosen": -154.264892578125, "logps/rejected": -303.0699462890625, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -5.692819595336914, "rewards/margins": 8.322552680969238, "rewards/rejected": -14.015372276306152, "step": 18244 }, { "epoch": 2.84, "learning_rate": 7.657119145183879e-07, "logits/chosen": -2.2843880653381348, "logits/rejected": -2.512599468231201, "logps/chosen": -205.4188690185547, "logps/rejected": -311.5429382324219, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.1812310218811035, "rewards/margins": 7.706626892089844, "rewards/rejected": -12.887857437133789, "step": 18245 }, { "epoch": 2.84, "learning_rate": 7.6497847398724e-07, "logits/chosen": -2.12666654586792, "logits/rejected": -2.5301830768585205, "logps/chosen": -173.15081787109375, "logps/rejected": -294.5860595703125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -8.713968276977539, "rewards/margins": 4.847797870635986, "rewards/rejected": -13.561765670776367, "step": 18246 }, { "epoch": 2.84, "learning_rate": 7.642450334560921e-07, "logits/chosen": -2.8777854442596436, "logits/rejected": -2.960947036743164, "logps/chosen": -232.9715576171875, "logps/rejected": -311.8805847167969, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -7.02153205871582, "rewards/margins": 6.161687850952148, "rewards/rejected": -13.183219909667969, "step": 18247 }, { "epoch": 2.84, "learning_rate": 7.635115929249443e-07, "logits/chosen": -1.3345071077346802, "logits/rejected": -2.1839451789855957, "logps/chosen": -197.16690063476562, "logps/rejected": -531.226806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.318985939025879, "rewards/margins": 15.062166213989258, "rewards/rejected": -22.381153106689453, "step": 18248 }, { "epoch": 2.84, "learning_rate": 7.627781523937963e-07, "logits/chosen": -2.8002426624298096, "logits/rejected": -2.3723220825195312, "logps/chosen": -392.95794677734375, "logps/rejected": -472.7577209472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.9466640949249268, "rewards/margins": 10.343351364135742, "rewards/rejected": -13.290014266967773, "step": 18249 }, { "epoch": 2.84, "learning_rate": 7.620447118626485e-07, "logits/chosen": -2.0065951347351074, "logits/rejected": -2.330495834350586, "logps/chosen": -182.2981414794922, "logps/rejected": -254.30934143066406, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -4.395262241363525, "rewards/margins": 8.166437149047852, "rewards/rejected": -12.561698913574219, "step": 18250 }, { "epoch": 2.84, "learning_rate": 7.613112713315006e-07, "logits/chosen": -2.2390992641448975, "logits/rejected": -2.5729751586914062, "logps/chosen": -193.76522827148438, "logps/rejected": -304.16619873046875, "loss": 0.5504, "rewards/accuracies": 0.5, "rewards/chosen": -9.497112274169922, "rewards/margins": 2.0209977626800537, "rewards/rejected": -11.518110275268555, "step": 18251 }, { "epoch": 2.84, "learning_rate": 7.605778308003528e-07, "logits/chosen": -2.67714262008667, "logits/rejected": -1.4313509464263916, "logps/chosen": -525.6121215820312, "logps/rejected": -385.1340637207031, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -7.536466598510742, "rewards/margins": 6.141989707946777, "rewards/rejected": -13.67845630645752, "step": 18252 }, { "epoch": 2.84, "learning_rate": 7.598443902692049e-07, "logits/chosen": -1.2395042181015015, "logits/rejected": -2.4741787910461426, "logps/chosen": -105.16242980957031, "logps/rejected": -450.39483642578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.563793182373047, "rewards/margins": 12.198199272155762, "rewards/rejected": -15.761992454528809, "step": 18253 }, { "epoch": 2.84, "learning_rate": 7.591109497380569e-07, "logits/chosen": -2.465360164642334, "logits/rejected": -2.597691297531128, "logps/chosen": -398.6453857421875, "logps/rejected": -418.8583679199219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.98085880279541, "rewards/margins": 8.74980354309082, "rewards/rejected": -14.73066234588623, "step": 18254 }, { "epoch": 2.84, "learning_rate": 7.58377509206909e-07, "logits/chosen": -2.640303373336792, "logits/rejected": -2.4189388751983643, "logps/chosen": -232.42813110351562, "logps/rejected": -338.50537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.41505765914917, "rewards/margins": 10.915973663330078, "rewards/rejected": -17.331031799316406, "step": 18255 }, { "epoch": 2.84, "learning_rate": 7.576440686757612e-07, "logits/chosen": -2.150355100631714, "logits/rejected": -2.673241376876831, "logps/chosen": -194.2826385498047, "logps/rejected": -342.88165283203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.24317455291748, "rewards/margins": 11.485033988952637, "rewards/rejected": -19.728208541870117, "step": 18256 }, { "epoch": 2.84, "learning_rate": 7.569106281446133e-07, "logits/chosen": -3.1457467079162598, "logits/rejected": -2.902482509613037, "logps/chosen": -143.76666259765625, "logps/rejected": -192.5168914794922, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -4.903797149658203, "rewards/margins": 9.419178009033203, "rewards/rejected": -14.322975158691406, "step": 18257 }, { "epoch": 2.84, "learning_rate": 7.561771876134655e-07, "logits/chosen": -2.2227344512939453, "logits/rejected": -2.6747219562530518, "logps/chosen": -188.93307495117188, "logps/rejected": -477.35809326171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.0126371383667, "rewards/margins": 10.775636672973633, "rewards/rejected": -18.78827476501465, "step": 18258 }, { "epoch": 2.84, "learning_rate": 7.554437470823175e-07, "logits/chosen": -2.6017258167266846, "logits/rejected": -2.7660434246063232, "logps/chosen": -187.10137939453125, "logps/rejected": -333.7363586425781, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -5.042831897735596, "rewards/margins": 7.7283735275268555, "rewards/rejected": -12.77120590209961, "step": 18259 }, { "epoch": 2.84, "learning_rate": 7.547103065511697e-07, "logits/chosen": -2.3350515365600586, "logits/rejected": -2.875941038131714, "logps/chosen": -172.86038208007812, "logps/rejected": -360.4309387207031, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -9.700563430786133, "rewards/margins": 6.74475622177124, "rewards/rejected": -16.44532012939453, "step": 18260 }, { "epoch": 2.84, "learning_rate": 7.539768660200218e-07, "logits/chosen": -2.4714155197143555, "logits/rejected": -2.0212929248809814, "logps/chosen": -313.5282897949219, "logps/rejected": -343.2009582519531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.670523643493652, "rewards/margins": 10.759297370910645, "rewards/rejected": -15.429821014404297, "step": 18261 }, { "epoch": 2.84, "learning_rate": 7.53243425488874e-07, "logits/chosen": -2.650418281555176, "logits/rejected": -1.0997949838638306, "logps/chosen": -392.58251953125, "logps/rejected": -216.11090087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4312596321105957, "rewards/margins": 12.973671913146973, "rewards/rejected": -15.404932022094727, "step": 18262 }, { "epoch": 2.84, "learning_rate": 7.525099849577261e-07, "logits/chosen": -2.5513875484466553, "logits/rejected": -2.525806427001953, "logps/chosen": -295.35736083984375, "logps/rejected": -301.7882995605469, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.465774059295654, "rewards/margins": 11.070343971252441, "rewards/rejected": -16.536117553710938, "step": 18263 }, { "epoch": 2.84, "learning_rate": 7.517765444265782e-07, "logits/chosen": -2.305227756500244, "logits/rejected": -2.0412278175354004, "logps/chosen": -230.18667602539062, "logps/rejected": -268.83746337890625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.784984588623047, "rewards/margins": 7.35081672668457, "rewards/rejected": -15.135801315307617, "step": 18264 }, { "epoch": 2.84, "learning_rate": 7.510431038954303e-07, "logits/chosen": -1.8149082660675049, "logits/rejected": -2.579376459121704, "logps/chosen": -190.2803955078125, "logps/rejected": -450.58160400390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.440988063812256, "rewards/margins": 13.96119499206543, "rewards/rejected": -21.402183532714844, "step": 18265 }, { "epoch": 2.84, "learning_rate": 7.503096633642824e-07, "logits/chosen": -2.7686755657196045, "logits/rejected": -2.7614097595214844, "logps/chosen": -162.06048583984375, "logps/rejected": -242.10000610351562, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -4.391736030578613, "rewards/margins": 5.820993423461914, "rewards/rejected": -10.212729454040527, "step": 18266 }, { "epoch": 2.84, "learning_rate": 7.495762228331345e-07, "logits/chosen": -2.5719330310821533, "logits/rejected": -2.6943798065185547, "logps/chosen": -147.27377319335938, "logps/rejected": -355.10992431640625, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -7.739426612854004, "rewards/margins": 7.005457878112793, "rewards/rejected": -14.744884490966797, "step": 18267 }, { "epoch": 2.84, "learning_rate": 7.488427823019866e-07, "logits/chosen": -1.8509132862091064, "logits/rejected": -2.905320882797241, "logps/chosen": -279.6629333496094, "logps/rejected": -436.10382080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.802295684814453, "rewards/margins": 10.425299644470215, "rewards/rejected": -16.227596282958984, "step": 18268 }, { "epoch": 2.84, "learning_rate": 7.481093417708387e-07, "logits/chosen": -1.8114529848098755, "logits/rejected": -2.750502347946167, "logps/chosen": -278.76641845703125, "logps/rejected": -668.1930541992188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.274347305297852, "rewards/margins": 12.789863586425781, "rewards/rejected": -22.064210891723633, "step": 18269 }, { "epoch": 2.84, "learning_rate": 7.473759012396908e-07, "logits/chosen": -2.5053484439849854, "logits/rejected": -2.6778573989868164, "logps/chosen": -174.36376953125, "logps/rejected": -418.51458740234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -12.181469917297363, "rewards/margins": 8.878507614135742, "rewards/rejected": -21.059978485107422, "step": 18270 }, { "epoch": 2.84, "learning_rate": 7.46642460708543e-07, "logits/chosen": -2.5300724506378174, "logits/rejected": -1.4068820476531982, "logps/chosen": -414.1162109375, "logps/rejected": -396.09588623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.580056190490723, "rewards/margins": 12.566667556762695, "rewards/rejected": -21.146724700927734, "step": 18271 }, { "epoch": 2.84, "learning_rate": 7.459090201773951e-07, "logits/chosen": -2.3907835483551025, "logits/rejected": -2.4235100746154785, "logps/chosen": -328.1766357421875, "logps/rejected": -395.013916015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.879943370819092, "rewards/margins": 10.315267562866211, "rewards/rejected": -16.19521141052246, "step": 18272 }, { "epoch": 2.84, "learning_rate": 7.451755796462472e-07, "logits/chosen": -2.198348045349121, "logits/rejected": -2.5441460609436035, "logps/chosen": -397.8592529296875, "logps/rejected": -574.294921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.459877014160156, "rewards/margins": 13.211051940917969, "rewards/rejected": -19.670928955078125, "step": 18273 }, { "epoch": 2.84, "learning_rate": 7.444421391150993e-07, "logits/chosen": -1.5188716650009155, "logits/rejected": -2.6399285793304443, "logps/chosen": -173.35311889648438, "logps/rejected": -429.0242919921875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -11.479637145996094, "rewards/margins": 7.727529048919678, "rewards/rejected": -19.207164764404297, "step": 18274 }, { "epoch": 2.84, "learning_rate": 7.437086985839515e-07, "logits/chosen": -2.401273727416992, "logits/rejected": -2.346616506576538, "logps/chosen": -219.39012145996094, "logps/rejected": -372.76092529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.500483512878418, "rewards/margins": 13.692947387695312, "rewards/rejected": -19.193431854248047, "step": 18275 }, { "epoch": 2.84, "learning_rate": 7.429752580528036e-07, "logits/chosen": -2.656184434890747, "logits/rejected": -2.6126115322113037, "logps/chosen": -253.2755126953125, "logps/rejected": -504.7887878417969, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.761865615844727, "rewards/margins": 10.25688648223877, "rewards/rejected": -15.01875114440918, "step": 18276 }, { "epoch": 2.84, "learning_rate": 7.422418175216558e-07, "logits/chosen": -2.3424665927886963, "logits/rejected": -2.5652897357940674, "logps/chosen": -273.835693359375, "logps/rejected": -398.2058410644531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.4444451332092285, "rewards/margins": 12.491518020629883, "rewards/rejected": -18.935962677001953, "step": 18277 }, { "epoch": 2.84, "learning_rate": 7.415083769905077e-07, "logits/chosen": -2.411856174468994, "logits/rejected": -1.4224843978881836, "logps/chosen": -224.91549682617188, "logps/rejected": -251.14060974121094, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.678362846374512, "rewards/margins": 8.281963348388672, "rewards/rejected": -12.960326194763184, "step": 18278 }, { "epoch": 2.84, "learning_rate": 7.407749364593599e-07, "logits/chosen": -2.7499186992645264, "logits/rejected": -2.823890447616577, "logps/chosen": -443.4186706542969, "logps/rejected": -566.9398803710938, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -10.626876831054688, "rewards/margins": 6.302279949188232, "rewards/rejected": -16.929157257080078, "step": 18279 }, { "epoch": 2.84, "learning_rate": 7.40041495928212e-07, "logits/chosen": -2.7497026920318604, "logits/rejected": -2.9330220222473145, "logps/chosen": -819.7120361328125, "logps/rejected": -709.8504638671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.947439670562744, "rewards/margins": 13.002588272094727, "rewards/rejected": -20.950027465820312, "step": 18280 }, { "epoch": 2.84, "learning_rate": 7.393080553970642e-07, "logits/chosen": -1.6749627590179443, "logits/rejected": -2.955312967300415, "logps/chosen": -161.7755889892578, "logps/rejected": -438.12432861328125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -6.412971496582031, "rewards/margins": 8.513151168823242, "rewards/rejected": -14.926122665405273, "step": 18281 }, { "epoch": 2.84, "learning_rate": 7.385746148659163e-07, "logits/chosen": -2.8133342266082764, "logits/rejected": -2.857611656188965, "logps/chosen": -252.38116455078125, "logps/rejected": -257.2334289550781, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -5.408202171325684, "rewards/margins": 4.940456390380859, "rewards/rejected": -10.348658561706543, "step": 18282 }, { "epoch": 2.84, "learning_rate": 7.378411743347684e-07, "logits/chosen": -1.2610211372375488, "logits/rejected": -2.620563268661499, "logps/chosen": -124.18138885498047, "logps/rejected": -540.2577514648438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.881434917449951, "rewards/margins": 11.676340103149414, "rewards/rejected": -15.557775497436523, "step": 18283 }, { "epoch": 2.84, "learning_rate": 7.371077338036205e-07, "logits/chosen": -2.153447151184082, "logits/rejected": -2.6091504096984863, "logps/chosen": -132.96278381347656, "logps/rejected": -386.736083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.372527122497559, "rewards/margins": 14.30941390991211, "rewards/rejected": -18.68194007873535, "step": 18284 }, { "epoch": 2.84, "learning_rate": 7.363742932724727e-07, "logits/chosen": -1.882185459136963, "logits/rejected": -2.4173362255096436, "logps/chosen": -319.5837707519531, "logps/rejected": -494.9107360839844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.71894645690918, "rewards/margins": 10.785080909729004, "rewards/rejected": -19.5040283203125, "step": 18285 }, { "epoch": 2.84, "learning_rate": 7.356408527413248e-07, "logits/chosen": -2.678898572921753, "logits/rejected": -2.9609720706939697, "logps/chosen": -138.5930633544922, "logps/rejected": -390.8851318359375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -9.680381774902344, "rewards/margins": 6.637482166290283, "rewards/rejected": -16.31786346435547, "step": 18286 }, { "epoch": 2.84, "learning_rate": 7.349074122101769e-07, "logits/chosen": -2.556713819503784, "logits/rejected": -2.8488197326660156, "logps/chosen": -1036.798583984375, "logps/rejected": -875.3099975585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.35387897491455, "rewards/margins": 12.24289321899414, "rewards/rejected": -21.596771240234375, "step": 18287 }, { "epoch": 2.84, "learning_rate": 7.34173971679029e-07, "logits/chosen": -2.87459135055542, "logits/rejected": -3.151909351348877, "logps/chosen": -1073.5556640625, "logps/rejected": -849.70703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.702618598937988, "rewards/margins": 14.093978881835938, "rewards/rejected": -20.79659652709961, "step": 18288 }, { "epoch": 2.84, "learning_rate": 7.33440531147881e-07, "logits/chosen": -2.0347774028778076, "logits/rejected": -2.822312355041504, "logps/chosen": -124.11125183105469, "logps/rejected": -430.6319274902344, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.88593053817749, "rewards/margins": 11.103668212890625, "rewards/rejected": -15.989598274230957, "step": 18289 }, { "epoch": 2.84, "learning_rate": 7.327070906167332e-07, "logits/chosen": -2.5830612182617188, "logits/rejected": -2.1332428455352783, "logps/chosen": -286.6151123046875, "logps/rejected": -394.05963134765625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -10.012886047363281, "rewards/margins": 9.944384574890137, "rewards/rejected": -19.9572696685791, "step": 18290 }, { "epoch": 2.84, "learning_rate": 7.319736500855853e-07, "logits/chosen": -2.782565116882324, "logits/rejected": -2.6914756298065186, "logps/chosen": -425.75909423828125, "logps/rejected": -461.07147216796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.606291770935059, "rewards/margins": 12.186203002929688, "rewards/rejected": -18.792495727539062, "step": 18291 }, { "epoch": 2.84, "learning_rate": 7.312402095544375e-07, "logits/chosen": -1.4234498739242554, "logits/rejected": -2.7031357288360596, "logps/chosen": -125.47734069824219, "logps/rejected": -299.4043884277344, "loss": 0.1795, "rewards/accuracies": 1.0, "rewards/chosen": -8.203157424926758, "rewards/margins": 4.340115547180176, "rewards/rejected": -12.543272972106934, "step": 18292 }, { "epoch": 2.84, "learning_rate": 7.305067690232895e-07, "logits/chosen": -2.0569989681243896, "logits/rejected": -2.6320528984069824, "logps/chosen": -208.34091186523438, "logps/rejected": -308.217041015625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -6.030052185058594, "rewards/margins": 9.786794662475586, "rewards/rejected": -15.81684684753418, "step": 18293 }, { "epoch": 2.85, "learning_rate": 7.297733284921417e-07, "logits/chosen": -2.531991958618164, "logits/rejected": -2.6858479976654053, "logps/chosen": -393.208740234375, "logps/rejected": -436.6612548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.82558536529541, "rewards/margins": 10.994357109069824, "rewards/rejected": -16.819942474365234, "step": 18294 }, { "epoch": 2.85, "learning_rate": 7.290398879609938e-07, "logits/chosen": -1.645318865776062, "logits/rejected": -2.5282163619995117, "logps/chosen": -124.30333709716797, "logps/rejected": -360.49969482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.592206001281738, "rewards/margins": 13.20113468170166, "rewards/rejected": -20.7933406829834, "step": 18295 }, { "epoch": 2.85, "learning_rate": 7.28306447429846e-07, "logits/chosen": -2.6308140754699707, "logits/rejected": -2.336833953857422, "logps/chosen": -344.8961181640625, "logps/rejected": -345.12420654296875, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -9.407085418701172, "rewards/margins": 6.373326301574707, "rewards/rejected": -15.780410766601562, "step": 18296 }, { "epoch": 2.85, "learning_rate": 7.275730068986981e-07, "logits/chosen": -2.43021821975708, "logits/rejected": -1.600054144859314, "logps/chosen": -338.50604248046875, "logps/rejected": -379.463623046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.232962131500244, "rewards/margins": 9.741249084472656, "rewards/rejected": -13.974210739135742, "step": 18297 }, { "epoch": 2.85, "learning_rate": 7.268395663675502e-07, "logits/chosen": -2.439460277557373, "logits/rejected": -1.2294796705245972, "logps/chosen": -294.1217956542969, "logps/rejected": -269.965087890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -2.555950164794922, "rewards/margins": 11.04633903503418, "rewards/rejected": -13.602289199829102, "step": 18298 }, { "epoch": 2.85, "learning_rate": 7.261061258364023e-07, "logits/chosen": -2.4014084339141846, "logits/rejected": -2.9132769107818604, "logps/chosen": -157.73529052734375, "logps/rejected": -336.38507080078125, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/chosen": -10.037471771240234, "rewards/margins": 4.462653160095215, "rewards/rejected": -14.50012493133545, "step": 18299 }, { "epoch": 2.85, "learning_rate": 7.253726853052545e-07, "logits/chosen": -2.474761724472046, "logits/rejected": -2.456145763397217, "logps/chosen": -300.304443359375, "logps/rejected": -359.5107727050781, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -5.490930557250977, "rewards/margins": 7.546640872955322, "rewards/rejected": -13.03757095336914, "step": 18300 }, { "epoch": 2.85, "learning_rate": 7.246392447741066e-07, "logits/chosen": -2.847355604171753, "logits/rejected": -2.6620640754699707, "logps/chosen": -234.47239685058594, "logps/rejected": -325.25341796875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.872154712677002, "rewards/margins": 6.914210319519043, "rewards/rejected": -13.786365509033203, "step": 18301 }, { "epoch": 2.85, "learning_rate": 7.239058042429587e-07, "logits/chosen": -2.8342642784118652, "logits/rejected": -3.0119290351867676, "logps/chosen": -548.9774169921875, "logps/rejected": -507.1020812988281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.460226058959961, "rewards/margins": 15.739977836608887, "rewards/rejected": -21.20020294189453, "step": 18302 }, { "epoch": 2.85, "learning_rate": 7.231723637118107e-07, "logits/chosen": -2.459449052810669, "logits/rejected": -2.223376512527466, "logps/chosen": -332.06787109375, "logps/rejected": -357.97412109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.750817775726318, "rewards/margins": 9.298928260803223, "rewards/rejected": -14.049745559692383, "step": 18303 }, { "epoch": 2.85, "learning_rate": 7.224389231806629e-07, "logits/chosen": -1.3574832677841187, "logits/rejected": -2.6216166019439697, "logps/chosen": -163.7467041015625, "logps/rejected": -655.22314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.782709121704102, "rewards/margins": 17.94866371154785, "rewards/rejected": -23.731372833251953, "step": 18304 }, { "epoch": 2.85, "learning_rate": 7.21705482649515e-07, "logits/chosen": -2.897578477859497, "logits/rejected": -2.2359673976898193, "logps/chosen": -958.3584594726562, "logps/rejected": -590.0771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0590057373046875, "rewards/margins": 12.304214477539062, "rewards/rejected": -17.36322021484375, "step": 18305 }, { "epoch": 2.85, "learning_rate": 7.209720421183672e-07, "logits/chosen": -1.520035982131958, "logits/rejected": -2.719722032546997, "logps/chosen": -142.02713012695312, "logps/rejected": -344.8869934082031, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.98721981048584, "rewards/margins": 7.5105881690979, "rewards/rejected": -15.497808456420898, "step": 18306 }, { "epoch": 2.85, "learning_rate": 7.202386015872193e-07, "logits/chosen": -2.285109281539917, "logits/rejected": -2.6950299739837646, "logps/chosen": -202.88523864746094, "logps/rejected": -372.3282165527344, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -7.017941951751709, "rewards/margins": 10.7101411819458, "rewards/rejected": -17.72808265686035, "step": 18307 }, { "epoch": 2.85, "learning_rate": 7.195051610560714e-07, "logits/chosen": -1.7632391452789307, "logits/rejected": -2.603865623474121, "logps/chosen": -176.82467651367188, "logps/rejected": -326.6705322265625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.758787631988525, "rewards/margins": 8.413162231445312, "rewards/rejected": -13.17194938659668, "step": 18308 }, { "epoch": 2.85, "learning_rate": 7.187717205249235e-07, "logits/chosen": -2.8995604515075684, "logits/rejected": -2.7742958068847656, "logps/chosen": -306.5467224121094, "logps/rejected": -316.5880126953125, "loss": 0.4474, "rewards/accuracies": 0.5, "rewards/chosen": -9.921074867248535, "rewards/margins": 2.9417459964752197, "rewards/rejected": -12.862820625305176, "step": 18309 }, { "epoch": 2.85, "learning_rate": 7.180382799937756e-07, "logits/chosen": -2.804499864578247, "logits/rejected": -2.424675703048706, "logps/chosen": -329.52520751953125, "logps/rejected": -478.29071044921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.991593360900879, "rewards/margins": 11.11669921875, "rewards/rejected": -22.108291625976562, "step": 18310 }, { "epoch": 2.85, "learning_rate": 7.173048394626278e-07, "logits/chosen": -2.1788170337677, "logits/rejected": -2.4990973472595215, "logps/chosen": -212.73333740234375, "logps/rejected": -457.66607666015625, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -9.46603775024414, "rewards/margins": 8.430085182189941, "rewards/rejected": -17.896121978759766, "step": 18311 }, { "epoch": 2.85, "learning_rate": 7.165713989314799e-07, "logits/chosen": -1.0960651636123657, "logits/rejected": -2.3221213817596436, "logps/chosen": -136.9078826904297, "logps/rejected": -369.07855224609375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -7.603388786315918, "rewards/margins": 10.094095230102539, "rewards/rejected": -17.69748306274414, "step": 18312 }, { "epoch": 2.85, "learning_rate": 7.15837958400332e-07, "logits/chosen": -2.5994675159454346, "logits/rejected": -2.59047269821167, "logps/chosen": -241.72396850585938, "logps/rejected": -360.0020751953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.18715763092041, "rewards/margins": 9.813055992126465, "rewards/rejected": -17.000213623046875, "step": 18313 }, { "epoch": 2.85, "learning_rate": 7.15104517869184e-07, "logits/chosen": -2.368549108505249, "logits/rejected": -2.618819236755371, "logps/chosen": -283.076171875, "logps/rejected": -429.94952392578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.983423233032227, "rewards/margins": 10.183175086975098, "rewards/rejected": -17.16659927368164, "step": 18314 }, { "epoch": 2.85, "learning_rate": 7.143710773380362e-07, "logits/chosen": -2.2161331176757812, "logits/rejected": -2.605849027633667, "logps/chosen": -158.46343994140625, "logps/rejected": -477.857177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.7832536697387695, "rewards/margins": 15.360612869262695, "rewards/rejected": -20.14386749267578, "step": 18315 }, { "epoch": 2.85, "learning_rate": 7.136376368068883e-07, "logits/chosen": -2.694736957550049, "logits/rejected": -2.0825252532958984, "logps/chosen": -437.30926513671875, "logps/rejected": -522.0156860351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.722640991210938, "rewards/margins": 11.713506698608398, "rewards/rejected": -20.436147689819336, "step": 18316 }, { "epoch": 2.85, "learning_rate": 7.129041962757405e-07, "logits/chosen": -2.5500741004943848, "logits/rejected": -1.0930719375610352, "logps/chosen": -352.8100280761719, "logps/rejected": -231.89913940429688, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -2.120718479156494, "rewards/margins": 10.193868637084961, "rewards/rejected": -12.314587593078613, "step": 18317 }, { "epoch": 2.85, "learning_rate": 7.121707557445925e-07, "logits/chosen": -2.6128296852111816, "logits/rejected": -2.5784964561462402, "logps/chosen": -575.7882080078125, "logps/rejected": -670.7820434570312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.197093963623047, "rewards/margins": 17.07370376586914, "rewards/rejected": -20.270797729492188, "step": 18318 }, { "epoch": 2.85, "learning_rate": 7.114373152134447e-07, "logits/chosen": -2.205141067504883, "logits/rejected": -2.5233640670776367, "logps/chosen": -259.34088134765625, "logps/rejected": -488.4752502441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.679630279541016, "rewards/margins": 14.438084602355957, "rewards/rejected": -21.117713928222656, "step": 18319 }, { "epoch": 2.85, "learning_rate": 7.107038746822968e-07, "logits/chosen": -2.955983877182007, "logits/rejected": -2.4076428413391113, "logps/chosen": -182.3414764404297, "logps/rejected": -221.3138427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4209203720092773, "rewards/margins": 12.57016372680664, "rewards/rejected": -14.991083145141602, "step": 18320 }, { "epoch": 2.85, "learning_rate": 7.09970434151149e-07, "logits/chosen": -3.032352924346924, "logits/rejected": -3.1846301555633545, "logps/chosen": -191.83380126953125, "logps/rejected": -333.8836669921875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -7.260477542877197, "rewards/margins": 8.021409034729004, "rewards/rejected": -15.28188705444336, "step": 18321 }, { "epoch": 2.85, "learning_rate": 7.092369936200011e-07, "logits/chosen": -2.4672722816467285, "logits/rejected": -2.459855556488037, "logps/chosen": -339.89813232421875, "logps/rejected": -431.3157958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.816776275634766, "rewards/margins": 10.437906265258789, "rewards/rejected": -16.254682540893555, "step": 18322 }, { "epoch": 2.85, "learning_rate": 7.085035530888532e-07, "logits/chosen": -2.8874635696411133, "logits/rejected": -3.0341711044311523, "logps/chosen": -105.71678161621094, "logps/rejected": -267.93316650390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.637454986572266, "rewards/margins": 10.851449012756348, "rewards/rejected": -19.48890495300293, "step": 18323 }, { "epoch": 2.85, "learning_rate": 7.077701125577053e-07, "logits/chosen": -2.626350164413452, "logits/rejected": -2.9724972248077393, "logps/chosen": -261.5679931640625, "logps/rejected": -317.8849792480469, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.764698028564453, "rewards/margins": 9.485814094543457, "rewards/rejected": -15.25051212310791, "step": 18324 }, { "epoch": 2.85, "learning_rate": 7.070366720265575e-07, "logits/chosen": -2.1443896293640137, "logits/rejected": -1.923321008682251, "logps/chosen": -193.97128295898438, "logps/rejected": -369.1234436035156, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -5.931371688842773, "rewards/margins": 8.826519012451172, "rewards/rejected": -14.757890701293945, "step": 18325 }, { "epoch": 2.85, "learning_rate": 7.063032314954095e-07, "logits/chosen": -2.2326583862304688, "logits/rejected": -2.7767245769500732, "logps/chosen": -107.98941040039062, "logps/rejected": -345.42913818359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.33713150024414, "rewards/margins": 9.43281364440918, "rewards/rejected": -18.76994514465332, "step": 18326 }, { "epoch": 2.85, "learning_rate": 7.055697909642616e-07, "logits/chosen": -1.806303858757019, "logits/rejected": -2.8300702571868896, "logps/chosen": -146.54351806640625, "logps/rejected": -492.3577880859375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.058680534362793, "rewards/margins": 6.943699836730957, "rewards/rejected": -14.00238037109375, "step": 18327 }, { "epoch": 2.85, "learning_rate": 7.048363504331137e-07, "logits/chosen": -1.1296439170837402, "logits/rejected": -2.1423912048339844, "logps/chosen": -270.1291198730469, "logps/rejected": -657.5205078125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -8.012723922729492, "rewards/margins": 9.791339874267578, "rewards/rejected": -17.804065704345703, "step": 18328 }, { "epoch": 2.85, "learning_rate": 7.041029099019658e-07, "logits/chosen": -2.6492648124694824, "logits/rejected": -2.0298638343811035, "logps/chosen": -543.5885009765625, "logps/rejected": -825.9375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.209169864654541, "rewards/margins": 13.635028839111328, "rewards/rejected": -16.84419822692871, "step": 18329 }, { "epoch": 2.85, "learning_rate": 7.03369469370818e-07, "logits/chosen": -2.095057487487793, "logits/rejected": -2.6418652534484863, "logps/chosen": -170.59608459472656, "logps/rejected": -291.4634704589844, "loss": 0.7982, "rewards/accuracies": 0.5, "rewards/chosen": -7.256512641906738, "rewards/margins": 6.839842319488525, "rewards/rejected": -14.096355438232422, "step": 18330 }, { "epoch": 2.85, "learning_rate": 7.026360288396701e-07, "logits/chosen": -2.4713940620422363, "logits/rejected": -3.0639004707336426, "logps/chosen": -113.71782684326172, "logps/rejected": -543.2440795898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.7794342041015625, "rewards/margins": 11.425785064697266, "rewards/rejected": -19.205219268798828, "step": 18331 }, { "epoch": 2.85, "learning_rate": 7.019025883085222e-07, "logits/chosen": -2.6707241535186768, "logits/rejected": -2.985107898712158, "logps/chosen": -345.1944274902344, "logps/rejected": -677.4835815429688, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.710538864135742, "rewards/margins": 9.7801513671875, "rewards/rejected": -16.490690231323242, "step": 18332 }, { "epoch": 2.85, "learning_rate": 7.011691477773743e-07, "logits/chosen": -2.2149498462677, "logits/rejected": -2.868043899536133, "logps/chosen": -88.1424331665039, "logps/rejected": -441.897705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.663423538208008, "rewards/margins": 14.20776081085205, "rewards/rejected": -18.871185302734375, "step": 18333 }, { "epoch": 2.85, "learning_rate": 7.004357072462265e-07, "logits/chosen": -2.12260365486145, "logits/rejected": -2.5399832725524902, "logps/chosen": -192.8671875, "logps/rejected": -336.77099609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.84921932220459, "rewards/margins": 9.307619094848633, "rewards/rejected": -17.156837463378906, "step": 18334 }, { "epoch": 2.85, "learning_rate": 6.997022667150786e-07, "logits/chosen": -2.3290836811065674, "logits/rejected": -2.769404649734497, "logps/chosen": -124.90633392333984, "logps/rejected": -289.5704345703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.249695301055908, "rewards/margins": 9.415229797363281, "rewards/rejected": -14.664924621582031, "step": 18335 }, { "epoch": 2.85, "learning_rate": 6.989688261839308e-07, "logits/chosen": -2.3880984783172607, "logits/rejected": -1.8941243886947632, "logps/chosen": -488.46533203125, "logps/rejected": -356.09332275390625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -8.406721115112305, "rewards/margins": 5.43898868560791, "rewards/rejected": -13.845708847045898, "step": 18336 }, { "epoch": 2.85, "learning_rate": 6.982353856527827e-07, "logits/chosen": -2.8232975006103516, "logits/rejected": -2.230400323867798, "logps/chosen": -443.8872375488281, "logps/rejected": -529.6156005859375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.655052185058594, "rewards/margins": 9.789604187011719, "rewards/rejected": -18.444656372070312, "step": 18337 }, { "epoch": 2.85, "learning_rate": 6.975019451216349e-07, "logits/chosen": -1.3730509281158447, "logits/rejected": -2.631943464279175, "logps/chosen": -351.605712890625, "logps/rejected": -575.989013671875, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -11.726644515991211, "rewards/margins": 5.828000068664551, "rewards/rejected": -17.554645538330078, "step": 18338 }, { "epoch": 2.85, "learning_rate": 6.96768504590487e-07, "logits/chosen": -2.9396119117736816, "logits/rejected": -2.306087017059326, "logps/chosen": -312.6258544921875, "logps/rejected": -263.04302978515625, "loss": 0.0412, "rewards/accuracies": 1.0, "rewards/chosen": -6.899481773376465, "rewards/margins": 5.799109935760498, "rewards/rejected": -12.698591232299805, "step": 18339 }, { "epoch": 2.85, "learning_rate": 6.960350640593392e-07, "logits/chosen": -0.699159562587738, "logits/rejected": -2.3676528930664062, "logps/chosen": -127.79106140136719, "logps/rejected": -515.9762573242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.12490177154541, "rewards/margins": 12.11451530456543, "rewards/rejected": -21.239418029785156, "step": 18340 }, { "epoch": 2.85, "learning_rate": 6.953016235281913e-07, "logits/chosen": -2.9652702808380127, "logits/rejected": -1.905378818511963, "logps/chosen": -551.2472534179688, "logps/rejected": -563.3246459960938, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -7.196687698364258, "rewards/margins": 7.346064567565918, "rewards/rejected": -14.542752265930176, "step": 18341 }, { "epoch": 2.85, "learning_rate": 6.945681829970434e-07, "logits/chosen": -1.605197787284851, "logits/rejected": -1.9984805583953857, "logps/chosen": -274.6430358886719, "logps/rejected": -530.010009765625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -6.511338710784912, "rewards/margins": 14.484283447265625, "rewards/rejected": -20.995622634887695, "step": 18342 }, { "epoch": 2.85, "learning_rate": 6.938347424658955e-07, "logits/chosen": -1.4879852533340454, "logits/rejected": -2.6198112964630127, "logps/chosen": -105.48318481445312, "logps/rejected": -303.714599609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.880617141723633, "rewards/margins": 10.065793991088867, "rewards/rejected": -15.9464111328125, "step": 18343 }, { "epoch": 2.85, "learning_rate": 6.931013019347477e-07, "logits/chosen": -2.567976474761963, "logits/rejected": -2.868364095687866, "logps/chosen": -155.8147430419922, "logps/rejected": -332.95062255859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.5136332511901855, "rewards/margins": 10.474431037902832, "rewards/rejected": -15.988064765930176, "step": 18344 }, { "epoch": 2.85, "learning_rate": 6.923678614035998e-07, "logits/chosen": -1.341161847114563, "logits/rejected": -2.5039026737213135, "logps/chosen": -128.5845184326172, "logps/rejected": -299.252685546875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.378750801086426, "rewards/margins": 7.717434883117676, "rewards/rejected": -16.0961856842041, "step": 18345 }, { "epoch": 2.85, "learning_rate": 6.91634420872452e-07, "logits/chosen": -2.3157012462615967, "logits/rejected": -2.4875082969665527, "logps/chosen": -155.501953125, "logps/rejected": -437.2286071777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.6812052726745605, "rewards/margins": 12.601764678955078, "rewards/rejected": -17.282970428466797, "step": 18346 }, { "epoch": 2.85, "learning_rate": 6.90900980341304e-07, "logits/chosen": -2.561582088470459, "logits/rejected": -2.705659866333008, "logps/chosen": -331.2440185546875, "logps/rejected": -417.49609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.089536190032959, "rewards/margins": 12.112569808959961, "rewards/rejected": -17.202106475830078, "step": 18347 }, { "epoch": 2.85, "learning_rate": 6.901675398101562e-07, "logits/chosen": -2.4427452087402344, "logits/rejected": -1.8989826440811157, "logps/chosen": -514.9117431640625, "logps/rejected": -474.54888916015625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -7.027381896972656, "rewards/margins": 8.869877815246582, "rewards/rejected": -15.897259712219238, "step": 18348 }, { "epoch": 2.85, "learning_rate": 6.894340992790083e-07, "logits/chosen": -2.3223209381103516, "logits/rejected": -2.7691309452056885, "logps/chosen": -144.56588745117188, "logps/rejected": -356.63494873046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.761002540588379, "rewards/margins": 9.750592231750488, "rewards/rejected": -16.511594772338867, "step": 18349 }, { "epoch": 2.85, "learning_rate": 6.887006587478603e-07, "logits/chosen": -2.6115705966949463, "logits/rejected": -2.725419044494629, "logps/chosen": -149.18710327148438, "logps/rejected": -477.61309814453125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.182195663452148, "rewards/margins": 7.64237117767334, "rewards/rejected": -15.824565887451172, "step": 18350 }, { "epoch": 2.85, "learning_rate": 6.879672182167125e-07, "logits/chosen": -2.584928035736084, "logits/rejected": -2.5840492248535156, "logps/chosen": -294.251953125, "logps/rejected": -302.1640930175781, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.245077610015869, "rewards/margins": 6.8855671882629395, "rewards/rejected": -12.130644798278809, "step": 18351 }, { "epoch": 2.85, "learning_rate": 6.872337776855645e-07, "logits/chosen": -1.911135196685791, "logits/rejected": -2.77008056640625, "logps/chosen": -109.8666763305664, "logps/rejected": -444.88470458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.2082929611206055, "rewards/margins": 10.209981918334961, "rewards/rejected": -17.41827392578125, "step": 18352 }, { "epoch": 2.85, "learning_rate": 6.865003371544167e-07, "logits/chosen": -2.907768726348877, "logits/rejected": -2.8978028297424316, "logps/chosen": -114.71131134033203, "logps/rejected": -195.86099243164062, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -4.3057966232299805, "rewards/margins": 7.074871063232422, "rewards/rejected": -11.380668640136719, "step": 18353 }, { "epoch": 2.85, "learning_rate": 6.857668966232688e-07, "logits/chosen": -2.3534042835235596, "logits/rejected": -1.794712781906128, "logps/chosen": -247.64852905273438, "logps/rejected": -265.0184631347656, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.632543087005615, "rewards/margins": 9.768736839294434, "rewards/rejected": -16.40127944946289, "step": 18354 }, { "epoch": 2.85, "learning_rate": 6.85033456092121e-07, "logits/chosen": -2.279487371444702, "logits/rejected": -2.7528836727142334, "logps/chosen": -247.39410400390625, "logps/rejected": -458.8032531738281, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -9.858393669128418, "rewards/margins": 7.948645114898682, "rewards/rejected": -17.807039260864258, "step": 18355 }, { "epoch": 2.85, "learning_rate": 6.843000155609731e-07, "logits/chosen": -1.374599814414978, "logits/rejected": -1.9368345737457275, "logps/chosen": -519.5458374023438, "logps/rejected": -671.9998779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -14.03139877319336, "rewards/margins": 12.770807266235352, "rewards/rejected": -26.80220603942871, "step": 18356 }, { "epoch": 2.85, "learning_rate": 6.835665750298252e-07, "logits/chosen": -2.505108594894409, "logits/rejected": -1.6787179708480835, "logps/chosen": -641.7445678710938, "logps/rejected": -480.737548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.861373901367188, "rewards/margins": 11.826879501342773, "rewards/rejected": -22.688251495361328, "step": 18357 }, { "epoch": 2.86, "learning_rate": 6.828331344986773e-07, "logits/chosen": -2.4534859657287598, "logits/rejected": -1.9866783618927002, "logps/chosen": -630.7141723632812, "logps/rejected": -630.98828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.846737861633301, "rewards/margins": 10.590760231018066, "rewards/rejected": -16.437498092651367, "step": 18358 }, { "epoch": 2.86, "learning_rate": 6.820996939675295e-07, "logits/chosen": -2.386108875274658, "logits/rejected": -2.965701103210449, "logps/chosen": -141.4761505126953, "logps/rejected": -408.2799072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.242609024047852, "rewards/margins": 9.663034439086914, "rewards/rejected": -16.905643463134766, "step": 18359 }, { "epoch": 2.86, "learning_rate": 6.813662534363816e-07, "logits/chosen": -2.5327303409576416, "logits/rejected": -2.531411647796631, "logps/chosen": -472.60357666015625, "logps/rejected": -636.8381958007812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.787778854370117, "rewards/margins": 14.212681770324707, "rewards/rejected": -23.00046157836914, "step": 18360 }, { "epoch": 2.86, "learning_rate": 6.806328129052338e-07, "logits/chosen": -2.0370993614196777, "logits/rejected": -2.7398757934570312, "logps/chosen": -288.5204162597656, "logps/rejected": -618.6920166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.174077033996582, "rewards/margins": 14.03849983215332, "rewards/rejected": -23.212575912475586, "step": 18361 }, { "epoch": 2.86, "learning_rate": 6.798993723740857e-07, "logits/chosen": -2.6295275688171387, "logits/rejected": -2.8463969230651855, "logps/chosen": -220.61111450195312, "logps/rejected": -451.08087158203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.351489067077637, "rewards/margins": 11.852773666381836, "rewards/rejected": -20.20426368713379, "step": 18362 }, { "epoch": 2.86, "learning_rate": 6.791659318429379e-07, "logits/chosen": -1.0801796913146973, "logits/rejected": -2.7140331268310547, "logps/chosen": -127.29414367675781, "logps/rejected": -355.93603515625, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/chosen": -5.78051233291626, "rewards/margins": 5.688055992126465, "rewards/rejected": -11.468567848205566, "step": 18363 }, { "epoch": 2.86, "learning_rate": 6.7843249131179e-07, "logits/chosen": -1.8817306756973267, "logits/rejected": -2.8260767459869385, "logps/chosen": -248.48983764648438, "logps/rejected": -446.5710754394531, "loss": 0.0651, "rewards/accuracies": 1.0, "rewards/chosen": -9.400114059448242, "rewards/margins": 8.429548263549805, "rewards/rejected": -17.829662322998047, "step": 18364 }, { "epoch": 2.86, "learning_rate": 6.776990507806422e-07, "logits/chosen": -2.556509017944336, "logits/rejected": -2.0063633918762207, "logps/chosen": -590.4970703125, "logps/rejected": -518.5784912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.743564605712891, "rewards/margins": 10.005255699157715, "rewards/rejected": -17.748821258544922, "step": 18365 }, { "epoch": 2.86, "learning_rate": 6.769656102494943e-07, "logits/chosen": -3.008779764175415, "logits/rejected": -2.4344630241394043, "logps/chosen": -380.0926513671875, "logps/rejected": -187.12376403808594, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -0.26251983642578125, "rewards/margins": 11.805309295654297, "rewards/rejected": -12.067829132080078, "step": 18366 }, { "epoch": 2.86, "learning_rate": 6.762321697183464e-07, "logits/chosen": -1.903266429901123, "logits/rejected": -2.407029867172241, "logps/chosen": -542.8184814453125, "logps/rejected": -684.3663330078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.936677932739258, "rewards/margins": 14.270918846130371, "rewards/rejected": -21.207595825195312, "step": 18367 }, { "epoch": 2.86, "learning_rate": 6.754987291871985e-07, "logits/chosen": -2.628671646118164, "logits/rejected": -2.317115545272827, "logps/chosen": -234.07919311523438, "logps/rejected": -452.1734619140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.978837013244629, "rewards/margins": 13.959185600280762, "rewards/rejected": -19.93802261352539, "step": 18368 }, { "epoch": 2.86, "learning_rate": 6.747652886560506e-07, "logits/chosen": -1.956807017326355, "logits/rejected": -2.388822317123413, "logps/chosen": -420.928466796875, "logps/rejected": -826.033935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.586383819580078, "rewards/margins": 11.554756164550781, "rewards/rejected": -20.14113998413086, "step": 18369 }, { "epoch": 2.86, "learning_rate": 6.740318481249028e-07, "logits/chosen": -1.5324610471725464, "logits/rejected": -2.7569761276245117, "logps/chosen": -265.7604675292969, "logps/rejected": -432.5575866699219, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -8.604159355163574, "rewards/margins": 6.495416641235352, "rewards/rejected": -15.099575996398926, "step": 18370 }, { "epoch": 2.86, "learning_rate": 6.732984075937549e-07, "logits/chosen": -2.668829917907715, "logits/rejected": -2.2340152263641357, "logps/chosen": -163.96719360351562, "logps/rejected": -183.71755981445312, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/chosen": -4.9969072341918945, "rewards/margins": 5.982440948486328, "rewards/rejected": -10.979347229003906, "step": 18371 }, { "epoch": 2.86, "learning_rate": 6.72564967062607e-07, "logits/chosen": -2.735766887664795, "logits/rejected": -1.2159479856491089, "logps/chosen": -361.1412353515625, "logps/rejected": -245.49681091308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.161961555480957, "rewards/margins": 10.159366607666016, "rewards/rejected": -12.321327209472656, "step": 18372 }, { "epoch": 2.86, "learning_rate": 6.71831526531459e-07, "logits/chosen": -2.5212185382843018, "logits/rejected": -2.3014731407165527, "logps/chosen": -239.46868896484375, "logps/rejected": -327.1155700683594, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -11.017873764038086, "rewards/margins": 8.163759231567383, "rewards/rejected": -19.18163299560547, "step": 18373 }, { "epoch": 2.86, "learning_rate": 6.710980860003112e-07, "logits/chosen": -2.357515811920166, "logits/rejected": -2.8585143089294434, "logps/chosen": -132.1634063720703, "logps/rejected": -316.45819091796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5740807056427, "rewards/margins": 11.438843727111816, "rewards/rejected": -15.012924194335938, "step": 18374 }, { "epoch": 2.86, "learning_rate": 6.703646454691633e-07, "logits/chosen": -2.4677891731262207, "logits/rejected": -2.7771944999694824, "logps/chosen": -431.9179382324219, "logps/rejected": -491.47650146484375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -11.355413436889648, "rewards/margins": 9.546689987182617, "rewards/rejected": -20.902103424072266, "step": 18375 }, { "epoch": 2.86, "learning_rate": 6.696312049380155e-07, "logits/chosen": -1.688157558441162, "logits/rejected": -2.8107903003692627, "logps/chosen": -192.07656860351562, "logps/rejected": -644.6621704101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.169672966003418, "rewards/margins": 13.614531517028809, "rewards/rejected": -20.784204483032227, "step": 18376 }, { "epoch": 2.86, "learning_rate": 6.688977644068675e-07, "logits/chosen": -1.8707202672958374, "logits/rejected": -2.8052449226379395, "logps/chosen": -388.6378479003906, "logps/rejected": -650.3092651367188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.929207801818848, "rewards/margins": 10.240406036376953, "rewards/rejected": -18.169612884521484, "step": 18377 }, { "epoch": 2.86, "learning_rate": 6.681643238757197e-07, "logits/chosen": -2.4840943813323975, "logits/rejected": -2.388958215713501, "logps/chosen": -162.35008239746094, "logps/rejected": -356.93170166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.14166259765625, "rewards/margins": 13.016542434692383, "rewards/rejected": -19.158205032348633, "step": 18378 }, { "epoch": 2.86, "learning_rate": 6.674308833445718e-07, "logits/chosen": -2.9092352390289307, "logits/rejected": -2.260476589202881, "logps/chosen": -378.991943359375, "logps/rejected": -218.33163452148438, "loss": 0.186, "rewards/accuracies": 1.0, "rewards/chosen": -10.925089836120605, "rewards/margins": 1.9911799430847168, "rewards/rejected": -12.916269302368164, "step": 18379 }, { "epoch": 2.86, "learning_rate": 6.66697442813424e-07, "logits/chosen": -2.6529903411865234, "logits/rejected": -2.005908250808716, "logps/chosen": -168.21798706054688, "logps/rejected": -254.303955078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.077077865600586, "rewards/margins": 9.701732635498047, "rewards/rejected": -15.778810501098633, "step": 18380 }, { "epoch": 2.86, "learning_rate": 6.659640022822761e-07, "logits/chosen": -2.214839220046997, "logits/rejected": -2.559699058532715, "logps/chosen": -736.9255981445312, "logps/rejected": -681.5269775390625, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -11.004981994628906, "rewards/margins": 7.929525852203369, "rewards/rejected": -18.93450927734375, "step": 18381 }, { "epoch": 2.86, "learning_rate": 6.652305617511282e-07, "logits/chosen": -2.4480888843536377, "logits/rejected": -2.9242513179779053, "logps/chosen": -262.2750244140625, "logps/rejected": -398.51947021484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.180977821350098, "rewards/margins": 10.144725799560547, "rewards/rejected": -20.325702667236328, "step": 18382 }, { "epoch": 2.86, "learning_rate": 6.644971212199803e-07, "logits/chosen": -2.494584798812866, "logits/rejected": -1.684430718421936, "logps/chosen": -250.79776000976562, "logps/rejected": -418.845703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.083706855773926, "rewards/margins": 9.00419807434082, "rewards/rejected": -15.087904930114746, "step": 18383 }, { "epoch": 2.86, "learning_rate": 6.637636806888325e-07, "logits/chosen": -1.4086697101593018, "logits/rejected": -2.8445534706115723, "logps/chosen": -254.1815948486328, "logps/rejected": -601.6477661132812, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.902383804321289, "rewards/margins": 9.444867134094238, "rewards/rejected": -17.34724998474121, "step": 18384 }, { "epoch": 2.86, "learning_rate": 6.630302401576846e-07, "logits/chosen": -2.6913886070251465, "logits/rejected": -2.8608031272888184, "logps/chosen": -266.7304382324219, "logps/rejected": -546.7486572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.247137069702148, "rewards/margins": 13.980804443359375, "rewards/rejected": -22.227941513061523, "step": 18385 }, { "epoch": 2.86, "learning_rate": 6.622967996265367e-07, "logits/chosen": -2.6265225410461426, "logits/rejected": -2.6860296726226807, "logps/chosen": -154.17398071289062, "logps/rejected": -262.8555603027344, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.34083080291748, "rewards/margins": 8.398981094360352, "rewards/rejected": -16.739810943603516, "step": 18386 }, { "epoch": 2.86, "learning_rate": 6.615633590953887e-07, "logits/chosen": -2.496737003326416, "logits/rejected": -2.0143117904663086, "logps/chosen": -710.2471923828125, "logps/rejected": -625.3131103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.122457504272461, "rewards/margins": 12.53669548034668, "rewards/rejected": -19.65915298461914, "step": 18387 }, { "epoch": 2.86, "learning_rate": 6.608299185642409e-07, "logits/chosen": -2.58779239654541, "logits/rejected": -1.8765164613723755, "logps/chosen": -271.1156921386719, "logps/rejected": -251.7331085205078, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -10.275390625, "rewards/margins": 7.299704074859619, "rewards/rejected": -17.575096130371094, "step": 18388 }, { "epoch": 2.86, "learning_rate": 6.60096478033093e-07, "logits/chosen": -2.5339250564575195, "logits/rejected": -2.2027788162231445, "logps/chosen": -191.81869506835938, "logps/rejected": -357.57098388671875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.8547492027282715, "rewards/margins": 10.994245529174805, "rewards/rejected": -16.848995208740234, "step": 18389 }, { "epoch": 2.86, "learning_rate": 6.593630375019451e-07, "logits/chosen": -2.5458972454071045, "logits/rejected": -2.683046579360962, "logps/chosen": -141.00448608398438, "logps/rejected": -283.32958984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.085875511169434, "rewards/margins": 9.220993041992188, "rewards/rejected": -17.306869506835938, "step": 18390 }, { "epoch": 2.86, "learning_rate": 6.586295969707972e-07, "logits/chosen": -2.271904706954956, "logits/rejected": -2.5352447032928467, "logps/chosen": -331.47503662109375, "logps/rejected": -379.63201904296875, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -8.839351654052734, "rewards/margins": 6.54312801361084, "rewards/rejected": -15.382479667663574, "step": 18391 }, { "epoch": 2.86, "learning_rate": 6.578961564396493e-07, "logits/chosen": -2.6226038932800293, "logits/rejected": -2.9551193714141846, "logps/chosen": -267.5768737792969, "logps/rejected": -398.8782958984375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -11.386638641357422, "rewards/margins": 7.796975135803223, "rewards/rejected": -19.183612823486328, "step": 18392 }, { "epoch": 2.86, "learning_rate": 6.571627159085015e-07, "logits/chosen": -1.9269336462020874, "logits/rejected": -2.883908987045288, "logps/chosen": -311.7524108886719, "logps/rejected": -533.1632080078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.747945785522461, "rewards/margins": 9.181900978088379, "rewards/rejected": -15.92984676361084, "step": 18393 }, { "epoch": 2.86, "learning_rate": 6.564292753773536e-07, "logits/chosen": -2.274038076400757, "logits/rejected": -2.178536891937256, "logps/chosen": -428.17022705078125, "logps/rejected": -426.237060546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -7.556468963623047, "rewards/margins": 8.34054946899414, "rewards/rejected": -15.897018432617188, "step": 18394 }, { "epoch": 2.86, "learning_rate": 6.556958348462058e-07, "logits/chosen": -2.4315128326416016, "logits/rejected": -2.9678633213043213, "logps/chosen": -538.4930419921875, "logps/rejected": -593.1519165039062, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -7.474334716796875, "rewards/margins": 6.307943820953369, "rewards/rejected": -13.782278060913086, "step": 18395 }, { "epoch": 2.86, "learning_rate": 6.549623943150578e-07, "logits/chosen": -2.4481029510498047, "logits/rejected": -2.383122444152832, "logps/chosen": -173.53811645507812, "logps/rejected": -295.03350830078125, "loss": 0.4539, "rewards/accuracies": 0.5, "rewards/chosen": -10.399270057678223, "rewards/margins": 8.241387367248535, "rewards/rejected": -18.640657424926758, "step": 18396 }, { "epoch": 2.86, "learning_rate": 6.5422895378391e-07, "logits/chosen": -2.5892598628997803, "logits/rejected": -1.696786642074585, "logps/chosen": -225.23724365234375, "logps/rejected": -185.72824096679688, "loss": 0.0926, "rewards/accuracies": 1.0, "rewards/chosen": -6.122251510620117, "rewards/margins": 4.789907932281494, "rewards/rejected": -10.912158966064453, "step": 18397 }, { "epoch": 2.86, "learning_rate": 6.53495513252762e-07, "logits/chosen": -0.6994962692260742, "logits/rejected": -1.2860982418060303, "logps/chosen": -539.0242919921875, "logps/rejected": -734.00439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -13.808557510375977, "rewards/margins": 14.996567726135254, "rewards/rejected": -28.805126190185547, "step": 18398 }, { "epoch": 2.86, "learning_rate": 6.527620727216142e-07, "logits/chosen": -2.808366298675537, "logits/rejected": -2.4878528118133545, "logps/chosen": -803.4788818359375, "logps/rejected": -504.1891174316406, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -5.3419647216796875, "rewards/margins": 9.45399284362793, "rewards/rejected": -14.795957565307617, "step": 18399 }, { "epoch": 2.86, "learning_rate": 6.520286321904663e-07, "logits/chosen": -2.8240230083465576, "logits/rejected": -2.7107186317443848, "logps/chosen": -235.03147888183594, "logps/rejected": -347.8367919921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -8.83894157409668, "rewards/margins": 10.70831298828125, "rewards/rejected": -19.547256469726562, "step": 18400 }, { "epoch": 2.86, "learning_rate": 6.512951916593184e-07, "logits/chosen": -1.1198508739471436, "logits/rejected": -1.788763403892517, "logps/chosen": -264.18695068359375, "logps/rejected": -417.00360107421875, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -10.859983444213867, "rewards/margins": 5.785058498382568, "rewards/rejected": -16.645042419433594, "step": 18401 }, { "epoch": 2.86, "learning_rate": 6.505617511281705e-07, "logits/chosen": -2.100752592086792, "logits/rejected": -2.9840636253356934, "logps/chosen": -384.8763427734375, "logps/rejected": -627.2335815429688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.452659606933594, "rewards/margins": 8.782879829406738, "rewards/rejected": -17.235538482666016, "step": 18402 }, { "epoch": 2.86, "learning_rate": 6.498283105970227e-07, "logits/chosen": -1.3267631530761719, "logits/rejected": -2.5619492530822754, "logps/chosen": -140.92330932617188, "logps/rejected": -558.2138671875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.650575637817383, "rewards/margins": 12.744745254516602, "rewards/rejected": -20.395320892333984, "step": 18403 }, { "epoch": 2.86, "learning_rate": 6.490948700658748e-07, "logits/chosen": -1.7039852142333984, "logits/rejected": -2.4202237129211426, "logps/chosen": -134.4945831298828, "logps/rejected": -346.2618408203125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.863572120666504, "rewards/margins": 8.658825874328613, "rewards/rejected": -14.522397994995117, "step": 18404 }, { "epoch": 2.86, "learning_rate": 6.48361429534727e-07, "logits/chosen": -1.41460382938385, "logits/rejected": -2.7300803661346436, "logps/chosen": -111.1986083984375, "logps/rejected": -528.0924072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.517546653747559, "rewards/margins": 13.870129585266113, "rewards/rejected": -23.387676239013672, "step": 18405 }, { "epoch": 2.86, "learning_rate": 6.47627989003579e-07, "logits/chosen": -2.4002439975738525, "logits/rejected": -2.1142587661743164, "logps/chosen": -225.93597412109375, "logps/rejected": -324.7578430175781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.713430404663086, "rewards/margins": 13.309261322021484, "rewards/rejected": -18.02269172668457, "step": 18406 }, { "epoch": 2.86, "learning_rate": 6.468945484724312e-07, "logits/chosen": -2.466611385345459, "logits/rejected": -2.6020708084106445, "logps/chosen": -370.68487548828125, "logps/rejected": -478.90252685546875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.319741249084473, "rewards/margins": 9.811359405517578, "rewards/rejected": -16.131099700927734, "step": 18407 }, { "epoch": 2.86, "learning_rate": 6.461611079412833e-07, "logits/chosen": -0.6544032096862793, "logits/rejected": -2.523939371109009, "logps/chosen": -119.57939147949219, "logps/rejected": -648.4759521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.34621524810791, "rewards/margins": 16.96082305908203, "rewards/rejected": -24.307039260864258, "step": 18408 }, { "epoch": 2.86, "learning_rate": 6.454276674101355e-07, "logits/chosen": -2.9570722579956055, "logits/rejected": -2.4775989055633545, "logps/chosen": -221.19161987304688, "logps/rejected": -272.7890930175781, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.879225730895996, "rewards/margins": 10.68912410736084, "rewards/rejected": -16.568349838256836, "step": 18409 }, { "epoch": 2.86, "learning_rate": 6.446942268789875e-07, "logits/chosen": -2.7381153106689453, "logits/rejected": -1.6359816789627075, "logps/chosen": -376.7000732421875, "logps/rejected": -272.5782470703125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -7.418866157531738, "rewards/margins": 7.81347131729126, "rewards/rejected": -15.232337951660156, "step": 18410 }, { "epoch": 2.86, "learning_rate": 6.439607863478396e-07, "logits/chosen": -1.3347688913345337, "logits/rejected": -2.054137945175171, "logps/chosen": -305.5333557128906, "logps/rejected": -468.26629638671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.708256721496582, "rewards/margins": 11.960329055786133, "rewards/rejected": -20.66858673095703, "step": 18411 }, { "epoch": 2.86, "learning_rate": 6.432273458166917e-07, "logits/chosen": -2.5142345428466797, "logits/rejected": -2.098660945892334, "logps/chosen": -152.25115966796875, "logps/rejected": -329.14349365234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.628782272338867, "rewards/margins": 11.945819854736328, "rewards/rejected": -16.574602127075195, "step": 18412 }, { "epoch": 2.86, "learning_rate": 6.424939052855438e-07, "logits/chosen": -2.299093723297119, "logits/rejected": -2.2071423530578613, "logps/chosen": -174.24737548828125, "logps/rejected": -327.8929443359375, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -8.07274055480957, "rewards/margins": 9.955778121948242, "rewards/rejected": -18.028518676757812, "step": 18413 }, { "epoch": 2.86, "learning_rate": 6.41760464754396e-07, "logits/chosen": -2.7309796810150146, "logits/rejected": -2.5487730503082275, "logps/chosen": -216.33639526367188, "logps/rejected": -188.88082885742188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -2.8068504333496094, "rewards/margins": 9.339385032653809, "rewards/rejected": -12.146235466003418, "step": 18414 }, { "epoch": 2.86, "learning_rate": 6.410270242232481e-07, "logits/chosen": -2.0605521202087402, "logits/rejected": -2.6120028495788574, "logps/chosen": -461.184814453125, "logps/rejected": -561.117431640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.446819305419922, "rewards/margins": 10.22347640991211, "rewards/rejected": -16.67029571533203, "step": 18415 }, { "epoch": 2.86, "learning_rate": 6.402935836921002e-07, "logits/chosen": -2.2581942081451416, "logits/rejected": -2.944366216659546, "logps/chosen": -95.15373229980469, "logps/rejected": -258.1361999511719, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -5.92593240737915, "rewards/margins": 6.823707580566406, "rewards/rejected": -12.749639511108398, "step": 18416 }, { "epoch": 2.86, "learning_rate": 6.395601431609523e-07, "logits/chosen": -2.5957820415496826, "logits/rejected": -2.7209620475769043, "logps/chosen": -156.62600708007812, "logps/rejected": -428.42730712890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.138679027557373, "rewards/margins": 8.25515079498291, "rewards/rejected": -12.393829345703125, "step": 18417 }, { "epoch": 2.86, "learning_rate": 6.388267026298045e-07, "logits/chosen": -2.263007164001465, "logits/rejected": -2.749586343765259, "logps/chosen": -149.71917724609375, "logps/rejected": -274.76239013671875, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -6.7088823318481445, "rewards/margins": 8.5904541015625, "rewards/rejected": -15.299335479736328, "step": 18418 }, { "epoch": 2.86, "learning_rate": 6.380932620986566e-07, "logits/chosen": -2.1326379776000977, "logits/rejected": -2.663607597351074, "logps/chosen": -335.41473388671875, "logps/rejected": -628.056396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.562492370605469, "rewards/margins": 12.187932014465332, "rewards/rejected": -17.750423431396484, "step": 18419 }, { "epoch": 2.86, "learning_rate": 6.373598215675088e-07, "logits/chosen": -1.9615609645843506, "logits/rejected": -2.6052210330963135, "logps/chosen": -242.13986206054688, "logps/rejected": -501.476806640625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.225858688354492, "rewards/margins": 11.12751579284668, "rewards/rejected": -19.353374481201172, "step": 18420 }, { "epoch": 2.86, "learning_rate": 6.366263810363607e-07, "logits/chosen": -2.1491963863372803, "logits/rejected": -2.309946060180664, "logps/chosen": -208.04849243164062, "logps/rejected": -326.23883056640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.717824935913086, "rewards/margins": 9.928592681884766, "rewards/rejected": -15.646417617797852, "step": 18421 }, { "epoch": 2.87, "learning_rate": 6.358929405052129e-07, "logits/chosen": -2.4809083938598633, "logits/rejected": -2.4292140007019043, "logps/chosen": -255.65475463867188, "logps/rejected": -597.1106567382812, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.5348968505859375, "rewards/margins": 10.296468734741211, "rewards/rejected": -14.831365585327148, "step": 18422 }, { "epoch": 2.87, "learning_rate": 6.35159499974065e-07, "logits/chosen": -3.10546612739563, "logits/rejected": -3.2086124420166016, "logps/chosen": -77.0394287109375, "logps/rejected": -276.1135559082031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.853137016296387, "rewards/margins": 12.22041130065918, "rewards/rejected": -18.07354736328125, "step": 18423 }, { "epoch": 2.87, "learning_rate": 6.344260594429172e-07, "logits/chosen": -1.8172860145568848, "logits/rejected": -2.3196561336517334, "logps/chosen": -216.1682586669922, "logps/rejected": -431.7179870605469, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.22972297668457, "rewards/margins": 10.282625198364258, "rewards/rejected": -19.512348175048828, "step": 18424 }, { "epoch": 2.87, "learning_rate": 6.336926189117693e-07, "logits/chosen": -2.6689789295196533, "logits/rejected": -2.425137519836426, "logps/chosen": -149.31263732910156, "logps/rejected": -209.6519317626953, "loss": 1.9212, "rewards/accuracies": 0.5, "rewards/chosen": -9.225592613220215, "rewards/margins": 0.4745628833770752, "rewards/rejected": -9.700155258178711, "step": 18425 }, { "epoch": 2.87, "learning_rate": 6.329591783806214e-07, "logits/chosen": -2.3850789070129395, "logits/rejected": -2.086149215698242, "logps/chosen": -153.63232421875, "logps/rejected": -251.92364501953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.370931625366211, "rewards/margins": 11.570850372314453, "rewards/rejected": -15.941781997680664, "step": 18426 }, { "epoch": 2.87, "learning_rate": 6.322257378494735e-07, "logits/chosen": -2.6511945724487305, "logits/rejected": -2.9576005935668945, "logps/chosen": -541.5162963867188, "logps/rejected": -425.018310546875, "loss": 0.3072, "rewards/accuracies": 1.0, "rewards/chosen": -12.063277244567871, "rewards/margins": 5.280529022216797, "rewards/rejected": -17.343807220458984, "step": 18427 }, { "epoch": 2.87, "learning_rate": 6.314922973183257e-07, "logits/chosen": -1.6946234703063965, "logits/rejected": -2.6122467517852783, "logps/chosen": -282.48809814453125, "logps/rejected": -443.0269775390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.877951622009277, "rewards/margins": 11.861063957214355, "rewards/rejected": -18.739015579223633, "step": 18428 }, { "epoch": 2.87, "learning_rate": 6.307588567871778e-07, "logits/chosen": -2.498133420944214, "logits/rejected": -2.6181485652923584, "logps/chosen": -246.6140594482422, "logps/rejected": -242.4149169921875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -9.582864761352539, "rewards/margins": 6.377405643463135, "rewards/rejected": -15.960270881652832, "step": 18429 }, { "epoch": 2.87, "learning_rate": 6.300254162560299e-07, "logits/chosen": -1.596744418144226, "logits/rejected": -2.3167238235473633, "logps/chosen": -143.3324737548828, "logps/rejected": -449.17181396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.098388671875, "rewards/margins": 17.819568634033203, "rewards/rejected": -24.917957305908203, "step": 18430 }, { "epoch": 2.87, "learning_rate": 6.29291975724882e-07, "logits/chosen": -2.6494944095611572, "logits/rejected": -2.4527587890625, "logps/chosen": -302.9363098144531, "logps/rejected": -393.7130126953125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.595223426818848, "rewards/margins": 7.503239631652832, "rewards/rejected": -12.09846305847168, "step": 18431 }, { "epoch": 2.87, "learning_rate": 6.285585351937341e-07, "logits/chosen": -2.696681499481201, "logits/rejected": -1.5710232257843018, "logps/chosen": -242.69468688964844, "logps/rejected": -149.35446166992188, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.575261116027832, "rewards/margins": 6.073940277099609, "rewards/rejected": -12.649200439453125, "step": 18432 }, { "epoch": 2.87, "learning_rate": 6.278250946625863e-07, "logits/chosen": -1.0347719192504883, "logits/rejected": -2.125288724899292, "logps/chosen": -326.0474853515625, "logps/rejected": -705.77880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.82574462890625, "rewards/margins": 14.568133354187012, "rewards/rejected": -20.393878936767578, "step": 18433 }, { "epoch": 2.87, "learning_rate": 6.270916541314383e-07, "logits/chosen": -2.2380552291870117, "logits/rejected": -2.814342975616455, "logps/chosen": -187.54046630859375, "logps/rejected": -449.4517822265625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.0240559577941895, "rewards/margins": 7.090319633483887, "rewards/rejected": -12.114376068115234, "step": 18434 }, { "epoch": 2.87, "learning_rate": 6.263582136002905e-07, "logits/chosen": -2.993018627166748, "logits/rejected": -2.375290632247925, "logps/chosen": -258.83258056640625, "logps/rejected": -188.79727172851562, "loss": 1.8739, "rewards/accuracies": 0.5, "rewards/chosen": -12.313085556030273, "rewards/margins": 2.97359561920166, "rewards/rejected": -15.286681175231934, "step": 18435 }, { "epoch": 2.87, "learning_rate": 6.256247730691425e-07, "logits/chosen": -2.433898687362671, "logits/rejected": -2.5133001804351807, "logps/chosen": -218.59811401367188, "logps/rejected": -359.79974365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.118689060211182, "rewards/margins": 10.062945365905762, "rewards/rejected": -16.1816349029541, "step": 18436 }, { "epoch": 2.87, "learning_rate": 6.248913325379947e-07, "logits/chosen": -1.7127331495285034, "logits/rejected": -2.7045345306396484, "logps/chosen": -149.70132446289062, "logps/rejected": -465.8165283203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.927938461303711, "rewards/margins": 10.022418975830078, "rewards/rejected": -19.95035743713379, "step": 18437 }, { "epoch": 2.87, "learning_rate": 6.241578920068468e-07, "logits/chosen": -1.2310080528259277, "logits/rejected": -1.4330523014068604, "logps/chosen": -259.8299560546875, "logps/rejected": -364.44403076171875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.888317108154297, "rewards/margins": 8.65388298034668, "rewards/rejected": -15.542200088500977, "step": 18438 }, { "epoch": 2.87, "learning_rate": 6.23424451475699e-07, "logits/chosen": -2.0264248847961426, "logits/rejected": -2.5058350563049316, "logps/chosen": -315.83575439453125, "logps/rejected": -568.5327758789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.549449920654297, "rewards/margins": 14.167745590209961, "rewards/rejected": -23.717195510864258, "step": 18439 }, { "epoch": 2.87, "learning_rate": 6.226910109445511e-07, "logits/chosen": -1.937284231185913, "logits/rejected": -2.8252413272857666, "logps/chosen": -447.7906494140625, "logps/rejected": -610.685546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.805896759033203, "rewards/margins": 12.750289916992188, "rewards/rejected": -20.55618667602539, "step": 18440 }, { "epoch": 2.87, "learning_rate": 6.219575704134032e-07, "logits/chosen": -2.697598695755005, "logits/rejected": -1.9297829866409302, "logps/chosen": -199.9794921875, "logps/rejected": -387.4364318847656, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.043399810791016, "rewards/margins": 10.73147964477539, "rewards/rejected": -15.774879455566406, "step": 18441 }, { "epoch": 2.87, "learning_rate": 6.212241298822553e-07, "logits/chosen": -2.4478652477264404, "logits/rejected": -2.6333506107330322, "logps/chosen": -294.58416748046875, "logps/rejected": -401.9571228027344, "loss": 0.1724, "rewards/accuracies": 1.0, "rewards/chosen": -7.914978981018066, "rewards/margins": 7.196352481842041, "rewards/rejected": -15.111331939697266, "step": 18442 }, { "epoch": 2.87, "learning_rate": 6.204906893511075e-07, "logits/chosen": -2.4182705879211426, "logits/rejected": -1.316293716430664, "logps/chosen": -246.900390625, "logps/rejected": -295.0958251953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.194226264953613, "rewards/margins": 9.394096374511719, "rewards/rejected": -16.588321685791016, "step": 18443 }, { "epoch": 2.87, "learning_rate": 6.197572488199596e-07, "logits/chosen": -2.124178409576416, "logits/rejected": -2.990114688873291, "logps/chosen": -90.08901977539062, "logps/rejected": -369.2099609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.297310829162598, "rewards/margins": 10.155878067016602, "rewards/rejected": -17.453189849853516, "step": 18444 }, { "epoch": 2.87, "learning_rate": 6.190238082888118e-07, "logits/chosen": -1.991920828819275, "logits/rejected": -2.5651049613952637, "logps/chosen": -207.41883850097656, "logps/rejected": -356.4940185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.6608686447143555, "rewards/margins": 10.847803115844727, "rewards/rejected": -15.508671760559082, "step": 18445 }, { "epoch": 2.87, "learning_rate": 6.182903677576637e-07, "logits/chosen": -2.6501286029815674, "logits/rejected": -1.9981341361999512, "logps/chosen": -650.459716796875, "logps/rejected": -493.4543151855469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.577733039855957, "rewards/margins": 10.97391414642334, "rewards/rejected": -21.551647186279297, "step": 18446 }, { "epoch": 2.87, "learning_rate": 6.175569272265159e-07, "logits/chosen": -2.1763715744018555, "logits/rejected": -2.7006964683532715, "logps/chosen": -617.9739990234375, "logps/rejected": -729.427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9575486183166504, "rewards/margins": 11.926182746887207, "rewards/rejected": -15.883731842041016, "step": 18447 }, { "epoch": 2.87, "learning_rate": 6.16823486695368e-07, "logits/chosen": -2.7947633266448975, "logits/rejected": -2.7382395267486572, "logps/chosen": -872.5467529296875, "logps/rejected": -618.291259765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.074579238891602, "rewards/margins": 8.658559799194336, "rewards/rejected": -16.733139038085938, "step": 18448 }, { "epoch": 2.87, "learning_rate": 6.160900461642202e-07, "logits/chosen": -2.079026699066162, "logits/rejected": -2.7107455730438232, "logps/chosen": -157.6575927734375, "logps/rejected": -343.50921630859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.680068016052246, "rewards/margins": 7.288238525390625, "rewards/rejected": -11.968306541442871, "step": 18449 }, { "epoch": 2.87, "learning_rate": 6.153566056330723e-07, "logits/chosen": -2.733849048614502, "logits/rejected": -2.1204185485839844, "logps/chosen": -386.5208740234375, "logps/rejected": -302.4029541015625, "loss": 0.1043, "rewards/accuracies": 1.0, "rewards/chosen": -7.93404483795166, "rewards/margins": 7.28346061706543, "rewards/rejected": -15.21750545501709, "step": 18450 }, { "epoch": 2.87, "learning_rate": 6.146231651019244e-07, "logits/chosen": -2.920189142227173, "logits/rejected": -2.8385086059570312, "logps/chosen": -196.0627899169922, "logps/rejected": -271.8333435058594, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/chosen": -8.971002578735352, "rewards/margins": 5.104378700256348, "rewards/rejected": -14.075380325317383, "step": 18451 }, { "epoch": 2.87, "learning_rate": 6.138897245707765e-07, "logits/chosen": -2.3804996013641357, "logits/rejected": -2.4105424880981445, "logps/chosen": -331.2915954589844, "logps/rejected": -323.6034240722656, "loss": 0.0513, "rewards/accuracies": 1.0, "rewards/chosen": -8.111371994018555, "rewards/margins": 7.338101387023926, "rewards/rejected": -15.44947338104248, "step": 18452 }, { "epoch": 2.87, "learning_rate": 6.131562840396286e-07, "logits/chosen": -1.4430698156356812, "logits/rejected": -2.287205696105957, "logps/chosen": -205.0113525390625, "logps/rejected": -400.6845703125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -6.953611373901367, "rewards/margins": 10.923205375671387, "rewards/rejected": -17.876815795898438, "step": 18453 }, { "epoch": 2.87, "learning_rate": 6.124228435084808e-07, "logits/chosen": -1.4199841022491455, "logits/rejected": -2.5898046493530273, "logps/chosen": -157.31414794921875, "logps/rejected": -569.458251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.095953941345215, "rewards/margins": 15.679580688476562, "rewards/rejected": -23.77553367614746, "step": 18454 }, { "epoch": 2.87, "learning_rate": 6.116894029773328e-07, "logits/chosen": -2.900006055831909, "logits/rejected": -1.8583271503448486, "logps/chosen": -286.797119140625, "logps/rejected": -211.3935546875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.2207183837890625, "rewards/margins": 10.67155647277832, "rewards/rejected": -15.892274856567383, "step": 18455 }, { "epoch": 2.87, "learning_rate": 6.10955962446185e-07, "logits/chosen": -1.7452300786972046, "logits/rejected": -2.4686546325683594, "logps/chosen": -169.29559326171875, "logps/rejected": -348.31866455078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.514146327972412, "rewards/margins": 9.810147285461426, "rewards/rejected": -17.32429313659668, "step": 18456 }, { "epoch": 2.87, "learning_rate": 6.10222521915037e-07, "logits/chosen": -2.6305763721466064, "logits/rejected": -1.3603999614715576, "logps/chosen": -286.5706787109375, "logps/rejected": -237.50839233398438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.167806625366211, "rewards/margins": 10.721437454223633, "rewards/rejected": -17.889244079589844, "step": 18457 }, { "epoch": 2.87, "learning_rate": 6.094890813838892e-07, "logits/chosen": -2.2759389877319336, "logits/rejected": -1.5581140518188477, "logps/chosen": -173.08395385742188, "logps/rejected": -229.7290496826172, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.743417263031006, "rewards/margins": 7.6425580978393555, "rewards/rejected": -15.385974884033203, "step": 18458 }, { "epoch": 2.87, "learning_rate": 6.087556408527413e-07, "logits/chosen": -2.1611108779907227, "logits/rejected": -2.7822225093841553, "logps/chosen": -149.44363403320312, "logps/rejected": -352.6614990234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.897096633911133, "rewards/margins": 9.079610824584961, "rewards/rejected": -12.976707458496094, "step": 18459 }, { "epoch": 2.87, "learning_rate": 6.080222003215934e-07, "logits/chosen": -2.628657579421997, "logits/rejected": -2.2013838291168213, "logps/chosen": -628.486083984375, "logps/rejected": -561.5861206054688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.894838809967041, "rewards/margins": 11.268978118896484, "rewards/rejected": -19.163818359375, "step": 18460 }, { "epoch": 2.87, "learning_rate": 6.072887597904455e-07, "logits/chosen": -2.6922175884246826, "logits/rejected": -2.7403860092163086, "logps/chosen": -215.77159118652344, "logps/rejected": -581.8529663085938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.098010063171387, "rewards/margins": 13.939213752746582, "rewards/rejected": -20.03722381591797, "step": 18461 }, { "epoch": 2.87, "learning_rate": 6.065553192592977e-07, "logits/chosen": -2.733055353164673, "logits/rejected": -2.780407428741455, "logps/chosen": -256.32427978515625, "logps/rejected": -310.7288818359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.296205997467041, "rewards/margins": 8.586800575256348, "rewards/rejected": -15.883007049560547, "step": 18462 }, { "epoch": 2.87, "learning_rate": 6.058218787281498e-07, "logits/chosen": -1.4925628900527954, "logits/rejected": -2.28173828125, "logps/chosen": -143.37002563476562, "logps/rejected": -276.4189147949219, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -7.591800689697266, "rewards/margins": 6.151474952697754, "rewards/rejected": -13.74327564239502, "step": 18463 }, { "epoch": 2.87, "learning_rate": 6.05088438197002e-07, "logits/chosen": -2.700953245162964, "logits/rejected": -2.7893359661102295, "logps/chosen": -306.2724304199219, "logps/rejected": -409.54595947265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.883437156677246, "rewards/margins": 10.507791519165039, "rewards/rejected": -19.39122772216797, "step": 18464 }, { "epoch": 2.87, "learning_rate": 6.04354997665854e-07, "logits/chosen": -2.3633291721343994, "logits/rejected": -3.0377883911132812, "logps/chosen": -165.54129028320312, "logps/rejected": -409.21417236328125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -6.6667070388793945, "rewards/margins": 6.637258529663086, "rewards/rejected": -13.303964614868164, "step": 18465 }, { "epoch": 2.87, "learning_rate": 6.036215571347062e-07, "logits/chosen": -2.6634092330932617, "logits/rejected": -2.8954217433929443, "logps/chosen": -190.879150390625, "logps/rejected": -400.85888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.7313971519470215, "rewards/margins": 11.5695161819458, "rewards/rejected": -17.300914764404297, "step": 18466 }, { "epoch": 2.87, "learning_rate": 6.028881166035583e-07, "logits/chosen": -1.5920764207839966, "logits/rejected": -2.5150723457336426, "logps/chosen": -158.55975341796875, "logps/rejected": -449.5027770996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.3133745193481445, "rewards/margins": 12.369511604309082, "rewards/rejected": -19.682886123657227, "step": 18467 }, { "epoch": 2.87, "learning_rate": 6.021546760724105e-07, "logits/chosen": -2.8522441387176514, "logits/rejected": -2.4650659561157227, "logps/chosen": -334.79119873046875, "logps/rejected": -347.55767822265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -8.825533866882324, "rewards/margins": 7.905555725097656, "rewards/rejected": -16.731090545654297, "step": 18468 }, { "epoch": 2.87, "learning_rate": 6.014212355412626e-07, "logits/chosen": -2.891038656234741, "logits/rejected": -2.6509854793548584, "logps/chosen": -213.29653930664062, "logps/rejected": -239.94544982910156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.981427192687988, "rewards/margins": 8.654413223266602, "rewards/rejected": -16.635841369628906, "step": 18469 }, { "epoch": 2.87, "learning_rate": 6.006877950101146e-07, "logits/chosen": -1.4974348545074463, "logits/rejected": -1.4908097982406616, "logps/chosen": -104.3724594116211, "logps/rejected": -296.7410888671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.937658309936523, "rewards/margins": 12.203594207763672, "rewards/rejected": -20.141250610351562, "step": 18470 }, { "epoch": 2.87, "learning_rate": 5.999543544789667e-07, "logits/chosen": -1.675315022468567, "logits/rejected": -2.432678699493408, "logps/chosen": -116.62995147705078, "logps/rejected": -297.0801696777344, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.369388580322266, "rewards/margins": 6.914134979248047, "rewards/rejected": -15.283523559570312, "step": 18471 }, { "epoch": 2.87, "learning_rate": 5.992209139478188e-07, "logits/chosen": -2.2225182056427, "logits/rejected": -2.6276257038116455, "logps/chosen": -174.77243041992188, "logps/rejected": -477.33062744140625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -9.910445213317871, "rewards/margins": 9.728897094726562, "rewards/rejected": -19.63934326171875, "step": 18472 }, { "epoch": 2.87, "learning_rate": 5.98487473416671e-07, "logits/chosen": -2.952564001083374, "logits/rejected": -1.839054822921753, "logps/chosen": -467.4426574707031, "logps/rejected": -391.87823486328125, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -5.895057678222656, "rewards/margins": 9.532918930053711, "rewards/rejected": -15.427976608276367, "step": 18473 }, { "epoch": 2.87, "learning_rate": 5.977540328855231e-07, "logits/chosen": -2.3575384616851807, "logits/rejected": -2.7766458988189697, "logps/chosen": -313.01605224609375, "logps/rejected": -447.0574951171875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -9.606460571289062, "rewards/margins": 8.038246154785156, "rewards/rejected": -17.64470672607422, "step": 18474 }, { "epoch": 2.87, "learning_rate": 5.970205923543752e-07, "logits/chosen": -2.7186830043792725, "logits/rejected": -2.74784517288208, "logps/chosen": -181.3426513671875, "logps/rejected": -339.1041564941406, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": -7.89255428314209, "rewards/margins": 6.650508880615234, "rewards/rejected": -14.543062210083008, "step": 18475 }, { "epoch": 2.87, "learning_rate": 5.962871518232273e-07, "logits/chosen": -2.364180326461792, "logits/rejected": -1.893646478652954, "logps/chosen": -302.10906982421875, "logps/rejected": -364.8441162109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.736324310302734, "rewards/margins": 9.783559799194336, "rewards/rejected": -15.51988410949707, "step": 18476 }, { "epoch": 2.87, "learning_rate": 5.955537112920795e-07, "logits/chosen": -2.732389211654663, "logits/rejected": -2.5486536026000977, "logps/chosen": -276.4018859863281, "logps/rejected": -380.4906005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.888505935668945, "rewards/margins": 11.19829273223877, "rewards/rejected": -19.0867977142334, "step": 18477 }, { "epoch": 2.87, "learning_rate": 5.948202707609316e-07, "logits/chosen": -2.223773241043091, "logits/rejected": -2.7314233779907227, "logps/chosen": -125.79247283935547, "logps/rejected": -288.9302978515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -7.917897701263428, "rewards/margins": 7.225663185119629, "rewards/rejected": -15.143560409545898, "step": 18478 }, { "epoch": 2.87, "learning_rate": 5.940868302297838e-07, "logits/chosen": -0.987900972366333, "logits/rejected": -2.0958802700042725, "logps/chosen": -221.59568786621094, "logps/rejected": -429.1112976074219, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.8541951179504395, "rewards/margins": 11.859350204467773, "rewards/rejected": -19.713544845581055, "step": 18479 }, { "epoch": 2.87, "learning_rate": 5.933533896986358e-07, "logits/chosen": -2.280841588973999, "logits/rejected": -3.0417261123657227, "logps/chosen": -129.42947387695312, "logps/rejected": -404.35687255859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -4.3769402503967285, "rewards/margins": 8.359881401062012, "rewards/rejected": -12.736822128295898, "step": 18480 }, { "epoch": 2.87, "learning_rate": 5.92619949167488e-07, "logits/chosen": -2.513370990753174, "logits/rejected": -1.7998454570770264, "logps/chosen": -263.68157958984375, "logps/rejected": -272.5314636230469, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.570978164672852, "rewards/margins": 6.693410873413086, "rewards/rejected": -13.264389038085938, "step": 18481 }, { "epoch": 2.87, "learning_rate": 5.9188650863634e-07, "logits/chosen": -2.5187230110168457, "logits/rejected": -1.9023832082748413, "logps/chosen": -313.2784423828125, "logps/rejected": -533.0325927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.921063423156738, "rewards/margins": 11.509990692138672, "rewards/rejected": -19.431053161621094, "step": 18482 }, { "epoch": 2.87, "learning_rate": 5.911530681051922e-07, "logits/chosen": -1.6025290489196777, "logits/rejected": -2.5509324073791504, "logps/chosen": -170.32086181640625, "logps/rejected": -448.6981506347656, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.549346923828125, "rewards/margins": 10.15397834777832, "rewards/rejected": -17.703325271606445, "step": 18483 }, { "epoch": 2.87, "learning_rate": 5.904196275740443e-07, "logits/chosen": -2.2372348308563232, "logits/rejected": -2.6530351638793945, "logps/chosen": -424.79351806640625, "logps/rejected": -427.55059814453125, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -4.926625728607178, "rewards/margins": 8.29797077178955, "rewards/rejected": -13.224596977233887, "step": 18484 }, { "epoch": 2.87, "learning_rate": 5.896861870428964e-07, "logits/chosen": -2.208691120147705, "logits/rejected": -2.562821865081787, "logps/chosen": -392.85137939453125, "logps/rejected": -516.0624389648438, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -8.892862319946289, "rewards/margins": 9.523324966430664, "rewards/rejected": -18.416187286376953, "step": 18485 }, { "epoch": 2.87, "learning_rate": 5.889527465117485e-07, "logits/chosen": -2.631410598754883, "logits/rejected": -1.9785202741622925, "logps/chosen": -400.8017883300781, "logps/rejected": -326.75189208984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.842859268188477, "rewards/margins": 14.102856636047363, "rewards/rejected": -20.945716857910156, "step": 18486 }, { "epoch": 2.88, "learning_rate": 5.882193059806007e-07, "logits/chosen": -1.8303821086883545, "logits/rejected": -2.519412040710449, "logps/chosen": -116.35002136230469, "logps/rejected": -250.89926147460938, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -8.130766868591309, "rewards/margins": 7.519007682800293, "rewards/rejected": -15.649774551391602, "step": 18487 }, { "epoch": 2.88, "learning_rate": 5.874858654494528e-07, "logits/chosen": -0.6877333521842957, "logits/rejected": -2.2213220596313477, "logps/chosen": -138.99073791503906, "logps/rejected": -395.3137512207031, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -8.473164558410645, "rewards/margins": 6.965702056884766, "rewards/rejected": -15.438867568969727, "step": 18488 }, { "epoch": 2.88, "learning_rate": 5.86752424918305e-07, "logits/chosen": -2.703730821609497, "logits/rejected": -2.278935432434082, "logps/chosen": -176.37173461914062, "logps/rejected": -223.446044921875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -5.552393436431885, "rewards/margins": 6.378533363342285, "rewards/rejected": -11.930927276611328, "step": 18489 }, { "epoch": 2.88, "learning_rate": 5.86018984387157e-07, "logits/chosen": -1.843183994293213, "logits/rejected": -2.4009058475494385, "logps/chosen": -224.17347717285156, "logps/rejected": -439.89764404296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.686391353607178, "rewards/margins": 11.441832542419434, "rewards/rejected": -18.128223419189453, "step": 18490 }, { "epoch": 2.88, "learning_rate": 5.852855438560092e-07, "logits/chosen": -2.945338249206543, "logits/rejected": -2.214388370513916, "logps/chosen": -1002.5460205078125, "logps/rejected": -631.9065551757812, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -7.696856498718262, "rewards/margins": 6.955230236053467, "rewards/rejected": -14.65208625793457, "step": 18491 }, { "epoch": 2.88, "learning_rate": 5.845521033248613e-07, "logits/chosen": -2.3773796558380127, "logits/rejected": -2.897745370864868, "logps/chosen": -168.78604125976562, "logps/rejected": -329.74224853515625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -7.577187538146973, "rewards/margins": 10.351070404052734, "rewards/rejected": -17.92825698852539, "step": 18492 }, { "epoch": 2.88, "learning_rate": 5.838186627937133e-07, "logits/chosen": -2.532353162765503, "logits/rejected": -2.585676670074463, "logps/chosen": -180.47802734375, "logps/rejected": -386.9462890625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.631743431091309, "rewards/margins": 8.60866641998291, "rewards/rejected": -16.24040985107422, "step": 18493 }, { "epoch": 2.88, "learning_rate": 5.830852222625655e-07, "logits/chosen": -2.2038397789001465, "logits/rejected": -2.737027645111084, "logps/chosen": -236.26556396484375, "logps/rejected": -602.051025390625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -11.097234725952148, "rewards/margins": 10.195585250854492, "rewards/rejected": -21.29281997680664, "step": 18494 }, { "epoch": 2.88, "learning_rate": 5.823517817314175e-07, "logits/chosen": -2.051866054534912, "logits/rejected": -2.6039395332336426, "logps/chosen": -190.7970733642578, "logps/rejected": -498.3466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.370257377624512, "rewards/margins": 11.328643798828125, "rewards/rejected": -19.698902130126953, "step": 18495 }, { "epoch": 2.88, "learning_rate": 5.816183412002697e-07, "logits/chosen": -2.6916613578796387, "logits/rejected": -2.2656214237213135, "logps/chosen": -289.8982238769531, "logps/rejected": -276.902099609375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.524508476257324, "rewards/margins": 8.544775009155273, "rewards/rejected": -16.06928253173828, "step": 18496 }, { "epoch": 2.88, "learning_rate": 5.808849006691218e-07, "logits/chosen": -2.6622698307037354, "logits/rejected": -2.760507822036743, "logps/chosen": -242.22088623046875, "logps/rejected": -318.65106201171875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.479057312011719, "rewards/margins": 9.209760665893555, "rewards/rejected": -15.688817977905273, "step": 18497 }, { "epoch": 2.88, "learning_rate": 5.80151460137974e-07, "logits/chosen": -2.658195972442627, "logits/rejected": -2.9109270572662354, "logps/chosen": -87.17430114746094, "logps/rejected": -304.47491455078125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.905086517333984, "rewards/margins": 7.804364204406738, "rewards/rejected": -14.709450721740723, "step": 18498 }, { "epoch": 2.88, "learning_rate": 5.794180196068261e-07, "logits/chosen": -1.814914345741272, "logits/rejected": -2.599151134490967, "logps/chosen": -135.21176147460938, "logps/rejected": -309.2314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.013990879058838, "rewards/margins": 11.486394882202148, "rewards/rejected": -16.500385284423828, "step": 18499 }, { "epoch": 2.88, "learning_rate": 5.786845790756782e-07, "logits/chosen": -2.5057389736175537, "logits/rejected": -2.3486976623535156, "logps/chosen": -617.7598876953125, "logps/rejected": -657.0926513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.23570442199707, "rewards/margins": 12.523455619812012, "rewards/rejected": -23.759159088134766, "step": 18500 }, { "epoch": 2.88, "learning_rate": 5.779511385445303e-07, "logits/chosen": -1.6171915531158447, "logits/rejected": -1.6544865369796753, "logps/chosen": -228.5218505859375, "logps/rejected": -407.491943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.152660369873047, "rewards/margins": 10.825718879699707, "rewards/rejected": -18.978378295898438, "step": 18501 }, { "epoch": 2.88, "learning_rate": 5.772176980133825e-07, "logits/chosen": -2.413867712020874, "logits/rejected": -2.980285882949829, "logps/chosen": -156.95590209960938, "logps/rejected": -292.3160095214844, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.996396064758301, "rewards/margins": 7.9549713134765625, "rewards/rejected": -14.951367378234863, "step": 18502 }, { "epoch": 2.88, "learning_rate": 5.764842574822346e-07, "logits/chosen": -2.396273374557495, "logits/rejected": -2.5072968006134033, "logps/chosen": -508.6412658691406, "logps/rejected": -608.1644287109375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -8.387619018554688, "rewards/margins": 10.040962219238281, "rewards/rejected": -18.42858123779297, "step": 18503 }, { "epoch": 2.88, "learning_rate": 5.757508169510868e-07, "logits/chosen": -2.9903430938720703, "logits/rejected": -2.659679889678955, "logps/chosen": -695.2459106445312, "logps/rejected": -518.9295654296875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.935765743255615, "rewards/margins": 7.874382972717285, "rewards/rejected": -12.810148239135742, "step": 18504 }, { "epoch": 2.88, "learning_rate": 5.750173764199387e-07, "logits/chosen": -1.9673373699188232, "logits/rejected": -2.5611345767974854, "logps/chosen": -683.4095458984375, "logps/rejected": -770.9205322265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.190144538879395, "rewards/margins": 11.706953048706055, "rewards/rejected": -20.897098541259766, "step": 18505 }, { "epoch": 2.88, "learning_rate": 5.742839358887909e-07, "logits/chosen": -2.2484068870544434, "logits/rejected": -2.418217182159424, "logps/chosen": -302.5169677734375, "logps/rejected": -544.7017822265625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.067416191101074, "rewards/margins": 10.611380577087402, "rewards/rejected": -16.678796768188477, "step": 18506 }, { "epoch": 2.88, "learning_rate": 5.73550495357643e-07, "logits/chosen": -2.5454680919647217, "logits/rejected": -2.7697439193725586, "logps/chosen": -677.65283203125, "logps/rejected": -1099.2913818359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.341330528259277, "rewards/margins": 13.863442420959473, "rewards/rejected": -20.20477294921875, "step": 18507 }, { "epoch": 2.88, "learning_rate": 5.728170548264952e-07, "logits/chosen": -2.1512908935546875, "logits/rejected": -2.882286310195923, "logps/chosen": -241.71240234375, "logps/rejected": -478.61614990234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -9.136622428894043, "rewards/margins": 7.954887390136719, "rewards/rejected": -17.091510772705078, "step": 18508 }, { "epoch": 2.88, "learning_rate": 5.720836142953473e-07, "logits/chosen": -2.1605234146118164, "logits/rejected": -2.294029474258423, "logps/chosen": -187.6673583984375, "logps/rejected": -360.457275390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.940969944000244, "rewards/margins": 9.779338836669922, "rewards/rejected": -16.72031021118164, "step": 18509 }, { "epoch": 2.88, "learning_rate": 5.713501737641994e-07, "logits/chosen": -2.7185487747192383, "logits/rejected": -2.8486571311950684, "logps/chosen": -242.73388671875, "logps/rejected": -425.3808288574219, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -3.3874940872192383, "rewards/margins": 8.2124605178833, "rewards/rejected": -11.599954605102539, "step": 18510 }, { "epoch": 2.88, "learning_rate": 5.706167332330515e-07, "logits/chosen": -2.4592020511627197, "logits/rejected": -1.6244617700576782, "logps/chosen": -191.5726318359375, "logps/rejected": -172.51129150390625, "loss": 0.6598, "rewards/accuracies": 0.5, "rewards/chosen": -8.940692901611328, "rewards/margins": 1.85194993019104, "rewards/rejected": -10.792642593383789, "step": 18511 }, { "epoch": 2.88, "learning_rate": 5.698832927019036e-07, "logits/chosen": -2.465404510498047, "logits/rejected": -2.8922488689422607, "logps/chosen": -791.152099609375, "logps/rejected": -644.3994140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.490499496459961, "rewards/margins": 9.766618728637695, "rewards/rejected": -20.257118225097656, "step": 18512 }, { "epoch": 2.88, "learning_rate": 5.691498521707558e-07, "logits/chosen": -2.8183393478393555, "logits/rejected": -2.9381773471832275, "logps/chosen": -106.26732635498047, "logps/rejected": -206.02438354492188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.271007537841797, "rewards/margins": 8.238306999206543, "rewards/rejected": -14.50931453704834, "step": 18513 }, { "epoch": 2.88, "learning_rate": 5.684164116396079e-07, "logits/chosen": -2.4076929092407227, "logits/rejected": -2.826303243637085, "logps/chosen": -366.84967041015625, "logps/rejected": -537.2023315429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.000162124633789, "rewards/margins": 13.108288764953613, "rewards/rejected": -24.10845184326172, "step": 18514 }, { "epoch": 2.88, "learning_rate": 5.6768297110846e-07, "logits/chosen": -2.517739772796631, "logits/rejected": -2.55039381980896, "logps/chosen": -146.83050537109375, "logps/rejected": -395.86737060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.378115653991699, "rewards/margins": 14.157675743103027, "rewards/rejected": -18.535791397094727, "step": 18515 }, { "epoch": 2.88, "learning_rate": 5.669495305773121e-07, "logits/chosen": -2.415416717529297, "logits/rejected": -2.434126853942871, "logps/chosen": -364.939453125, "logps/rejected": -412.872314453125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.16861629486084, "rewards/margins": 7.059578895568848, "rewards/rejected": -14.228195190429688, "step": 18516 }, { "epoch": 2.88, "learning_rate": 5.662160900461643e-07, "logits/chosen": -2.89486026763916, "logits/rejected": -2.8761587142944336, "logps/chosen": -173.90237426757812, "logps/rejected": -319.02606201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.938667297363281, "rewards/margins": 11.302613258361816, "rewards/rejected": -16.24127960205078, "step": 18517 }, { "epoch": 2.88, "learning_rate": 5.654826495150163e-07, "logits/chosen": -2.7306973934173584, "logits/rejected": -2.506639003753662, "logps/chosen": -185.11862182617188, "logps/rejected": -489.9483337402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.789059638977051, "rewards/margins": 15.391948699951172, "rewards/rejected": -19.181007385253906, "step": 18518 }, { "epoch": 2.88, "learning_rate": 5.647492089838684e-07, "logits/chosen": -2.424335241317749, "logits/rejected": -2.883174419403076, "logps/chosen": -215.62979125976562, "logps/rejected": -546.9789428710938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.7501420974731445, "rewards/margins": 12.304216384887695, "rewards/rejected": -19.054357528686523, "step": 18519 }, { "epoch": 2.88, "learning_rate": 5.640157684527205e-07, "logits/chosen": -2.7956042289733887, "logits/rejected": -2.716171979904175, "logps/chosen": -207.28994750976562, "logps/rejected": -280.2678527832031, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -4.354850769042969, "rewards/margins": 7.1771321296691895, "rewards/rejected": -11.531982421875, "step": 18520 }, { "epoch": 2.88, "learning_rate": 5.632823279215727e-07, "logits/chosen": -1.191425085067749, "logits/rejected": -2.590061664581299, "logps/chosen": -237.34666442871094, "logps/rejected": -614.0162353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.428452491760254, "rewards/margins": 16.55017852783203, "rewards/rejected": -23.97863006591797, "step": 18521 }, { "epoch": 2.88, "learning_rate": 5.625488873904248e-07, "logits/chosen": -1.5838485956192017, "logits/rejected": -2.57332181930542, "logps/chosen": -162.032470703125, "logps/rejected": -538.5726928710938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.333086967468262, "rewards/margins": 14.440388679504395, "rewards/rejected": -21.773475646972656, "step": 18522 }, { "epoch": 2.88, "learning_rate": 5.61815446859277e-07, "logits/chosen": -1.6445926427841187, "logits/rejected": -2.534367561340332, "logps/chosen": -128.74658203125, "logps/rejected": -358.56439208984375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.4694623947143555, "rewards/margins": 9.683097839355469, "rewards/rejected": -17.15256118774414, "step": 18523 }, { "epoch": 2.88, "learning_rate": 5.61082006328129e-07, "logits/chosen": -2.7810230255126953, "logits/rejected": -2.5613203048706055, "logps/chosen": -302.9959716796875, "logps/rejected": -188.8205108642578, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -6.959861755371094, "rewards/margins": 6.681220054626465, "rewards/rejected": -13.641081809997559, "step": 18524 }, { "epoch": 2.88, "learning_rate": 5.603485657969812e-07, "logits/chosen": -2.170862913131714, "logits/rejected": -2.4173743724823, "logps/chosen": -205.80490112304688, "logps/rejected": -406.0806579589844, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -8.427168846130371, "rewards/margins": 7.809051513671875, "rewards/rejected": -16.23621940612793, "step": 18525 }, { "epoch": 2.88, "learning_rate": 5.596151252658333e-07, "logits/chosen": -1.9891529083251953, "logits/rejected": -2.5702855587005615, "logps/chosen": -282.77001953125, "logps/rejected": -642.4727172851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.282306671142578, "rewards/margins": 13.398078918457031, "rewards/rejected": -21.68038558959961, "step": 18526 }, { "epoch": 2.88, "learning_rate": 5.588816847346855e-07, "logits/chosen": -2.776153564453125, "logits/rejected": -1.4433931112289429, "logps/chosen": -399.45892333984375, "logps/rejected": -214.0185546875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -4.164614677429199, "rewards/margins": 7.871784210205078, "rewards/rejected": -12.036398887634277, "step": 18527 }, { "epoch": 2.88, "learning_rate": 5.581482442035376e-07, "logits/chosen": -1.4591742753982544, "logits/rejected": -2.4391255378723145, "logps/chosen": -668.0870361328125, "logps/rejected": -717.8154296875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.25068473815918, "rewards/margins": 8.89244270324707, "rewards/rejected": -18.14312744140625, "step": 18528 }, { "epoch": 2.88, "learning_rate": 5.574148036723896e-07, "logits/chosen": -1.0432798862457275, "logits/rejected": -2.685971736907959, "logps/chosen": -106.46383666992188, "logps/rejected": -444.2757568359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.159370422363281, "rewards/margins": 9.373281478881836, "rewards/rejected": -15.532651901245117, "step": 18529 }, { "epoch": 2.88, "learning_rate": 5.566813631412417e-07, "logits/chosen": -2.7817041873931885, "logits/rejected": -2.0141379833221436, "logps/chosen": -194.4117431640625, "logps/rejected": -396.407470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.126690864562988, "rewards/margins": 13.300925254821777, "rewards/rejected": -17.427616119384766, "step": 18530 }, { "epoch": 2.88, "learning_rate": 5.559479226100939e-07, "logits/chosen": -2.819424867630005, "logits/rejected": -2.867827892303467, "logps/chosen": -113.01318359375, "logps/rejected": -220.42510986328125, "loss": 0.4562, "rewards/accuracies": 0.5, "rewards/chosen": -9.030517578125, "rewards/margins": 5.105714321136475, "rewards/rejected": -14.136232376098633, "step": 18531 }, { "epoch": 2.88, "learning_rate": 5.55214482078946e-07, "logits/chosen": -1.7791647911071777, "logits/rejected": -2.587587356567383, "logps/chosen": -275.7622985839844, "logps/rejected": -493.259521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.58220911026001, "rewards/margins": 15.628859519958496, "rewards/rejected": -21.211069107055664, "step": 18532 }, { "epoch": 2.88, "learning_rate": 5.544810415477981e-07, "logits/chosen": -2.7581658363342285, "logits/rejected": -2.140651226043701, "logps/chosen": -804.728515625, "logps/rejected": -513.5202026367188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.154651165008545, "rewards/margins": 10.436090469360352, "rewards/rejected": -16.590742111206055, "step": 18533 }, { "epoch": 2.88, "learning_rate": 5.537476010166502e-07, "logits/chosen": -1.5434101819992065, "logits/rejected": -2.7316513061523438, "logps/chosen": -216.53147888183594, "logps/rejected": -521.3317260742188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.608836650848389, "rewards/margins": 10.706148147583008, "rewards/rejected": -17.314985275268555, "step": 18534 }, { "epoch": 2.88, "learning_rate": 5.530141604855023e-07, "logits/chosen": -2.7599759101867676, "logits/rejected": -2.747184991836548, "logps/chosen": -176.4755401611328, "logps/rejected": -228.16168212890625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.051780700683594, "rewards/margins": 8.133800506591797, "rewards/rejected": -14.18558120727539, "step": 18535 }, { "epoch": 2.88, "learning_rate": 5.522807199543545e-07, "logits/chosen": -2.5858395099639893, "logits/rejected": -2.1334493160247803, "logps/chosen": -269.4283142089844, "logps/rejected": -371.1226806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.499408721923828, "rewards/margins": 11.429153442382812, "rewards/rejected": -16.92856216430664, "step": 18536 }, { "epoch": 2.88, "learning_rate": 5.515472794232066e-07, "logits/chosen": -2.7192134857177734, "logits/rejected": -3.2251734733581543, "logps/chosen": -96.86273193359375, "logps/rejected": -281.286376953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.11958122253418, "rewards/margins": 8.422798156738281, "rewards/rejected": -16.542381286621094, "step": 18537 }, { "epoch": 2.88, "learning_rate": 5.508138388920588e-07, "logits/chosen": -2.0499684810638428, "logits/rejected": -2.7115259170532227, "logps/chosen": -315.82989501953125, "logps/rejected": -527.5650024414062, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -8.335593223571777, "rewards/margins": 9.182233810424805, "rewards/rejected": -17.5178279876709, "step": 18538 }, { "epoch": 2.88, "learning_rate": 5.500803983609108e-07, "logits/chosen": -2.0432283878326416, "logits/rejected": -2.574176549911499, "logps/chosen": -244.4159393310547, "logps/rejected": -410.6512451171875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.757467269897461, "rewards/margins": 9.22962474822998, "rewards/rejected": -15.987092971801758, "step": 18539 }, { "epoch": 2.88, "learning_rate": 5.49346957829763e-07, "logits/chosen": -1.7929571866989136, "logits/rejected": -2.6510627269744873, "logps/chosen": -151.7113800048828, "logps/rejected": -496.66888427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.472691535949707, "rewards/margins": 13.692456245422363, "rewards/rejected": -18.16514778137207, "step": 18540 }, { "epoch": 2.88, "learning_rate": 5.48613517298615e-07, "logits/chosen": -2.4317140579223633, "logits/rejected": -1.5458347797393799, "logps/chosen": -216.98626708984375, "logps/rejected": -145.55618286132812, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -6.859121322631836, "rewards/margins": 5.161404609680176, "rewards/rejected": -12.020524978637695, "step": 18541 }, { "epoch": 2.88, "learning_rate": 5.478800767674672e-07, "logits/chosen": -2.9912898540496826, "logits/rejected": -2.68843412399292, "logps/chosen": -262.9320068359375, "logps/rejected": -293.4515380859375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.733500957489014, "rewards/margins": 7.270440101623535, "rewards/rejected": -13.003941535949707, "step": 18542 }, { "epoch": 2.88, "learning_rate": 5.471466362363193e-07, "logits/chosen": -2.508530616760254, "logits/rejected": -2.755389451980591, "logps/chosen": -120.51986694335938, "logps/rejected": -378.25506591796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.577962398529053, "rewards/margins": 14.768978118896484, "rewards/rejected": -20.346940994262695, "step": 18543 }, { "epoch": 2.88, "learning_rate": 5.464131957051714e-07, "logits/chosen": -1.7310540676116943, "logits/rejected": -2.818333625793457, "logps/chosen": -113.48371887207031, "logps/rejected": -463.55267333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.356644630432129, "rewards/margins": 14.036087036132812, "rewards/rejected": -22.392730712890625, "step": 18544 }, { "epoch": 2.88, "learning_rate": 5.456797551740235e-07, "logits/chosen": -2.3428537845611572, "logits/rejected": -2.1604089736938477, "logps/chosen": -270.21832275390625, "logps/rejected": -313.3487548828125, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.902096271514893, "rewards/margins": 10.441385269165039, "rewards/rejected": -17.343481063842773, "step": 18545 }, { "epoch": 2.88, "learning_rate": 5.449463146428757e-07, "logits/chosen": -2.4507484436035156, "logits/rejected": -2.63198184967041, "logps/chosen": -221.99659729003906, "logps/rejected": -196.76483154296875, "loss": 0.9668, "rewards/accuracies": 0.5, "rewards/chosen": -9.461088180541992, "rewards/margins": 2.592937469482422, "rewards/rejected": -12.054025650024414, "step": 18546 }, { "epoch": 2.88, "learning_rate": 5.442128741117278e-07, "logits/chosen": -2.9651339054107666, "logits/rejected": -2.4973907470703125, "logps/chosen": -645.1053466796875, "logps/rejected": -539.761474609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.988923072814941, "rewards/margins": 10.43082332611084, "rewards/rejected": -17.41974639892578, "step": 18547 }, { "epoch": 2.88, "learning_rate": 5.4347943358058e-07, "logits/chosen": -2.719614028930664, "logits/rejected": -2.81062650680542, "logps/chosen": -355.70428466796875, "logps/rejected": -235.12765502929688, "loss": 3.9319, "rewards/accuracies": 0.5, "rewards/chosen": -9.905035972595215, "rewards/margins": -0.7872257232666016, "rewards/rejected": -9.117810249328613, "step": 18548 }, { "epoch": 2.88, "learning_rate": 5.42745993049432e-07, "logits/chosen": -2.303408622741699, "logits/rejected": -2.967766284942627, "logps/chosen": -318.9166564941406, "logps/rejected": -608.9874877929688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.471215724945068, "rewards/margins": 13.049448013305664, "rewards/rejected": -19.52066421508789, "step": 18549 }, { "epoch": 2.88, "learning_rate": 5.420125525182842e-07, "logits/chosen": -2.502061367034912, "logits/rejected": -2.679429531097412, "logps/chosen": -228.28407287597656, "logps/rejected": -370.49951171875, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -12.583230972290039, "rewards/margins": 6.274267196655273, "rewards/rejected": -18.857498168945312, "step": 18550 }, { "epoch": 2.89, "learning_rate": 5.412791119871363e-07, "logits/chosen": -0.7137211561203003, "logits/rejected": -2.7264058589935303, "logps/chosen": -199.6903076171875, "logps/rejected": -738.0655517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.663987159729004, "rewards/margins": 13.935430526733398, "rewards/rejected": -19.59941864013672, "step": 18551 }, { "epoch": 2.89, "learning_rate": 5.405456714559885e-07, "logits/chosen": -1.5990322828292847, "logits/rejected": -2.666506052017212, "logps/chosen": -192.30551147460938, "logps/rejected": -548.6370239257812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.309370517730713, "rewards/margins": 12.378199577331543, "rewards/rejected": -18.687570571899414, "step": 18552 }, { "epoch": 2.89, "learning_rate": 5.398122309248406e-07, "logits/chosen": -2.541076421737671, "logits/rejected": -2.3388195037841797, "logps/chosen": -757.9947509765625, "logps/rejected": -753.4791259765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.631065368652344, "rewards/margins": 10.627668380737305, "rewards/rejected": -20.25873374938965, "step": 18553 }, { "epoch": 2.89, "learning_rate": 5.390787903936926e-07, "logits/chosen": -2.4549875259399414, "logits/rejected": -2.912290334701538, "logps/chosen": -150.7191925048828, "logps/rejected": -376.15875244140625, "loss": 0.0485, "rewards/accuracies": 1.0, "rewards/chosen": -7.207345485687256, "rewards/margins": 7.329912185668945, "rewards/rejected": -14.53725814819336, "step": 18554 }, { "epoch": 2.89, "learning_rate": 5.383453498625447e-07, "logits/chosen": -2.4565131664276123, "logits/rejected": -2.328549861907959, "logps/chosen": -214.06427001953125, "logps/rejected": -356.2170104980469, "loss": 0.0445, "rewards/accuracies": 1.0, "rewards/chosen": -7.712524890899658, "rewards/margins": 5.561220169067383, "rewards/rejected": -13.273744583129883, "step": 18555 }, { "epoch": 2.89, "learning_rate": 5.376119093313968e-07, "logits/chosen": -2.8048033714294434, "logits/rejected": -2.50709867477417, "logps/chosen": -326.4212951660156, "logps/rejected": -401.99737548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.308999061584473, "rewards/margins": 12.12795639038086, "rewards/rejected": -18.436954498291016, "step": 18556 }, { "epoch": 2.89, "learning_rate": 5.36878468800249e-07, "logits/chosen": -2.5662975311279297, "logits/rejected": -2.0408554077148438, "logps/chosen": -380.5096435546875, "logps/rejected": -394.75067138671875, "loss": 0.0746, "rewards/accuracies": 1.0, "rewards/chosen": -11.226595878601074, "rewards/margins": 7.045116424560547, "rewards/rejected": -18.271713256835938, "step": 18557 }, { "epoch": 2.89, "learning_rate": 5.361450282691011e-07, "logits/chosen": -1.464385747909546, "logits/rejected": -2.4342803955078125, "logps/chosen": -168.62545776367188, "logps/rejected": -444.6436767578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.25772476196289, "rewards/margins": 9.007857322692871, "rewards/rejected": -17.265583038330078, "step": 18558 }, { "epoch": 2.89, "learning_rate": 5.354115877379532e-07, "logits/chosen": -1.6657201051712036, "logits/rejected": -2.271254062652588, "logps/chosen": -136.87405395507812, "logps/rejected": -319.54150390625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -6.18926477432251, "rewards/margins": 10.407310485839844, "rewards/rejected": -16.596574783325195, "step": 18559 }, { "epoch": 2.89, "learning_rate": 5.346781472068053e-07, "logits/chosen": -2.5819358825683594, "logits/rejected": -2.7279269695281982, "logps/chosen": -111.84127807617188, "logps/rejected": -342.8380126953125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -8.026803970336914, "rewards/margins": 8.056300163269043, "rewards/rejected": -16.08310317993164, "step": 18560 }, { "epoch": 2.89, "learning_rate": 5.339447066756575e-07, "logits/chosen": -2.304253339767456, "logits/rejected": -2.778089761734009, "logps/chosen": -319.05474853515625, "logps/rejected": -435.36639404296875, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -7.192502021789551, "rewards/margins": 8.824312210083008, "rewards/rejected": -16.016813278198242, "step": 18561 }, { "epoch": 2.89, "learning_rate": 5.332112661445096e-07, "logits/chosen": -1.206888198852539, "logits/rejected": -2.6325912475585938, "logps/chosen": -193.60504150390625, "logps/rejected": -586.5407104492188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.206683158874512, "rewards/margins": 10.170392990112305, "rewards/rejected": -18.377077102661133, "step": 18562 }, { "epoch": 2.89, "learning_rate": 5.324778256133618e-07, "logits/chosen": -2.807300567626953, "logits/rejected": -2.9260120391845703, "logps/chosen": -249.4589080810547, "logps/rejected": -311.4942626953125, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.340714454650879, "rewards/margins": 7.752793312072754, "rewards/rejected": -14.093507766723633, "step": 18563 }, { "epoch": 2.89, "learning_rate": 5.317443850822138e-07, "logits/chosen": -2.4855949878692627, "logits/rejected": -2.77059006690979, "logps/chosen": -348.3238220214844, "logps/rejected": -472.6293029785156, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.137239933013916, "rewards/margins": 11.428322792053223, "rewards/rejected": -14.565563201904297, "step": 18564 }, { "epoch": 2.89, "learning_rate": 5.31010944551066e-07, "logits/chosen": -2.492928981781006, "logits/rejected": -2.6947574615478516, "logps/chosen": -124.130615234375, "logps/rejected": -319.75347900390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.646024703979492, "rewards/margins": 9.619258880615234, "rewards/rejected": -16.265283584594727, "step": 18565 }, { "epoch": 2.89, "learning_rate": 5.30277504019918e-07, "logits/chosen": -2.3562052249908447, "logits/rejected": -2.7126100063323975, "logps/chosen": -275.12176513671875, "logps/rejected": -424.959716796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.259150981903076, "rewards/margins": 10.777509689331055, "rewards/rejected": -16.03666114807129, "step": 18566 }, { "epoch": 2.89, "learning_rate": 5.295440634887702e-07, "logits/chosen": -1.800903081893921, "logits/rejected": -2.5411272048950195, "logps/chosen": -153.96578979492188, "logps/rejected": -344.5228271484375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.306791305541992, "rewards/margins": 9.815860748291016, "rewards/rejected": -18.122652053833008, "step": 18567 }, { "epoch": 2.89, "learning_rate": 5.288106229576223e-07, "logits/chosen": -1.6597118377685547, "logits/rejected": -2.5054261684417725, "logps/chosen": -422.130126953125, "logps/rejected": -688.276611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.584985733032227, "rewards/margins": 15.280357360839844, "rewards/rejected": -21.86534309387207, "step": 18568 }, { "epoch": 2.89, "learning_rate": 5.280771824264744e-07, "logits/chosen": -2.8264694213867188, "logits/rejected": -1.7975293397903442, "logps/chosen": -249.6752471923828, "logps/rejected": -177.6201171875, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -5.741703033447266, "rewards/margins": 6.213352203369141, "rewards/rejected": -11.955055236816406, "step": 18569 }, { "epoch": 2.89, "learning_rate": 5.273437418953265e-07, "logits/chosen": -2.6412546634674072, "logits/rejected": -3.191722869873047, "logps/chosen": -138.20069885253906, "logps/rejected": -381.40631103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.9623332023620605, "rewards/margins": 11.48613166809082, "rewards/rejected": -15.448465347290039, "step": 18570 }, { "epoch": 2.89, "learning_rate": 5.266103013641787e-07, "logits/chosen": -2.201608419418335, "logits/rejected": -2.7432291507720947, "logps/chosen": -81.83930206298828, "logps/rejected": -295.87701416015625, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -6.277140140533447, "rewards/margins": 5.048180103302002, "rewards/rejected": -11.32532024383545, "step": 18571 }, { "epoch": 2.89, "learning_rate": 5.258768608330308e-07, "logits/chosen": -2.1333699226379395, "logits/rejected": -2.7945773601531982, "logps/chosen": -131.9669647216797, "logps/rejected": -285.0852966308594, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -5.542347431182861, "rewards/margins": 6.927120208740234, "rewards/rejected": -12.469468116760254, "step": 18572 }, { "epoch": 2.89, "learning_rate": 5.251434203018829e-07, "logits/chosen": -1.8403433561325073, "logits/rejected": -2.514477252960205, "logps/chosen": -131.7789764404297, "logps/rejected": -383.2548828125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -8.687942504882812, "rewards/margins": 11.421005249023438, "rewards/rejected": -20.10894775390625, "step": 18573 }, { "epoch": 2.89, "learning_rate": 5.24409979770735e-07, "logits/chosen": -2.5657122135162354, "logits/rejected": -2.495527982711792, "logps/chosen": -137.35086059570312, "logps/rejected": -197.0517578125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.1587653160095215, "rewards/margins": 8.25072193145752, "rewards/rejected": -13.409486770629883, "step": 18574 }, { "epoch": 2.89, "learning_rate": 5.236765392395871e-07, "logits/chosen": -2.189819574356079, "logits/rejected": -2.641555070877075, "logps/chosen": -345.20501708984375, "logps/rejected": -763.8460083007812, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.279192924499512, "rewards/margins": 8.657598495483398, "rewards/rejected": -15.936790466308594, "step": 18575 }, { "epoch": 2.89, "learning_rate": 5.229430987084393e-07, "logits/chosen": -2.5781853199005127, "logits/rejected": -2.687352180480957, "logps/chosen": -122.8472900390625, "logps/rejected": -325.5311584472656, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.230722427368164, "rewards/margins": 10.022140502929688, "rewards/rejected": -17.252864837646484, "step": 18576 }, { "epoch": 2.89, "learning_rate": 5.222096581772913e-07, "logits/chosen": -2.4861650466918945, "logits/rejected": -2.589451313018799, "logps/chosen": -285.42254638671875, "logps/rejected": -284.9803771972656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.598467826843262, "rewards/margins": 10.26534652709961, "rewards/rejected": -16.863815307617188, "step": 18577 }, { "epoch": 2.89, "learning_rate": 5.214762176461434e-07, "logits/chosen": -1.378248691558838, "logits/rejected": -2.584341287612915, "logps/chosen": -167.51283264160156, "logps/rejected": -412.62628173828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.127381324768066, "rewards/margins": 10.12012004852295, "rewards/rejected": -17.247501373291016, "step": 18578 }, { "epoch": 2.89, "learning_rate": 5.207427771149955e-07, "logits/chosen": -2.6808595657348633, "logits/rejected": -2.67354679107666, "logps/chosen": -721.461669921875, "logps/rejected": -554.2842407226562, "loss": 0.0206, "rewards/accuracies": 1.0, "rewards/chosen": -6.5103912353515625, "rewards/margins": 8.260580062866211, "rewards/rejected": -14.770971298217773, "step": 18579 }, { "epoch": 2.89, "learning_rate": 5.200093365838477e-07, "logits/chosen": -1.6707236766815186, "logits/rejected": -2.89204478263855, "logps/chosen": -541.0591430664062, "logps/rejected": -727.5364990234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -11.560704231262207, "rewards/margins": 8.86589241027832, "rewards/rejected": -20.426597595214844, "step": 18580 }, { "epoch": 2.89, "learning_rate": 5.192758960526998e-07, "logits/chosen": -2.175722599029541, "logits/rejected": -2.9085030555725098, "logps/chosen": -125.62139892578125, "logps/rejected": -446.9804382324219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.409429550170898, "rewards/margins": 10.367393493652344, "rewards/rejected": -17.77682113647461, "step": 18581 }, { "epoch": 2.89, "learning_rate": 5.18542455521552e-07, "logits/chosen": -1.205995798110962, "logits/rejected": -2.275728464126587, "logps/chosen": -90.57391357421875, "logps/rejected": -381.1938171386719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.991522789001465, "rewards/margins": 10.814098358154297, "rewards/rejected": -16.805622100830078, "step": 18582 }, { "epoch": 2.89, "learning_rate": 5.17809014990404e-07, "logits/chosen": -2.6128056049346924, "logits/rejected": -2.7368593215942383, "logps/chosen": -195.82574462890625, "logps/rejected": -294.57647705078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.758303642272949, "rewards/margins": 9.1854887008667, "rewards/rejected": -14.943792343139648, "step": 18583 }, { "epoch": 2.89, "learning_rate": 5.170755744592562e-07, "logits/chosen": -2.597081184387207, "logits/rejected": -3.0438551902770996, "logps/chosen": -426.6376647949219, "logps/rejected": -546.35302734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.3752522468566895, "rewards/margins": 11.477688789367676, "rewards/rejected": -15.852941513061523, "step": 18584 }, { "epoch": 2.89, "learning_rate": 5.163421339281083e-07, "logits/chosen": -2.0409183502197266, "logits/rejected": -2.709571599960327, "logps/chosen": -188.1905059814453, "logps/rejected": -462.3387145996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.339444637298584, "rewards/margins": 13.141475677490234, "rewards/rejected": -20.480918884277344, "step": 18585 }, { "epoch": 2.89, "learning_rate": 5.156086933969605e-07, "logits/chosen": -2.2275378704071045, "logits/rejected": -2.658639907836914, "logps/chosen": -127.64427185058594, "logps/rejected": -323.09930419921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.554214477539062, "rewards/margins": 9.699190139770508, "rewards/rejected": -19.25340461730957, "step": 18586 }, { "epoch": 2.89, "learning_rate": 5.148752528658126e-07, "logits/chosen": -2.4547066688537598, "logits/rejected": -2.429192066192627, "logps/chosen": -294.2336730957031, "logps/rejected": -376.92816162109375, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -7.168909072875977, "rewards/margins": 5.439454078674316, "rewards/rejected": -12.608363151550293, "step": 18587 }, { "epoch": 2.89, "learning_rate": 5.141418123346647e-07, "logits/chosen": -2.0225110054016113, "logits/rejected": -2.8621158599853516, "logps/chosen": -355.935791015625, "logps/rejected": -580.9625244140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.616158962249756, "rewards/margins": 9.682621002197266, "rewards/rejected": -17.298778533935547, "step": 18588 }, { "epoch": 2.89, "learning_rate": 5.134083718035167e-07, "logits/chosen": -2.4404494762420654, "logits/rejected": -2.649658441543579, "logps/chosen": -292.42681884765625, "logps/rejected": -391.47772216796875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.219882965087891, "rewards/margins": 7.606797218322754, "rewards/rejected": -12.826680183410645, "step": 18589 }, { "epoch": 2.89, "learning_rate": 5.126749312723689e-07, "logits/chosen": -2.7590994834899902, "logits/rejected": -1.693773627281189, "logps/chosen": -292.4765625, "logps/rejected": -218.20843505859375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -7.452761650085449, "rewards/margins": 6.804843902587891, "rewards/rejected": -14.257606506347656, "step": 18590 }, { "epoch": 2.89, "learning_rate": 5.11941490741221e-07, "logits/chosen": -2.411191701889038, "logits/rejected": -0.8648892641067505, "logps/chosen": -225.8293914794922, "logps/rejected": -127.50357818603516, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -2.7064766883850098, "rewards/margins": 7.965228080749512, "rewards/rejected": -10.67170524597168, "step": 18591 }, { "epoch": 2.89, "learning_rate": 5.112080502100732e-07, "logits/chosen": -2.2848076820373535, "logits/rejected": -2.915008068084717, "logps/chosen": -129.01316833496094, "logps/rejected": -353.3751220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.412602424621582, "rewards/margins": 11.368671417236328, "rewards/rejected": -15.78127384185791, "step": 18592 }, { "epoch": 2.89, "learning_rate": 5.104746096789252e-07, "logits/chosen": -2.5304160118103027, "logits/rejected": -2.990185260772705, "logps/chosen": -125.62361145019531, "logps/rejected": -257.8147888183594, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -4.763403415679932, "rewards/margins": 11.10438346862793, "rewards/rejected": -15.867786407470703, "step": 18593 }, { "epoch": 2.89, "learning_rate": 5.097411691477774e-07, "logits/chosen": -0.7531331777572632, "logits/rejected": -2.31058406829834, "logps/chosen": -180.22479248046875, "logps/rejected": -610.1319580078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.846019744873047, "rewards/margins": 9.647449493408203, "rewards/rejected": -17.49346923828125, "step": 18594 }, { "epoch": 2.89, "learning_rate": 5.090077286166295e-07, "logits/chosen": -2.9393038749694824, "logits/rejected": -2.640338659286499, "logps/chosen": -293.61492919921875, "logps/rejected": -282.83837890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.258265733718872, "rewards/margins": 10.464006423950195, "rewards/rejected": -12.722271919250488, "step": 18595 }, { "epoch": 2.89, "learning_rate": 5.082742880854816e-07, "logits/chosen": -2.6464297771453857, "logits/rejected": -2.721113443374634, "logps/chosen": -91.74223327636719, "logps/rejected": -232.61865234375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.63588809967041, "rewards/margins": 9.178772926330566, "rewards/rejected": -15.814661026000977, "step": 18596 }, { "epoch": 2.89, "learning_rate": 5.075408475543338e-07, "logits/chosen": -2.707901954650879, "logits/rejected": -2.833977222442627, "logps/chosen": -131.4781951904297, "logps/rejected": -338.33380126953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.948685169219971, "rewards/margins": 9.747125625610352, "rewards/rejected": -16.695810317993164, "step": 18597 }, { "epoch": 2.89, "learning_rate": 5.068074070231858e-07, "logits/chosen": -2.561481475830078, "logits/rejected": -2.3139774799346924, "logps/chosen": -134.16824340820312, "logps/rejected": -302.57501220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.0048322677612305, "rewards/margins": 13.525535583496094, "rewards/rejected": -17.530366897583008, "step": 18598 }, { "epoch": 2.89, "learning_rate": 5.06073966492038e-07, "logits/chosen": -2.197214365005493, "logits/rejected": -2.7373526096343994, "logps/chosen": -146.0303955078125, "logps/rejected": -495.5280456542969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.691823482513428, "rewards/margins": 11.972715377807617, "rewards/rejected": -18.664539337158203, "step": 18599 }, { "epoch": 2.89, "learning_rate": 5.053405259608901e-07, "logits/chosen": -2.392301082611084, "logits/rejected": -2.5116326808929443, "logps/chosen": -213.749267578125, "logps/rejected": -318.590087890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.733247756958008, "rewards/margins": 9.585405349731445, "rewards/rejected": -15.318653106689453, "step": 18600 }, { "epoch": 2.89, "learning_rate": 5.046070854297422e-07, "logits/chosen": -2.5482332706451416, "logits/rejected": -2.7500548362731934, "logps/chosen": -175.54554748535156, "logps/rejected": -301.66986083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.851378440856934, "rewards/margins": 10.221658706665039, "rewards/rejected": -17.073036193847656, "step": 18601 }, { "epoch": 2.89, "learning_rate": 5.038736448985943e-07, "logits/chosen": -1.5714775323867798, "logits/rejected": -2.749929428100586, "logps/chosen": -267.700439453125, "logps/rejected": -609.9412841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.83629035949707, "rewards/margins": 11.304794311523438, "rewards/rejected": -18.141084671020508, "step": 18602 }, { "epoch": 2.89, "learning_rate": 5.031402043674464e-07, "logits/chosen": -1.0295244455337524, "logits/rejected": -2.5331430435180664, "logps/chosen": -163.42080688476562, "logps/rejected": -574.46533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.3248090744018555, "rewards/margins": 13.46247386932373, "rewards/rejected": -20.787282943725586, "step": 18603 }, { "epoch": 2.89, "learning_rate": 5.024067638362985e-07, "logits/chosen": -0.9937705993652344, "logits/rejected": -2.4592204093933105, "logps/chosen": -145.53155517578125, "logps/rejected": -480.1504211425781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.397529602050781, "rewards/margins": 9.59144401550293, "rewards/rejected": -16.98897361755371, "step": 18604 }, { "epoch": 2.89, "learning_rate": 5.016733233051507e-07, "logits/chosen": -2.913644790649414, "logits/rejected": -2.0893166065216064, "logps/chosen": -435.17108154296875, "logps/rejected": -327.00030517578125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.974905490875244, "rewards/margins": 7.1369476318359375, "rewards/rejected": -14.111852645874023, "step": 18605 }, { "epoch": 2.89, "learning_rate": 5.009398827740028e-07, "logits/chosen": -2.5858004093170166, "logits/rejected": -1.9681452512741089, "logps/chosen": -663.1568603515625, "logps/rejected": -535.748291015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.846957206726074, "rewards/margins": 9.248334884643555, "rewards/rejected": -16.095293045043945, "step": 18606 }, { "epoch": 2.89, "learning_rate": 5.00206442242855e-07, "logits/chosen": -2.6672253608703613, "logits/rejected": -2.304065465927124, "logps/chosen": -158.88458251953125, "logps/rejected": -393.63140869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.955722332000732, "rewards/margins": 13.03270149230957, "rewards/rejected": -19.98842430114746, "step": 18607 }, { "epoch": 2.89, "learning_rate": 4.99473001711707e-07, "logits/chosen": -2.698800563812256, "logits/rejected": -1.6021440029144287, "logps/chosen": -342.3026123046875, "logps/rejected": -405.9443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.428375244140625, "rewards/margins": 12.028621673583984, "rewards/rejected": -16.45699691772461, "step": 18608 }, { "epoch": 2.89, "learning_rate": 4.987395611805592e-07, "logits/chosen": -2.484792947769165, "logits/rejected": -2.5681750774383545, "logps/chosen": -261.366455078125, "logps/rejected": -407.5680847167969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.093852043151855, "rewards/margins": 9.390320777893066, "rewards/rejected": -18.484172821044922, "step": 18609 }, { "epoch": 2.89, "learning_rate": 4.980061206494113e-07, "logits/chosen": -2.2220640182495117, "logits/rejected": -2.374603748321533, "logps/chosen": -609.0311279296875, "logps/rejected": -712.6204223632812, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.652579307556152, "rewards/margins": 9.561820983886719, "rewards/rejected": -17.214399337768555, "step": 18610 }, { "epoch": 2.89, "learning_rate": 4.972726801182635e-07, "logits/chosen": -2.643688678741455, "logits/rejected": -2.519219398498535, "logps/chosen": -319.6878662109375, "logps/rejected": -403.5680847167969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.95676326751709, "rewards/margins": 10.2886381149292, "rewards/rejected": -17.24540138244629, "step": 18611 }, { "epoch": 2.89, "learning_rate": 4.965392395871156e-07, "logits/chosen": -2.5284407138824463, "logits/rejected": -1.7720904350280762, "logps/chosen": -208.236328125, "logps/rejected": -194.50146484375, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/chosen": -4.708502292633057, "rewards/margins": 4.911680221557617, "rewards/rejected": -9.620182037353516, "step": 18612 }, { "epoch": 2.89, "learning_rate": 4.958057990559676e-07, "logits/chosen": -2.195213556289673, "logits/rejected": -2.6070139408111572, "logps/chosen": -208.01345825195312, "logps/rejected": -335.6082763671875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.865082740783691, "rewards/margins": 8.428387641906738, "rewards/rejected": -13.29347038269043, "step": 18613 }, { "epoch": 2.89, "learning_rate": 4.950723585248197e-07, "logits/chosen": -1.9861862659454346, "logits/rejected": -2.668224811553955, "logps/chosen": -244.06808471679688, "logps/rejected": -536.0015258789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.088672161102295, "rewards/margins": 14.249011993408203, "rewards/rejected": -20.337684631347656, "step": 18614 }, { "epoch": 2.9, "learning_rate": 4.943389179936718e-07, "logits/chosen": -2.2278547286987305, "logits/rejected": -2.5447356700897217, "logps/chosen": -275.74359130859375, "logps/rejected": -412.29730224609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.3217997550964355, "rewards/margins": 10.185739517211914, "rewards/rejected": -16.507539749145508, "step": 18615 }, { "epoch": 2.9, "learning_rate": 4.93605477462524e-07, "logits/chosen": -1.8588695526123047, "logits/rejected": -2.4964916706085205, "logps/chosen": -206.70802307128906, "logps/rejected": -399.8271789550781, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -11.686750411987305, "rewards/margins": 8.104974746704102, "rewards/rejected": -19.791725158691406, "step": 18616 }, { "epoch": 2.9, "learning_rate": 4.928720369313761e-07, "logits/chosen": -2.4738311767578125, "logits/rejected": -2.4940855503082275, "logps/chosen": -173.59970092773438, "logps/rejected": -219.8313751220703, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -8.893017768859863, "rewards/margins": 6.318316459655762, "rewards/rejected": -15.211334228515625, "step": 18617 }, { "epoch": 2.9, "learning_rate": 4.921385964002282e-07, "logits/chosen": -2.1572482585906982, "logits/rejected": -2.7839436531066895, "logps/chosen": -383.5894775390625, "logps/rejected": -685.4537353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.20280647277832, "rewards/margins": 13.113822937011719, "rewards/rejected": -23.31662940979004, "step": 18618 }, { "epoch": 2.9, "learning_rate": 4.914051558690803e-07, "logits/chosen": -2.127082586288452, "logits/rejected": -2.887387990951538, "logps/chosen": -264.45904541015625, "logps/rejected": -479.2657775878906, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.232502937316895, "rewards/margins": 7.6414875984191895, "rewards/rejected": -15.873991012573242, "step": 18619 }, { "epoch": 2.9, "learning_rate": 4.906717153379325e-07, "logits/chosen": -2.002518892288208, "logits/rejected": -2.6083593368530273, "logps/chosen": -270.85845947265625, "logps/rejected": -359.9521484375, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -9.959810256958008, "rewards/margins": 3.073812484741211, "rewards/rejected": -13.033622741699219, "step": 18620 }, { "epoch": 2.9, "learning_rate": 4.899382748067846e-07, "logits/chosen": -2.3040771484375, "logits/rejected": -2.4839770793914795, "logps/chosen": -340.97076416015625, "logps/rejected": -362.820068359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.902382850646973, "rewards/margins": 12.252497673034668, "rewards/rejected": -20.15488052368164, "step": 18621 }, { "epoch": 2.9, "learning_rate": 4.892048342756368e-07, "logits/chosen": -2.4049899578094482, "logits/rejected": -2.6238932609558105, "logps/chosen": -118.63383483886719, "logps/rejected": -242.3551025390625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -8.775969505310059, "rewards/margins": 8.604479789733887, "rewards/rejected": -17.380449295043945, "step": 18622 }, { "epoch": 2.9, "learning_rate": 4.884713937444888e-07, "logits/chosen": -1.6683242321014404, "logits/rejected": -2.7136850357055664, "logps/chosen": -149.23635864257812, "logps/rejected": -439.452880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.972705841064453, "rewards/margins": 12.085309028625488, "rewards/rejected": -18.058013916015625, "step": 18623 }, { "epoch": 2.9, "learning_rate": 4.87737953213341e-07, "logits/chosen": -2.0998051166534424, "logits/rejected": -2.6258113384246826, "logps/chosen": -303.4069519042969, "logps/rejected": -264.530029296875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.573281288146973, "rewards/margins": 9.250926971435547, "rewards/rejected": -15.82420825958252, "step": 18624 }, { "epoch": 2.9, "learning_rate": 4.87004512682193e-07, "logits/chosen": -2.882402181625366, "logits/rejected": -2.353464126586914, "logps/chosen": -282.1245422363281, "logps/rejected": -258.49713134765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.8037919998168945, "rewards/margins": 9.738584518432617, "rewards/rejected": -15.542377471923828, "step": 18625 }, { "epoch": 2.9, "learning_rate": 4.862710721510452e-07, "logits/chosen": -2.200098991394043, "logits/rejected": -2.505624532699585, "logps/chosen": -121.8291244506836, "logps/rejected": -277.4298400878906, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -5.918092250823975, "rewards/margins": 7.771794319152832, "rewards/rejected": -13.689886093139648, "step": 18626 }, { "epoch": 2.9, "learning_rate": 4.855376316198973e-07, "logits/chosen": -1.8180147409439087, "logits/rejected": -2.880420446395874, "logps/chosen": -263.4202575683594, "logps/rejected": -504.50872802734375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -10.962132453918457, "rewards/margins": 5.782055854797363, "rewards/rejected": -16.74418830871582, "step": 18627 }, { "epoch": 2.9, "learning_rate": 4.848041910887494e-07, "logits/chosen": -2.8086636066436768, "logits/rejected": -2.797980308532715, "logps/chosen": -187.46609497070312, "logps/rejected": -182.88919067382812, "loss": 0.9418, "rewards/accuracies": 0.5, "rewards/chosen": -7.462141990661621, "rewards/margins": 2.952819585800171, "rewards/rejected": -10.414961814880371, "step": 18628 }, { "epoch": 2.9, "learning_rate": 4.840707505576015e-07, "logits/chosen": -1.5045629739761353, "logits/rejected": -2.7697198390960693, "logps/chosen": -126.9610366821289, "logps/rejected": -499.35693359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.160895347595215, "rewards/margins": 10.174788475036621, "rewards/rejected": -18.335683822631836, "step": 18629 }, { "epoch": 2.9, "learning_rate": 4.833373100264537e-07, "logits/chosen": -2.745126724243164, "logits/rejected": -3.073276996612549, "logps/chosen": -79.75852966308594, "logps/rejected": -186.29629516601562, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -6.414341926574707, "rewards/margins": 5.331172943115234, "rewards/rejected": -11.745513916015625, "step": 18630 }, { "epoch": 2.9, "learning_rate": 4.826038694953058e-07, "logits/chosen": -2.185070514678955, "logits/rejected": -2.77294659614563, "logps/chosen": -210.93222045898438, "logps/rejected": -464.42950439453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.505983352661133, "rewards/margins": 11.705118179321289, "rewards/rejected": -18.211101531982422, "step": 18631 }, { "epoch": 2.9, "learning_rate": 4.81870428964158e-07, "logits/chosen": -1.4129995107650757, "logits/rejected": -2.7208504676818848, "logps/chosen": -177.52276611328125, "logps/rejected": -435.20733642578125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.369329452514648, "rewards/margins": 7.748297691345215, "rewards/rejected": -16.11762809753418, "step": 18632 }, { "epoch": 2.9, "learning_rate": 4.8113698843301e-07, "logits/chosen": -2.7380433082580566, "logits/rejected": -2.8847925662994385, "logps/chosen": -735.815673828125, "logps/rejected": -878.8270263671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.443872451782227, "rewards/margins": 15.252496719360352, "rewards/rejected": -22.696369171142578, "step": 18633 }, { "epoch": 2.9, "learning_rate": 4.804035479018622e-07, "logits/chosen": -2.6209158897399902, "logits/rejected": -2.6309304237365723, "logps/chosen": -189.37069702148438, "logps/rejected": -400.80645751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.345327854156494, "rewards/margins": 10.331189155578613, "rewards/rejected": -16.676517486572266, "step": 18634 }, { "epoch": 2.9, "learning_rate": 4.796701073707143e-07, "logits/chosen": -2.644554853439331, "logits/rejected": -2.520211935043335, "logps/chosen": -169.98388671875, "logps/rejected": -244.38153076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.4292426109313965, "rewards/margins": 12.905818939208984, "rewards/rejected": -16.33506202697754, "step": 18635 }, { "epoch": 2.9, "learning_rate": 4.789366668395664e-07, "logits/chosen": -2.5013279914855957, "logits/rejected": -2.618980646133423, "logps/chosen": -383.2698059082031, "logps/rejected": -588.0132446289062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.583281517028809, "rewards/margins": 11.146669387817383, "rewards/rejected": -18.729949951171875, "step": 18636 }, { "epoch": 2.9, "learning_rate": 4.782032263084185e-07, "logits/chosen": -1.8676694631576538, "logits/rejected": -2.7150847911834717, "logps/chosen": -153.30703735351562, "logps/rejected": -288.9080810546875, "loss": 0.0683, "rewards/accuracies": 1.0, "rewards/chosen": -8.110301971435547, "rewards/margins": 5.27947473526001, "rewards/rejected": -13.389777183532715, "step": 18637 }, { "epoch": 2.9, "learning_rate": 4.774697857772705e-07, "logits/chosen": -2.004672050476074, "logits/rejected": -2.3102474212646484, "logps/chosen": -127.27500915527344, "logps/rejected": -338.4950256347656, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -7.283417701721191, "rewards/margins": 7.139152526855469, "rewards/rejected": -14.42257022857666, "step": 18638 }, { "epoch": 2.9, "learning_rate": 4.7673634524612273e-07, "logits/chosen": -2.770827054977417, "logits/rejected": -2.2909348011016846, "logps/chosen": -261.51934814453125, "logps/rejected": -299.3201904296875, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -5.982513427734375, "rewards/margins": 9.186332702636719, "rewards/rejected": -15.168846130371094, "step": 18639 }, { "epoch": 2.9, "learning_rate": 4.7600290471497487e-07, "logits/chosen": -1.0937416553497314, "logits/rejected": -2.4760897159576416, "logps/chosen": -129.37399291992188, "logps/rejected": -363.6627197265625, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.543377876281738, "rewards/margins": 7.437727928161621, "rewards/rejected": -13.98110580444336, "step": 18640 }, { "epoch": 2.9, "learning_rate": 4.75269464183827e-07, "logits/chosen": -2.3021106719970703, "logits/rejected": -2.5295939445495605, "logps/chosen": -121.63540649414062, "logps/rejected": -320.6239318847656, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -7.548064708709717, "rewards/margins": 7.745217323303223, "rewards/rejected": -15.293281555175781, "step": 18641 }, { "epoch": 2.9, "learning_rate": 4.7453602365267905e-07, "logits/chosen": -2.989182949066162, "logits/rejected": -2.6765847206115723, "logps/chosen": -178.55010986328125, "logps/rejected": -301.8105163574219, "loss": 0.0353, "rewards/accuracies": 1.0, "rewards/chosen": -8.669219017028809, "rewards/margins": 8.893035888671875, "rewards/rejected": -17.562253952026367, "step": 18642 }, { "epoch": 2.9, "learning_rate": 4.738025831215312e-07, "logits/chosen": -2.326219081878662, "logits/rejected": -2.615251064300537, "logps/chosen": -210.26181030273438, "logps/rejected": -331.63787841796875, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.584078788757324, "rewards/margins": 7.275690078735352, "rewards/rejected": -13.859768867492676, "step": 18643 }, { "epoch": 2.9, "learning_rate": 4.7306914259038334e-07, "logits/chosen": -2.1844871044158936, "logits/rejected": -3.027939796447754, "logps/chosen": -89.02798461914062, "logps/rejected": -529.3319091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.828007936477661, "rewards/margins": 13.03147029876709, "rewards/rejected": -16.859477996826172, "step": 18644 }, { "epoch": 2.9, "learning_rate": 4.7233570205923543e-07, "logits/chosen": -1.1594878435134888, "logits/rejected": -2.5400662422180176, "logps/chosen": -162.53713989257812, "logps/rejected": -386.5010986328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.443011283874512, "rewards/margins": 9.822319030761719, "rewards/rejected": -15.265331268310547, "step": 18645 }, { "epoch": 2.9, "learning_rate": 4.7160226152808757e-07, "logits/chosen": -2.314981460571289, "logits/rejected": -2.4815611839294434, "logps/chosen": -303.8072509765625, "logps/rejected": -430.32708740234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -11.527542114257812, "rewards/margins": 11.111888885498047, "rewards/rejected": -22.63943099975586, "step": 18646 }, { "epoch": 2.9, "learning_rate": 4.708688209969396e-07, "logits/chosen": -1.8197262287139893, "logits/rejected": -2.7356107234954834, "logps/chosen": -180.314208984375, "logps/rejected": -676.7452392578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.739860534667969, "rewards/margins": 17.226533889770508, "rewards/rejected": -27.96639633178711, "step": 18647 }, { "epoch": 2.9, "learning_rate": 4.7013538046579175e-07, "logits/chosen": -2.6161043643951416, "logits/rejected": -3.0088109970092773, "logps/chosen": -524.7243041992188, "logps/rejected": -670.8181762695312, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.25343132019043, "rewards/margins": 9.124834060668945, "rewards/rejected": -15.378265380859375, "step": 18648 }, { "epoch": 2.9, "learning_rate": 4.694019399346439e-07, "logits/chosen": -2.619009256362915, "logits/rejected": -2.687251091003418, "logps/chosen": -144.00091552734375, "logps/rejected": -277.1597595214844, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.846245765686035, "rewards/margins": 6.782132148742676, "rewards/rejected": -12.628377914428711, "step": 18649 }, { "epoch": 2.9, "learning_rate": 4.6866849940349604e-07, "logits/chosen": -1.8070495128631592, "logits/rejected": -2.6514363288879395, "logps/chosen": -351.757080078125, "logps/rejected": -556.3576049804688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.371087074279785, "rewards/margins": 10.689983367919922, "rewards/rejected": -16.06106948852539, "step": 18650 }, { "epoch": 2.9, "learning_rate": 4.679350588723482e-07, "logits/chosen": -1.6085354089736938, "logits/rejected": -2.6454367637634277, "logps/chosen": -208.3314208984375, "logps/rejected": -574.43115234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.483445644378662, "rewards/margins": 12.085553169250488, "rewards/rejected": -18.569000244140625, "step": 18651 }, { "epoch": 2.9, "learning_rate": 4.672016183412002e-07, "logits/chosen": -1.6056945323944092, "logits/rejected": -2.6312191486358643, "logps/chosen": -121.72512817382812, "logps/rejected": -307.34381103515625, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -8.967915534973145, "rewards/margins": 6.264941692352295, "rewards/rejected": -15.232856750488281, "step": 18652 }, { "epoch": 2.9, "learning_rate": 4.6646817781005236e-07, "logits/chosen": -2.673919916152954, "logits/rejected": -1.700822114944458, "logps/chosen": -285.411376953125, "logps/rejected": -231.7027587890625, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": -9.141621589660645, "rewards/margins": 5.358870029449463, "rewards/rejected": -14.500492095947266, "step": 18653 }, { "epoch": 2.9, "learning_rate": 4.657347372789045e-07, "logits/chosen": -1.729947566986084, "logits/rejected": -2.7822983264923096, "logps/chosen": -233.54934692382812, "logps/rejected": -508.79248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.70252799987793, "rewards/margins": 11.41555404663086, "rewards/rejected": -18.11808204650879, "step": 18654 }, { "epoch": 2.9, "learning_rate": 4.6500129674775665e-07, "logits/chosen": -2.605355978012085, "logits/rejected": -3.004889726638794, "logps/chosen": -104.68070983886719, "logps/rejected": -265.4987487792969, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -7.351802349090576, "rewards/margins": 8.464637756347656, "rewards/rejected": -15.81644058227539, "step": 18655 }, { "epoch": 2.9, "learning_rate": 4.642678562166088e-07, "logits/chosen": -2.4751081466674805, "logits/rejected": -2.5665290355682373, "logps/chosen": -219.4440155029297, "logps/rejected": -361.7998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.270763397216797, "rewards/margins": 11.430742263793945, "rewards/rejected": -16.701505661010742, "step": 18656 }, { "epoch": 2.9, "learning_rate": 4.6353441568546083e-07, "logits/chosen": -2.632493019104004, "logits/rejected": -1.9035961627960205, "logps/chosen": -228.42083740234375, "logps/rejected": -231.4085693359375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.6237106323242188, "rewards/margins": 8.071734428405762, "rewards/rejected": -11.695444107055664, "step": 18657 }, { "epoch": 2.9, "learning_rate": 4.6280097515431297e-07, "logits/chosen": -2.475022554397583, "logits/rejected": -2.8196353912353516, "logps/chosen": -89.31441497802734, "logps/rejected": -441.0888671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.056495666503906, "rewards/margins": 11.853527069091797, "rewards/rejected": -18.910022735595703, "step": 18658 }, { "epoch": 2.9, "learning_rate": 4.620675346231651e-07, "logits/chosen": -2.2013394832611084, "logits/rejected": -2.717456817626953, "logps/chosen": -131.34872436523438, "logps/rejected": -545.848388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.456964492797852, "rewards/margins": 13.478437423706055, "rewards/rejected": -20.935401916503906, "step": 18659 }, { "epoch": 2.9, "learning_rate": 4.6133409409201726e-07, "logits/chosen": -1.5453753471374512, "logits/rejected": -2.4098854064941406, "logps/chosen": -198.75823974609375, "logps/rejected": -287.65386962890625, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -8.937186241149902, "rewards/margins": 7.02275276184082, "rewards/rejected": -15.959939956665039, "step": 18660 }, { "epoch": 2.9, "learning_rate": 4.606006535608694e-07, "logits/chosen": -2.0341622829437256, "logits/rejected": -2.714150905609131, "logps/chosen": -264.15289306640625, "logps/rejected": -379.37432861328125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.708781242370605, "rewards/margins": 9.99109172821045, "rewards/rejected": -19.699872970581055, "step": 18661 }, { "epoch": 2.9, "learning_rate": 4.5986721302972144e-07, "logits/chosen": -2.4894485473632812, "logits/rejected": -2.351897954940796, "logps/chosen": -609.4962768554688, "logps/rejected": -488.59356689453125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.581705570220947, "rewards/margins": 10.509647369384766, "rewards/rejected": -15.091352462768555, "step": 18662 }, { "epoch": 2.9, "learning_rate": 4.591337724985736e-07, "logits/chosen": -2.0444586277008057, "logits/rejected": -2.875499963760376, "logps/chosen": -339.29852294921875, "logps/rejected": -735.6047973632812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.743231773376465, "rewards/margins": 10.786698341369629, "rewards/rejected": -17.529930114746094, "step": 18663 }, { "epoch": 2.9, "learning_rate": 4.584003319674257e-07, "logits/chosen": -2.0572917461395264, "logits/rejected": -2.6156985759735107, "logps/chosen": -473.6158142089844, "logps/rejected": -598.3643798828125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.810286521911621, "rewards/margins": 8.69693374633789, "rewards/rejected": -15.507219314575195, "step": 18664 }, { "epoch": 2.9, "learning_rate": 4.576668914362778e-07, "logits/chosen": -2.5838379859924316, "logits/rejected": -2.627607583999634, "logps/chosen": -194.416259765625, "logps/rejected": -334.0249328613281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.0045166015625, "rewards/margins": 8.473918914794922, "rewards/rejected": -18.478435516357422, "step": 18665 }, { "epoch": 2.9, "learning_rate": 4.5693345090512996e-07, "logits/chosen": -1.6104927062988281, "logits/rejected": -2.583651304244995, "logps/chosen": -288.2064514160156, "logps/rejected": -552.3916625976562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.416926383972168, "rewards/margins": 12.429166793823242, "rewards/rejected": -19.846094131469727, "step": 18666 }, { "epoch": 2.9, "learning_rate": 4.56200010373982e-07, "logits/chosen": -1.7190431356430054, "logits/rejected": -2.56789231300354, "logps/chosen": -160.69332885742188, "logps/rejected": -459.59808349609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.981593132019043, "rewards/margins": 13.502238273620605, "rewards/rejected": -19.48383140563965, "step": 18667 }, { "epoch": 2.9, "learning_rate": 4.5546656984283414e-07, "logits/chosen": -2.706847906112671, "logits/rejected": -2.803025960922241, "logps/chosen": -389.5489196777344, "logps/rejected": -366.921142578125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -8.282828330993652, "rewards/margins": 6.333285331726074, "rewards/rejected": -14.616113662719727, "step": 18668 }, { "epoch": 2.9, "learning_rate": 4.547331293116863e-07, "logits/chosen": -2.1685643196105957, "logits/rejected": -2.6860663890838623, "logps/chosen": -365.50958251953125, "logps/rejected": -498.1565246582031, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.142356872558594, "rewards/margins": 12.544120788574219, "rewards/rejected": -19.686477661132812, "step": 18669 }, { "epoch": 2.9, "learning_rate": 4.539996887805384e-07, "logits/chosen": -2.7794487476348877, "logits/rejected": -2.312321662902832, "logps/chosen": -412.90087890625, "logps/rejected": -340.1052551269531, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/chosen": -1.2354873418807983, "rewards/margins": 9.771940231323242, "rewards/rejected": -11.007428169250488, "step": 18670 }, { "epoch": 2.9, "learning_rate": 4.5326624824939056e-07, "logits/chosen": -2.867894411087036, "logits/rejected": -2.5801496505737305, "logps/chosen": -250.21412658691406, "logps/rejected": -178.65562438964844, "loss": 0.1113, "rewards/accuracies": 1.0, "rewards/chosen": -7.765258312225342, "rewards/margins": 2.317202568054199, "rewards/rejected": -10.082460403442383, "step": 18671 }, { "epoch": 2.9, "learning_rate": 4.525328077182426e-07, "logits/chosen": -2.462397575378418, "logits/rejected": -1.9116058349609375, "logps/chosen": -348.09967041015625, "logps/rejected": -496.1641540527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.233987808227539, "rewards/margins": 14.816731452941895, "rewards/rejected": -23.05072021484375, "step": 18672 }, { "epoch": 2.9, "learning_rate": 4.5179936718709474e-07, "logits/chosen": -2.5555922985076904, "logits/rejected": -1.402125358581543, "logps/chosen": -492.641357421875, "logps/rejected": -513.000732421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.929065704345703, "rewards/margins": 19.46609878540039, "rewards/rejected": -24.395164489746094, "step": 18673 }, { "epoch": 2.9, "learning_rate": 4.510659266559469e-07, "logits/chosen": -2.1194047927856445, "logits/rejected": -2.467987537384033, "logps/chosen": -162.09945678710938, "logps/rejected": -385.92340087890625, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.981398105621338, "rewards/margins": 11.744114875793457, "rewards/rejected": -17.725513458251953, "step": 18674 }, { "epoch": 2.9, "learning_rate": 4.5033248612479903e-07, "logits/chosen": -2.839212417602539, "logits/rejected": -2.884272336959839, "logps/chosen": -206.0244598388672, "logps/rejected": -393.4033203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.859931945800781, "rewards/margins": 9.73324966430664, "rewards/rejected": -15.593181610107422, "step": 18675 }, { "epoch": 2.9, "learning_rate": 4.4959904559365117e-07, "logits/chosen": -2.7992255687713623, "logits/rejected": -2.7986245155334473, "logps/chosen": -316.49847412109375, "logps/rejected": -369.74053955078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.762385368347168, "rewards/margins": 10.790407180786133, "rewards/rejected": -15.5527925491333, "step": 18676 }, { "epoch": 2.9, "learning_rate": 4.488656050625032e-07, "logits/chosen": -1.2729740142822266, "logits/rejected": -2.5178215503692627, "logps/chosen": -193.3583221435547, "logps/rejected": -459.192138671875, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -8.795503616333008, "rewards/margins": 6.899382591247559, "rewards/rejected": -15.69488525390625, "step": 18677 }, { "epoch": 2.9, "learning_rate": 4.4813216453135535e-07, "logits/chosen": -1.3009638786315918, "logits/rejected": -2.100630044937134, "logps/chosen": -309.27593994140625, "logps/rejected": -431.34100341796875, "loss": 0.1691, "rewards/accuracies": 1.0, "rewards/chosen": -11.198643684387207, "rewards/margins": 6.9236931800842285, "rewards/rejected": -18.122337341308594, "step": 18678 }, { "epoch": 2.9, "learning_rate": 4.473987240002075e-07, "logits/chosen": -2.65069580078125, "logits/rejected": -2.9806113243103027, "logps/chosen": -451.83648681640625, "logps/rejected": -608.2637939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.06593132019043, "rewards/margins": 13.505170822143555, "rewards/rejected": -21.571102142333984, "step": 18679 }, { "epoch": 2.91, "learning_rate": 4.4666528346905964e-07, "logits/chosen": -1.827199101448059, "logits/rejected": -3.0358834266662598, "logps/chosen": -139.48390197753906, "logps/rejected": -420.0011901855469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.252023220062256, "rewards/margins": 10.556497573852539, "rewards/rejected": -14.808521270751953, "step": 18680 }, { "epoch": 2.91, "learning_rate": 4.459318429379118e-07, "logits/chosen": -2.545372247695923, "logits/rejected": -1.924538016319275, "logps/chosen": -139.9765625, "logps/rejected": -187.11936950683594, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.915111064910889, "rewards/margins": 6.874257564544678, "rewards/rejected": -14.789368629455566, "step": 18681 }, { "epoch": 2.91, "learning_rate": 4.451984024067638e-07, "logits/chosen": -2.2637531757354736, "logits/rejected": -2.525446653366089, "logps/chosen": -648.4151611328125, "logps/rejected": -671.5492553710938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.020027160644531, "rewards/margins": 13.456785202026367, "rewards/rejected": -20.47681427001953, "step": 18682 }, { "epoch": 2.91, "learning_rate": 4.4446496187561596e-07, "logits/chosen": -2.9749879837036133, "logits/rejected": -2.621458053588867, "logps/chosen": -332.2127990722656, "logps/rejected": -386.805908203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.210993766784668, "rewards/margins": 9.033891677856445, "rewards/rejected": -16.244884490966797, "step": 18683 }, { "epoch": 2.91, "learning_rate": 4.437315213444681e-07, "logits/chosen": -1.3243787288665771, "logits/rejected": -2.5461018085479736, "logps/chosen": -222.5646209716797, "logps/rejected": -342.4974670410156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.349569320678711, "rewards/margins": 9.428600311279297, "rewards/rejected": -13.778169631958008, "step": 18684 }, { "epoch": 2.91, "learning_rate": 4.4299808081332025e-07, "logits/chosen": -2.6334359645843506, "logits/rejected": -2.3635141849517822, "logps/chosen": -314.89117431640625, "logps/rejected": -365.48236083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.017663955688477, "rewards/margins": 10.398311614990234, "rewards/rejected": -15.415975570678711, "step": 18685 }, { "epoch": 2.91, "learning_rate": 4.4226464028217234e-07, "logits/chosen": -1.744194746017456, "logits/rejected": -2.3852717876434326, "logps/chosen": -230.07675170898438, "logps/rejected": -392.74005126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.006833076477051, "rewards/margins": 10.391573905944824, "rewards/rejected": -16.398406982421875, "step": 18686 }, { "epoch": 2.91, "learning_rate": 4.4153119975102443e-07, "logits/chosen": -2.9056484699249268, "logits/rejected": -2.2754621505737305, "logps/chosen": -774.1845092773438, "logps/rejected": -528.4169921875, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -9.223919868469238, "rewards/margins": 8.939825057983398, "rewards/rejected": -18.163745880126953, "step": 18687 }, { "epoch": 2.91, "learning_rate": 4.407977592198765e-07, "logits/chosen": -2.4737110137939453, "logits/rejected": -1.7136242389678955, "logps/chosen": -362.9807434082031, "logps/rejected": -356.21820068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.234346389770508, "rewards/margins": 10.353715896606445, "rewards/rejected": -15.588061332702637, "step": 18688 }, { "epoch": 2.91, "learning_rate": 4.4006431868872866e-07, "logits/chosen": -2.71901535987854, "logits/rejected": -2.5663561820983887, "logps/chosen": -204.4636993408203, "logps/rejected": -322.68115234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.0227766036987305, "rewards/margins": 13.137532234191895, "rewards/rejected": -17.160308837890625, "step": 18689 }, { "epoch": 2.91, "learning_rate": 4.3933087815758075e-07, "logits/chosen": -1.533562421798706, "logits/rejected": -2.5720231533050537, "logps/chosen": -181.52467346191406, "logps/rejected": -515.6455688476562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.090056419372559, "rewards/margins": 16.12666130065918, "rewards/rejected": -22.216716766357422, "step": 18690 }, { "epoch": 2.91, "learning_rate": 4.385974376264329e-07, "logits/chosen": -2.1295764446258545, "logits/rejected": -2.527169942855835, "logps/chosen": -112.79136657714844, "logps/rejected": -289.85748291015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.345815658569336, "rewards/margins": 9.886178016662598, "rewards/rejected": -16.231992721557617, "step": 18691 }, { "epoch": 2.91, "learning_rate": 4.3786399709528504e-07, "logits/chosen": -2.387587785720825, "logits/rejected": -2.2779641151428223, "logps/chosen": -210.3545684814453, "logps/rejected": -199.11349487304688, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -6.678906440734863, "rewards/margins": 6.399367332458496, "rewards/rejected": -13.07827377319336, "step": 18692 }, { "epoch": 2.91, "learning_rate": 4.3713055656413713e-07, "logits/chosen": -2.4280261993408203, "logits/rejected": -2.544487953186035, "logps/chosen": -338.2823486328125, "logps/rejected": -369.5881652832031, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.398326873779297, "rewards/margins": 8.400394439697266, "rewards/rejected": -15.798721313476562, "step": 18693 }, { "epoch": 2.91, "learning_rate": 4.3639711603298927e-07, "logits/chosen": -2.160184621810913, "logits/rejected": -2.5643668174743652, "logps/chosen": -199.012451171875, "logps/rejected": -424.578125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.890729904174805, "rewards/margins": 8.74959945678711, "rewards/rejected": -16.640329360961914, "step": 18694 }, { "epoch": 2.91, "learning_rate": 4.3566367550184136e-07, "logits/chosen": -2.3182880878448486, "logits/rejected": -2.5513999462127686, "logps/chosen": -216.7530517578125, "logps/rejected": -188.6680450439453, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -7.786444187164307, "rewards/margins": 5.151690483093262, "rewards/rejected": -12.938135147094727, "step": 18695 }, { "epoch": 2.91, "learning_rate": 4.349302349706935e-07, "logits/chosen": -2.501250982284546, "logits/rejected": -2.7389814853668213, "logps/chosen": -380.61651611328125, "logps/rejected": -355.5347900390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.716897010803223, "rewards/margins": 9.812246322631836, "rewards/rejected": -15.529142379760742, "step": 18696 }, { "epoch": 2.91, "learning_rate": 4.3419679443954565e-07, "logits/chosen": -2.771031618118286, "logits/rejected": -2.4479148387908936, "logps/chosen": -367.73504638671875, "logps/rejected": -347.1270751953125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.289495468139648, "rewards/margins": 9.823393821716309, "rewards/rejected": -19.11288833618164, "step": 18697 }, { "epoch": 2.91, "learning_rate": 4.3346335390839774e-07, "logits/chosen": -2.599778890609741, "logits/rejected": -2.7150704860687256, "logps/chosen": -132.88796997070312, "logps/rejected": -222.80496215820312, "loss": 0.0431, "rewards/accuracies": 1.0, "rewards/chosen": -10.368101119995117, "rewards/margins": 4.817117691040039, "rewards/rejected": -15.185218811035156, "step": 18698 }, { "epoch": 2.91, "learning_rate": 4.327299133772499e-07, "logits/chosen": -2.721081495285034, "logits/rejected": -2.4176907539367676, "logps/chosen": -432.66326904296875, "logps/rejected": -476.43548583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.28033447265625, "rewards/margins": 11.365485191345215, "rewards/rejected": -18.64582061767578, "step": 18699 }, { "epoch": 2.91, "learning_rate": 4.3199647284610197e-07, "logits/chosen": -2.55287504196167, "logits/rejected": -2.3907673358917236, "logps/chosen": -571.9494018554688, "logps/rejected": -519.78466796875, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/chosen": -9.336578369140625, "rewards/margins": 7.152261734008789, "rewards/rejected": -16.488840103149414, "step": 18700 }, { "epoch": 2.91, "learning_rate": 4.312630323149541e-07, "logits/chosen": -2.801393508911133, "logits/rejected": -2.859452962875366, "logps/chosen": -202.26846313476562, "logps/rejected": -343.7227783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.197800159454346, "rewards/margins": 13.010692596435547, "rewards/rejected": -19.208492279052734, "step": 18701 }, { "epoch": 2.91, "learning_rate": 4.3052959178380626e-07, "logits/chosen": -2.10585880279541, "logits/rejected": -2.7555923461914062, "logps/chosen": -155.24319458007812, "logps/rejected": -406.3402099609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.5118091106414795, "rewards/margins": 15.585498809814453, "rewards/rejected": -19.097307205200195, "step": 18702 }, { "epoch": 2.91, "learning_rate": 4.2979615125265835e-07, "logits/chosen": -2.5131263732910156, "logits/rejected": -2.6921660900115967, "logps/chosen": -97.26335144042969, "logps/rejected": -267.4394836425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.272638320922852, "rewards/margins": 12.246315002441406, "rewards/rejected": -18.518953323364258, "step": 18703 }, { "epoch": 2.91, "learning_rate": 4.290627107215105e-07, "logits/chosen": -2.4884183406829834, "logits/rejected": -2.183839797973633, "logps/chosen": -459.03167724609375, "logps/rejected": -533.3545532226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.937148571014404, "rewards/margins": 15.221460342407227, "rewards/rejected": -21.15860939025879, "step": 18704 }, { "epoch": 2.91, "learning_rate": 4.283292701903626e-07, "logits/chosen": -1.8751927614212036, "logits/rejected": -2.7295637130737305, "logps/chosen": -111.48904418945312, "logps/rejected": -370.2427978515625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -7.342044353485107, "rewards/margins": 7.56149435043335, "rewards/rejected": -14.903538703918457, "step": 18705 }, { "epoch": 2.91, "learning_rate": 4.275958296592147e-07, "logits/chosen": -2.7942774295806885, "logits/rejected": -2.8873636722564697, "logps/chosen": -157.63172912597656, "logps/rejected": -296.670654296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.484735488891602, "rewards/margins": 9.678728103637695, "rewards/rejected": -20.163463592529297, "step": 18706 }, { "epoch": 2.91, "learning_rate": 4.268623891280668e-07, "logits/chosen": -1.6098048686981201, "logits/rejected": -2.5520689487457275, "logps/chosen": -164.5967254638672, "logps/rejected": -424.9259338378906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.986183166503906, "rewards/margins": 9.0867919921875, "rewards/rejected": -17.072975158691406, "step": 18707 }, { "epoch": 2.91, "learning_rate": 4.261289485969189e-07, "logits/chosen": -2.1414217948913574, "logits/rejected": -2.027636766433716, "logps/chosen": -890.8602294921875, "logps/rejected": -669.1837158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.156765937805176, "rewards/margins": 12.702075004577637, "rewards/rejected": -22.858840942382812, "step": 18708 }, { "epoch": 2.91, "learning_rate": 4.2539550806577104e-07, "logits/chosen": -2.4854063987731934, "logits/rejected": -2.369654655456543, "logps/chosen": -215.84963989257812, "logps/rejected": -385.49066162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.983574867248535, "rewards/margins": 10.702217102050781, "rewards/rejected": -16.685791015625, "step": 18709 }, { "epoch": 2.91, "learning_rate": 4.2466206753462313e-07, "logits/chosen": -2.662626266479492, "logits/rejected": -1.973101019859314, "logps/chosen": -489.35601806640625, "logps/rejected": -451.9812927246094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.693696975708008, "rewards/margins": 11.060526847839355, "rewards/rejected": -19.754222869873047, "step": 18710 }, { "epoch": 2.91, "learning_rate": 4.239286270034753e-07, "logits/chosen": -1.9853813648223877, "logits/rejected": -1.9046564102172852, "logps/chosen": -292.53582763671875, "logps/rejected": -420.6255798339844, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -7.863121032714844, "rewards/margins": 9.451408386230469, "rewards/rejected": -17.314529418945312, "step": 18711 }, { "epoch": 2.91, "learning_rate": 4.231951864723274e-07, "logits/chosen": -2.2445573806762695, "logits/rejected": -2.7214810848236084, "logps/chosen": -147.25537109375, "logps/rejected": -349.35345458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.851249694824219, "rewards/margins": 13.855851173400879, "rewards/rejected": -18.707101821899414, "step": 18712 }, { "epoch": 2.91, "learning_rate": 4.224617459411795e-07, "logits/chosen": -2.5769190788269043, "logits/rejected": -2.8566734790802, "logps/chosen": -98.08568572998047, "logps/rejected": -364.3919982910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.975090503692627, "rewards/margins": 12.820596694946289, "rewards/rejected": -18.795686721801758, "step": 18713 }, { "epoch": 2.91, "learning_rate": 4.2172830541003165e-07, "logits/chosen": -1.8149468898773193, "logits/rejected": -2.241281270980835, "logps/chosen": -241.6315460205078, "logps/rejected": -361.29730224609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.480574131011963, "rewards/margins": 10.345420837402344, "rewards/rejected": -16.82599449157715, "step": 18714 }, { "epoch": 2.91, "learning_rate": 4.2099486487888374e-07, "logits/chosen": -2.6264302730560303, "logits/rejected": -2.695730447769165, "logps/chosen": -200.21824645996094, "logps/rejected": -295.2738037109375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.872939586639404, "rewards/margins": 9.087154388427734, "rewards/rejected": -14.960094451904297, "step": 18715 }, { "epoch": 2.91, "learning_rate": 4.202614243477359e-07, "logits/chosen": -2.265984535217285, "logits/rejected": -2.28271484375, "logps/chosen": -212.43649291992188, "logps/rejected": -393.4590148925781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.245667457580566, "rewards/margins": 13.3135986328125, "rewards/rejected": -20.559267044067383, "step": 18716 }, { "epoch": 2.91, "learning_rate": 4.19527983816588e-07, "logits/chosen": -2.161778211593628, "logits/rejected": -2.8999786376953125, "logps/chosen": -179.49221801757812, "logps/rejected": -328.3690490722656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.462688446044922, "rewards/margins": 9.591524124145508, "rewards/rejected": -17.05421257019043, "step": 18717 }, { "epoch": 2.91, "learning_rate": 4.187945432854401e-07, "logits/chosen": -2.2842788696289062, "logits/rejected": -2.475480556488037, "logps/chosen": -233.02520751953125, "logps/rejected": -316.95013427734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.325016498565674, "rewards/margins": 9.58596420288086, "rewards/rejected": -14.910980224609375, "step": 18718 }, { "epoch": 2.91, "learning_rate": 4.1806110275429226e-07, "logits/chosen": -2.615403652191162, "logits/rejected": -2.2807044982910156, "logps/chosen": -389.03662109375, "logps/rejected": -373.7242431640625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/chosen": -6.25765323638916, "rewards/margins": 9.432476997375488, "rewards/rejected": -15.690130233764648, "step": 18719 }, { "epoch": 2.91, "learning_rate": 4.1732766222314435e-07, "logits/chosen": -2.555372953414917, "logits/rejected": -2.744612216949463, "logps/chosen": -346.5836486816406, "logps/rejected": -407.5739440917969, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.493782043457031, "rewards/margins": 8.604256629943848, "rewards/rejected": -14.098039627075195, "step": 18720 }, { "epoch": 2.91, "learning_rate": 4.165942216919965e-07, "logits/chosen": -2.8990728855133057, "logits/rejected": -2.1827919483184814, "logps/chosen": -254.6143798828125, "logps/rejected": -215.19454956054688, "loss": 0.5481, "rewards/accuracies": 0.5, "rewards/chosen": -5.109672546386719, "rewards/margins": 6.962897777557373, "rewards/rejected": -12.07257080078125, "step": 18721 }, { "epoch": 2.91, "learning_rate": 4.158607811608486e-07, "logits/chosen": -2.197749137878418, "logits/rejected": -1.5727046728134155, "logps/chosen": -313.6884765625, "logps/rejected": -492.2455749511719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.6229567527770996, "rewards/margins": 14.110710144042969, "rewards/rejected": -17.733667373657227, "step": 18722 }, { "epoch": 2.91, "learning_rate": 4.1512734062970073e-07, "logits/chosen": -2.7123970985412598, "logits/rejected": -1.9562149047851562, "logps/chosen": -322.875244140625, "logps/rejected": -386.0871276855469, "loss": 0.1784, "rewards/accuracies": 1.0, "rewards/chosen": -10.106255531311035, "rewards/margins": 7.335739612579346, "rewards/rejected": -17.44199562072754, "step": 18723 }, { "epoch": 2.91, "learning_rate": 4.1439390009855287e-07, "logits/chosen": -2.6160640716552734, "logits/rejected": -1.6569868326187134, "logps/chosen": -223.62911987304688, "logps/rejected": -140.10137939453125, "loss": 0.293, "rewards/accuracies": 1.0, "rewards/chosen": -7.950230598449707, "rewards/margins": 4.257955074310303, "rewards/rejected": -12.208185195922852, "step": 18724 }, { "epoch": 2.91, "learning_rate": 4.1366045956740496e-07, "logits/chosen": -2.6343305110931396, "logits/rejected": -2.3179619312286377, "logps/chosen": -352.91748046875, "logps/rejected": -333.2412414550781, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -7.818127632141113, "rewards/margins": 9.192315101623535, "rewards/rejected": -17.01044273376465, "step": 18725 }, { "epoch": 2.91, "learning_rate": 4.129270190362571e-07, "logits/chosen": -2.1543002128601074, "logits/rejected": -2.8132243156433105, "logps/chosen": -209.5484619140625, "logps/rejected": -623.637451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.920516014099121, "rewards/margins": 12.987428665161133, "rewards/rejected": -20.907943725585938, "step": 18726 }, { "epoch": 2.91, "learning_rate": 4.121935785051092e-07, "logits/chosen": -2.79803204536438, "logits/rejected": -2.416350841522217, "logps/chosen": -238.86520385742188, "logps/rejected": -281.567626953125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.209023475646973, "rewards/margins": 8.060550689697266, "rewards/rejected": -15.269573211669922, "step": 18727 }, { "epoch": 2.91, "learning_rate": 4.114601379739613e-07, "logits/chosen": -1.6376047134399414, "logits/rejected": -2.380706787109375, "logps/chosen": -127.8189697265625, "logps/rejected": -234.64796447753906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.6318020820617676, "rewards/margins": 10.13240909576416, "rewards/rejected": -13.764211654663086, "step": 18728 }, { "epoch": 2.91, "learning_rate": 4.1072669744281343e-07, "logits/chosen": -1.1983985900878906, "logits/rejected": -2.545997142791748, "logps/chosen": -166.94110107421875, "logps/rejected": -418.6180419921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.647829055786133, "rewards/margins": 10.882852554321289, "rewards/rejected": -18.530681610107422, "step": 18729 }, { "epoch": 2.91, "learning_rate": 4.099932569116655e-07, "logits/chosen": -2.4504239559173584, "logits/rejected": -2.7269093990325928, "logps/chosen": -265.67059326171875, "logps/rejected": -359.6172180175781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.64699935913086, "rewards/margins": 10.29707145690918, "rewards/rejected": -19.944068908691406, "step": 18730 }, { "epoch": 2.91, "learning_rate": 4.0925981638051766e-07, "logits/chosen": -2.6866068840026855, "logits/rejected": -2.7564327716827393, "logps/chosen": -355.40350341796875, "logps/rejected": -318.2113037109375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.126569747924805, "rewards/margins": 6.894261360168457, "rewards/rejected": -15.020832061767578, "step": 18731 }, { "epoch": 2.91, "learning_rate": 4.0852637584936975e-07, "logits/chosen": -2.585113525390625, "logits/rejected": -2.646604299545288, "logps/chosen": -180.55459594726562, "logps/rejected": -417.8662109375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -6.2284321784973145, "rewards/margins": 9.389091491699219, "rewards/rejected": -15.617523193359375, "step": 18732 }, { "epoch": 2.91, "learning_rate": 4.077929353182219e-07, "logits/chosen": -1.9991368055343628, "logits/rejected": -2.5355076789855957, "logps/chosen": -100.72996520996094, "logps/rejected": -277.3848571777344, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.651065826416016, "rewards/margins": 8.059152603149414, "rewards/rejected": -16.71021842956543, "step": 18733 }, { "epoch": 2.91, "learning_rate": 4.0705949478707404e-07, "logits/chosen": -2.438563108444214, "logits/rejected": -2.5618457794189453, "logps/chosen": -990.6954345703125, "logps/rejected": -732.6696166992188, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -10.963190078735352, "rewards/margins": 7.490895748138428, "rewards/rejected": -18.454086303710938, "step": 18734 }, { "epoch": 2.91, "learning_rate": 4.0632605425592613e-07, "logits/chosen": -2.431062936782837, "logits/rejected": -3.086256504058838, "logps/chosen": -93.23869323730469, "logps/rejected": -402.0099792480469, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.04159688949585, "rewards/margins": 14.435552597045898, "rewards/rejected": -20.477149963378906, "step": 18735 }, { "epoch": 2.91, "learning_rate": 4.0559261372477827e-07, "logits/chosen": -2.4266088008880615, "logits/rejected": -1.7435957193374634, "logps/chosen": -126.3207015991211, "logps/rejected": -180.609375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -6.034600257873535, "rewards/margins": 8.179373741149902, "rewards/rejected": -14.213973999023438, "step": 18736 }, { "epoch": 2.91, "learning_rate": 4.0485917319363036e-07, "logits/chosen": -2.5995993614196777, "logits/rejected": -2.185176134109497, "logps/chosen": -715.3711547851562, "logps/rejected": -599.3023681640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.061551094055176, "rewards/margins": 10.820037841796875, "rewards/rejected": -15.881589889526367, "step": 18737 }, { "epoch": 2.91, "learning_rate": 4.041257326624825e-07, "logits/chosen": -2.944096803665161, "logits/rejected": -2.6016359329223633, "logps/chosen": -295.00421142578125, "logps/rejected": -410.63677978515625, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -9.593864440917969, "rewards/margins": 6.885955333709717, "rewards/rejected": -16.479820251464844, "step": 18738 }, { "epoch": 2.91, "learning_rate": 4.0339229213133465e-07, "logits/chosen": -2.4058825969696045, "logits/rejected": -2.779740333557129, "logps/chosen": -106.17304992675781, "logps/rejected": -211.20257568359375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -4.978521347045898, "rewards/margins": 6.137947082519531, "rewards/rejected": -11.11646842956543, "step": 18739 }, { "epoch": 2.91, "learning_rate": 4.0265885160018674e-07, "logits/chosen": -2.6506502628326416, "logits/rejected": -2.0169944763183594, "logps/chosen": -228.03501892089844, "logps/rejected": -197.87432861328125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -8.755468368530273, "rewards/margins": 5.887601852416992, "rewards/rejected": -14.643070220947266, "step": 18740 }, { "epoch": 2.91, "learning_rate": 4.019254110690389e-07, "logits/chosen": -2.357866048812866, "logits/rejected": -2.593717575073242, "logps/chosen": -96.13664245605469, "logps/rejected": -337.79248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.710204124450684, "rewards/margins": 14.198442459106445, "rewards/rejected": -20.908645629882812, "step": 18741 }, { "epoch": 2.91, "learning_rate": 4.0119197053789097e-07, "logits/chosen": -1.2517154216766357, "logits/rejected": -2.569692373275757, "logps/chosen": -200.47683715820312, "logps/rejected": -499.9224853515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.746094703674316, "rewards/margins": 9.422663688659668, "rewards/rejected": -19.168758392333984, "step": 18742 }, { "epoch": 2.91, "learning_rate": 4.004585300067431e-07, "logits/chosen": -2.7505240440368652, "logits/rejected": -3.078778028488159, "logps/chosen": -97.50354766845703, "logps/rejected": -335.06976318359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -3.7328860759735107, "rewards/margins": 8.916509628295898, "rewards/rejected": -12.649394989013672, "step": 18743 }, { "epoch": 2.92, "learning_rate": 3.9972508947559525e-07, "logits/chosen": -2.9536311626434326, "logits/rejected": -1.3657292127609253, "logps/chosen": -341.9576721191406, "logps/rejected": -205.67935180664062, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -3.187486410140991, "rewards/margins": 8.413971900939941, "rewards/rejected": -11.601457595825195, "step": 18744 }, { "epoch": 2.92, "learning_rate": 3.9899164894444735e-07, "logits/chosen": -2.3613791465759277, "logits/rejected": -2.4977290630340576, "logps/chosen": -165.62135314941406, "logps/rejected": -308.476806640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.7501983642578125, "rewards/margins": 11.878259658813477, "rewards/rejected": -19.628459930419922, "step": 18745 }, { "epoch": 2.92, "learning_rate": 3.982582084132995e-07, "logits/chosen": -2.6137168407440186, "logits/rejected": -2.7052035331726074, "logps/chosen": -104.1429672241211, "logps/rejected": -337.0646057128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.042257308959961, "rewards/margins": 10.476114273071289, "rewards/rejected": -16.51837158203125, "step": 18746 }, { "epoch": 2.92, "learning_rate": 3.975247678821516e-07, "logits/chosen": -2.4743306636810303, "logits/rejected": -2.843507766723633, "logps/chosen": -122.70792388916016, "logps/rejected": -287.7593688964844, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.600876331329346, "rewards/margins": 9.828039169311523, "rewards/rejected": -15.428915977478027, "step": 18747 }, { "epoch": 2.92, "learning_rate": 3.9679132735100367e-07, "logits/chosen": -1.476462721824646, "logits/rejected": -2.2626705169677734, "logps/chosen": -306.78070068359375, "logps/rejected": -578.5150146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.962810039520264, "rewards/margins": 15.771671295166016, "rewards/rejected": -21.734481811523438, "step": 18748 }, { "epoch": 2.92, "learning_rate": 3.9605788681985576e-07, "logits/chosen": -2.243866205215454, "logits/rejected": -2.6964468955993652, "logps/chosen": -386.25592041015625, "logps/rejected": -566.1983642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.040985107421875, "rewards/margins": 15.981640815734863, "rewards/rejected": -22.022624969482422, "step": 18749 }, { "epoch": 2.92, "learning_rate": 3.953244462887079e-07, "logits/chosen": -2.614179849624634, "logits/rejected": -2.8869712352752686, "logps/chosen": -145.28089904785156, "logps/rejected": -353.976318359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.577070713043213, "rewards/margins": 9.400992393493652, "rewards/rejected": -15.978063583374023, "step": 18750 }, { "epoch": 2.92, "learning_rate": 3.9459100575756004e-07, "logits/chosen": -2.6043951511383057, "logits/rejected": -2.5788683891296387, "logps/chosen": -201.06246948242188, "logps/rejected": -349.0135498046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.880062580108643, "rewards/margins": 9.244189262390137, "rewards/rejected": -15.124252319335938, "step": 18751 }, { "epoch": 2.92, "learning_rate": 3.9385756522641213e-07, "logits/chosen": -2.4747254848480225, "logits/rejected": -2.3728463649749756, "logps/chosen": -254.51007080078125, "logps/rejected": -448.4769287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.77606725692749, "rewards/margins": 14.481534957885742, "rewards/rejected": -19.25760269165039, "step": 18752 }, { "epoch": 2.92, "learning_rate": 3.931241246952643e-07, "logits/chosen": -2.3391151428222656, "logits/rejected": -2.7283101081848145, "logps/chosen": -122.9915542602539, "logps/rejected": -289.2114562988281, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.500319004058838, "rewards/margins": 10.038153648376465, "rewards/rejected": -14.538473129272461, "step": 18753 }, { "epoch": 2.92, "learning_rate": 3.9239068416411637e-07, "logits/chosen": -1.9862172603607178, "logits/rejected": -2.7700023651123047, "logps/chosen": -174.11907958984375, "logps/rejected": -365.35748291015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.303642272949219, "rewards/margins": 8.030513763427734, "rewards/rejected": -16.334156036376953, "step": 18754 }, { "epoch": 2.92, "learning_rate": 3.916572436329685e-07, "logits/chosen": -2.6984574794769287, "logits/rejected": -2.8479561805725098, "logps/chosen": -300.9827880859375, "logps/rejected": -312.9735412597656, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": -12.527865409851074, "rewards/margins": 5.121669769287109, "rewards/rejected": -17.6495361328125, "step": 18755 }, { "epoch": 2.92, "learning_rate": 3.9092380310182065e-07, "logits/chosen": -2.339536428451538, "logits/rejected": -3.0632245540618896, "logps/chosen": -197.10934448242188, "logps/rejected": -399.33709716796875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -6.774019241333008, "rewards/margins": 8.154905319213867, "rewards/rejected": -14.928924560546875, "step": 18756 }, { "epoch": 2.92, "learning_rate": 3.9019036257067274e-07, "logits/chosen": -2.38875150680542, "logits/rejected": -2.5880367755889893, "logps/chosen": -99.02172088623047, "logps/rejected": -235.35845947265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.515909194946289, "rewards/margins": 9.64708423614502, "rewards/rejected": -16.162994384765625, "step": 18757 }, { "epoch": 2.92, "learning_rate": 3.894569220395249e-07, "logits/chosen": -2.2583301067352295, "logits/rejected": -2.7307708263397217, "logps/chosen": -250.76292419433594, "logps/rejected": -293.3485107421875, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -8.647320747375488, "rewards/margins": 5.769905090332031, "rewards/rejected": -14.41722583770752, "step": 18758 }, { "epoch": 2.92, "learning_rate": 3.88723481508377e-07, "logits/chosen": -0.7016515731811523, "logits/rejected": -2.591542959213257, "logps/chosen": -106.80589294433594, "logps/rejected": -522.8031005859375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.967251777648926, "rewards/margins": 10.804031372070312, "rewards/rejected": -18.771282196044922, "step": 18759 }, { "epoch": 2.92, "learning_rate": 3.879900409772291e-07, "logits/chosen": -1.8725322484970093, "logits/rejected": -2.9471969604492188, "logps/chosen": -163.285400390625, "logps/rejected": -483.3885498046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.33363151550293, "rewards/margins": 8.716148376464844, "rewards/rejected": -18.049779891967773, "step": 18760 }, { "epoch": 2.92, "learning_rate": 3.8725660044608126e-07, "logits/chosen": -1.4921507835388184, "logits/rejected": -2.723550796508789, "logps/chosen": -186.15414428710938, "logps/rejected": -503.98052978515625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -9.530387878417969, "rewards/margins": 7.376707553863525, "rewards/rejected": -16.907094955444336, "step": 18761 }, { "epoch": 2.92, "learning_rate": 3.8652315991493335e-07, "logits/chosen": -2.6621041297912598, "logits/rejected": -2.322011709213257, "logps/chosen": -492.6388854980469, "logps/rejected": -460.1458740234375, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": -5.029529571533203, "rewards/margins": 8.952730178833008, "rewards/rejected": -13.982259750366211, "step": 18762 }, { "epoch": 2.92, "learning_rate": 3.857897193837855e-07, "logits/chosen": -2.660702705383301, "logits/rejected": -2.840221881866455, "logps/chosen": -214.47657775878906, "logps/rejected": -414.4039306640625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -6.9383769035339355, "rewards/margins": 7.744443416595459, "rewards/rejected": -14.682820320129395, "step": 18763 }, { "epoch": 2.92, "learning_rate": 3.850562788526376e-07, "logits/chosen": -2.5626256465911865, "logits/rejected": -2.4243581295013428, "logps/chosen": -533.9224853515625, "logps/rejected": -652.5534057617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.79876184463501, "rewards/margins": 15.80020809173584, "rewards/rejected": -22.598970413208008, "step": 18764 }, { "epoch": 2.92, "learning_rate": 3.8432283832148973e-07, "logits/chosen": -1.2428758144378662, "logits/rejected": -2.224050521850586, "logps/chosen": -240.4170684814453, "logps/rejected": -361.237548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.282697677612305, "rewards/margins": 9.583127975463867, "rewards/rejected": -15.865825653076172, "step": 18765 }, { "epoch": 2.92, "learning_rate": 3.8358939779034187e-07, "logits/chosen": -1.80922269821167, "logits/rejected": -2.7484734058380127, "logps/chosen": -84.32717895507812, "logps/rejected": -341.84197998046875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -7.043668746948242, "rewards/margins": 10.724175453186035, "rewards/rejected": -17.767845153808594, "step": 18766 }, { "epoch": 2.92, "learning_rate": 3.8285595725919396e-07, "logits/chosen": -2.8396549224853516, "logits/rejected": -2.5533628463745117, "logps/chosen": -184.37229919433594, "logps/rejected": -232.79916381835938, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.139772415161133, "rewards/margins": 6.616090774536133, "rewards/rejected": -12.755863189697266, "step": 18767 }, { "epoch": 2.92, "learning_rate": 3.8212251672804605e-07, "logits/chosen": -1.9286227226257324, "logits/rejected": -3.1878175735473633, "logps/chosen": -149.66839599609375, "logps/rejected": -546.3043212890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.841589450836182, "rewards/margins": 9.227392196655273, "rewards/rejected": -16.068981170654297, "step": 18768 }, { "epoch": 2.92, "learning_rate": 3.8138907619689814e-07, "logits/chosen": -2.74589467048645, "logits/rejected": -2.202976703643799, "logps/chosen": -191.90965270996094, "logps/rejected": -95.64215087890625, "loss": 0.2611, "rewards/accuracies": 1.0, "rewards/chosen": -4.880174160003662, "rewards/margins": 3.3418972492218018, "rewards/rejected": -8.222071647644043, "step": 18769 }, { "epoch": 2.92, "learning_rate": 3.806556356657503e-07, "logits/chosen": -2.1000280380249023, "logits/rejected": -2.7921230792999268, "logps/chosen": -180.4382781982422, "logps/rejected": -409.8120422363281, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.212400436401367, "rewards/margins": 8.90335464477539, "rewards/rejected": -16.115755081176758, "step": 18770 }, { "epoch": 2.92, "learning_rate": 3.7992219513460243e-07, "logits/chosen": -2.708521842956543, "logits/rejected": -2.971754312515259, "logps/chosen": -459.65252685546875, "logps/rejected": -436.9056396484375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -8.186461448669434, "rewards/margins": 5.652789115905762, "rewards/rejected": -13.839250564575195, "step": 18771 }, { "epoch": 2.92, "learning_rate": 3.791887546034545e-07, "logits/chosen": -2.8088831901550293, "logits/rejected": -1.483480453491211, "logps/chosen": -396.39288330078125, "logps/rejected": -314.3805236816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.46913743019104, "rewards/margins": 13.424297332763672, "rewards/rejected": -15.893434524536133, "step": 18772 }, { "epoch": 2.92, "learning_rate": 3.7845531407230666e-07, "logits/chosen": -2.467725992202759, "logits/rejected": -2.6103477478027344, "logps/chosen": -90.08084106445312, "logps/rejected": -162.9017333984375, "loss": 0.03, "rewards/accuracies": 1.0, "rewards/chosen": -7.554079055786133, "rewards/margins": 4.3182244300842285, "rewards/rejected": -11.872303009033203, "step": 18773 }, { "epoch": 2.92, "learning_rate": 3.7772187354115875e-07, "logits/chosen": -2.423804998397827, "logits/rejected": -2.0297651290893555, "logps/chosen": -226.74609375, "logps/rejected": -398.81689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.51948356628418, "rewards/margins": 11.282732009887695, "rewards/rejected": -19.802215576171875, "step": 18774 }, { "epoch": 2.92, "learning_rate": 3.769884330100109e-07, "logits/chosen": -1.9004316329956055, "logits/rejected": -2.7962656021118164, "logps/chosen": -350.9380187988281, "logps/rejected": -773.0404052734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.547561645507812, "rewards/margins": 9.899927139282227, "rewards/rejected": -20.44748878479004, "step": 18775 }, { "epoch": 2.92, "learning_rate": 3.7625499247886304e-07, "logits/chosen": -2.783660888671875, "logits/rejected": -2.4193718433380127, "logps/chosen": -603.4368286132812, "logps/rejected": -405.8132019042969, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -7.1374192237854, "rewards/margins": 6.495416164398193, "rewards/rejected": -13.632835388183594, "step": 18776 }, { "epoch": 2.92, "learning_rate": 3.755215519477151e-07, "logits/chosen": -2.1136882305145264, "logits/rejected": -2.4178075790405273, "logps/chosen": -188.7140350341797, "logps/rejected": -508.3471984863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.605157852172852, "rewards/margins": 12.45224380493164, "rewards/rejected": -20.057403564453125, "step": 18777 }, { "epoch": 2.92, "learning_rate": 3.7478811141656727e-07, "logits/chosen": -2.6873347759246826, "logits/rejected": -2.9349365234375, "logps/chosen": -302.07550048828125, "logps/rejected": -371.208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.507917881011963, "rewards/margins": 11.655298233032227, "rewards/rejected": -18.16321563720703, "step": 18778 }, { "epoch": 2.92, "learning_rate": 3.7405467088541936e-07, "logits/chosen": -2.1831791400909424, "logits/rejected": -2.6761395931243896, "logps/chosen": -374.32916259765625, "logps/rejected": -391.45782470703125, "loss": 4.5327, "rewards/accuracies": 0.5, "rewards/chosen": -12.262918472290039, "rewards/margins": -0.8693280220031738, "rewards/rejected": -11.393590927124023, "step": 18779 }, { "epoch": 2.92, "learning_rate": 3.733212303542715e-07, "logits/chosen": -2.1435210704803467, "logits/rejected": -2.6410248279571533, "logps/chosen": -89.25787353515625, "logps/rejected": -402.30413818359375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -7.315644264221191, "rewards/margins": 8.398510932922363, "rewards/rejected": -15.714155197143555, "step": 18780 }, { "epoch": 2.92, "learning_rate": 3.725877898231236e-07, "logits/chosen": -1.5331448316574097, "logits/rejected": -2.921387195587158, "logps/chosen": -265.25067138671875, "logps/rejected": -610.2477416992188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.110278606414795, "rewards/margins": 9.619832038879395, "rewards/rejected": -16.73011016845703, "step": 18781 }, { "epoch": 2.92, "learning_rate": 3.7185434929197574e-07, "logits/chosen": -1.599181056022644, "logits/rejected": -2.593707323074341, "logps/chosen": -226.3354949951172, "logps/rejected": -448.96142578125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -10.24009895324707, "rewards/margins": 9.519758224487305, "rewards/rejected": -19.759857177734375, "step": 18782 }, { "epoch": 2.92, "learning_rate": 3.711209087608279e-07, "logits/chosen": -1.2550208568572998, "logits/rejected": -2.5644350051879883, "logps/chosen": -154.96168518066406, "logps/rejected": -481.895751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.557557106018066, "rewards/margins": 12.70492172241211, "rewards/rejected": -23.26247787475586, "step": 18783 }, { "epoch": 2.92, "learning_rate": 3.7038746822967997e-07, "logits/chosen": -2.3070645332336426, "logits/rejected": -2.7404539585113525, "logps/chosen": -198.97854614257812, "logps/rejected": -427.0027770996094, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -9.115686416625977, "rewards/margins": 8.295576095581055, "rewards/rejected": -17.41126251220703, "step": 18784 }, { "epoch": 2.92, "learning_rate": 3.696540276985321e-07, "logits/chosen": -2.9803500175476074, "logits/rejected": -2.621718168258667, "logps/chosen": -559.4896850585938, "logps/rejected": -279.17572021484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -2.5399703979492188, "rewards/margins": 9.179207801818848, "rewards/rejected": -11.719179153442383, "step": 18785 }, { "epoch": 2.92, "learning_rate": 3.689205871673842e-07, "logits/chosen": -2.0568251609802246, "logits/rejected": -2.6387338638305664, "logps/chosen": -693.49072265625, "logps/rejected": -747.4361572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.05375862121582, "rewards/margins": 11.557136535644531, "rewards/rejected": -19.61089515686035, "step": 18786 }, { "epoch": 2.92, "learning_rate": 3.6818714663623634e-07, "logits/chosen": -2.7384471893310547, "logits/rejected": -2.058424949645996, "logps/chosen": -276.34576416015625, "logps/rejected": -235.85830688476562, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.355118751525879, "rewards/margins": 8.385286331176758, "rewards/rejected": -13.74040412902832, "step": 18787 }, { "epoch": 2.92, "learning_rate": 3.6745370610508843e-07, "logits/chosen": -1.8454667329788208, "logits/rejected": -2.815911054611206, "logps/chosen": -411.6719055175781, "logps/rejected": -677.4380493164062, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/chosen": -9.323554992675781, "rewards/margins": 6.68092155456543, "rewards/rejected": -16.00447654724121, "step": 18788 }, { "epoch": 2.92, "learning_rate": 3.667202655739405e-07, "logits/chosen": -2.384216547012329, "logits/rejected": -2.691516399383545, "logps/chosen": -164.96688842773438, "logps/rejected": -377.445556640625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.827160835266113, "rewards/margins": 10.909159660339355, "rewards/rejected": -15.736320495605469, "step": 18789 }, { "epoch": 2.92, "learning_rate": 3.6598682504279267e-07, "logits/chosen": -2.7297616004943848, "logits/rejected": -2.071133852005005, "logps/chosen": -608.3880004882812, "logps/rejected": -450.1812744140625, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -7.400912284851074, "rewards/margins": 8.883620262145996, "rewards/rejected": -16.28453254699707, "step": 18790 }, { "epoch": 2.92, "learning_rate": 3.6525338451164476e-07, "logits/chosen": -2.66780948638916, "logits/rejected": -2.6980955600738525, "logps/chosen": -178.59274291992188, "logps/rejected": -404.2914123535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.285769462585449, "rewards/margins": 12.776565551757812, "rewards/rejected": -18.062335968017578, "step": 18791 }, { "epoch": 2.92, "learning_rate": 3.645199439804969e-07, "logits/chosen": -2.337333917617798, "logits/rejected": -2.5248420238494873, "logps/chosen": -171.92465209960938, "logps/rejected": -335.484619140625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -8.284214973449707, "rewards/margins": 8.69308853149414, "rewards/rejected": -16.97730255126953, "step": 18792 }, { "epoch": 2.92, "learning_rate": 3.6378650344934904e-07, "logits/chosen": -2.1408371925354004, "logits/rejected": -2.5587401390075684, "logps/chosen": -214.68605041503906, "logps/rejected": -376.103271484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.38960075378418, "rewards/margins": 8.135961532592773, "rewards/rejected": -16.525562286376953, "step": 18793 }, { "epoch": 2.92, "learning_rate": 3.6305306291820113e-07, "logits/chosen": -2.638594388961792, "logits/rejected": -2.705441474914551, "logps/chosen": -94.71971130371094, "logps/rejected": -265.5115966796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.898099899291992, "rewards/margins": 9.213356018066406, "rewards/rejected": -16.1114559173584, "step": 18794 }, { "epoch": 2.92, "learning_rate": 3.623196223870533e-07, "logits/chosen": -3.056567668914795, "logits/rejected": -2.7763123512268066, "logps/chosen": -535.6205444335938, "logps/rejected": -393.50482177734375, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.9826765060424805, "rewards/margins": 10.705116271972656, "rewards/rejected": -17.68779182434082, "step": 18795 }, { "epoch": 2.92, "learning_rate": 3.6158618185590537e-07, "logits/chosen": -2.420999765396118, "logits/rejected": -2.7785792350769043, "logps/chosen": -519.2762451171875, "logps/rejected": -844.785888671875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -9.145042419433594, "rewards/margins": 11.534416198730469, "rewards/rejected": -20.679458618164062, "step": 18796 }, { "epoch": 2.92, "learning_rate": 3.608527413247575e-07, "logits/chosen": -2.192222833633423, "logits/rejected": -1.6385204792022705, "logps/chosen": -189.40628051757812, "logps/rejected": -110.87813568115234, "loss": 2.2158, "rewards/accuracies": 0.5, "rewards/chosen": -10.43972396850586, "rewards/margins": -1.4529120922088623, "rewards/rejected": -8.986811637878418, "step": 18797 }, { "epoch": 2.92, "learning_rate": 3.6011930079360965e-07, "logits/chosen": -2.7786929607391357, "logits/rejected": -2.682934522628784, "logps/chosen": -361.7860412597656, "logps/rejected": -479.8622741699219, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.396060943603516, "rewards/margins": 8.71495246887207, "rewards/rejected": -17.111013412475586, "step": 18798 }, { "epoch": 2.92, "learning_rate": 3.5938586026246174e-07, "logits/chosen": -2.180431604385376, "logits/rejected": -2.525516986846924, "logps/chosen": -245.9479217529297, "logps/rejected": -265.1683044433594, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -3.950068950653076, "rewards/margins": 8.326000213623047, "rewards/rejected": -12.276068687438965, "step": 18799 }, { "epoch": 2.92, "learning_rate": 3.586524197313139e-07, "logits/chosen": -2.097093343734741, "logits/rejected": -2.774555206298828, "logps/chosen": -202.1151580810547, "logps/rejected": -570.647216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.070683479309082, "rewards/margins": 13.504064559936523, "rewards/rejected": -20.57474708557129, "step": 18800 }, { "epoch": 2.92, "learning_rate": 3.57918979200166e-07, "logits/chosen": -1.8666080236434937, "logits/rejected": -2.6731414794921875, "logps/chosen": -188.70156860351562, "logps/rejected": -388.657470703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.850122451782227, "rewards/margins": 10.86776351928711, "rewards/rejected": -17.717885971069336, "step": 18801 }, { "epoch": 2.92, "learning_rate": 3.571855386690181e-07, "logits/chosen": -1.6625275611877441, "logits/rejected": -2.339240312576294, "logps/chosen": -114.19013214111328, "logps/rejected": -399.21832275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.4181928634643555, "rewards/margins": 15.185468673706055, "rewards/rejected": -20.603660583496094, "step": 18802 }, { "epoch": 2.92, "learning_rate": 3.5645209813787026e-07, "logits/chosen": -2.6298341751098633, "logits/rejected": -2.9245243072509766, "logps/chosen": -534.129638671875, "logps/rejected": -576.207275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.424640655517578, "rewards/margins": 12.811433792114258, "rewards/rejected": -17.23607635498047, "step": 18803 }, { "epoch": 2.92, "learning_rate": 3.5571865760672235e-07, "logits/chosen": -2.258328437805176, "logits/rejected": -2.3367719650268555, "logps/chosen": -392.8110656738281, "logps/rejected": -384.0076904296875, "loss": 0.0943, "rewards/accuracies": 1.0, "rewards/chosen": -8.862810134887695, "rewards/margins": 6.342288017272949, "rewards/rejected": -15.205097198486328, "step": 18804 }, { "epoch": 2.92, "learning_rate": 3.549852170755745e-07, "logits/chosen": -2.6810848712921143, "logits/rejected": -2.5884575843811035, "logps/chosen": -161.01693725585938, "logps/rejected": -203.98611450195312, "loss": 0.7924, "rewards/accuracies": 0.5, "rewards/chosen": -11.649073600769043, "rewards/margins": 1.7816858291625977, "rewards/rejected": -13.43075942993164, "step": 18805 }, { "epoch": 2.92, "learning_rate": 3.542517765444266e-07, "logits/chosen": -2.7934954166412354, "logits/rejected": -3.00799298286438, "logps/chosen": -706.228759765625, "logps/rejected": -807.491943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.493462562561035, "rewards/margins": 12.02341079711914, "rewards/rejected": -16.51687240600586, "step": 18806 }, { "epoch": 2.92, "learning_rate": 3.5351833601327873e-07, "logits/chosen": -2.4658102989196777, "logits/rejected": -2.944420576095581, "logps/chosen": -109.69245910644531, "logps/rejected": -594.0432739257812, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.398462295532227, "rewards/margins": 18.312786102294922, "rewards/rejected": -24.71125030517578, "step": 18807 }, { "epoch": 2.93, "learning_rate": 3.527848954821308e-07, "logits/chosen": -2.2413101196289062, "logits/rejected": -1.4593396186828613, "logps/chosen": -218.83731079101562, "logps/rejected": -286.5639953613281, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.974956035614014, "rewards/margins": 11.100979804992676, "rewards/rejected": -17.07593536376953, "step": 18808 }, { "epoch": 2.93, "learning_rate": 3.520514549509829e-07, "logits/chosen": -1.5063196420669556, "logits/rejected": -2.6466457843780518, "logps/chosen": -143.19967651367188, "logps/rejected": -347.85418701171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.952986717224121, "rewards/margins": 9.843369483947754, "rewards/rejected": -16.796356201171875, "step": 18809 }, { "epoch": 2.93, "learning_rate": 3.5131801441983505e-07, "logits/chosen": -2.571046829223633, "logits/rejected": -2.775578498840332, "logps/chosen": -203.744873046875, "logps/rejected": -233.70054626464844, "loss": 0.1358, "rewards/accuracies": 1.0, "rewards/chosen": -9.643803596496582, "rewards/margins": 3.8162877559661865, "rewards/rejected": -13.460091590881348, "step": 18810 }, { "epoch": 2.93, "learning_rate": 3.5058457388868714e-07, "logits/chosen": -2.065692186355591, "logits/rejected": -2.6815185546875, "logps/chosen": -292.7181091308594, "logps/rejected": -389.82861328125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -7.188305854797363, "rewards/margins": 5.541992664337158, "rewards/rejected": -12.73029899597168, "step": 18811 }, { "epoch": 2.93, "learning_rate": 3.498511333575393e-07, "logits/chosen": -1.0702533721923828, "logits/rejected": -2.496286392211914, "logps/chosen": -125.03324127197266, "logps/rejected": -435.0982971191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.117074966430664, "rewards/margins": 11.980239868164062, "rewards/rejected": -22.097314834594727, "step": 18812 }, { "epoch": 2.93, "learning_rate": 3.491176928263914e-07, "logits/chosen": -2.547255277633667, "logits/rejected": -1.8799043893814087, "logps/chosen": -234.68490600585938, "logps/rejected": -309.2339172363281, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/chosen": -7.248687744140625, "rewards/margins": 7.7300896644592285, "rewards/rejected": -14.978776931762695, "step": 18813 }, { "epoch": 2.93, "learning_rate": 3.483842522952435e-07, "logits/chosen": -1.4672526121139526, "logits/rejected": -2.3961501121520996, "logps/chosen": -242.29803466796875, "logps/rejected": -445.53717041015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.296687126159668, "rewards/margins": 10.119829177856445, "rewards/rejected": -18.41651725769043, "step": 18814 }, { "epoch": 2.93, "learning_rate": 3.4765081176409566e-07, "logits/chosen": -1.1977698802947998, "logits/rejected": -1.494130253791809, "logps/chosen": -402.14068603515625, "logps/rejected": -404.5018310546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.582403659820557, "rewards/margins": 11.675504684448242, "rewards/rejected": -16.25790786743164, "step": 18815 }, { "epoch": 2.93, "learning_rate": 3.4691737123294775e-07, "logits/chosen": -2.18542742729187, "logits/rejected": -2.3028810024261475, "logps/chosen": -244.50515747070312, "logps/rejected": -432.978271484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.8568291664123535, "rewards/margins": 9.900213241577148, "rewards/rejected": -15.757041931152344, "step": 18816 }, { "epoch": 2.93, "learning_rate": 3.461839307017999e-07, "logits/chosen": -2.122180461883545, "logits/rejected": -2.5815694332122803, "logps/chosen": -355.92822265625, "logps/rejected": -642.624755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.505934715270996, "rewards/margins": 14.359559059143066, "rewards/rejected": -21.865493774414062, "step": 18817 }, { "epoch": 2.93, "learning_rate": 3.45450490170652e-07, "logits/chosen": -2.548142433166504, "logits/rejected": -1.6672046184539795, "logps/chosen": -276.9064025878906, "logps/rejected": -244.8675079345703, "loss": 0.0654, "rewards/accuracies": 1.0, "rewards/chosen": -10.350194931030273, "rewards/margins": 5.497496604919434, "rewards/rejected": -15.847691535949707, "step": 18818 }, { "epoch": 2.93, "learning_rate": 3.447170496395041e-07, "logits/chosen": -2.831749439239502, "logits/rejected": -2.6804592609405518, "logps/chosen": -285.50677490234375, "logps/rejected": -474.97149658203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.403580188751221, "rewards/margins": 13.275045394897461, "rewards/rejected": -18.678625106811523, "step": 18819 }, { "epoch": 2.93, "learning_rate": 3.4398360910835627e-07, "logits/chosen": -2.1843411922454834, "logits/rejected": -1.548082947731018, "logps/chosen": -398.4264221191406, "logps/rejected": -231.24374389648438, "loss": 0.1697, "rewards/accuracies": 1.0, "rewards/chosen": -7.627194404602051, "rewards/margins": 5.407037734985352, "rewards/rejected": -13.034232139587402, "step": 18820 }, { "epoch": 2.93, "learning_rate": 3.4325016857720836e-07, "logits/chosen": -2.5834598541259766, "logits/rejected": -2.4619390964508057, "logps/chosen": -165.90667724609375, "logps/rejected": -299.3948974609375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.883698463439941, "rewards/margins": 8.995502471923828, "rewards/rejected": -13.87920093536377, "step": 18821 }, { "epoch": 2.93, "learning_rate": 3.425167280460605e-07, "logits/chosen": -2.337831974029541, "logits/rejected": -2.6683433055877686, "logps/chosen": -230.9139404296875, "logps/rejected": -394.2919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.349446773529053, "rewards/margins": 11.787449836730957, "rewards/rejected": -18.13689613342285, "step": 18822 }, { "epoch": 2.93, "learning_rate": 3.417832875149126e-07, "logits/chosen": -2.3034207820892334, "logits/rejected": -2.8680920600891113, "logps/chosen": -151.75765991210938, "logps/rejected": -381.41021728515625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -8.48528003692627, "rewards/margins": 8.027220726013184, "rewards/rejected": -16.512500762939453, "step": 18823 }, { "epoch": 2.93, "learning_rate": 3.4104984698376474e-07, "logits/chosen": -1.661833643913269, "logits/rejected": -2.5611684322357178, "logps/chosen": -162.439697265625, "logps/rejected": -352.534912109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.585964679718018, "rewards/margins": 9.741291999816895, "rewards/rejected": -15.32725715637207, "step": 18824 }, { "epoch": 2.93, "learning_rate": 3.403164064526169e-07, "logits/chosen": -1.2829867601394653, "logits/rejected": -2.3415236473083496, "logps/chosen": -209.56427001953125, "logps/rejected": -552.0687255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.537310600280762, "rewards/margins": 10.757926940917969, "rewards/rejected": -19.295236587524414, "step": 18825 }, { "epoch": 2.93, "learning_rate": 3.3958296592146897e-07, "logits/chosen": -1.8355952501296997, "logits/rejected": -2.666259527206421, "logps/chosen": -347.82763671875, "logps/rejected": -720.2684326171875, "loss": 0.0183, "rewards/accuracies": 1.0, "rewards/chosen": -7.304340362548828, "rewards/margins": 8.642135620117188, "rewards/rejected": -15.946475982666016, "step": 18826 }, { "epoch": 2.93, "learning_rate": 3.388495253903211e-07, "logits/chosen": -2.521695375442505, "logits/rejected": -1.0254920721054077, "logps/chosen": -344.4106750488281, "logps/rejected": -354.11767578125, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -4.977150917053223, "rewards/margins": 7.217432975769043, "rewards/rejected": -12.194583892822266, "step": 18827 }, { "epoch": 2.93, "learning_rate": 3.381160848591732e-07, "logits/chosen": -2.3473145961761475, "logits/rejected": -1.7720067501068115, "logps/chosen": -525.1687622070312, "logps/rejected": -618.41259765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.814824104309082, "rewards/margins": 12.377519607543945, "rewards/rejected": -22.192344665527344, "step": 18828 }, { "epoch": 2.93, "learning_rate": 3.373826443280253e-07, "logits/chosen": -2.3462493419647217, "logits/rejected": -1.639848232269287, "logps/chosen": -250.90550231933594, "logps/rejected": -444.8617248535156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.175204277038574, "rewards/margins": 16.49734115600586, "rewards/rejected": -20.672544479370117, "step": 18829 }, { "epoch": 2.93, "learning_rate": 3.3664920379687743e-07, "logits/chosen": -2.4517037868499756, "logits/rejected": -2.9506170749664307, "logps/chosen": -398.7060546875, "logps/rejected": -756.5119018554688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.063420295715332, "rewards/margins": 13.82080364227295, "rewards/rejected": -18.88422393798828, "step": 18830 }, { "epoch": 2.93, "learning_rate": 3.359157632657295e-07, "logits/chosen": -2.211422920227051, "logits/rejected": -2.678997278213501, "logps/chosen": -233.78131103515625, "logps/rejected": -389.7294921875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -8.763062477111816, "rewards/margins": 9.616764068603516, "rewards/rejected": -18.379825592041016, "step": 18831 }, { "epoch": 2.93, "learning_rate": 3.3518232273458167e-07, "logits/chosen": -2.72538161277771, "logits/rejected": -3.0316872596740723, "logps/chosen": -220.57725524902344, "logps/rejected": -415.687255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.448394775390625, "rewards/margins": 11.056211471557617, "rewards/rejected": -19.50460433959961, "step": 18832 }, { "epoch": 2.93, "learning_rate": 3.3444888220343376e-07, "logits/chosen": -2.8472163677215576, "logits/rejected": -2.229800224304199, "logps/chosen": -478.1413269042969, "logps/rejected": -491.04583740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.809443473815918, "rewards/margins": 11.947168350219727, "rewards/rejected": -16.756610870361328, "step": 18833 }, { "epoch": 2.93, "learning_rate": 3.337154416722859e-07, "logits/chosen": -1.3356889486312866, "logits/rejected": -2.6849560737609863, "logps/chosen": -109.11763763427734, "logps/rejected": -377.205078125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -9.145028114318848, "rewards/margins": 6.667732238769531, "rewards/rejected": -15.812759399414062, "step": 18834 }, { "epoch": 2.93, "learning_rate": 3.3298200114113804e-07, "logits/chosen": -1.295422077178955, "logits/rejected": -2.634230136871338, "logps/chosen": -182.9676513671875, "logps/rejected": -512.6727294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.196640968322754, "rewards/margins": 14.03140640258789, "rewards/rejected": -21.228046417236328, "step": 18835 }, { "epoch": 2.93, "learning_rate": 3.3224856060999013e-07, "logits/chosen": -2.7134809494018555, "logits/rejected": -2.6254942417144775, "logps/chosen": -331.0944519042969, "logps/rejected": -470.43670654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.64870834350586, "rewards/margins": 11.566041946411133, "rewards/rejected": -20.214750289916992, "step": 18836 }, { "epoch": 2.93, "learning_rate": 3.315151200788423e-07, "logits/chosen": -0.7853880524635315, "logits/rejected": -1.4385312795639038, "logps/chosen": -226.74868774414062, "logps/rejected": -500.3293151855469, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.624661445617676, "rewards/margins": 10.224733352661133, "rewards/rejected": -15.849394798278809, "step": 18837 }, { "epoch": 2.93, "learning_rate": 3.3078167954769437e-07, "logits/chosen": -1.4275333881378174, "logits/rejected": -2.3840272426605225, "logps/chosen": -146.32696533203125, "logps/rejected": -408.509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.717583179473877, "rewards/margins": 13.845046043395996, "rewards/rejected": -20.56262969970703, "step": 18838 }, { "epoch": 2.93, "learning_rate": 3.300482390165465e-07, "logits/chosen": -2.2601685523986816, "logits/rejected": -2.472637176513672, "logps/chosen": -288.41461181640625, "logps/rejected": -294.80584716796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.632765769958496, "rewards/margins": 8.795452117919922, "rewards/rejected": -13.428218841552734, "step": 18839 }, { "epoch": 2.93, "learning_rate": 3.293147984853986e-07, "logits/chosen": -2.084303140640259, "logits/rejected": -2.759089231491089, "logps/chosen": -224.98793029785156, "logps/rejected": -443.794921875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.455659866333008, "rewards/margins": 11.00680923461914, "rewards/rejected": -18.46247100830078, "step": 18840 }, { "epoch": 2.93, "learning_rate": 3.2858135795425074e-07, "logits/chosen": -1.4145385026931763, "logits/rejected": -2.461501121520996, "logps/chosen": -130.63990783691406, "logps/rejected": -449.62841796875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.498794555664062, "rewards/margins": 11.560599327087402, "rewards/rejected": -20.05939483642578, "step": 18841 }, { "epoch": 2.93, "learning_rate": 3.278479174231029e-07, "logits/chosen": -1.2814453840255737, "logits/rejected": -2.536595344543457, "logps/chosen": -120.71844482421875, "logps/rejected": -438.6734619140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.249956130981445, "rewards/margins": 9.742717742919922, "rewards/rejected": -18.992673873901367, "step": 18842 }, { "epoch": 2.93, "learning_rate": 3.27114476891955e-07, "logits/chosen": -2.0767791271209717, "logits/rejected": -2.6803345680236816, "logps/chosen": -252.86782836914062, "logps/rejected": -592.348876953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.835873603820801, "rewards/margins": 13.235597610473633, "rewards/rejected": -19.07147216796875, "step": 18843 }, { "epoch": 2.93, "learning_rate": 3.263810363608071e-07, "logits/chosen": -2.8018643856048584, "logits/rejected": -2.6182467937469482, "logps/chosen": -246.72337341308594, "logps/rejected": -332.2781677246094, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -8.666168212890625, "rewards/margins": 8.562374114990234, "rewards/rejected": -17.22854232788086, "step": 18844 }, { "epoch": 2.93, "learning_rate": 3.256475958296592e-07, "logits/chosen": -2.483853816986084, "logits/rejected": -2.82340931892395, "logps/chosen": -275.0423583984375, "logps/rejected": -494.65521240234375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.149338722229004, "rewards/margins": 9.95733642578125, "rewards/rejected": -17.10667610168457, "step": 18845 }, { "epoch": 2.93, "learning_rate": 3.2491415529851135e-07, "logits/chosen": -2.3548026084899902, "logits/rejected": -2.8561360836029053, "logps/chosen": -177.59249877929688, "logps/rejected": -391.517333984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.051953315734863, "rewards/margins": 9.874229431152344, "rewards/rejected": -15.92618179321289, "step": 18846 }, { "epoch": 2.93, "learning_rate": 3.241807147673635e-07, "logits/chosen": -2.629556655883789, "logits/rejected": -2.697464942932129, "logps/chosen": -212.75079345703125, "logps/rejected": -323.7200927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.345500946044922, "rewards/margins": 13.782389640808105, "rewards/rejected": -18.127891540527344, "step": 18847 }, { "epoch": 2.93, "learning_rate": 3.234472742362156e-07, "logits/chosen": -2.8483574390411377, "logits/rejected": -2.4703736305236816, "logps/chosen": -288.95404052734375, "logps/rejected": -198.37954711914062, "loss": 0.3471, "rewards/accuracies": 0.5, "rewards/chosen": -5.496448040008545, "rewards/margins": 5.837108135223389, "rewards/rejected": -11.333556175231934, "step": 18848 }, { "epoch": 2.93, "learning_rate": 3.2271383370506773e-07, "logits/chosen": -2.5891313552856445, "logits/rejected": -3.007967710494995, "logps/chosen": -89.75000762939453, "logps/rejected": -324.0198974609375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.177949905395508, "rewards/margins": 9.876803398132324, "rewards/rejected": -16.054752349853516, "step": 18849 }, { "epoch": 2.93, "learning_rate": 3.219803931739198e-07, "logits/chosen": -2.7101798057556152, "logits/rejected": -2.0983808040618896, "logps/chosen": -609.2386474609375, "logps/rejected": -393.7227783203125, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -5.892642021179199, "rewards/margins": 7.994819641113281, "rewards/rejected": -13.88746166229248, "step": 18850 }, { "epoch": 2.93, "learning_rate": 3.212469526427719e-07, "logits/chosen": -2.3957622051239014, "logits/rejected": -1.258323311805725, "logps/chosen": -341.7261962890625, "logps/rejected": -306.0069580078125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -7.277441024780273, "rewards/margins": 8.885513305664062, "rewards/rejected": -16.162954330444336, "step": 18851 }, { "epoch": 2.93, "learning_rate": 3.2051351211162405e-07, "logits/chosen": -1.8642709255218506, "logits/rejected": -2.7271454334259033, "logps/chosen": -140.6502685546875, "logps/rejected": -472.8709411621094, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -7.446549415588379, "rewards/margins": 8.720285415649414, "rewards/rejected": -16.166833877563477, "step": 18852 }, { "epoch": 2.93, "learning_rate": 3.1978007158047614e-07, "logits/chosen": -1.853920578956604, "logits/rejected": -1.0284812450408936, "logps/chosen": -275.65423583984375, "logps/rejected": -192.47308349609375, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": -6.176327228546143, "rewards/margins": 4.248667240142822, "rewards/rejected": -10.424994468688965, "step": 18853 }, { "epoch": 2.93, "learning_rate": 3.190466310493283e-07, "logits/chosen": -2.489560127258301, "logits/rejected": -2.566404104232788, "logps/chosen": -199.809814453125, "logps/rejected": -350.664794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.326044082641602, "rewards/margins": 13.733818054199219, "rewards/rejected": -19.05986213684082, "step": 18854 }, { "epoch": 2.93, "learning_rate": 3.1831319051818037e-07, "logits/chosen": -2.2959680557250977, "logits/rejected": -2.8415703773498535, "logps/chosen": -363.57720947265625, "logps/rejected": -460.1807861328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.7687349319458, "rewards/margins": 10.545384407043457, "rewards/rejected": -19.314119338989258, "step": 18855 }, { "epoch": 2.93, "learning_rate": 3.175797499870325e-07, "logits/chosen": -1.860554814338684, "logits/rejected": -2.5474650859832764, "logps/chosen": -134.9241485595703, "logps/rejected": -390.656982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.171358108520508, "rewards/margins": 14.075422286987305, "rewards/rejected": -19.246780395507812, "step": 18856 }, { "epoch": 2.93, "learning_rate": 3.1684630945588466e-07, "logits/chosen": -1.8368747234344482, "logits/rejected": -2.7579174041748047, "logps/chosen": -230.34634399414062, "logps/rejected": -527.5841674804688, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.624880313873291, "rewards/margins": 9.672672271728516, "rewards/rejected": -14.297552108764648, "step": 18857 }, { "epoch": 2.93, "learning_rate": 3.1611286892473675e-07, "logits/chosen": -2.474236488342285, "logits/rejected": -1.1374317407608032, "logps/chosen": -407.6608581542969, "logps/rejected": -338.60260009765625, "loss": 0.1346, "rewards/accuracies": 1.0, "rewards/chosen": -12.12109661102295, "rewards/margins": 9.190580368041992, "rewards/rejected": -21.311676025390625, "step": 18858 }, { "epoch": 2.93, "learning_rate": 3.153794283935889e-07, "logits/chosen": -1.3192670345306396, "logits/rejected": -2.2554047107696533, "logps/chosen": -249.513671875, "logps/rejected": -585.19287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.276449203491211, "rewards/margins": 11.730663299560547, "rewards/rejected": -20.007112503051758, "step": 18859 }, { "epoch": 2.93, "learning_rate": 3.14645987862441e-07, "logits/chosen": -2.423799753189087, "logits/rejected": -2.7928454875946045, "logps/chosen": -366.218994140625, "logps/rejected": -416.0055847167969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.770243167877197, "rewards/margins": 10.51822280883789, "rewards/rejected": -16.28846549987793, "step": 18860 }, { "epoch": 2.93, "learning_rate": 3.139125473312931e-07, "logits/chosen": -2.614386796951294, "logits/rejected": -2.6344194412231445, "logps/chosen": -505.6445007324219, "logps/rejected": -541.1038818359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.933285713195801, "rewards/margins": 9.191787719726562, "rewards/rejected": -17.12507438659668, "step": 18861 }, { "epoch": 2.93, "learning_rate": 3.1317910680014527e-07, "logits/chosen": -2.277087926864624, "logits/rejected": -2.766220808029175, "logps/chosen": -353.26727294921875, "logps/rejected": -496.33221435546875, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": -7.914619445800781, "rewards/margins": 7.908449649810791, "rewards/rejected": -15.823068618774414, "step": 18862 }, { "epoch": 2.93, "learning_rate": 3.1244566626899736e-07, "logits/chosen": -2.3892154693603516, "logits/rejected": -2.6183643341064453, "logps/chosen": -109.146484375, "logps/rejected": -211.9031982421875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.303744316101074, "rewards/margins": 8.798471450805664, "rewards/rejected": -15.102215766906738, "step": 18863 }, { "epoch": 2.93, "learning_rate": 3.117122257378495e-07, "logits/chosen": -2.837214231491089, "logits/rejected": -1.842665195465088, "logps/chosen": -323.06231689453125, "logps/rejected": -294.3359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.112247943878174, "rewards/margins": 10.356054306030273, "rewards/rejected": -16.46830177307129, "step": 18864 }, { "epoch": 2.93, "learning_rate": 3.109787852067016e-07, "logits/chosen": -1.3679962158203125, "logits/rejected": -2.539400339126587, "logps/chosen": -238.11109924316406, "logps/rejected": -439.6318359375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -11.392980575561523, "rewards/margins": 9.709896087646484, "rewards/rejected": -21.102876663208008, "step": 18865 }, { "epoch": 2.93, "learning_rate": 3.1024534467555373e-07, "logits/chosen": -2.509383201599121, "logits/rejected": -1.4074335098266602, "logps/chosen": -238.81980895996094, "logps/rejected": -333.7620849609375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -3.4256184101104736, "rewards/margins": 7.579038143157959, "rewards/rejected": -11.004655838012695, "step": 18866 }, { "epoch": 2.93, "learning_rate": 3.095119041444059e-07, "logits/chosen": -1.6177420616149902, "logits/rejected": -2.490734100341797, "logps/chosen": -196.06130981445312, "logps/rejected": -382.53350830078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.279386520385742, "rewards/margins": 9.95014762878418, "rewards/rejected": -19.229534149169922, "step": 18867 }, { "epoch": 2.93, "learning_rate": 3.0877846361325797e-07, "logits/chosen": -2.355445146560669, "logits/rejected": -1.8451064825057983, "logps/chosen": -747.74951171875, "logps/rejected": -576.0264282226562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.216251373291016, "rewards/margins": 10.354506492614746, "rewards/rejected": -16.570758819580078, "step": 18868 }, { "epoch": 2.93, "learning_rate": 3.080450230821101e-07, "logits/chosen": -1.1323946714401245, "logits/rejected": -2.448549270629883, "logps/chosen": -167.34767150878906, "logps/rejected": -409.417236328125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.535158157348633, "rewards/margins": 8.937454223632812, "rewards/rejected": -17.472612380981445, "step": 18869 }, { "epoch": 2.93, "learning_rate": 3.073115825509622e-07, "logits/chosen": -1.7815420627593994, "logits/rejected": -2.334981918334961, "logps/chosen": -147.58641052246094, "logps/rejected": -357.1083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.075961589813232, "rewards/margins": 13.083757400512695, "rewards/rejected": -20.159719467163086, "step": 18870 }, { "epoch": 2.93, "learning_rate": 3.065781420198143e-07, "logits/chosen": -2.4912283420562744, "logits/rejected": -2.7738823890686035, "logps/chosen": -251.1517333984375, "logps/rejected": -347.3687744140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.237372398376465, "rewards/margins": 11.241445541381836, "rewards/rejected": -17.478818893432617, "step": 18871 }, { "epoch": 2.93, "learning_rate": 3.058447014886664e-07, "logits/chosen": -2.588486433029175, "logits/rejected": -2.578153371810913, "logps/chosen": -146.55686950683594, "logps/rejected": -283.96392822265625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.943099021911621, "rewards/margins": 10.034290313720703, "rewards/rejected": -16.97739028930664, "step": 18872 }, { "epoch": 2.94, "learning_rate": 3.051112609575185e-07, "logits/chosen": -1.3793141841888428, "logits/rejected": -2.4942891597747803, "logps/chosen": -193.23504638671875, "logps/rejected": -568.00244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.812880516052246, "rewards/margins": 11.193302154541016, "rewards/rejected": -19.006183624267578, "step": 18873 }, { "epoch": 2.94, "learning_rate": 3.0437782042637067e-07, "logits/chosen": -2.4357752799987793, "logits/rejected": -2.103095054626465, "logps/chosen": -415.414794921875, "logps/rejected": -533.0767211914062, "loss": 0.0293, "rewards/accuracies": 1.0, "rewards/chosen": -8.706700325012207, "rewards/margins": 7.7218828201293945, "rewards/rejected": -16.4285831451416, "step": 18874 }, { "epoch": 2.94, "learning_rate": 3.0364437989522276e-07, "logits/chosen": -3.1125760078430176, "logits/rejected": -3.1636276245117188, "logps/chosen": -390.78778076171875, "logps/rejected": -432.6512451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.154609680175781, "rewards/margins": 14.359393119812012, "rewards/rejected": -23.51400375366211, "step": 18875 }, { "epoch": 2.94, "learning_rate": 3.029109393640749e-07, "logits/chosen": -2.1689541339874268, "logits/rejected": -2.8949077129364014, "logps/chosen": -835.2288208007812, "logps/rejected": -941.8522338867188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.353590965270996, "rewards/margins": 13.717418670654297, "rewards/rejected": -19.07101058959961, "step": 18876 }, { "epoch": 2.94, "learning_rate": 3.02177498832927e-07, "logits/chosen": -2.5868453979492188, "logits/rejected": -2.5257298946380615, "logps/chosen": -194.41650390625, "logps/rejected": -218.62680053710938, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.886661052703857, "rewards/margins": 8.785863876342773, "rewards/rejected": -15.672525405883789, "step": 18877 }, { "epoch": 2.94, "learning_rate": 3.0144405830177913e-07, "logits/chosen": -2.462517261505127, "logits/rejected": -2.264580249786377, "logps/chosen": -311.5230407714844, "logps/rejected": -291.8616943359375, "loss": 0.7356, "rewards/accuracies": 0.5, "rewards/chosen": -8.038164138793945, "rewards/margins": 4.09128999710083, "rewards/rejected": -12.129453659057617, "step": 18878 }, { "epoch": 2.94, "learning_rate": 3.007106177706313e-07, "logits/chosen": -2.2637267112731934, "logits/rejected": -2.743389368057251, "logps/chosen": -298.13494873046875, "logps/rejected": -374.36968994140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.3032145500183105, "rewards/margins": 11.740976333618164, "rewards/rejected": -15.044191360473633, "step": 18879 }, { "epoch": 2.94, "learning_rate": 2.9997717723948337e-07, "logits/chosen": -1.9138250350952148, "logits/rejected": -2.848375082015991, "logps/chosen": -227.706298828125, "logps/rejected": -437.0945129394531, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -8.122544288635254, "rewards/margins": 8.561779022216797, "rewards/rejected": -16.684322357177734, "step": 18880 }, { "epoch": 2.94, "learning_rate": 2.992437367083355e-07, "logits/chosen": -2.5209438800811768, "logits/rejected": -2.166653633117676, "logps/chosen": -344.8150329589844, "logps/rejected": -340.70135498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.630476951599121, "rewards/margins": 11.928434371948242, "rewards/rejected": -19.558910369873047, "step": 18881 }, { "epoch": 2.94, "learning_rate": 2.985102961771876e-07, "logits/chosen": -2.340040683746338, "logits/rejected": -2.48783278465271, "logps/chosen": -226.82662963867188, "logps/rejected": -397.2970886230469, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.513167381286621, "rewards/margins": 13.075267791748047, "rewards/rejected": -20.588436126708984, "step": 18882 }, { "epoch": 2.94, "learning_rate": 2.9777685564603974e-07, "logits/chosen": -2.5849602222442627, "logits/rejected": -2.839958429336548, "logps/chosen": -159.6077880859375, "logps/rejected": -317.6552429199219, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.01876449584961, "rewards/margins": 8.16162109375, "rewards/rejected": -16.18038558959961, "step": 18883 }, { "epoch": 2.94, "learning_rate": 2.970434151148919e-07, "logits/chosen": -2.13114857673645, "logits/rejected": -2.473238945007324, "logps/chosen": -188.69186401367188, "logps/rejected": -441.48992919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.076837062835693, "rewards/margins": 14.492830276489258, "rewards/rejected": -20.56966781616211, "step": 18884 }, { "epoch": 2.94, "learning_rate": 2.96309974583744e-07, "logits/chosen": -1.6564291715621948, "logits/rejected": -2.4916160106658936, "logps/chosen": -456.943359375, "logps/rejected": -579.2960205078125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -5.985971927642822, "rewards/margins": 11.382426261901855, "rewards/rejected": -17.368398666381836, "step": 18885 }, { "epoch": 2.94, "learning_rate": 2.955765340525961e-07, "logits/chosen": -2.2702488899230957, "logits/rejected": -2.529956817626953, "logps/chosen": -135.38070678710938, "logps/rejected": -314.6088562011719, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.871326446533203, "rewards/margins": 11.465729713439941, "rewards/rejected": -18.33705711364746, "step": 18886 }, { "epoch": 2.94, "learning_rate": 2.948430935214482e-07, "logits/chosen": -1.754923939704895, "logits/rejected": -2.7232675552368164, "logps/chosen": -211.58755493164062, "logps/rejected": -495.1635437011719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.321878433227539, "rewards/margins": 9.56039047241211, "rewards/rejected": -17.88226890563965, "step": 18887 }, { "epoch": 2.94, "learning_rate": 2.9410965299030035e-07, "logits/chosen": -1.9384393692016602, "logits/rejected": -2.4895076751708984, "logps/chosen": -187.01356506347656, "logps/rejected": -389.61376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.0732831954956055, "rewards/margins": 13.383766174316406, "rewards/rejected": -18.457048416137695, "step": 18888 }, { "epoch": 2.94, "learning_rate": 2.933762124591525e-07, "logits/chosen": -2.754257917404175, "logits/rejected": -2.6441874504089355, "logps/chosen": -274.96588134765625, "logps/rejected": -399.7723388671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.631237030029297, "rewards/margins": 9.092041015625, "rewards/rejected": -14.723278045654297, "step": 18889 }, { "epoch": 2.94, "learning_rate": 2.926427719280046e-07, "logits/chosen": -2.4631571769714355, "logits/rejected": -1.968494176864624, "logps/chosen": -314.75091552734375, "logps/rejected": -454.7203369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.887490272521973, "rewards/margins": 14.303542137145996, "rewards/rejected": -23.19103240966797, "step": 18890 }, { "epoch": 2.94, "learning_rate": 2.919093313968567e-07, "logits/chosen": -2.2384819984436035, "logits/rejected": -2.826535940170288, "logps/chosen": -135.35507202148438, "logps/rejected": -338.1702880859375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.594260215759277, "rewards/margins": 7.517877578735352, "rewards/rejected": -16.112138748168945, "step": 18891 }, { "epoch": 2.94, "learning_rate": 2.9117589086570876e-07, "logits/chosen": -2.256922721862793, "logits/rejected": -2.596752882003784, "logps/chosen": -193.95315551757812, "logps/rejected": -384.75732421875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -7.75556755065918, "rewards/margins": 11.55308723449707, "rewards/rejected": -19.30865478515625, "step": 18892 }, { "epoch": 2.94, "learning_rate": 2.904424503345609e-07, "logits/chosen": -2.543921709060669, "logits/rejected": -2.468907356262207, "logps/chosen": -333.6689758300781, "logps/rejected": -384.6968994140625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.660066604614258, "rewards/margins": 8.973175048828125, "rewards/rejected": -14.633241653442383, "step": 18893 }, { "epoch": 2.94, "learning_rate": 2.8970900980341305e-07, "logits/chosen": -1.8094475269317627, "logits/rejected": -2.5604498386383057, "logps/chosen": -301.017578125, "logps/rejected": -695.9892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.0206756591796875, "rewards/margins": 11.58646011352539, "rewards/rejected": -18.607135772705078, "step": 18894 }, { "epoch": 2.94, "learning_rate": 2.8897556927226514e-07, "logits/chosen": -2.798292636871338, "logits/rejected": -2.6623544692993164, "logps/chosen": -392.1468505859375, "logps/rejected": -496.5435791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.378544807434082, "rewards/margins": 12.383729934692383, "rewards/rejected": -17.76227378845215, "step": 18895 }, { "epoch": 2.94, "learning_rate": 2.882421287411173e-07, "logits/chosen": -2.5653560161590576, "logits/rejected": -2.464405059814453, "logps/chosen": -667.0364990234375, "logps/rejected": -592.7442016601562, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.033476829528809, "rewards/margins": 8.30243968963623, "rewards/rejected": -14.335916519165039, "step": 18896 }, { "epoch": 2.94, "learning_rate": 2.8750868820996937e-07, "logits/chosen": -2.6777217388153076, "logits/rejected": -2.5183069705963135, "logps/chosen": -320.458740234375, "logps/rejected": -350.7098083496094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.976589202880859, "rewards/margins": 9.848522186279297, "rewards/rejected": -14.825111389160156, "step": 18897 }, { "epoch": 2.94, "learning_rate": 2.867752476788215e-07, "logits/chosen": -1.8647396564483643, "logits/rejected": -2.3770885467529297, "logps/chosen": -419.7578430175781, "logps/rejected": -689.86962890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.395930290222168, "rewards/margins": 16.607070922851562, "rewards/rejected": -25.003002166748047, "step": 18898 }, { "epoch": 2.94, "learning_rate": 2.8604180714767366e-07, "logits/chosen": -2.5890729427337646, "logits/rejected": -2.4920575618743896, "logps/chosen": -133.95684814453125, "logps/rejected": -167.19345092773438, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -6.683719635009766, "rewards/margins": 5.043008804321289, "rewards/rejected": -11.726728439331055, "step": 18899 }, { "epoch": 2.94, "learning_rate": 2.8530836661652575e-07, "logits/chosen": -0.7102642059326172, "logits/rejected": -1.412095546722412, "logps/chosen": -259.2605285644531, "logps/rejected": -642.490478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.95637035369873, "rewards/margins": 15.437433242797852, "rewards/rejected": -25.393802642822266, "step": 18900 }, { "epoch": 2.94, "learning_rate": 2.845749260853779e-07, "logits/chosen": -1.5725644826889038, "logits/rejected": -2.5347070693969727, "logps/chosen": -155.62669372558594, "logps/rejected": -427.886474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.822649002075195, "rewards/margins": 11.34164810180664, "rewards/rejected": -21.164297103881836, "step": 18901 }, { "epoch": 2.94, "learning_rate": 2.8384148555423e-07, "logits/chosen": -2.305142402648926, "logits/rejected": -2.661221981048584, "logps/chosen": -369.2122497558594, "logps/rejected": -597.722412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.712419509887695, "rewards/margins": 12.928985595703125, "rewards/rejected": -20.64140510559082, "step": 18902 }, { "epoch": 2.94, "learning_rate": 2.831080450230821e-07, "logits/chosen": -1.4906233549118042, "logits/rejected": -2.573932409286499, "logps/chosen": -501.94683837890625, "logps/rejected": -662.1419677734375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.822916030883789, "rewards/margins": 9.491588592529297, "rewards/rejected": -18.314504623413086, "step": 18903 }, { "epoch": 2.94, "learning_rate": 2.823746044919342e-07, "logits/chosen": -1.8181513547897339, "logits/rejected": -2.145328998565674, "logps/chosen": -143.2379150390625, "logps/rejected": -239.21307373046875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -8.901230812072754, "rewards/margins": 6.347186088562012, "rewards/rejected": -15.248416900634766, "step": 18904 }, { "epoch": 2.94, "learning_rate": 2.8164116396078636e-07, "logits/chosen": -2.7478044033050537, "logits/rejected": -1.972798466682434, "logps/chosen": -295.150390625, "logps/rejected": -516.24072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.291574954986572, "rewards/margins": 11.894436836242676, "rewards/rejected": -17.186012268066406, "step": 18905 }, { "epoch": 2.94, "learning_rate": 2.809077234296385e-07, "logits/chosen": -2.5499191284179688, "logits/rejected": -2.7123279571533203, "logps/chosen": -205.80335998535156, "logps/rejected": -332.34698486328125, "loss": 0.9424, "rewards/accuracies": 0.5, "rewards/chosen": -8.411028861999512, "rewards/margins": 3.492642402648926, "rewards/rejected": -11.903671264648438, "step": 18906 }, { "epoch": 2.94, "learning_rate": 2.801742828984906e-07, "logits/chosen": -1.6692466735839844, "logits/rejected": -2.7788519859313965, "logps/chosen": -257.49761962890625, "logps/rejected": -542.0724487304688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.979818344116211, "rewards/margins": 13.608221054077148, "rewards/rejected": -18.58803939819336, "step": 18907 }, { "epoch": 2.94, "learning_rate": 2.7944084236734273e-07, "logits/chosen": -1.6727014780044556, "logits/rejected": -2.2947025299072266, "logps/chosen": -168.40798950195312, "logps/rejected": -318.74737548828125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.8719024658203125, "rewards/margins": 9.0458345413208, "rewards/rejected": -15.91773796081543, "step": 18908 }, { "epoch": 2.94, "learning_rate": 2.787074018361948e-07, "logits/chosen": -2.419569969177246, "logits/rejected": -2.275555372238159, "logps/chosen": -159.33657836914062, "logps/rejected": -374.73858642578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.12308406829834, "rewards/margins": 11.651405334472656, "rewards/rejected": -17.774490356445312, "step": 18909 }, { "epoch": 2.94, "learning_rate": 2.7797396130504697e-07, "logits/chosen": -2.2656121253967285, "logits/rejected": -2.638052463531494, "logps/chosen": -110.16443634033203, "logps/rejected": -276.51239013671875, "loss": 0.4749, "rewards/accuracies": 0.5, "rewards/chosen": -6.9792375564575195, "rewards/margins": 4.812047004699707, "rewards/rejected": -11.791284561157227, "step": 18910 }, { "epoch": 2.94, "learning_rate": 2.7724052077389906e-07, "logits/chosen": -2.013849973678589, "logits/rejected": -2.770115613937378, "logps/chosen": -203.711181640625, "logps/rejected": -384.0949401855469, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -9.103010177612305, "rewards/margins": 6.971335411071777, "rewards/rejected": -16.0743465423584, "step": 18911 }, { "epoch": 2.94, "learning_rate": 2.7650708024275115e-07, "logits/chosen": -2.6979739665985107, "logits/rejected": -1.9321935176849365, "logps/chosen": -297.1674499511719, "logps/rejected": -278.54998779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.597428321838379, "rewards/margins": 11.728700637817383, "rewards/rejected": -14.326128005981445, "step": 18912 }, { "epoch": 2.94, "learning_rate": 2.757736397116033e-07, "logits/chosen": -2.245349168777466, "logits/rejected": -2.819408416748047, "logps/chosen": -326.25372314453125, "logps/rejected": -507.3467102050781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.274011135101318, "rewards/margins": 9.868376731872559, "rewards/rejected": -16.14238739013672, "step": 18913 }, { "epoch": 2.94, "learning_rate": 2.750401991804554e-07, "logits/chosen": -2.453096866607666, "logits/rejected": -2.6209218502044678, "logps/chosen": -181.57481384277344, "logps/rejected": -338.5518798828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.252206802368164, "rewards/margins": 10.116836547851562, "rewards/rejected": -17.369043350219727, "step": 18914 }, { "epoch": 2.94, "learning_rate": 2.743067586493075e-07, "logits/chosen": -0.7936317920684814, "logits/rejected": -2.7053961753845215, "logps/chosen": -151.55755615234375, "logps/rejected": -520.1719970703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.320853233337402, "rewards/margins": 17.252309799194336, "rewards/rejected": -24.573162078857422, "step": 18915 }, { "epoch": 2.94, "learning_rate": 2.7357331811815967e-07, "logits/chosen": -1.3381552696228027, "logits/rejected": -2.7788186073303223, "logps/chosen": -132.30105590820312, "logps/rejected": -808.62451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.492847442626953, "rewards/margins": 11.63581657409668, "rewards/rejected": -21.128664016723633, "step": 18916 }, { "epoch": 2.94, "learning_rate": 2.7283987758701176e-07, "logits/chosen": -1.202065348625183, "logits/rejected": -1.9819223880767822, "logps/chosen": -292.65679931640625, "logps/rejected": -577.2635498046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -9.35081958770752, "rewards/margins": 12.00418758392334, "rewards/rejected": -21.35500717163086, "step": 18917 }, { "epoch": 2.94, "learning_rate": 2.721064370558639e-07, "logits/chosen": -0.8872813582420349, "logits/rejected": -2.3220603466033936, "logps/chosen": -104.09615325927734, "logps/rejected": -365.72491455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.674753665924072, "rewards/margins": 13.838658332824707, "rewards/rejected": -20.513412475585938, "step": 18918 }, { "epoch": 2.94, "learning_rate": 2.71372996524716e-07, "logits/chosen": -2.2508134841918945, "logits/rejected": -2.6254725456237793, "logps/chosen": -145.15664672851562, "logps/rejected": -324.22735595703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.522424697875977, "rewards/margins": 8.773910522460938, "rewards/rejected": -18.296335220336914, "step": 18919 }, { "epoch": 2.94, "learning_rate": 2.7063955599356813e-07, "logits/chosen": -1.4093210697174072, "logits/rejected": -2.459623336791992, "logps/chosen": -234.90274047851562, "logps/rejected": -554.331298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.383544921875, "rewards/margins": 16.57894515991211, "rewards/rejected": -24.96249008178711, "step": 18920 }, { "epoch": 2.94, "learning_rate": 2.699061154624203e-07, "logits/chosen": -2.3777565956115723, "logits/rejected": -2.689188003540039, "logps/chosen": -184.87307739257812, "logps/rejected": -369.8393249511719, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.467315673828125, "rewards/margins": 9.667803764343262, "rewards/rejected": -18.135120391845703, "step": 18921 }, { "epoch": 2.94, "learning_rate": 2.6917267493127237e-07, "logits/chosen": -1.803399682044983, "logits/rejected": -2.751406669616699, "logps/chosen": -126.16119384765625, "logps/rejected": -370.3211669921875, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -7.497805595397949, "rewards/margins": 8.746082305908203, "rewards/rejected": -16.24388885498047, "step": 18922 }, { "epoch": 2.94, "learning_rate": 2.684392344001245e-07, "logits/chosen": -2.2758586406707764, "logits/rejected": -2.623599052429199, "logps/chosen": -124.21876525878906, "logps/rejected": -431.2268371582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.398576736450195, "rewards/margins": 10.472105026245117, "rewards/rejected": -17.870681762695312, "step": 18923 }, { "epoch": 2.94, "learning_rate": 2.677057938689766e-07, "logits/chosen": -2.3263020515441895, "logits/rejected": -1.9703561067581177, "logps/chosen": -541.6587524414062, "logps/rejected": -754.403076171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.299681663513184, "rewards/margins": 13.071022033691406, "rewards/rejected": -19.370704650878906, "step": 18924 }, { "epoch": 2.94, "learning_rate": 2.6697235333782874e-07, "logits/chosen": -2.4768941402435303, "logits/rejected": -2.6380960941314697, "logps/chosen": -142.19512939453125, "logps/rejected": -202.39065551757812, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.974088191986084, "rewards/margins": 7.5110273361206055, "rewards/rejected": -12.485115051269531, "step": 18925 }, { "epoch": 2.94, "learning_rate": 2.662389128066809e-07, "logits/chosen": -2.8264009952545166, "logits/rejected": -2.896629571914673, "logps/chosen": -149.87225341796875, "logps/rejected": -239.6106414794922, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -9.402128219604492, "rewards/margins": 5.99384880065918, "rewards/rejected": -15.395977020263672, "step": 18926 }, { "epoch": 2.94, "learning_rate": 2.65505472275533e-07, "logits/chosen": -2.5901448726654053, "logits/rejected": -1.7863836288452148, "logps/chosen": -253.59475708007812, "logps/rejected": -295.8540344238281, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.3186464309692383, "rewards/margins": 11.151415824890137, "rewards/rejected": -13.470062255859375, "step": 18927 }, { "epoch": 2.94, "learning_rate": 2.647720317443851e-07, "logits/chosen": -2.082240104675293, "logits/rejected": -2.6372437477111816, "logps/chosen": -439.6470947265625, "logps/rejected": -705.2615356445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -10.657539367675781, "rewards/margins": 15.921136856079102, "rewards/rejected": -26.578676223754883, "step": 18928 }, { "epoch": 2.94, "learning_rate": 2.640385912132372e-07, "logits/chosen": -2.8754096031188965, "logits/rejected": -2.792412757873535, "logps/chosen": -564.8719482421875, "logps/rejected": -557.9788818359375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.985291957855225, "rewards/margins": 8.319597244262695, "rewards/rejected": -15.304889678955078, "step": 18929 }, { "epoch": 2.94, "learning_rate": 2.6330515068208935e-07, "logits/chosen": -2.3616116046905518, "logits/rejected": -2.6914706230163574, "logps/chosen": -131.64285278320312, "logps/rejected": -292.7615966796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.07822322845459, "rewards/margins": 11.71013069152832, "rewards/rejected": -15.78835391998291, "step": 18930 }, { "epoch": 2.94, "learning_rate": 2.6257171015094144e-07, "logits/chosen": -2.381685733795166, "logits/rejected": -2.7103545665740967, "logps/chosen": -249.6533203125, "logps/rejected": -434.6991271972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.034128189086914, "rewards/margins": 11.40627670288086, "rewards/rejected": -17.440404891967773, "step": 18931 }, { "epoch": 2.94, "learning_rate": 2.6183826961979353e-07, "logits/chosen": -2.899812936782837, "logits/rejected": -2.988731622695923, "logps/chosen": -363.1912841796875, "logps/rejected": -562.2061157226562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.379164695739746, "rewards/margins": 10.755806922912598, "rewards/rejected": -18.134971618652344, "step": 18932 }, { "epoch": 2.94, "learning_rate": 2.6110482908864567e-07, "logits/chosen": -1.7751567363739014, "logits/rejected": -2.6650898456573486, "logps/chosen": -200.9692840576172, "logps/rejected": -465.7021484375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.000232696533203, "rewards/margins": 8.972307205200195, "rewards/rejected": -17.9725399017334, "step": 18933 }, { "epoch": 2.94, "learning_rate": 2.6037138855749776e-07, "logits/chosen": -2.385280132293701, "logits/rejected": -1.8217202425003052, "logps/chosen": -235.0879669189453, "logps/rejected": -410.97161865234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.463799476623535, "rewards/margins": 14.898366928100586, "rewards/rejected": -21.362167358398438, "step": 18934 }, { "epoch": 2.94, "learning_rate": 2.596379480263499e-07, "logits/chosen": -2.795694589614868, "logits/rejected": -2.9017527103424072, "logps/chosen": -242.73236083984375, "logps/rejected": -352.3551940917969, "loss": 0.051, "rewards/accuracies": 1.0, "rewards/chosen": -11.75939655303955, "rewards/margins": 5.380215167999268, "rewards/rejected": -17.139612197875977, "step": 18935 }, { "epoch": 2.94, "learning_rate": 2.58904507495202e-07, "logits/chosen": -2.304237127304077, "logits/rejected": -2.8518502712249756, "logps/chosen": -111.19913482666016, "logps/rejected": -339.0517578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.49397087097168, "rewards/margins": 10.977794647216797, "rewards/rejected": -19.471765518188477, "step": 18936 }, { "epoch": 2.95, "learning_rate": 2.5817106696405414e-07, "logits/chosen": -1.4722055196762085, "logits/rejected": -2.690150022506714, "logps/chosen": -225.44357299804688, "logps/rejected": -569.4324951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.596973419189453, "rewards/margins": 13.68977165222168, "rewards/rejected": -20.286746978759766, "step": 18937 }, { "epoch": 2.95, "learning_rate": 2.574376264329063e-07, "logits/chosen": -0.9714342355728149, "logits/rejected": -2.4973888397216797, "logps/chosen": -217.2201385498047, "logps/rejected": -492.1318359375, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -6.052045822143555, "rewards/margins": 7.104203224182129, "rewards/rejected": -13.156249046325684, "step": 18938 }, { "epoch": 2.95, "learning_rate": 2.5670418590175837e-07, "logits/chosen": -1.8502557277679443, "logits/rejected": -2.0596420764923096, "logps/chosen": -469.3775329589844, "logps/rejected": -465.6393127441406, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.282487869262695, "rewards/margins": 9.945383071899414, "rewards/rejected": -18.22787094116211, "step": 18939 }, { "epoch": 2.95, "learning_rate": 2.559707453706105e-07, "logits/chosen": -2.6293692588806152, "logits/rejected": -1.997923493385315, "logps/chosen": -308.46148681640625, "logps/rejected": -316.4730224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.22593879699707, "rewards/margins": 11.896397590637207, "rewards/rejected": -20.122337341308594, "step": 18940 }, { "epoch": 2.95, "learning_rate": 2.552373048394626e-07, "logits/chosen": -2.5381734371185303, "logits/rejected": -2.912733793258667, "logps/chosen": -167.88494873046875, "logps/rejected": -345.80401611328125, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.218973159790039, "rewards/margins": 7.576205253601074, "rewards/rejected": -13.795178413391113, "step": 18941 }, { "epoch": 2.95, "learning_rate": 2.5450386430831475e-07, "logits/chosen": -0.6483456492424011, "logits/rejected": -2.385709524154663, "logps/chosen": -111.1565933227539, "logps/rejected": -546.2669677734375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -8.563613891601562, "rewards/margins": 12.055008888244629, "rewards/rejected": -20.618621826171875, "step": 18942 }, { "epoch": 2.95, "learning_rate": 2.537704237771669e-07, "logits/chosen": -2.5197947025299072, "logits/rejected": -2.4530837535858154, "logps/chosen": -136.08718872070312, "logps/rejected": -232.43087768554688, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -7.414679527282715, "rewards/margins": 6.924072265625, "rewards/rejected": -14.338752746582031, "step": 18943 }, { "epoch": 2.95, "learning_rate": 2.53036983246019e-07, "logits/chosen": -1.1735734939575195, "logits/rejected": -2.5508363246917725, "logps/chosen": -163.31900024414062, "logps/rejected": -420.0323486328125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.9077229499816895, "rewards/margins": 8.847705841064453, "rewards/rejected": -15.7554292678833, "step": 18944 }, { "epoch": 2.95, "learning_rate": 2.523035427148711e-07, "logits/chosen": -2.7562918663024902, "logits/rejected": -2.9184165000915527, "logps/chosen": -124.06139373779297, "logps/rejected": -326.56451416015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.101141929626465, "rewards/margins": 11.374703407287598, "rewards/rejected": -20.475845336914062, "step": 18945 }, { "epoch": 2.95, "learning_rate": 2.515701021837232e-07, "logits/chosen": -2.5506362915039062, "logits/rejected": -2.4407284259796143, "logps/chosen": -388.8182373046875, "logps/rejected": -470.6861572265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.9887590408325195, "rewards/margins": 9.240720748901367, "rewards/rejected": -17.22947883605957, "step": 18946 }, { "epoch": 2.95, "learning_rate": 2.5083666165257536e-07, "logits/chosen": -1.8758118152618408, "logits/rejected": -2.711899995803833, "logps/chosen": -132.9425048828125, "logps/rejected": -432.8700866699219, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.930880546569824, "rewards/margins": 9.821869850158691, "rewards/rejected": -16.752750396728516, "step": 18947 }, { "epoch": 2.95, "learning_rate": 2.501032211214275e-07, "logits/chosen": -2.047175645828247, "logits/rejected": -2.543926239013672, "logps/chosen": -156.250244140625, "logps/rejected": -421.4300231933594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.163076400756836, "rewards/margins": 16.388460159301758, "rewards/rejected": -23.551536560058594, "step": 18948 }, { "epoch": 2.95, "learning_rate": 2.493697805902796e-07, "logits/chosen": -1.7014926671981812, "logits/rejected": -2.739220380783081, "logps/chosen": -161.59609985351562, "logps/rejected": -413.8507385253906, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.65904426574707, "rewards/margins": 10.783958435058594, "rewards/rejected": -18.443002700805664, "step": 18949 }, { "epoch": 2.95, "learning_rate": 2.4863634005913173e-07, "logits/chosen": -1.4306460618972778, "logits/rejected": -1.5301425457000732, "logps/chosen": -367.5482177734375, "logps/rejected": -375.104736328125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -8.880621910095215, "rewards/margins": 9.33454704284668, "rewards/rejected": -18.21516990661621, "step": 18950 }, { "epoch": 2.95, "learning_rate": 2.479028995279838e-07, "logits/chosen": -2.404686450958252, "logits/rejected": -2.4860284328460693, "logps/chosen": -230.81219482421875, "logps/rejected": -344.10003662109375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -8.398149490356445, "rewards/margins": 7.519415855407715, "rewards/rejected": -15.917564392089844, "step": 18951 }, { "epoch": 2.95, "learning_rate": 2.471694589968359e-07, "logits/chosen": -2.53609561920166, "logits/rejected": -2.3639121055603027, "logps/chosen": -791.671630859375, "logps/rejected": -681.047119140625, "loss": 0.0203, "rewards/accuracies": 1.0, "rewards/chosen": -6.444027900695801, "rewards/margins": 8.686395645141602, "rewards/rejected": -15.130423545837402, "step": 18952 }, { "epoch": 2.95, "learning_rate": 2.4643601846568806e-07, "logits/chosen": -1.775767207145691, "logits/rejected": -2.7593581676483154, "logps/chosen": -152.74522399902344, "logps/rejected": -376.3121337890625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -7.641574859619141, "rewards/margins": 9.141709327697754, "rewards/rejected": -16.783283233642578, "step": 18953 }, { "epoch": 2.95, "learning_rate": 2.4570257793454015e-07, "logits/chosen": -1.3617315292358398, "logits/rejected": -1.9949854612350464, "logps/chosen": -188.33946228027344, "logps/rejected": -618.4186401367188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.240253448486328, "rewards/margins": 10.691075325012207, "rewards/rejected": -18.93132972717285, "step": 18954 }, { "epoch": 2.95, "learning_rate": 2.449691374033923e-07, "logits/chosen": -1.6717798709869385, "logits/rejected": -2.573657512664795, "logps/chosen": -221.42935180664062, "logps/rejected": -385.16961669921875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.8297624588012695, "rewards/margins": 7.7650227546691895, "rewards/rejected": -13.594785690307617, "step": 18955 }, { "epoch": 2.95, "learning_rate": 2.442356968722444e-07, "logits/chosen": -2.085698366165161, "logits/rejected": -2.4340004920959473, "logps/chosen": -347.06719970703125, "logps/rejected": -435.6273193359375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -6.6367974281311035, "rewards/margins": 10.669520378112793, "rewards/rejected": -17.306318283081055, "step": 18956 }, { "epoch": 2.95, "learning_rate": 2.435022563410965e-07, "logits/chosen": -1.8286385536193848, "logits/rejected": -2.5790693759918213, "logps/chosen": -220.50393676757812, "logps/rejected": -363.84197998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.836350440979004, "rewards/margins": 10.863775253295898, "rewards/rejected": -13.700124740600586, "step": 18957 }, { "epoch": 2.95, "learning_rate": 2.4276881580994867e-07, "logits/chosen": -1.6976128816604614, "logits/rejected": -2.8454349040985107, "logps/chosen": -219.34304809570312, "logps/rejected": -604.0638427734375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.793943405151367, "rewards/margins": 10.46334457397461, "rewards/rejected": -19.257287979125977, "step": 18958 }, { "epoch": 2.95, "learning_rate": 2.4203537527880076e-07, "logits/chosen": -1.7481319904327393, "logits/rejected": -2.9816558361053467, "logps/chosen": -118.80083465576172, "logps/rejected": -376.6817321777344, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -6.703507423400879, "rewards/margins": 8.681183815002441, "rewards/rejected": -15.38469123840332, "step": 18959 }, { "epoch": 2.95, "learning_rate": 2.413019347476529e-07, "logits/chosen": -2.164968729019165, "logits/rejected": -2.623476266860962, "logps/chosen": -483.3316345214844, "logps/rejected": -504.8409423828125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -8.977602005004883, "rewards/margins": 7.9230570793151855, "rewards/rejected": -16.900659561157227, "step": 18960 }, { "epoch": 2.95, "learning_rate": 2.40568494216505e-07, "logits/chosen": -2.60791015625, "logits/rejected": -2.8025877475738525, "logps/chosen": -68.17533874511719, "logps/rejected": -280.9932556152344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -3.1094582080841064, "rewards/margins": 9.394079208374023, "rewards/rejected": -12.50353717803955, "step": 18961 }, { "epoch": 2.95, "learning_rate": 2.3983505368535713e-07, "logits/chosen": -1.739243507385254, "logits/rejected": -2.5941898822784424, "logps/chosen": -92.48046875, "logps/rejected": -292.14666748046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.2872514724731445, "rewards/margins": 10.78903579711914, "rewards/rejected": -17.07628631591797, "step": 18962 }, { "epoch": 2.95, "learning_rate": 2.391016131542093e-07, "logits/chosen": -2.8512609004974365, "logits/rejected": -2.629448413848877, "logps/chosen": -366.018310546875, "logps/rejected": -414.8503723144531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.4123687744140625, "rewards/margins": 12.056111335754395, "rewards/rejected": -19.46847915649414, "step": 18963 }, { "epoch": 2.95, "learning_rate": 2.3836817262306136e-07, "logits/chosen": -2.308718681335449, "logits/rejected": -2.9529407024383545, "logps/chosen": -104.56425476074219, "logps/rejected": -429.43817138671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.225574493408203, "rewards/margins": 13.352869033813477, "rewards/rejected": -19.57844352722168, "step": 18964 }, { "epoch": 2.95, "learning_rate": 2.376347320919135e-07, "logits/chosen": -1.9750272035598755, "logits/rejected": -2.6836540699005127, "logps/chosen": -451.83868408203125, "logps/rejected": -585.419677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.547677993774414, "rewards/margins": 10.4705171585083, "rewards/rejected": -20.01819610595703, "step": 18965 }, { "epoch": 2.95, "learning_rate": 2.369012915607656e-07, "logits/chosen": -1.4707469940185547, "logits/rejected": -2.8136367797851562, "logps/chosen": -405.527587890625, "logps/rejected": -843.9854125976562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.673144340515137, "rewards/margins": 11.34750747680664, "rewards/rejected": -20.020652770996094, "step": 18966 }, { "epoch": 2.95, "learning_rate": 2.3616785102961771e-07, "logits/chosen": -1.7943692207336426, "logits/rejected": -2.510143518447876, "logps/chosen": -177.60548400878906, "logps/rejected": -351.8841552734375, "loss": 0.6938, "rewards/accuracies": 0.5, "rewards/chosen": -8.8540620803833, "rewards/margins": 7.121432781219482, "rewards/rejected": -15.975494384765625, "step": 18967 }, { "epoch": 2.95, "learning_rate": 2.354344104984698e-07, "logits/chosen": -1.898089051246643, "logits/rejected": -2.6980855464935303, "logps/chosen": -155.2734375, "logps/rejected": -297.22442626953125, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.782464027404785, "rewards/margins": 8.355987548828125, "rewards/rejected": -15.138452529907227, "step": 18968 }, { "epoch": 2.95, "learning_rate": 2.3470096996732195e-07, "logits/chosen": -2.691027879714966, "logits/rejected": -2.078651189804077, "logps/chosen": -582.7059326171875, "logps/rejected": -526.9696655273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.6313018798828125, "rewards/margins": 10.784346580505371, "rewards/rejected": -17.4156494140625, "step": 18969 }, { "epoch": 2.95, "learning_rate": 2.339675294361741e-07, "logits/chosen": -3.1281886100769043, "logits/rejected": -3.1031651496887207, "logps/chosen": -121.37933349609375, "logps/rejected": -248.76275634765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.070544242858887, "rewards/margins": 8.99174690246582, "rewards/rejected": -17.062292098999023, "step": 18970 }, { "epoch": 2.95, "learning_rate": 2.3323408890502618e-07, "logits/chosen": -2.476508378982544, "logits/rejected": -2.5276966094970703, "logps/chosen": -397.95697021484375, "logps/rejected": -537.2139892578125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -11.2305908203125, "rewards/margins": 9.799717903137207, "rewards/rejected": -21.03030776977539, "step": 18971 }, { "epoch": 2.95, "learning_rate": 2.3250064837387832e-07, "logits/chosen": -2.9433600902557373, "logits/rejected": -2.997457504272461, "logps/chosen": -202.64443969726562, "logps/rejected": -264.6602478027344, "loss": 0.1242, "rewards/accuracies": 1.0, "rewards/chosen": -13.654224395751953, "rewards/margins": 4.554950714111328, "rewards/rejected": -18.20917510986328, "step": 18972 }, { "epoch": 2.95, "learning_rate": 2.3176720784273041e-07, "logits/chosen": -2.1182918548583984, "logits/rejected": -2.7188720703125, "logps/chosen": -271.2797546386719, "logps/rejected": -473.9773254394531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.4498298168182373, "rewards/margins": 15.922065734863281, "rewards/rejected": -18.371896743774414, "step": 18973 }, { "epoch": 2.95, "learning_rate": 2.3103376731158256e-07, "logits/chosen": -2.872910261154175, "logits/rejected": -2.8097012042999268, "logps/chosen": -266.4822082519531, "logps/rejected": -299.4437255859375, "loss": 0.0021, "rewards/accuracies": 1.0, "rewards/chosen": -6.546754837036133, "rewards/margins": 8.077807426452637, "rewards/rejected": -14.62456226348877, "step": 18974 }, { "epoch": 2.95, "learning_rate": 2.303003267804347e-07, "logits/chosen": -2.1308460235595703, "logits/rejected": -2.5091938972473145, "logps/chosen": -146.342529296875, "logps/rejected": -371.3887939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.659376621246338, "rewards/margins": 14.51700210571289, "rewards/rejected": -19.176380157470703, "step": 18975 }, { "epoch": 2.95, "learning_rate": 2.295668862492868e-07, "logits/chosen": -1.7750743627548218, "logits/rejected": -2.704357385635376, "logps/chosen": -195.59469604492188, "logps/rejected": -442.34320068359375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -9.466727256774902, "rewards/margins": 8.645881652832031, "rewards/rejected": -18.11260986328125, "step": 18976 }, { "epoch": 2.95, "learning_rate": 2.288334457181389e-07, "logits/chosen": -2.770012855529785, "logits/rejected": -3.00612211227417, "logps/chosen": -509.260986328125, "logps/rejected": -420.3524169921875, "loss": 0.0169, "rewards/accuracies": 1.0, "rewards/chosen": -5.525407791137695, "rewards/margins": 8.629737854003906, "rewards/rejected": -14.155144691467285, "step": 18977 }, { "epoch": 2.95, "learning_rate": 2.28100005186991e-07, "logits/chosen": -1.8583768606185913, "logits/rejected": -2.5390138626098633, "logps/chosen": -292.54833984375, "logps/rejected": -530.4188232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.62246322631836, "rewards/margins": 14.745030403137207, "rewards/rejected": -23.36749267578125, "step": 18978 }, { "epoch": 2.95, "learning_rate": 2.2736656465584314e-07, "logits/chosen": -2.7711246013641357, "logits/rejected": -2.776560068130493, "logps/chosen": -108.11505126953125, "logps/rejected": -342.9002990722656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.694372653961182, "rewards/margins": 12.250444412231445, "rewards/rejected": -18.94481658935547, "step": 18979 }, { "epoch": 2.95, "learning_rate": 2.2663312412469528e-07, "logits/chosen": -2.6547579765319824, "logits/rejected": -2.5341033935546875, "logps/chosen": -646.5953369140625, "logps/rejected": -579.0756225585938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.83184289932251, "rewards/margins": 12.277557373046875, "rewards/rejected": -20.109399795532227, "step": 18980 }, { "epoch": 2.95, "learning_rate": 2.2589968359354737e-07, "logits/chosen": -1.790525197982788, "logits/rejected": -2.340517282485962, "logps/chosen": -221.15921020507812, "logps/rejected": -339.1471862792969, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -10.560201644897461, "rewards/margins": 5.8131608963012695, "rewards/rejected": -16.373363494873047, "step": 18981 }, { "epoch": 2.95, "learning_rate": 2.2516624306239952e-07, "logits/chosen": -2.1120238304138184, "logits/rejected": -2.832022190093994, "logps/chosen": -179.25701904296875, "logps/rejected": -462.3466491699219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.742574691772461, "rewards/margins": 12.650529861450195, "rewards/rejected": -21.393104553222656, "step": 18982 }, { "epoch": 2.95, "learning_rate": 2.244328025312516e-07, "logits/chosen": -2.470834255218506, "logits/rejected": -1.6555038690567017, "logps/chosen": -316.7388610839844, "logps/rejected": -330.573974609375, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -8.458032608032227, "rewards/margins": 11.530191421508789, "rewards/rejected": -19.988224029541016, "step": 18983 }, { "epoch": 2.95, "learning_rate": 2.2369936200010375e-07, "logits/chosen": -2.111786365509033, "logits/rejected": -2.736041307449341, "logps/chosen": -122.4560546875, "logps/rejected": -437.9183044433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.31371545791626, "rewards/margins": 11.563907623291016, "rewards/rejected": -18.87762451171875, "step": 18984 }, { "epoch": 2.95, "learning_rate": 2.229659214689559e-07, "logits/chosen": -2.5907602310180664, "logits/rejected": -2.417843818664551, "logps/chosen": -463.21075439453125, "logps/rejected": -566.7025756835938, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -11.790377616882324, "rewards/margins": 11.546436309814453, "rewards/rejected": -23.336814880371094, "step": 18985 }, { "epoch": 2.95, "learning_rate": 2.2223248093780798e-07, "logits/chosen": -2.986513137817383, "logits/rejected": -2.7499210834503174, "logps/chosen": -196.70477294921875, "logps/rejected": -183.31069946289062, "loss": 0.1288, "rewards/accuracies": 1.0, "rewards/chosen": -6.84798526763916, "rewards/margins": 5.82725715637207, "rewards/rejected": -12.675241470336914, "step": 18986 }, { "epoch": 2.95, "learning_rate": 2.2149904040666012e-07, "logits/chosen": -2.490021228790283, "logits/rejected": -2.8776121139526367, "logps/chosen": -212.4473876953125, "logps/rejected": -437.98883056640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.5087103843688965, "rewards/margins": 11.849554061889648, "rewards/rejected": -18.358264923095703, "step": 18987 }, { "epoch": 2.95, "learning_rate": 2.2076559987551221e-07, "logits/chosen": -2.4677956104278564, "logits/rejected": -2.2277140617370605, "logps/chosen": -269.12469482421875, "logps/rejected": -398.8697204589844, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -5.718169212341309, "rewards/margins": 14.53736686706543, "rewards/rejected": -20.255535125732422, "step": 18988 }, { "epoch": 2.95, "learning_rate": 2.2003215934436433e-07, "logits/chosen": -2.5802018642425537, "logits/rejected": -2.314162492752075, "logps/chosen": -342.02215576171875, "logps/rejected": -645.4889526367188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.798967361450195, "rewards/margins": 9.697772026062012, "rewards/rejected": -14.496739387512207, "step": 18989 }, { "epoch": 2.95, "learning_rate": 2.1929871881321645e-07, "logits/chosen": -2.620985746383667, "logits/rejected": -2.626211643218994, "logps/chosen": -154.5302276611328, "logps/rejected": -185.28677368164062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.788531303405762, "rewards/margins": 8.678817749023438, "rewards/rejected": -13.467348098754883, "step": 18990 }, { "epoch": 2.95, "learning_rate": 2.1856527828206856e-07, "logits/chosen": -1.6187756061553955, "logits/rejected": -2.2080109119415283, "logps/chosen": -116.12686920166016, "logps/rejected": -273.8847961425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.136327743530273, "rewards/margins": 11.208072662353516, "rewards/rejected": -16.34440040588379, "step": 18991 }, { "epoch": 2.95, "learning_rate": 2.1783183775092068e-07, "logits/chosen": -2.1402950286865234, "logits/rejected": -2.946446418762207, "logps/chosen": -138.8985595703125, "logps/rejected": -582.4444580078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.9166436195373535, "rewards/margins": 15.445402145385742, "rewards/rejected": -21.362045288085938, "step": 18992 }, { "epoch": 2.95, "learning_rate": 2.1709839721977282e-07, "logits/chosen": -2.1783697605133057, "logits/rejected": -2.9230539798736572, "logps/chosen": -244.5362548828125, "logps/rejected": -402.6657409667969, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -6.424888610839844, "rewards/margins": 8.788005828857422, "rewards/rejected": -15.212894439697266, "step": 18993 }, { "epoch": 2.95, "learning_rate": 2.1636495668862494e-07, "logits/chosen": -2.80710768699646, "logits/rejected": -2.4459660053253174, "logps/chosen": -259.0157470703125, "logps/rejected": -131.90748596191406, "loss": 0.1116, "rewards/accuracies": 1.0, "rewards/chosen": -4.373785972595215, "rewards/margins": 5.084169864654541, "rewards/rejected": -9.457956314086914, "step": 18994 }, { "epoch": 2.95, "learning_rate": 2.1563151615747706e-07, "logits/chosen": -1.8878090381622314, "logits/rejected": -2.3803348541259766, "logps/chosen": -151.2628631591797, "logps/rejected": -289.1854248046875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -7.326414108276367, "rewards/margins": 8.96674919128418, "rewards/rejected": -16.293163299560547, "step": 18995 }, { "epoch": 2.95, "learning_rate": 2.1489807562632917e-07, "logits/chosen": -2.317216396331787, "logits/rejected": -2.508652925491333, "logps/chosen": -438.57293701171875, "logps/rejected": -581.3689575195312, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.9878568649292, "rewards/margins": 10.570951461791992, "rewards/rejected": -20.558809280395508, "step": 18996 }, { "epoch": 2.95, "learning_rate": 2.141646350951813e-07, "logits/chosen": -2.468417167663574, "logits/rejected": -2.084296941757202, "logps/chosen": -372.36785888671875, "logps/rejected": -565.7152709960938, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.018589973449707, "rewards/margins": 9.185084342956543, "rewards/rejected": -19.20367431640625, "step": 18997 }, { "epoch": 2.95, "learning_rate": 2.134311945640334e-07, "logits/chosen": -2.6257293224334717, "logits/rejected": -2.490064859390259, "logps/chosen": -236.72418212890625, "logps/rejected": -431.870849609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.713493347167969, "rewards/margins": 8.987438201904297, "rewards/rejected": -19.700931549072266, "step": 18998 }, { "epoch": 2.95, "learning_rate": 2.1269775403288552e-07, "logits/chosen": -2.707048177719116, "logits/rejected": -2.237658739089966, "logps/chosen": -323.7169189453125, "logps/rejected": -269.9205322265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.985116004943848, "rewards/margins": 7.402628421783447, "rewards/rejected": -13.387743949890137, "step": 18999 }, { "epoch": 2.95, "learning_rate": 2.1196431350173764e-07, "logits/chosen": -1.3747835159301758, "logits/rejected": -2.2433104515075684, "logps/chosen": -230.11390686035156, "logps/rejected": -488.4846496582031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.206530570983887, "rewards/margins": 13.463630676269531, "rewards/rejected": -21.670162200927734, "step": 19000 }, { "epoch": 2.96, "learning_rate": 2.1123087297058976e-07, "logits/chosen": -2.0874838829040527, "logits/rejected": -2.6553852558135986, "logps/chosen": -333.5572509765625, "logps/rejected": -683.321533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -11.784191131591797, "rewards/margins": 16.38452911376953, "rewards/rejected": -28.168720245361328, "step": 19001 }, { "epoch": 2.96, "learning_rate": 2.1049743243944187e-07, "logits/chosen": -2.430586338043213, "logits/rejected": -0.9768565893173218, "logps/chosen": -298.04205322265625, "logps/rejected": -374.47662353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.453715801239014, "rewards/margins": 14.847757339477539, "rewards/rejected": -22.301471710205078, "step": 19002 }, { "epoch": 2.96, "learning_rate": 2.09763991908294e-07, "logits/chosen": -2.6226887702941895, "logits/rejected": -2.123952865600586, "logps/chosen": -471.9903869628906, "logps/rejected": -459.9610595703125, "loss": 0.1525, "rewards/accuracies": 1.0, "rewards/chosen": -10.172849655151367, "rewards/margins": 4.2792067527771, "rewards/rejected": -14.452056884765625, "step": 19003 }, { "epoch": 2.96, "learning_rate": 2.0903055137714613e-07, "logits/chosen": -2.8812105655670166, "logits/rejected": -2.3952202796936035, "logps/chosen": -944.3026123046875, "logps/rejected": -668.4180908203125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -12.955738067626953, "rewards/margins": 7.06188440322876, "rewards/rejected": -20.017621994018555, "step": 19004 }, { "epoch": 2.96, "learning_rate": 2.0829711084599825e-07, "logits/chosen": -2.601306200027466, "logits/rejected": -2.2378249168395996, "logps/chosen": -162.65850830078125, "logps/rejected": -389.6835632324219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.5065999031066895, "rewards/margins": 12.15855884552002, "rewards/rejected": -17.665159225463867, "step": 19005 }, { "epoch": 2.96, "learning_rate": 2.0756367031485036e-07, "logits/chosen": -1.6997205018997192, "logits/rejected": -2.6417438983917236, "logps/chosen": -444.0167541503906, "logps/rejected": -862.4913940429688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.919856071472168, "rewards/margins": 13.225136756896973, "rewards/rejected": -23.14499282836914, "step": 19006 }, { "epoch": 2.96, "learning_rate": 2.0683022978370248e-07, "logits/chosen": -2.3369133472442627, "logits/rejected": -2.510840892791748, "logps/chosen": -112.21459197998047, "logps/rejected": -240.73916625976562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.161355018615723, "rewards/margins": 13.490203857421875, "rewards/rejected": -17.651559829711914, "step": 19007 }, { "epoch": 2.96, "learning_rate": 2.060967892525546e-07, "logits/chosen": -1.7197606563568115, "logits/rejected": -2.38271427154541, "logps/chosen": -223.73440551757812, "logps/rejected": -368.30865478515625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.557625770568848, "rewards/margins": 8.587641716003418, "rewards/rejected": -14.145267486572266, "step": 19008 }, { "epoch": 2.96, "learning_rate": 2.0536334872140671e-07, "logits/chosen": -2.0060980319976807, "logits/rejected": -2.565117835998535, "logps/chosen": -185.701171875, "logps/rejected": -376.611083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.750301361083984, "rewards/margins": 10.722384452819824, "rewards/rejected": -15.472685813903809, "step": 19009 }, { "epoch": 2.96, "learning_rate": 2.0462990819025883e-07, "logits/chosen": -1.9919674396514893, "logits/rejected": -2.3295390605926514, "logps/chosen": -168.22103881835938, "logps/rejected": -388.2993469238281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.1978654861450195, "rewards/margins": 9.235204696655273, "rewards/rejected": -15.433070182800293, "step": 19010 }, { "epoch": 2.96, "learning_rate": 2.0389646765911095e-07, "logits/chosen": -2.1234304904937744, "logits/rejected": -2.449748992919922, "logps/chosen": -363.0210266113281, "logps/rejected": -477.59393310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -2.733597755432129, "rewards/margins": 12.524712562561035, "rewards/rejected": -15.258310317993164, "step": 19011 }, { "epoch": 2.96, "learning_rate": 2.0316302712796306e-07, "logits/chosen": -2.690086841583252, "logits/rejected": -2.8630552291870117, "logps/chosen": -162.8746337890625, "logps/rejected": -376.48162841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.213529586791992, "rewards/margins": 11.373672485351562, "rewards/rejected": -17.587203979492188, "step": 19012 }, { "epoch": 2.96, "learning_rate": 2.0242958659681518e-07, "logits/chosen": -2.6303815841674805, "logits/rejected": -2.6471922397613525, "logps/chosen": -132.66168212890625, "logps/rejected": -193.843505859375, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -6.986208915710449, "rewards/margins": 8.706840515136719, "rewards/rejected": -15.693049430847168, "step": 19013 }, { "epoch": 2.96, "learning_rate": 2.0169614606566732e-07, "logits/chosen": -2.1796278953552246, "logits/rejected": -2.5703938007354736, "logps/chosen": -253.02972412109375, "logps/rejected": -431.588134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.162067413330078, "rewards/margins": 12.13901424407959, "rewards/rejected": -19.301082611083984, "step": 19014 }, { "epoch": 2.96, "learning_rate": 2.0096270553451944e-07, "logits/chosen": -2.7219364643096924, "logits/rejected": -2.66865873336792, "logps/chosen": -273.4757385253906, "logps/rejected": -397.44476318359375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -5.849460124969482, "rewards/margins": 11.391423225402832, "rewards/rejected": -17.240882873535156, "step": 19015 }, { "epoch": 2.96, "learning_rate": 2.0022926500337156e-07, "logits/chosen": -2.612800121307373, "logits/rejected": -2.5604827404022217, "logps/chosen": -670.6717529296875, "logps/rejected": -861.7509155273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.4520487785339355, "rewards/margins": 12.588103294372559, "rewards/rejected": -19.04015350341797, "step": 19016 }, { "epoch": 2.96, "learning_rate": 1.9949582447222367e-07, "logits/chosen": -1.1106153726577759, "logits/rejected": -2.6645195484161377, "logps/chosen": -145.69833374023438, "logps/rejected": -567.0377197265625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.303827285766602, "rewards/margins": 10.860901832580566, "rewards/rejected": -16.164730072021484, "step": 19017 }, { "epoch": 2.96, "learning_rate": 1.987623839410758e-07, "logits/chosen": -2.348642349243164, "logits/rejected": -2.475613832473755, "logps/chosen": -173.79531860351562, "logps/rejected": -350.8483581542969, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -5.400914669036865, "rewards/margins": 8.624281883239746, "rewards/rejected": -14.025196075439453, "step": 19018 }, { "epoch": 2.96, "learning_rate": 1.9802894340992788e-07, "logits/chosen": -1.1193715333938599, "logits/rejected": -1.8034626245498657, "logps/chosen": -177.04269409179688, "logps/rejected": -590.40478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.475249290466309, "rewards/margins": 13.829261779785156, "rewards/rejected": -21.30451011657715, "step": 19019 }, { "epoch": 2.96, "learning_rate": 1.9729550287878002e-07, "logits/chosen": -3.058814287185669, "logits/rejected": -2.786494016647339, "logps/chosen": -187.14137268066406, "logps/rejected": -317.10882568359375, "loss": 0.9302, "rewards/accuracies": 0.5, "rewards/chosen": -8.747014045715332, "rewards/margins": 4.990099906921387, "rewards/rejected": -13.737113952636719, "step": 19020 }, { "epoch": 2.96, "learning_rate": 1.9656206234763214e-07, "logits/chosen": -1.8690119981765747, "logits/rejected": -2.488748550415039, "logps/chosen": -400.28173828125, "logps/rejected": -585.4559936523438, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.7549409866333, "rewards/margins": 12.723762512207031, "rewards/rejected": -21.478702545166016, "step": 19021 }, { "epoch": 2.96, "learning_rate": 1.9582862181648426e-07, "logits/chosen": -2.521919012069702, "logits/rejected": -2.9344749450683594, "logps/chosen": -381.3399353027344, "logps/rejected": -626.0018310546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.244344711303711, "rewards/margins": 11.581365585327148, "rewards/rejected": -16.82571029663086, "step": 19022 }, { "epoch": 2.96, "learning_rate": 1.9509518128533637e-07, "logits/chosen": -1.1382466554641724, "logits/rejected": -2.4443907737731934, "logps/chosen": -121.22529602050781, "logps/rejected": -357.5986633300781, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.614349365234375, "rewards/margins": 9.467459678649902, "rewards/rejected": -14.081808090209961, "step": 19023 }, { "epoch": 2.96, "learning_rate": 1.943617407541885e-07, "logits/chosen": -2.3429412841796875, "logits/rejected": -2.3419597148895264, "logps/chosen": -550.483154296875, "logps/rejected": -342.28466796875, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -4.001457214355469, "rewards/margins": 8.036694526672363, "rewards/rejected": -12.038152694702148, "step": 19024 }, { "epoch": 2.96, "learning_rate": 1.9362830022304063e-07, "logits/chosen": -1.6157256364822388, "logits/rejected": -2.804805040359497, "logps/chosen": -177.44386291503906, "logps/rejected": -540.731689453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.525153636932373, "rewards/margins": 11.792655944824219, "rewards/rejected": -18.31781005859375, "step": 19025 }, { "epoch": 2.96, "learning_rate": 1.9289485969189275e-07, "logits/chosen": -2.5986831188201904, "logits/rejected": -2.0862531661987305, "logps/chosen": -476.1324462890625, "logps/rejected": -541.8125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.676553726196289, "rewards/margins": 13.642964363098145, "rewards/rejected": -19.31951904296875, "step": 19026 }, { "epoch": 2.96, "learning_rate": 1.9216141916074486e-07, "logits/chosen": -1.9804314374923706, "logits/rejected": -2.8582091331481934, "logps/chosen": -161.51199340820312, "logps/rejected": -609.2154541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.468732833862305, "rewards/margins": 12.293573379516602, "rewards/rejected": -21.762306213378906, "step": 19027 }, { "epoch": 2.96, "learning_rate": 1.9142797862959698e-07, "logits/chosen": -2.759429454803467, "logits/rejected": -2.8851406574249268, "logps/chosen": -158.80633544921875, "logps/rejected": -299.14666748046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.160235404968262, "rewards/margins": 9.785492897033691, "rewards/rejected": -16.945728302001953, "step": 19028 }, { "epoch": 2.96, "learning_rate": 1.9069453809844907e-07, "logits/chosen": -2.595346212387085, "logits/rejected": -2.639176607131958, "logps/chosen": -133.30300903320312, "logps/rejected": -375.54705810546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -8.327433586120605, "rewards/margins": 9.347448348999023, "rewards/rejected": -17.674882888793945, "step": 19029 }, { "epoch": 2.96, "learning_rate": 1.8996109756730121e-07, "logits/chosen": -2.3975794315338135, "logits/rejected": -1.8507739305496216, "logps/chosen": -301.64959716796875, "logps/rejected": -310.41094970703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.6583428382873535, "rewards/margins": 8.753583908081055, "rewards/rejected": -15.411927223205566, "step": 19030 }, { "epoch": 2.96, "learning_rate": 1.8922765703615333e-07, "logits/chosen": -2.665830612182617, "logits/rejected": -0.8249461650848389, "logps/chosen": -762.8344116210938, "logps/rejected": -331.7969970703125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -10.365142822265625, "rewards/margins": 5.8027663230896, "rewards/rejected": -16.16790771484375, "step": 19031 }, { "epoch": 2.96, "learning_rate": 1.8849421650500545e-07, "logits/chosen": -2.3523778915405273, "logits/rejected": -2.4921560287475586, "logps/chosen": -115.44611358642578, "logps/rejected": -233.18299865722656, "loss": 0.0207, "rewards/accuracies": 1.0, "rewards/chosen": -9.017143249511719, "rewards/margins": 5.367494583129883, "rewards/rejected": -14.384637832641602, "step": 19032 }, { "epoch": 2.96, "learning_rate": 1.8776077597385756e-07, "logits/chosen": -1.5813735723495483, "logits/rejected": -2.923020601272583, "logps/chosen": -216.30043029785156, "logps/rejected": -555.545166015625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -7.072589874267578, "rewards/margins": 9.396200180053711, "rewards/rejected": -16.46879005432129, "step": 19033 }, { "epoch": 2.96, "learning_rate": 1.8702733544270968e-07, "logits/chosen": -1.9919806718826294, "logits/rejected": -2.7460858821868896, "logps/chosen": -183.02542114257812, "logps/rejected": -542.2842407226562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.143416404724121, "rewards/margins": 18.72262954711914, "rewards/rejected": -27.866046905517578, "step": 19034 }, { "epoch": 2.96, "learning_rate": 1.862938949115618e-07, "logits/chosen": -2.5487608909606934, "logits/rejected": -1.5978739261627197, "logps/chosen": -241.85426330566406, "logps/rejected": -395.8785095214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.625944137573242, "rewards/margins": 11.148324966430664, "rewards/rejected": -17.774269104003906, "step": 19035 }, { "epoch": 2.96, "learning_rate": 1.8556045438041394e-07, "logits/chosen": -1.5889257192611694, "logits/rejected": -2.4786458015441895, "logps/chosen": -308.02099609375, "logps/rejected": -396.464111328125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -7.861918926239014, "rewards/margins": 7.773350715637207, "rewards/rejected": -15.635269165039062, "step": 19036 }, { "epoch": 2.96, "learning_rate": 1.8482701384926606e-07, "logits/chosen": -2.373676300048828, "logits/rejected": -2.7679834365844727, "logps/chosen": -586.8204956054688, "logps/rejected": -620.9036865234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.9569292068481445, "rewards/margins": 10.311553001403809, "rewards/rejected": -18.268482208251953, "step": 19037 }, { "epoch": 2.96, "learning_rate": 1.8409357331811817e-07, "logits/chosen": -1.7924671173095703, "logits/rejected": -2.6631531715393066, "logps/chosen": -169.14427185058594, "logps/rejected": -385.5782165527344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -3.7303171157836914, "rewards/margins": 10.834306716918945, "rewards/rejected": -14.56462287902832, "step": 19038 }, { "epoch": 2.96, "learning_rate": 1.8336013278697026e-07, "logits/chosen": -1.7680878639221191, "logits/rejected": -2.6998496055603027, "logps/chosen": -119.92755126953125, "logps/rejected": -290.0492248535156, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.431143283843994, "rewards/margins": 6.8862690925598145, "rewards/rejected": -11.317412376403809, "step": 19039 }, { "epoch": 2.96, "learning_rate": 1.8262669225582238e-07, "logits/chosen": -1.2496825456619263, "logits/rejected": -2.297649621963501, "logps/chosen": -118.1668701171875, "logps/rejected": -470.85821533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.063630104064941, "rewards/margins": 12.296987533569336, "rewards/rejected": -17.360618591308594, "step": 19040 }, { "epoch": 2.96, "learning_rate": 1.8189325172467452e-07, "logits/chosen": -2.622469663619995, "logits/rejected": -2.5028536319732666, "logps/chosen": -170.5787811279297, "logps/rejected": -288.07958984375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -6.942318916320801, "rewards/margins": 8.591490745544434, "rewards/rejected": -15.533809661865234, "step": 19041 }, { "epoch": 2.96, "learning_rate": 1.8115981119352664e-07, "logits/chosen": -1.7831950187683105, "logits/rejected": -2.689040422439575, "logps/chosen": -228.62387084960938, "logps/rejected": -600.5003662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.468656063079834, "rewards/margins": 12.60906982421875, "rewards/rejected": -20.07772445678711, "step": 19042 }, { "epoch": 2.96, "learning_rate": 1.8042637066237875e-07, "logits/chosen": -2.2718682289123535, "logits/rejected": -2.2833597660064697, "logps/chosen": -160.9216766357422, "logps/rejected": -307.66363525390625, "loss": 0.1819, "rewards/accuracies": 1.0, "rewards/chosen": -11.019829750061035, "rewards/margins": 6.137545585632324, "rewards/rejected": -17.15737533569336, "step": 19043 }, { "epoch": 2.96, "learning_rate": 1.7969293013123087e-07, "logits/chosen": -2.5557775497436523, "logits/rejected": -2.52167010307312, "logps/chosen": -231.42041015625, "logps/rejected": -416.8719177246094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.729269027709961, "rewards/margins": 10.812074661254883, "rewards/rejected": -19.541343688964844, "step": 19044 }, { "epoch": 2.96, "learning_rate": 1.78959489600083e-07, "logits/chosen": -2.6284985542297363, "logits/rejected": -2.496488571166992, "logps/chosen": -223.48114013671875, "logps/rejected": -510.9772644042969, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.431930541992188, "rewards/margins": 13.800152778625488, "rewards/rejected": -23.23208236694336, "step": 19045 }, { "epoch": 2.96, "learning_rate": 1.7822604906893513e-07, "logits/chosen": -2.5105462074279785, "logits/rejected": -1.884416937828064, "logps/chosen": -186.6327362060547, "logps/rejected": -216.9853973388672, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -6.84295129776001, "rewards/margins": 5.9747467041015625, "rewards/rejected": -12.817697525024414, "step": 19046 }, { "epoch": 2.96, "learning_rate": 1.7749260853778725e-07, "logits/chosen": -0.9810943007469177, "logits/rejected": -2.6973490715026855, "logps/chosen": -141.42080688476562, "logps/rejected": -631.8507080078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.856632232666016, "rewards/margins": 10.713397979736328, "rewards/rejected": -19.570030212402344, "step": 19047 }, { "epoch": 2.96, "learning_rate": 1.7675916800663936e-07, "logits/chosen": -2.31475567817688, "logits/rejected": -2.7533695697784424, "logps/chosen": -399.8540954589844, "logps/rejected": -512.7421264648438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.482770919799805, "rewards/margins": 11.88092041015625, "rewards/rejected": -21.363691329956055, "step": 19048 }, { "epoch": 2.96, "learning_rate": 1.7602572747549145e-07, "logits/chosen": -2.7484374046325684, "logits/rejected": -2.412872791290283, "logps/chosen": -311.6519775390625, "logps/rejected": -381.86279296875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -7.886993408203125, "rewards/margins": 9.108461380004883, "rewards/rejected": -16.995454788208008, "step": 19049 }, { "epoch": 2.96, "learning_rate": 1.7529228694434357e-07, "logits/chosen": -2.3854949474334717, "logits/rejected": -2.9173169136047363, "logps/chosen": -132.30508422851562, "logps/rejected": -349.6373596191406, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -9.017366409301758, "rewards/margins": 5.4463043212890625, "rewards/rejected": -14.46367073059082, "step": 19050 }, { "epoch": 2.96, "learning_rate": 1.745588464131957e-07, "logits/chosen": -0.6168097853660583, "logits/rejected": -2.657026529312134, "logps/chosen": -124.87826538085938, "logps/rejected": -388.8525390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.477065086364746, "rewards/margins": 9.50344467163086, "rewards/rejected": -16.98050880432129, "step": 19051 }, { "epoch": 2.96, "learning_rate": 1.7382540588204783e-07, "logits/chosen": -2.210573434829712, "logits/rejected": -1.9515886306762695, "logps/chosen": -153.4573974609375, "logps/rejected": -378.60675048828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.077932834625244, "rewards/margins": 13.04883098602295, "rewards/rejected": -18.12676429748535, "step": 19052 }, { "epoch": 2.96, "learning_rate": 1.7309196535089995e-07, "logits/chosen": -2.5250465869903564, "logits/rejected": -2.345872640609741, "logps/chosen": -186.8997802734375, "logps/rejected": -363.4684143066406, "loss": 0.2013, "rewards/accuracies": 1.0, "rewards/chosen": -10.416370391845703, "rewards/margins": 5.056814193725586, "rewards/rejected": -15.473184585571289, "step": 19053 }, { "epoch": 2.96, "learning_rate": 1.7235852481975206e-07, "logits/chosen": -2.7940945625305176, "logits/rejected": -2.642153024673462, "logps/chosen": -273.7930908203125, "logps/rejected": -235.1688995361328, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": -7.49349308013916, "rewards/margins": 3.149846315383911, "rewards/rejected": -10.643339157104492, "step": 19054 }, { "epoch": 2.96, "learning_rate": 1.7162508428860418e-07, "logits/chosen": -2.4113521575927734, "logits/rejected": -2.0905778408050537, "logps/chosen": -137.324462890625, "logps/rejected": -204.67559814453125, "loss": 0.0399, "rewards/accuracies": 1.0, "rewards/chosen": -9.263273239135742, "rewards/margins": 4.837871551513672, "rewards/rejected": -14.101144790649414, "step": 19055 }, { "epoch": 2.96, "learning_rate": 1.708916437574563e-07, "logits/chosen": -1.5117695331573486, "logits/rejected": -2.824791431427002, "logps/chosen": -119.82325744628906, "logps/rejected": -297.44677734375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -9.718528747558594, "rewards/margins": 7.948886871337891, "rewards/rejected": -17.667415618896484, "step": 19056 }, { "epoch": 2.96, "learning_rate": 1.7015820322630844e-07, "logits/chosen": -1.519160509109497, "logits/rejected": -2.4882609844207764, "logps/chosen": -219.95535278320312, "logps/rejected": -391.84832763671875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -10.060741424560547, "rewards/margins": 9.093069076538086, "rewards/rejected": -19.153810501098633, "step": 19057 }, { "epoch": 2.96, "learning_rate": 1.6942476269516056e-07, "logits/chosen": -2.274341583251953, "logits/rejected": -2.4883131980895996, "logps/chosen": -271.62982177734375, "logps/rejected": -418.5281066894531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.450829982757568, "rewards/margins": 11.117815017700195, "rewards/rejected": -18.568645477294922, "step": 19058 }, { "epoch": 2.96, "learning_rate": 1.6869132216401265e-07, "logits/chosen": -2.8615877628326416, "logits/rejected": -2.692514419555664, "logps/chosen": -476.77752685546875, "logps/rejected": -536.19482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.230513095855713, "rewards/margins": 12.891982078552246, "rewards/rejected": -17.122495651245117, "step": 19059 }, { "epoch": 2.96, "learning_rate": 1.6795788163286476e-07, "logits/chosen": -2.369133710861206, "logits/rejected": -1.7341071367263794, "logps/chosen": -496.8960266113281, "logps/rejected": -449.4176025390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.831023216247559, "rewards/margins": 12.266372680664062, "rewards/rejected": -19.097396850585938, "step": 19060 }, { "epoch": 2.96, "learning_rate": 1.6722444110171688e-07, "logits/chosen": -2.798722267150879, "logits/rejected": -2.716913938522339, "logps/chosen": -192.11288452148438, "logps/rejected": -257.8700866699219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.193487167358398, "rewards/margins": 11.011482238769531, "rewards/rejected": -15.20496940612793, "step": 19061 }, { "epoch": 2.96, "learning_rate": 1.6649100057056902e-07, "logits/chosen": -2.0727450847625732, "logits/rejected": -2.621574640274048, "logps/chosen": -284.26751708984375, "logps/rejected": -394.33087158203125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -11.936963081359863, "rewards/margins": 7.088338851928711, "rewards/rejected": -19.02530288696289, "step": 19062 }, { "epoch": 2.96, "learning_rate": 1.6575756003942114e-07, "logits/chosen": -2.6550521850585938, "logits/rejected": -2.5356969833374023, "logps/chosen": -243.7657470703125, "logps/rejected": -473.556884765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.123247146606445, "rewards/margins": 10.514766693115234, "rewards/rejected": -19.63801383972168, "step": 19063 }, { "epoch": 2.96, "learning_rate": 1.6502411950827325e-07, "logits/chosen": -2.426835775375366, "logits/rejected": -1.4702014923095703, "logps/chosen": -330.683349609375, "logps/rejected": -247.4168701171875, "loss": 0.2488, "rewards/accuracies": 1.0, "rewards/chosen": -7.424315929412842, "rewards/margins": 3.5950350761413574, "rewards/rejected": -11.0193510055542, "step": 19064 }, { "epoch": 2.97, "learning_rate": 1.6429067897712537e-07, "logits/chosen": -1.7357200384140015, "logits/rejected": -2.6100833415985107, "logps/chosen": -244.74749755859375, "logps/rejected": -583.18310546875, "loss": 0.3845, "rewards/accuracies": 0.5, "rewards/chosen": -11.733648300170898, "rewards/margins": 9.865543365478516, "rewards/rejected": -21.599191665649414, "step": 19065 }, { "epoch": 2.97, "learning_rate": 1.635572384459775e-07, "logits/chosen": -2.8156046867370605, "logits/rejected": -2.085265874862671, "logps/chosen": -364.328125, "logps/rejected": -351.4370422363281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.575204849243164, "rewards/margins": 9.105205535888672, "rewards/rejected": -15.680410385131836, "step": 19066 }, { "epoch": 2.97, "learning_rate": 1.628237979148296e-07, "logits/chosen": -2.1999380588531494, "logits/rejected": -2.6114754676818848, "logps/chosen": -248.46949768066406, "logps/rejected": -475.4289855957031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.251863479614258, "rewards/margins": 13.934869766235352, "rewards/rejected": -23.18673324584961, "step": 19067 }, { "epoch": 2.97, "learning_rate": 1.6209035738368175e-07, "logits/chosen": -1.174144983291626, "logits/rejected": -2.408795118331909, "logps/chosen": -131.7835693359375, "logps/rejected": -413.6653137207031, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -9.56778335571289, "rewards/margins": 8.637818336486816, "rewards/rejected": -18.205602645874023, "step": 19068 }, { "epoch": 2.97, "learning_rate": 1.6135691685253386e-07, "logits/chosen": -2.5470950603485107, "logits/rejected": -2.3803181648254395, "logps/chosen": -614.5120849609375, "logps/rejected": -655.4639892578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.951395511627197, "rewards/margins": 10.431282043457031, "rewards/rejected": -18.382678985595703, "step": 19069 }, { "epoch": 2.97, "learning_rate": 1.6062347632138595e-07, "logits/chosen": -1.0096246004104614, "logits/rejected": -2.3929827213287354, "logps/chosen": -153.5941162109375, "logps/rejected": -517.4793701171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.061927795410156, "rewards/margins": 13.827849388122559, "rewards/rejected": -22.88977813720703, "step": 19070 }, { "epoch": 2.97, "learning_rate": 1.5989003579023807e-07, "logits/chosen": -2.0491466522216797, "logits/rejected": -0.7526338696479797, "logps/chosen": -464.9854736328125, "logps/rejected": -298.3088073730469, "loss": 0.1051, "rewards/accuracies": 1.0, "rewards/chosen": -8.493510246276855, "rewards/margins": 8.614163398742676, "rewards/rejected": -17.10767364501953, "step": 19071 }, { "epoch": 2.97, "learning_rate": 1.5915659525909019e-07, "logits/chosen": -2.7420895099639893, "logits/rejected": -2.8678901195526123, "logps/chosen": -201.16384887695312, "logps/rejected": -517.859130859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.741616249084473, "rewards/margins": 10.566890716552734, "rewards/rejected": -18.30850601196289, "step": 19072 }, { "epoch": 2.97, "learning_rate": 1.5842315472794233e-07, "logits/chosen": -2.132434844970703, "logits/rejected": -1.946095585823059, "logps/chosen": -453.35797119140625, "logps/rejected": -447.99554443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.342462539672852, "rewards/margins": 13.262918472290039, "rewards/rejected": -21.60538101196289, "step": 19073 }, { "epoch": 2.97, "learning_rate": 1.5768971419679445e-07, "logits/chosen": -2.3431732654571533, "logits/rejected": -2.6829936504364014, "logps/chosen": -152.17459106445312, "logps/rejected": -219.7495574951172, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -7.628833293914795, "rewards/margins": 6.969512462615967, "rewards/rejected": -14.598345756530762, "step": 19074 }, { "epoch": 2.97, "learning_rate": 1.5695627366564656e-07, "logits/chosen": -2.5791587829589844, "logits/rejected": -1.790961503982544, "logps/chosen": -306.7355041503906, "logps/rejected": -247.00469970703125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -7.468268394470215, "rewards/margins": 6.8043670654296875, "rewards/rejected": -14.272635459899902, "step": 19075 }, { "epoch": 2.97, "learning_rate": 1.5622283313449868e-07, "logits/chosen": -1.5944918394088745, "logits/rejected": -2.3471152782440186, "logps/chosen": -249.10084533691406, "logps/rejected": -405.1387939453125, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -8.791894912719727, "rewards/margins": 12.50179672241211, "rewards/rejected": -21.293691635131836, "step": 19076 }, { "epoch": 2.97, "learning_rate": 1.554893926033508e-07, "logits/chosen": -2.7948005199432373, "logits/rejected": -2.3704729080200195, "logps/chosen": -177.7332305908203, "logps/rejected": -258.94708251953125, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.126274108886719, "rewards/margins": 8.477439880371094, "rewards/rejected": -15.603713989257812, "step": 19077 }, { "epoch": 2.97, "learning_rate": 1.5475595207220294e-07, "logits/chosen": -1.758459210395813, "logits/rejected": -2.035909652709961, "logps/chosen": -379.22625732421875, "logps/rejected": -589.4036865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.538518905639648, "rewards/margins": 14.447853088378906, "rewards/rejected": -20.986371994018555, "step": 19078 }, { "epoch": 2.97, "learning_rate": 1.5402251154105506e-07, "logits/chosen": -2.663553476333618, "logits/rejected": -2.719391345977783, "logps/chosen": -636.555908203125, "logps/rejected": -683.86181640625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.596038818359375, "rewards/margins": 10.992680549621582, "rewards/rejected": -17.58871841430664, "step": 19079 }, { "epoch": 2.97, "learning_rate": 1.5328907100990715e-07, "logits/chosen": -2.203000545501709, "logits/rejected": -2.733351230621338, "logps/chosen": -259.82818603515625, "logps/rejected": -486.598388671875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.50258731842041, "rewards/margins": 12.139911651611328, "rewards/rejected": -19.642499923706055, "step": 19080 }, { "epoch": 2.97, "learning_rate": 1.5255563047875926e-07, "logits/chosen": -2.8681349754333496, "logits/rejected": -3.102487087249756, "logps/chosen": -131.59625244140625, "logps/rejected": -169.02981567382812, "loss": 0.0231, "rewards/accuracies": 1.0, "rewards/chosen": -6.386994361877441, "rewards/margins": 4.401566028594971, "rewards/rejected": -10.78856086730957, "step": 19081 }, { "epoch": 2.97, "learning_rate": 1.5182218994761138e-07, "logits/chosen": -2.7008609771728516, "logits/rejected": -2.624295949935913, "logps/chosen": -274.4342956542969, "logps/rejected": -253.29879760742188, "loss": 0.0238, "rewards/accuracies": 1.0, "rewards/chosen": -7.242617607116699, "rewards/margins": 4.236432075500488, "rewards/rejected": -11.479049682617188, "step": 19082 }, { "epoch": 2.97, "learning_rate": 1.510887494164635e-07, "logits/chosen": -2.574838638305664, "logits/rejected": -2.660511016845703, "logps/chosen": -131.33770751953125, "logps/rejected": -265.5016174316406, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -6.3557047843933105, "rewards/margins": 7.301990509033203, "rewards/rejected": -13.657695770263672, "step": 19083 }, { "epoch": 2.97, "learning_rate": 1.5035530888531564e-07, "logits/chosen": -2.758673906326294, "logits/rejected": -3.151841402053833, "logps/chosen": -183.6673583984375, "logps/rejected": -355.38946533203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.364170551300049, "rewards/margins": 9.668905258178711, "rewards/rejected": -15.033075332641602, "step": 19084 }, { "epoch": 2.97, "learning_rate": 1.4962186835416775e-07, "logits/chosen": -1.3821858167648315, "logits/rejected": -2.4167726039886475, "logps/chosen": -315.9491271972656, "logps/rejected": -638.86767578125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.727307319641113, "rewards/margins": 10.295141220092773, "rewards/rejected": -20.022449493408203, "step": 19085 }, { "epoch": 2.97, "learning_rate": 1.4888842782301987e-07, "logits/chosen": -2.5852138996124268, "logits/rejected": -2.67965030670166, "logps/chosen": -180.334228515625, "logps/rejected": -318.16876220703125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -7.411653518676758, "rewards/margins": 9.41702938079834, "rewards/rejected": -16.82868194580078, "step": 19086 }, { "epoch": 2.97, "learning_rate": 1.48154987291872e-07, "logits/chosen": -1.7393029928207397, "logits/rejected": -2.427839756011963, "logps/chosen": -210.01177978515625, "logps/rejected": -244.65164184570312, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -4.891584873199463, "rewards/margins": 6.299524307250977, "rewards/rejected": -11.191108703613281, "step": 19087 }, { "epoch": 2.97, "learning_rate": 1.474215467607241e-07, "logits/chosen": -2.6516225337982178, "logits/rejected": -2.5375564098358154, "logps/chosen": -371.86749267578125, "logps/rejected": -415.22216796875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -7.809068202972412, "rewards/margins": 8.455693244934082, "rewards/rejected": -16.264760971069336, "step": 19088 }, { "epoch": 2.97, "learning_rate": 1.4668810622957625e-07, "logits/chosen": -2.1586406230926514, "logits/rejected": -2.8472824096679688, "logps/chosen": -194.53738403320312, "logps/rejected": -361.910400390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.577813148498535, "rewards/margins": 7.359620571136475, "rewards/rejected": -13.937433242797852, "step": 19089 }, { "epoch": 2.97, "learning_rate": 1.4595466569842834e-07, "logits/chosen": -2.7369532585144043, "logits/rejected": -2.646030902862549, "logps/chosen": -534.384033203125, "logps/rejected": -778.5657348632812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.761500835418701, "rewards/margins": 13.209644317626953, "rewards/rejected": -18.971145629882812, "step": 19090 }, { "epoch": 2.97, "learning_rate": 1.4522122516728045e-07, "logits/chosen": -1.9082998037338257, "logits/rejected": -2.9245972633361816, "logps/chosen": -143.16522216796875, "logps/rejected": -471.2339782714844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.118868827819824, "rewards/margins": 12.108344078063965, "rewards/rejected": -19.22721290588379, "step": 19091 }, { "epoch": 2.97, "learning_rate": 1.4448778463613257e-07, "logits/chosen": -1.7326388359069824, "logits/rejected": -2.878467082977295, "logps/chosen": -152.97271728515625, "logps/rejected": -472.2371826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.002318859100342, "rewards/margins": 13.427215576171875, "rewards/rejected": -18.429534912109375, "step": 19092 }, { "epoch": 2.97, "learning_rate": 1.4375434410498469e-07, "logits/chosen": -2.6931345462799072, "logits/rejected": -2.7050771713256836, "logps/chosen": -720.6776123046875, "logps/rejected": -836.1663818359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.0438055992126465, "rewards/margins": 10.895485877990723, "rewards/rejected": -15.939291000366211, "step": 19093 }, { "epoch": 2.97, "learning_rate": 1.4302090357383683e-07, "logits/chosen": -2.556580066680908, "logits/rejected": -1.748243808746338, "logps/chosen": -409.0548095703125, "logps/rejected": -359.9565734863281, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.2793354988098145, "rewards/margins": 11.445087432861328, "rewards/rejected": -18.724422454833984, "step": 19094 }, { "epoch": 2.97, "learning_rate": 1.4228746304268895e-07, "logits/chosen": -2.1435484886169434, "logits/rejected": -1.5774235725402832, "logps/chosen": -166.5990447998047, "logps/rejected": -401.25531005859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.648308753967285, "rewards/margins": 9.304149627685547, "rewards/rejected": -14.952458381652832, "step": 19095 }, { "epoch": 2.97, "learning_rate": 1.4155402251154106e-07, "logits/chosen": -1.7856807708740234, "logits/rejected": -2.655379295349121, "logps/chosen": -197.12274169921875, "logps/rejected": -447.9402160644531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.996335983276367, "rewards/margins": 12.854385375976562, "rewards/rejected": -20.85072135925293, "step": 19096 }, { "epoch": 2.97, "learning_rate": 1.4082058198039318e-07, "logits/chosen": -2.3615946769714355, "logits/rejected": -2.6917552947998047, "logps/chosen": -861.0949096679688, "logps/rejected": -1049.51318359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.472794532775879, "rewards/margins": 18.48470687866211, "rewards/rejected": -26.957502365112305, "step": 19097 }, { "epoch": 2.97, "learning_rate": 1.400871414492453e-07, "logits/chosen": -2.1885077953338623, "logits/rejected": -3.088948965072632, "logps/chosen": -180.0452423095703, "logps/rejected": -448.51446533203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.543004989624023, "rewards/margins": 11.052849769592285, "rewards/rejected": -18.595855712890625, "step": 19098 }, { "epoch": 2.97, "learning_rate": 1.393537009180974e-07, "logits/chosen": -1.7479110956192017, "logits/rejected": -2.6893579959869385, "logps/chosen": -88.31024169921875, "logps/rejected": -282.006591796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.657326698303223, "rewards/margins": 7.379037857055664, "rewards/rejected": -14.036364555358887, "step": 19099 }, { "epoch": 2.97, "learning_rate": 1.3862026038694953e-07, "logits/chosen": -2.1274611949920654, "logits/rejected": -2.911092758178711, "logps/chosen": -125.81385803222656, "logps/rejected": -328.57666015625, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -7.573151588439941, "rewards/margins": 5.685488700866699, "rewards/rejected": -13.25864028930664, "step": 19100 }, { "epoch": 2.97, "learning_rate": 1.3788681985580165e-07, "logits/chosen": -1.8637433052062988, "logits/rejected": -2.8323421478271484, "logps/chosen": -422.6379699707031, "logps/rejected": -625.9697265625, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.82470703125, "rewards/margins": 7.693894863128662, "rewards/rejected": -14.51860237121582, "step": 19101 }, { "epoch": 2.97, "learning_rate": 1.3715337932465376e-07, "logits/chosen": -2.210749864578247, "logits/rejected": -2.7448182106018066, "logps/chosen": -187.01409912109375, "logps/rejected": -367.7186584472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.387420654296875, "rewards/margins": 11.412155151367188, "rewards/rejected": -16.799575805664062, "step": 19102 }, { "epoch": 2.97, "learning_rate": 1.3641993879350588e-07, "logits/chosen": -2.148073196411133, "logits/rejected": -2.6486032009124756, "logps/chosen": -204.6815643310547, "logps/rejected": -383.4400939941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.227780818939209, "rewards/margins": 12.804986953735352, "rewards/rejected": -18.03276824951172, "step": 19103 }, { "epoch": 2.97, "learning_rate": 1.35686498262358e-07, "logits/chosen": -2.6193928718566895, "logits/rejected": -2.54575514793396, "logps/chosen": -298.83966064453125, "logps/rejected": -383.8813171386719, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.620537757873535, "rewards/margins": 8.150452613830566, "rewards/rejected": -13.770990371704102, "step": 19104 }, { "epoch": 2.97, "learning_rate": 1.3495305773121014e-07, "logits/chosen": -2.457751750946045, "logits/rejected": -1.8253575563430786, "logps/chosen": -684.5809936523438, "logps/rejected": -603.1901245117188, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.7942962646484375, "rewards/margins": 9.555729866027832, "rewards/rejected": -14.35002613067627, "step": 19105 }, { "epoch": 2.97, "learning_rate": 1.3421961720006225e-07, "logits/chosen": -0.44568130373954773, "logits/rejected": -2.2476646900177, "logps/chosen": -119.2876205444336, "logps/rejected": -626.64599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.838157653808594, "rewards/margins": 12.690718650817871, "rewards/rejected": -20.52887725830078, "step": 19106 }, { "epoch": 2.97, "learning_rate": 1.3348617666891437e-07, "logits/chosen": -1.2145048379898071, "logits/rejected": -2.6429126262664795, "logps/chosen": -290.26171875, "logps/rejected": -672.1776123046875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.049947738647461, "rewards/margins": 9.227893829345703, "rewards/rejected": -19.277841567993164, "step": 19107 }, { "epoch": 2.97, "learning_rate": 1.327527361377665e-07, "logits/chosen": -2.3098270893096924, "logits/rejected": -2.7480123043060303, "logps/chosen": -207.06100463867188, "logps/rejected": -366.09466552734375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -7.578746795654297, "rewards/margins": 10.019882202148438, "rewards/rejected": -17.598628997802734, "step": 19108 }, { "epoch": 2.97, "learning_rate": 1.320192956066186e-07, "logits/chosen": -2.7100980281829834, "logits/rejected": -1.9993395805358887, "logps/chosen": -294.0229797363281, "logps/rejected": -261.274658203125, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": -7.009499549865723, "rewards/margins": 6.669850826263428, "rewards/rejected": -13.679349899291992, "step": 19109 }, { "epoch": 2.97, "learning_rate": 1.3128585507547072e-07, "logits/chosen": -2.875424385070801, "logits/rejected": -2.759680986404419, "logps/chosen": -213.06410217285156, "logps/rejected": -262.9372253417969, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -9.00794792175293, "rewards/margins": 6.284368515014648, "rewards/rejected": -15.292316436767578, "step": 19110 }, { "epoch": 2.97, "learning_rate": 1.3055241454432284e-07, "logits/chosen": -2.5000128746032715, "logits/rejected": -2.714402914047241, "logps/chosen": -106.97013854980469, "logps/rejected": -259.48663330078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -7.094491004943848, "rewards/margins": 9.393848419189453, "rewards/rejected": -16.488340377807617, "step": 19111 }, { "epoch": 2.97, "learning_rate": 1.2981897401317495e-07, "logits/chosen": -1.4360800981521606, "logits/rejected": -2.7547333240509033, "logps/chosen": -206.23410034179688, "logps/rejected": -490.12396240234375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -9.283447265625, "rewards/margins": 8.800660133361816, "rewards/rejected": -18.0841064453125, "step": 19112 }, { "epoch": 2.97, "learning_rate": 1.2908553348202707e-07, "logits/chosen": -2.3546407222747803, "logits/rejected": -2.6636886596679688, "logps/chosen": -254.88992309570312, "logps/rejected": -327.0433654785156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.02110481262207, "rewards/margins": 8.885554313659668, "rewards/rejected": -16.906658172607422, "step": 19113 }, { "epoch": 2.97, "learning_rate": 1.2835209295087919e-07, "logits/chosen": -2.489121675491333, "logits/rejected": -2.5546000003814697, "logps/chosen": -745.090087890625, "logps/rejected": -574.3533935546875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -4.008480072021484, "rewards/margins": 10.086898803710938, "rewards/rejected": -14.095378875732422, "step": 19114 }, { "epoch": 2.97, "learning_rate": 1.276186524197313e-07, "logits/chosen": -2.5091612339019775, "logits/rejected": -2.6139416694641113, "logps/chosen": -525.4904174804688, "logps/rejected": -436.6147766113281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.623095512390137, "rewards/margins": 11.854360580444336, "rewards/rejected": -19.47745704650879, "step": 19115 }, { "epoch": 2.97, "learning_rate": 1.2688521188858345e-07, "logits/chosen": -1.8626474142074585, "logits/rejected": -2.597062349319458, "logps/chosen": -288.57843017578125, "logps/rejected": -425.998291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.741056442260742, "rewards/margins": 12.504274368286133, "rewards/rejected": -19.245330810546875, "step": 19116 }, { "epoch": 2.97, "learning_rate": 1.2615177135743556e-07, "logits/chosen": -2.4504263401031494, "logits/rejected": -1.5260193347930908, "logps/chosen": -364.67376708984375, "logps/rejected": -402.6969909667969, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -7.294451713562012, "rewards/margins": 11.535140991210938, "rewards/rejected": -18.829591751098633, "step": 19117 }, { "epoch": 2.97, "learning_rate": 1.2541833082628768e-07, "logits/chosen": -2.4483683109283447, "logits/rejected": -2.783414602279663, "logps/chosen": -124.70223999023438, "logps/rejected": -319.021240234375, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -5.8693647384643555, "rewards/margins": 9.630535125732422, "rewards/rejected": -15.499898910522461, "step": 19118 }, { "epoch": 2.97, "learning_rate": 1.246848902951398e-07, "logits/chosen": -2.5652449131011963, "logits/rejected": -3.0979626178741455, "logps/chosen": -362.13092041015625, "logps/rejected": -516.8114624023438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.300162315368652, "rewards/margins": 8.730412483215332, "rewards/rejected": -17.030574798583984, "step": 19119 }, { "epoch": 2.97, "learning_rate": 1.239514497639919e-07, "logits/chosen": -2.0889437198638916, "logits/rejected": -2.712186813354492, "logps/chosen": -371.48748779296875, "logps/rejected": -486.06671142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.016205787658691, "rewards/margins": 10.837242126464844, "rewards/rejected": -14.853448867797852, "step": 19120 }, { "epoch": 2.97, "learning_rate": 1.2321800923284403e-07, "logits/chosen": -2.3201258182525635, "logits/rejected": -2.8757851123809814, "logps/chosen": -171.11862182617188, "logps/rejected": -395.53955078125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.269136428833008, "rewards/margins": 9.073941230773926, "rewards/rejected": -15.34307861328125, "step": 19121 }, { "epoch": 2.97, "learning_rate": 1.2248456870169614e-07, "logits/chosen": -2.6426022052764893, "logits/rejected": -3.109694004058838, "logps/chosen": -184.57562255859375, "logps/rejected": -531.2257080078125, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/chosen": -5.3809814453125, "rewards/margins": 11.273292541503906, "rewards/rejected": -16.654273986816406, "step": 19122 }, { "epoch": 2.97, "learning_rate": 1.2175112817054826e-07, "logits/chosen": -1.7418044805526733, "logits/rejected": -2.691314220428467, "logps/chosen": -173.8684539794922, "logps/rejected": -476.9433288574219, "loss": 0.3343, "rewards/accuracies": 1.0, "rewards/chosen": -10.151956558227539, "rewards/margins": 8.76274299621582, "rewards/rejected": -18.91469955444336, "step": 19123 }, { "epoch": 2.97, "learning_rate": 1.2101768763940038e-07, "logits/chosen": -2.625978946685791, "logits/rejected": -2.637162446975708, "logps/chosen": -135.91331481933594, "logps/rejected": -229.8770751953125, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -10.850011825561523, "rewards/margins": 6.29594087600708, "rewards/rejected": -17.145952224731445, "step": 19124 }, { "epoch": 2.97, "learning_rate": 1.202842471082525e-07, "logits/chosen": -2.136469602584839, "logits/rejected": -2.7981550693511963, "logps/chosen": -165.00144958496094, "logps/rejected": -382.6473693847656, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.285716533660889, "rewards/margins": 11.956021308898926, "rewards/rejected": -16.241737365722656, "step": 19125 }, { "epoch": 2.97, "learning_rate": 1.1955080657710464e-07, "logits/chosen": -1.1726386547088623, "logits/rejected": -2.8738420009613037, "logps/chosen": -94.85491180419922, "logps/rejected": -490.672607421875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -7.819885730743408, "rewards/margins": 7.930416107177734, "rewards/rejected": -15.750301361083984, "step": 19126 }, { "epoch": 2.97, "learning_rate": 1.1881736604595675e-07, "logits/chosen": -2.3357627391815186, "logits/rejected": -1.965308666229248, "logps/chosen": -560.9675903320312, "logps/rejected": -489.0429992675781, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -9.201504707336426, "rewards/margins": 9.624181747436523, "rewards/rejected": -18.825687408447266, "step": 19127 }, { "epoch": 2.97, "learning_rate": 1.1808392551480886e-07, "logits/chosen": -1.4589102268218994, "logits/rejected": -2.3013622760772705, "logps/chosen": -424.9705505371094, "logps/rejected": -514.0821533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.807954788208008, "rewards/margins": 14.71937370300293, "rewards/rejected": -23.527328491210938, "step": 19128 }, { "epoch": 2.97, "learning_rate": 1.1735048498366097e-07, "logits/chosen": -2.6493008136749268, "logits/rejected": -2.6224169731140137, "logps/chosen": -181.85430908203125, "logps/rejected": -316.9486083984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -5.927004814147949, "rewards/margins": 9.410289764404297, "rewards/rejected": -15.33729362487793, "step": 19129 }, { "epoch": 2.98, "learning_rate": 1.1661704445251309e-07, "logits/chosen": -2.057368516921997, "logits/rejected": -2.5518529415130615, "logps/chosen": -306.2223815917969, "logps/rejected": -569.8911743164062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.888889312744141, "rewards/margins": 12.811529159545898, "rewards/rejected": -18.70041847229004, "step": 19130 }, { "epoch": 2.98, "learning_rate": 1.1588360392136521e-07, "logits/chosen": -2.052497625350952, "logits/rejected": -2.7249019145965576, "logps/chosen": -263.0509338378906, "logps/rejected": -466.4974060058594, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": -5.898456573486328, "rewards/margins": 7.322031021118164, "rewards/rejected": -13.220487594604492, "step": 19131 }, { "epoch": 2.98, "learning_rate": 1.1515016339021735e-07, "logits/chosen": -2.1488635540008545, "logits/rejected": -2.6290335655212402, "logps/chosen": -305.0023498535156, "logps/rejected": -431.7136535644531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.693887233734131, "rewards/margins": 10.574739456176758, "rewards/rejected": -18.268627166748047, "step": 19132 }, { "epoch": 2.98, "learning_rate": 1.1441672285906945e-07, "logits/chosen": -2.809202194213867, "logits/rejected": -2.985119342803955, "logps/chosen": -101.56732940673828, "logps/rejected": -523.7538452148438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.776839256286621, "rewards/margins": 16.16452407836914, "rewards/rejected": -20.941364288330078, "step": 19133 }, { "epoch": 2.98, "learning_rate": 1.1368328232792157e-07, "logits/chosen": -2.7131903171539307, "logits/rejected": -2.6665360927581787, "logps/chosen": -276.0090026855469, "logps/rejected": -634.7730712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.96011209487915, "rewards/margins": 14.40761661529541, "rewards/rejected": -20.36772918701172, "step": 19134 }, { "epoch": 2.98, "learning_rate": 1.1294984179677369e-07, "logits/chosen": -2.500214099884033, "logits/rejected": -2.6323723793029785, "logps/chosen": -139.85202026367188, "logps/rejected": -251.54953002929688, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.574066162109375, "rewards/margins": 9.948437690734863, "rewards/rejected": -16.522504806518555, "step": 19135 }, { "epoch": 2.98, "learning_rate": 1.122164012656258e-07, "logits/chosen": -2.2489101886749268, "logits/rejected": -2.3394596576690674, "logps/chosen": -316.3008117675781, "logps/rejected": -454.14471435546875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -7.368412017822266, "rewards/margins": 9.291751861572266, "rewards/rejected": -16.66016387939453, "step": 19136 }, { "epoch": 2.98, "learning_rate": 1.1148296073447795e-07, "logits/chosen": -1.2318761348724365, "logits/rejected": -2.489784002304077, "logps/chosen": -173.47640991210938, "logps/rejected": -606.8145751953125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -9.297752380371094, "rewards/margins": 6.730052471160889, "rewards/rejected": -16.02780532836914, "step": 19137 }, { "epoch": 2.98, "learning_rate": 1.1074952020333006e-07, "logits/chosen": -2.4829022884368896, "logits/rejected": -1.9977221488952637, "logps/chosen": -269.388427734375, "logps/rejected": -330.15264892578125, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -9.33392333984375, "rewards/margins": 4.63692045211792, "rewards/rejected": -13.970843315124512, "step": 19138 }, { "epoch": 2.98, "learning_rate": 1.1001607967218217e-07, "logits/chosen": -2.763202667236328, "logits/rejected": -1.8012900352478027, "logps/chosen": -537.3834838867188, "logps/rejected": -294.40972900390625, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -8.664335250854492, "rewards/margins": 9.63298225402832, "rewards/rejected": -18.297317504882812, "step": 19139 }, { "epoch": 2.98, "learning_rate": 1.0928263914103428e-07, "logits/chosen": -2.615079641342163, "logits/rejected": -1.7398701906204224, "logps/chosen": -409.02593994140625, "logps/rejected": -508.68035888671875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -5.499731540679932, "rewards/margins": 7.440723896026611, "rewards/rejected": -12.940455436706543, "step": 19140 }, { "epoch": 2.98, "learning_rate": 1.0854919860988641e-07, "logits/chosen": -2.596026659011841, "logits/rejected": -2.2410361766815186, "logps/chosen": -974.2889404296875, "logps/rejected": -613.7554321289062, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -7.5034403800964355, "rewards/margins": 7.481741905212402, "rewards/rejected": -14.98518180847168, "step": 19141 }, { "epoch": 2.98, "learning_rate": 1.0781575807873853e-07, "logits/chosen": -2.2960619926452637, "logits/rejected": -2.6767964363098145, "logps/chosen": -550.7213134765625, "logps/rejected": -702.2489013671875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/chosen": -6.767695426940918, "rewards/margins": 9.421941757202148, "rewards/rejected": -16.189638137817383, "step": 19142 }, { "epoch": 2.98, "learning_rate": 1.0708231754759064e-07, "logits/chosen": -1.5875771045684814, "logits/rejected": -2.638488292694092, "logps/chosen": -565.4486083984375, "logps/rejected": -639.6236572265625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -6.6040730476379395, "rewards/margins": 11.996076583862305, "rewards/rejected": -18.60015106201172, "step": 19143 }, { "epoch": 2.98, "learning_rate": 1.0634887701644276e-07, "logits/chosen": -2.5128188133239746, "logits/rejected": -2.2764227390289307, "logps/chosen": -453.2022399902344, "logps/rejected": -634.7613525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.595599174499512, "rewards/margins": 15.053074836730957, "rewards/rejected": -22.64867401123047, "step": 19144 }, { "epoch": 2.98, "learning_rate": 1.0561543648529488e-07, "logits/chosen": -1.749024510383606, "logits/rejected": -2.5164976119995117, "logps/chosen": -152.60812377929688, "logps/rejected": -390.1241760253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.090889930725098, "rewards/margins": 13.40884017944336, "rewards/rejected": -19.49972915649414, "step": 19145 }, { "epoch": 2.98, "learning_rate": 1.04881995954147e-07, "logits/chosen": -2.598198890686035, "logits/rejected": -2.546407461166382, "logps/chosen": -322.6362609863281, "logps/rejected": -369.9894104003906, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -10.13774299621582, "rewards/margins": 6.9193949699401855, "rewards/rejected": -17.057138442993164, "step": 19146 }, { "epoch": 2.98, "learning_rate": 1.0414855542299912e-07, "logits/chosen": -2.3805835247039795, "logits/rejected": -2.8546078205108643, "logps/chosen": -481.1724548339844, "logps/rejected": -811.8882446289062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.938512802124023, "rewards/margins": 15.950757026672363, "rewards/rejected": -24.889270782470703, "step": 19147 }, { "epoch": 2.98, "learning_rate": 1.0341511489185124e-07, "logits/chosen": -2.7439746856689453, "logits/rejected": -2.3574178218841553, "logps/chosen": -223.2359161376953, "logps/rejected": -316.33929443359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.8852128982543945, "rewards/margins": 8.181351661682129, "rewards/rejected": -14.066564559936523, "step": 19148 }, { "epoch": 2.98, "learning_rate": 1.0268167436070336e-07, "logits/chosen": -2.9405109882354736, "logits/rejected": -2.4326260089874268, "logps/chosen": -298.3089599609375, "logps/rejected": -300.6380615234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.410870552062988, "rewards/margins": 11.568415641784668, "rewards/rejected": -18.979286193847656, "step": 19149 }, { "epoch": 2.98, "learning_rate": 1.0194823382955547e-07, "logits/chosen": -1.429914116859436, "logits/rejected": -2.7287168502807617, "logps/chosen": -172.16015625, "logps/rejected": -793.0330810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.526549339294434, "rewards/margins": 16.153385162353516, "rewards/rejected": -22.679935455322266, "step": 19150 }, { "epoch": 2.98, "learning_rate": 1.0121479329840759e-07, "logits/chosen": -2.8314363956451416, "logits/rejected": -0.758253812789917, "logps/chosen": -1061.0565185546875, "logps/rejected": -339.34844970703125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.2293381690979, "rewards/margins": 9.622905731201172, "rewards/rejected": -15.852243423461914, "step": 19151 }, { "epoch": 2.98, "learning_rate": 1.0048135276725972e-07, "logits/chosen": -2.365814447402954, "logits/rejected": -2.66678524017334, "logps/chosen": -185.7131805419922, "logps/rejected": -370.885009765625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -9.263738632202148, "rewards/margins": 10.799591064453125, "rewards/rejected": -20.063331604003906, "step": 19152 }, { "epoch": 2.98, "learning_rate": 9.974791223611184e-08, "logits/chosen": -2.6138978004455566, "logits/rejected": -2.8259880542755127, "logps/chosen": -150.9469451904297, "logps/rejected": -201.49404907226562, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -5.280613899230957, "rewards/margins": 7.7330522537231445, "rewards/rejected": -13.013666152954102, "step": 19153 }, { "epoch": 2.98, "learning_rate": 9.901447170496394e-08, "logits/chosen": -1.467041254043579, "logits/rejected": -2.0323026180267334, "logps/chosen": -485.1285400390625, "logps/rejected": -724.0866088867188, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -9.353106498718262, "rewards/margins": 12.646862030029297, "rewards/rejected": -21.999967575073242, "step": 19154 }, { "epoch": 2.98, "learning_rate": 9.828103117381607e-08, "logits/chosen": -2.504502773284912, "logits/rejected": -2.7237801551818848, "logps/chosen": -578.3402099609375, "logps/rejected": -730.65478515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.53095817565918, "rewards/margins": 12.046165466308594, "rewards/rejected": -17.57712173461914, "step": 19155 }, { "epoch": 2.98, "learning_rate": 9.754759064266819e-08, "logits/chosen": -0.6208459138870239, "logits/rejected": -1.253467321395874, "logps/chosen": -333.5464172363281, "logps/rejected": -476.39019775390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.135225296020508, "rewards/margins": 14.018651962280273, "rewards/rejected": -20.15387725830078, "step": 19156 }, { "epoch": 2.98, "learning_rate": 9.681415011152032e-08, "logits/chosen": -2.599827527999878, "logits/rejected": -2.7588489055633545, "logps/chosen": -450.08599853515625, "logps/rejected": -493.6435852050781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.2159576416015625, "rewards/margins": 12.516679763793945, "rewards/rejected": -19.732637405395508, "step": 19157 }, { "epoch": 2.98, "learning_rate": 9.608070958037243e-08, "logits/chosen": -1.1533195972442627, "logits/rejected": -2.6545045375823975, "logps/chosen": -179.79278564453125, "logps/rejected": -524.3007202148438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.370492935180664, "rewards/margins": 12.632828712463379, "rewards/rejected": -21.00332260131836, "step": 19158 }, { "epoch": 2.98, "learning_rate": 9.534726904922454e-08, "logits/chosen": -2.155498743057251, "logits/rejected": -2.542429208755493, "logps/chosen": -205.09994506835938, "logps/rejected": -527.527587890625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -8.461235046386719, "rewards/margins": 10.849371910095215, "rewards/rejected": -19.310606002807617, "step": 19159 }, { "epoch": 2.98, "learning_rate": 9.461382851807667e-08, "logits/chosen": -2.184718370437622, "logits/rejected": -2.4728589057922363, "logps/chosen": -188.1776123046875, "logps/rejected": -239.14361572265625, "loss": 0.023, "rewards/accuracies": 1.0, "rewards/chosen": -6.0412797927856445, "rewards/margins": 4.7827467918396, "rewards/rejected": -10.824026107788086, "step": 19160 }, { "epoch": 2.98, "learning_rate": 9.388038798692878e-08, "logits/chosen": -2.238133192062378, "logits/rejected": -2.7970528602600098, "logps/chosen": -206.635498046875, "logps/rejected": -485.01751708984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.275191307067871, "rewards/margins": 9.41848373413086, "rewards/rejected": -19.693675994873047, "step": 19161 }, { "epoch": 2.98, "learning_rate": 9.31469474557809e-08, "logits/chosen": -2.582279682159424, "logits/rejected": -2.2114157676696777, "logps/chosen": -359.5777893066406, "logps/rejected": -426.053466796875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -10.258743286132812, "rewards/margins": 8.556711196899414, "rewards/rejected": -18.815454483032227, "step": 19162 }, { "epoch": 2.98, "learning_rate": 9.241350692463303e-08, "logits/chosen": -2.630462646484375, "logits/rejected": -2.8709301948547363, "logps/chosen": -95.4171142578125, "logps/rejected": -294.9024963378906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.539335250854492, "rewards/margins": 11.913366317749023, "rewards/rejected": -19.452701568603516, "step": 19163 }, { "epoch": 2.98, "learning_rate": 9.168006639348513e-08, "logits/chosen": -1.7402626276016235, "logits/rejected": -2.6013343334198, "logps/chosen": -240.65386962890625, "logps/rejected": -687.078369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.260066986083984, "rewards/margins": 18.023571014404297, "rewards/rejected": -27.28363800048828, "step": 19164 }, { "epoch": 2.98, "learning_rate": 9.094662586233726e-08, "logits/chosen": -2.6142306327819824, "logits/rejected": -3.1433072090148926, "logps/chosen": -112.47993469238281, "logps/rejected": -281.0844421386719, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.467627048492432, "rewards/margins": 9.028948783874512, "rewards/rejected": -15.496575355529785, "step": 19165 }, { "epoch": 2.98, "learning_rate": 9.021318533118938e-08, "logits/chosen": -0.7486419081687927, "logits/rejected": -1.3492860794067383, "logps/chosen": -132.1002197265625, "logps/rejected": -591.5303955078125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -6.13972282409668, "rewards/margins": 36.98398971557617, "rewards/rejected": -43.123714447021484, "step": 19166 }, { "epoch": 2.98, "learning_rate": 8.94797448000415e-08, "logits/chosen": -2.0817747116088867, "logits/rejected": -2.793705940246582, "logps/chosen": -127.28564453125, "logps/rejected": -369.5672607421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -7.425488471984863, "rewards/margins": 8.290947914123535, "rewards/rejected": -15.716436386108398, "step": 19167 }, { "epoch": 2.98, "learning_rate": 8.874630426889362e-08, "logits/chosen": -2.5525310039520264, "logits/rejected": -1.7008064985275269, "logps/chosen": -731.4066772460938, "logps/rejected": -546.6771240234375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -8.454440116882324, "rewards/margins": 7.727133274078369, "rewards/rejected": -16.18157386779785, "step": 19168 }, { "epoch": 2.98, "learning_rate": 8.801286373774573e-08, "logits/chosen": -1.7894667387008667, "logits/rejected": -2.4686288833618164, "logps/chosen": -165.74375915527344, "logps/rejected": -314.85321044921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.125497341156006, "rewards/margins": 10.216343879699707, "rewards/rejected": -14.341841697692871, "step": 19169 }, { "epoch": 2.98, "learning_rate": 8.727942320659784e-08, "logits/chosen": -1.1828217506408691, "logits/rejected": -2.3824615478515625, "logps/chosen": -82.9149398803711, "logps/rejected": -305.7969970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.113522529602051, "rewards/margins": 12.99878215789795, "rewards/rejected": -17.1123046875, "step": 19170 }, { "epoch": 2.98, "learning_rate": 8.654598267544997e-08, "logits/chosen": -2.271483898162842, "logits/rejected": -2.723449945449829, "logps/chosen": -172.276123046875, "logps/rejected": -420.5252380371094, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/chosen": -7.138953685760498, "rewards/margins": 7.692544937133789, "rewards/rejected": -14.831499099731445, "step": 19171 }, { "epoch": 2.98, "learning_rate": 8.581254214430209e-08, "logits/chosen": -1.3428350687026978, "logits/rejected": -2.620927333831787, "logps/chosen": -142.32656860351562, "logps/rejected": -601.7974853515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.208492279052734, "rewards/margins": 14.179035186767578, "rewards/rejected": -20.387527465820312, "step": 19172 }, { "epoch": 2.98, "learning_rate": 8.507910161315422e-08, "logits/chosen": -2.39264178276062, "logits/rejected": -2.591728448867798, "logps/chosen": -177.48342895507812, "logps/rejected": -317.5762023925781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.934538841247559, "rewards/margins": 10.628768920898438, "rewards/rejected": -19.563308715820312, "step": 19173 }, { "epoch": 2.98, "learning_rate": 8.434566108200632e-08, "logits/chosen": -1.7133291959762573, "logits/rejected": -2.592461347579956, "logps/chosen": -145.3497772216797, "logps/rejected": -418.61419677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.011173248291016, "rewards/margins": 11.146343231201172, "rewards/rejected": -19.157516479492188, "step": 19174 }, { "epoch": 2.98, "learning_rate": 8.361222055085844e-08, "logits/chosen": -2.321866989135742, "logits/rejected": -2.585301637649536, "logps/chosen": -166.8104248046875, "logps/rejected": -352.75909423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.840640068054199, "rewards/margins": 11.289073944091797, "rewards/rejected": -19.12971305847168, "step": 19175 }, { "epoch": 2.98, "learning_rate": 8.287878001971057e-08, "logits/chosen": -2.4207677841186523, "logits/rejected": -2.70373797416687, "logps/chosen": -146.07705688476562, "logps/rejected": -391.1035461425781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.29262638092041, "rewards/margins": 10.851465225219727, "rewards/rejected": -18.14409065246582, "step": 19176 }, { "epoch": 2.98, "learning_rate": 8.214533948856269e-08, "logits/chosen": -2.2259738445281982, "logits/rejected": -2.7210159301757812, "logps/chosen": -722.7770385742188, "logps/rejected": -802.56591796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.866436958312988, "rewards/margins": 10.212520599365234, "rewards/rejected": -19.07895851135254, "step": 19177 }, { "epoch": 2.98, "learning_rate": 8.14118989574148e-08, "logits/chosen": -2.406907320022583, "logits/rejected": -2.7295429706573486, "logps/chosen": -99.62269592285156, "logps/rejected": -312.670166015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.299564361572266, "rewards/margins": 11.906993865966797, "rewards/rejected": -18.206558227539062, "step": 19178 }, { "epoch": 2.98, "learning_rate": 8.067845842626693e-08, "logits/chosen": -2.736285448074341, "logits/rejected": -1.788038969039917, "logps/chosen": -313.47491455078125, "logps/rejected": -271.5023193359375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/chosen": -7.049314498901367, "rewards/margins": 7.050863265991211, "rewards/rejected": -14.100177764892578, "step": 19179 }, { "epoch": 2.98, "learning_rate": 7.994501789511904e-08, "logits/chosen": -1.5012096166610718, "logits/rejected": -2.3629820346832275, "logps/chosen": -87.88799285888672, "logps/rejected": -235.27220153808594, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.95511531829834, "rewards/margins": 7.540491104125977, "rewards/rejected": -14.495606422424316, "step": 19180 }, { "epoch": 2.98, "learning_rate": 7.921157736397116e-08, "logits/chosen": -2.675487995147705, "logits/rejected": -2.7866084575653076, "logps/chosen": -83.80419921875, "logps/rejected": -302.60205078125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -6.160730361938477, "rewards/margins": 10.494298934936523, "rewards/rejected": -16.655029296875, "step": 19181 }, { "epoch": 2.98, "learning_rate": 7.847813683282328e-08, "logits/chosen": -2.198521614074707, "logits/rejected": -2.6211001873016357, "logps/chosen": -184.60067749023438, "logps/rejected": -428.60186767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.352431297302246, "rewards/margins": 12.533795356750488, "rewards/rejected": -20.886226654052734, "step": 19182 }, { "epoch": 2.98, "learning_rate": 7.77446963016754e-08, "logits/chosen": -2.79994797706604, "logits/rejected": -2.810121774673462, "logps/chosen": -276.1151123046875, "logps/rejected": -451.038330078125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -8.864791870117188, "rewards/margins": 9.361712455749512, "rewards/rejected": -18.226505279541016, "step": 19183 }, { "epoch": 2.98, "learning_rate": 7.701125577052753e-08, "logits/chosen": -2.5022826194763184, "logits/rejected": -2.636244058609009, "logps/chosen": -118.95136260986328, "logps/rejected": -301.05804443359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.033727645874023, "rewards/margins": 9.33597469329834, "rewards/rejected": -17.369701385498047, "step": 19184 }, { "epoch": 2.98, "learning_rate": 7.627781523937963e-08, "logits/chosen": -2.1466917991638184, "logits/rejected": -0.9857504367828369, "logps/chosen": -576.8138427734375, "logps/rejected": -310.82232666015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.151796817779541, "rewards/margins": 11.872624397277832, "rewards/rejected": -16.02442169189453, "step": 19185 }, { "epoch": 2.98, "learning_rate": 7.554437470823175e-08, "logits/chosen": -2.463749885559082, "logits/rejected": -2.6042351722717285, "logps/chosen": -284.45684814453125, "logps/rejected": -360.2279052734375, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -8.735610008239746, "rewards/margins": 6.441737174987793, "rewards/rejected": -15.177347183227539, "step": 19186 }, { "epoch": 2.98, "learning_rate": 7.481093417708388e-08, "logits/chosen": -2.3890440464019775, "logits/rejected": -1.607377529144287, "logps/chosen": -575.3536376953125, "logps/rejected": -576.0640258789062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.830730438232422, "rewards/margins": 11.873451232910156, "rewards/rejected": -21.704181671142578, "step": 19187 }, { "epoch": 2.98, "learning_rate": 7.4077493645936e-08, "logits/chosen": -2.482631206512451, "logits/rejected": -2.672825574874878, "logps/chosen": -566.2208862304688, "logps/rejected": -810.2056274414062, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -9.929986953735352, "rewards/margins": 7.676437854766846, "rewards/rejected": -17.60642433166504, "step": 19188 }, { "epoch": 2.98, "learning_rate": 7.334405311478812e-08, "logits/chosen": -0.9819691181182861, "logits/rejected": -2.5904510021209717, "logps/chosen": -152.28317260742188, "logps/rejected": -585.6392211914062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.162545204162598, "rewards/margins": 10.665764808654785, "rewards/rejected": -17.828310012817383, "step": 19189 }, { "epoch": 2.98, "learning_rate": 7.261061258364023e-08, "logits/chosen": -1.380308985710144, "logits/rejected": -1.6341928243637085, "logps/chosen": -1044.9573974609375, "logps/rejected": -1197.6328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.846989631652832, "rewards/margins": 14.496495246887207, "rewards/rejected": -23.34348487854004, "step": 19190 }, { "epoch": 2.98, "learning_rate": 7.187717205249234e-08, "logits/chosen": -2.897618532180786, "logits/rejected": -2.3869292736053467, "logps/chosen": -310.7587890625, "logps/rejected": -279.95196533203125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.625981330871582, "rewards/margins": 8.680130004882812, "rewards/rejected": -17.30611228942871, "step": 19191 }, { "epoch": 2.98, "learning_rate": 7.114373152134447e-08, "logits/chosen": -1.0621100664138794, "logits/rejected": -2.369393825531006, "logps/chosen": -159.7073211669922, "logps/rejected": -594.7637329101562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.55044937133789, "rewards/margins": 11.203496932983398, "rewards/rejected": -19.75394630432129, "step": 19192 }, { "epoch": 2.98, "learning_rate": 7.041029099019659e-08, "logits/chosen": -2.3472468852996826, "logits/rejected": -2.6847314834594727, "logps/chosen": -264.90765380859375, "logps/rejected": -568.1004028320312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -9.11989974975586, "rewards/margins": 14.092042922973633, "rewards/rejected": -23.211942672729492, "step": 19193 }, { "epoch": 2.99, "learning_rate": 6.96768504590487e-08, "logits/chosen": -1.3389514684677124, "logits/rejected": -2.241337537765503, "logps/chosen": -256.4860534667969, "logps/rejected": -555.5682983398438, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.767685890197754, "rewards/margins": 11.39326000213623, "rewards/rejected": -19.160945892333984, "step": 19194 }, { "epoch": 2.99, "learning_rate": 6.894340992790082e-08, "logits/chosen": -2.3461873531341553, "logits/rejected": -1.7364380359649658, "logps/chosen": -271.20947265625, "logps/rejected": -419.24517822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.089805603027344, "rewards/margins": 12.337782859802246, "rewards/rejected": -17.427587509155273, "step": 19195 }, { "epoch": 2.99, "learning_rate": 6.820996939675294e-08, "logits/chosen": -2.0452206134796143, "logits/rejected": -2.632267475128174, "logps/chosen": -207.43331909179688, "logps/rejected": -306.79766845703125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.304439544677734, "rewards/margins": 10.140437126159668, "rewards/rejected": -20.44487762451172, "step": 19196 }, { "epoch": 2.99, "learning_rate": 6.747652886560507e-08, "logits/chosen": -2.9156312942504883, "logits/rejected": -2.874133586883545, "logps/chosen": -145.3746795654297, "logps/rejected": -171.78878784179688, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -7.68820858001709, "rewards/margins": 5.564955711364746, "rewards/rejected": -13.253164291381836, "step": 19197 }, { "epoch": 2.99, "learning_rate": 6.674308833445719e-08, "logits/chosen": -1.773887276649475, "logits/rejected": -2.707047462463379, "logps/chosen": -624.937744140625, "logps/rejected": -590.7493896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.582272529602051, "rewards/margins": 12.278989791870117, "rewards/rejected": -19.86126136779785, "step": 19198 }, { "epoch": 2.99, "learning_rate": 6.60096478033093e-08, "logits/chosen": -2.6717112064361572, "logits/rejected": -2.3444528579711914, "logps/chosen": -204.78970336914062, "logps/rejected": -405.90777587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.010601997375488, "rewards/margins": 11.233409881591797, "rewards/rejected": -17.24401092529297, "step": 19199 }, { "epoch": 2.99, "learning_rate": 6.527620727216142e-08, "logits/chosen": -2.70314884185791, "logits/rejected": -2.615708589553833, "logps/chosen": -476.9775695800781, "logps/rejected": -575.3348388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.896589279174805, "rewards/margins": 10.966670989990234, "rewards/rejected": -19.86326026916504, "step": 19200 }, { "epoch": 2.99, "learning_rate": 6.454276674101353e-08, "logits/chosen": -2.696756362915039, "logits/rejected": -2.3666083812713623, "logps/chosen": -172.66946411132812, "logps/rejected": -199.8712158203125, "loss": 0.0879, "rewards/accuracies": 1.0, "rewards/chosen": -5.116797924041748, "rewards/margins": 9.259220123291016, "rewards/rejected": -14.376018524169922, "step": 19201 }, { "epoch": 2.99, "learning_rate": 6.380932620986565e-08, "logits/chosen": -2.875502109527588, "logits/rejected": -2.6757376194000244, "logps/chosen": -161.60597229003906, "logps/rejected": -289.051513671875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.427915573120117, "rewards/margins": 9.355339050292969, "rewards/rejected": -15.783254623413086, "step": 19202 }, { "epoch": 2.99, "learning_rate": 6.307588567871778e-08, "logits/chosen": -1.759795069694519, "logits/rejected": -2.572277069091797, "logps/chosen": -87.14202117919922, "logps/rejected": -245.739013671875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -5.19171142578125, "rewards/margins": 9.263103485107422, "rewards/rejected": -14.454814910888672, "step": 19203 }, { "epoch": 2.99, "learning_rate": 6.23424451475699e-08, "logits/chosen": -1.1164875030517578, "logits/rejected": -1.948159098625183, "logps/chosen": -241.346435546875, "logps/rejected": -472.47930908203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -8.007864952087402, "rewards/margins": 11.586858749389648, "rewards/rejected": -19.594724655151367, "step": 19204 }, { "epoch": 2.99, "learning_rate": 6.160900461642201e-08, "logits/chosen": -2.4867959022521973, "logits/rejected": -2.6263322830200195, "logps/chosen": -134.01766967773438, "logps/rejected": -305.3397521972656, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.390909671783447, "rewards/margins": 10.417019844055176, "rewards/rejected": -16.80792999267578, "step": 19205 }, { "epoch": 2.99, "learning_rate": 6.087556408527413e-08, "logits/chosen": -2.6135988235473633, "logits/rejected": -2.723344564437866, "logps/chosen": -205.9154052734375, "logps/rejected": -394.4684753417969, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.324423789978027, "rewards/margins": 12.61817741394043, "rewards/rejected": -20.94260025024414, "step": 19206 }, { "epoch": 2.99, "learning_rate": 6.014212355412625e-08, "logits/chosen": -1.9596244096755981, "logits/rejected": -3.0308737754821777, "logps/chosen": -211.75062561035156, "logps/rejected": -564.551025390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.957578659057617, "rewards/margins": 10.003186225891113, "rewards/rejected": -16.960765838623047, "step": 19207 }, { "epoch": 2.99, "learning_rate": 5.940868302297838e-08, "logits/chosen": -3.038806915283203, "logits/rejected": -3.2188310623168945, "logps/chosen": -145.36279296875, "logps/rejected": -369.45635986328125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.6328277587890625, "rewards/margins": 11.034692764282227, "rewards/rejected": -16.66752052307129, "step": 19208 }, { "epoch": 2.99, "learning_rate": 5.867524249183049e-08, "logits/chosen": -1.556509017944336, "logits/rejected": -2.455997943878174, "logps/chosen": -304.93841552734375, "logps/rejected": -578.7825927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.527449131011963, "rewards/margins": 11.11439323425293, "rewards/rejected": -17.641841888427734, "step": 19209 }, { "epoch": 2.99, "learning_rate": 5.7941801960682603e-08, "logits/chosen": -2.6403729915618896, "logits/rejected": -1.9225454330444336, "logps/chosen": -491.94183349609375, "logps/rejected": -327.9609069824219, "loss": 0.2585, "rewards/accuracies": 1.0, "rewards/chosen": -10.70639705657959, "rewards/margins": 5.247719764709473, "rewards/rejected": -15.954116821289062, "step": 19210 }, { "epoch": 2.99, "learning_rate": 5.7208361429534727e-08, "logits/chosen": -1.3949671983718872, "logits/rejected": -2.5983877182006836, "logps/chosen": -602.102783203125, "logps/rejected": -617.23095703125, "loss": 1.095, "rewards/accuracies": 0.5, "rewards/chosen": -11.331928253173828, "rewards/margins": 5.398097991943359, "rewards/rejected": -16.730026245117188, "step": 19211 }, { "epoch": 2.99, "learning_rate": 5.647492089838684e-08, "logits/chosen": -1.2573846578598022, "logits/rejected": -2.9647440910339355, "logps/chosen": -145.71334838867188, "logps/rejected": -523.3726806640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -7.579498767852783, "rewards/margins": 9.33087158203125, "rewards/rejected": -16.910369873046875, "step": 19212 }, { "epoch": 2.99, "learning_rate": 5.574148036723897e-08, "logits/chosen": -1.755932331085205, "logits/rejected": -2.6084659099578857, "logps/chosen": -201.0048065185547, "logps/rejected": -433.0762023925781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.363744735717773, "rewards/margins": 13.60568618774414, "rewards/rejected": -19.969430923461914, "step": 19213 }, { "epoch": 2.99, "learning_rate": 5.500803983609108e-08, "logits/chosen": -2.3986310958862305, "logits/rejected": -2.763761281967163, "logps/chosen": -138.69192504882812, "logps/rejected": -395.4798278808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.797558307647705, "rewards/margins": 13.414338111877441, "rewards/rejected": -19.211896896362305, "step": 19214 }, { "epoch": 2.99, "learning_rate": 5.4274599304943206e-08, "logits/chosen": -2.781644821166992, "logits/rejected": -2.7754857540130615, "logps/chosen": -479.0837707519531, "logps/rejected": -644.9852294921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.356514930725098, "rewards/margins": 15.274164199829102, "rewards/rejected": -21.630680084228516, "step": 19215 }, { "epoch": 2.99, "learning_rate": 5.354115877379532e-08, "logits/chosen": -2.829578161239624, "logits/rejected": -2.434910774230957, "logps/chosen": -276.4053955078125, "logps/rejected": -453.9913024902344, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -8.841099739074707, "rewards/margins": 7.003786563873291, "rewards/rejected": -15.844886779785156, "step": 19216 }, { "epoch": 2.99, "learning_rate": 5.280771824264744e-08, "logits/chosen": -2.0754806995391846, "logits/rejected": -2.464395523071289, "logps/chosen": -577.4069213867188, "logps/rejected": -667.174072265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -7.853489875793457, "rewards/margins": 13.310298919677734, "rewards/rejected": -21.163789749145508, "step": 19217 }, { "epoch": 2.99, "learning_rate": 5.207427771149956e-08, "logits/chosen": -1.5915676355361938, "logits/rejected": -2.5183510780334473, "logps/chosen": -227.2655792236328, "logps/rejected": -324.56134033203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.353466987609863, "rewards/margins": 9.21849250793457, "rewards/rejected": -15.571958541870117, "step": 19218 }, { "epoch": 2.99, "learning_rate": 5.134083718035168e-08, "logits/chosen": -2.0095713138580322, "logits/rejected": -2.3477206230163574, "logps/chosen": -496.3943176269531, "logps/rejected": -643.3719482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.707434177398682, "rewards/margins": 14.549337387084961, "rewards/rejected": -22.256771087646484, "step": 19219 }, { "epoch": 2.99, "learning_rate": 5.0607396649203795e-08, "logits/chosen": -1.2070894241333008, "logits/rejected": -2.206954002380371, "logps/chosen": -193.52735900878906, "logps/rejected": -553.2801513671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -10.002077102661133, "rewards/margins": 11.053651809692383, "rewards/rejected": -21.055728912353516, "step": 19220 }, { "epoch": 2.99, "learning_rate": 4.987395611805592e-08, "logits/chosen": -1.516505479812622, "logits/rejected": -2.5213098526000977, "logps/chosen": -241.83377075195312, "logps/rejected": -560.8424072265625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.3042778968811035, "rewards/margins": 10.412726402282715, "rewards/rejected": -16.717004776000977, "step": 19221 }, { "epoch": 2.99, "learning_rate": 4.9140515586908035e-08, "logits/chosen": -1.9552924633026123, "logits/rejected": -2.610105514526367, "logps/chosen": -296.66802978515625, "logps/rejected": -395.65008544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.789294242858887, "rewards/margins": 10.753073692321777, "rewards/rejected": -17.542367935180664, "step": 19222 }, { "epoch": 2.99, "learning_rate": 4.840707505576016e-08, "logits/chosen": -2.0424680709838867, "logits/rejected": -2.563507556915283, "logps/chosen": -181.27639770507812, "logps/rejected": -429.4413146972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.242323875427246, "rewards/margins": 14.656264305114746, "rewards/rejected": -21.898588180541992, "step": 19223 }, { "epoch": 2.99, "learning_rate": 4.767363452461227e-08, "logits/chosen": -2.26371693611145, "logits/rejected": -2.573340654373169, "logps/chosen": -147.66395568847656, "logps/rejected": -230.03610229492188, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.755363464355469, "rewards/margins": 9.704416275024414, "rewards/rejected": -14.459779739379883, "step": 19224 }, { "epoch": 2.99, "learning_rate": 4.694019399346439e-08, "logits/chosen": -2.597130298614502, "logits/rejected": -2.651240110397339, "logps/chosen": -171.72874450683594, "logps/rejected": -327.9000244140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.7507476806640625, "rewards/margins": 10.6016845703125, "rewards/rejected": -17.352432250976562, "step": 19225 }, { "epoch": 2.99, "learning_rate": 4.6206753462316514e-08, "logits/chosen": -1.213354468345642, "logits/rejected": -2.6737260818481445, "logps/chosen": -171.08709716796875, "logps/rejected": -482.93902587890625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.645979881286621, "rewards/margins": 6.611729621887207, "rewards/rejected": -14.257709503173828, "step": 19226 }, { "epoch": 2.99, "learning_rate": 4.547331293116863e-08, "logits/chosen": -1.8775091171264648, "logits/rejected": -2.620914936065674, "logps/chosen": -186.8383331298828, "logps/rejected": -296.3887634277344, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": -10.367711067199707, "rewards/margins": 7.276207447052002, "rewards/rejected": -17.643918991088867, "step": 19227 }, { "epoch": 2.99, "learning_rate": 4.473987240002075e-08, "logits/chosen": -2.7535994052886963, "logits/rejected": -2.8946828842163086, "logps/chosen": -271.2720642089844, "logps/rejected": -364.865966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.611687183380127, "rewards/margins": 12.324678421020508, "rewards/rejected": -17.93636703491211, "step": 19228 }, { "epoch": 2.99, "learning_rate": 4.4006431868872863e-08, "logits/chosen": -1.844133973121643, "logits/rejected": -2.192457914352417, "logps/chosen": -148.1318359375, "logps/rejected": -234.0614013671875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -11.67100715637207, "rewards/margins": 6.140431880950928, "rewards/rejected": -17.811439514160156, "step": 19229 }, { "epoch": 2.99, "learning_rate": 4.3272991337724987e-08, "logits/chosen": -1.9546911716461182, "logits/rejected": -2.605895757675171, "logps/chosen": -168.02088928222656, "logps/rejected": -432.25970458984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.339229583740234, "rewards/margins": 10.417684555053711, "rewards/rejected": -18.756914138793945, "step": 19230 }, { "epoch": 2.99, "learning_rate": 4.253955080657711e-08, "logits/chosen": -1.4786667823791504, "logits/rejected": -1.9337042570114136, "logps/chosen": -201.06674194335938, "logps/rejected": -395.0196838378906, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.859846115112305, "rewards/margins": 11.073339462280273, "rewards/rejected": -17.933185577392578, "step": 19231 }, { "epoch": 2.99, "learning_rate": 4.180611027542922e-08, "logits/chosen": -2.6512889862060547, "logits/rejected": -2.785686731338501, "logps/chosen": -193.17056274414062, "logps/rejected": -369.76708984375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -8.465042114257812, "rewards/margins": 10.694489479064941, "rewards/rejected": -19.15953254699707, "step": 19232 }, { "epoch": 2.99, "learning_rate": 4.107266974428134e-08, "logits/chosen": -2.384812831878662, "logits/rejected": -2.0356557369232178, "logps/chosen": -216.62977600097656, "logps/rejected": -363.8672180175781, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -7.283409118652344, "rewards/margins": 10.209005355834961, "rewards/rejected": -17.492414474487305, "step": 19233 }, { "epoch": 2.99, "learning_rate": 4.0339229213133466e-08, "logits/chosen": -2.0312633514404297, "logits/rejected": -2.8979921340942383, "logps/chosen": -116.08155059814453, "logps/rejected": -427.6511535644531, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.249146461486816, "rewards/margins": 11.352237701416016, "rewards/rejected": -17.601383209228516, "step": 19234 }, { "epoch": 2.99, "learning_rate": 3.960578868198558e-08, "logits/chosen": -2.844519853591919, "logits/rejected": -2.9793930053710938, "logps/chosen": -242.3600616455078, "logps/rejected": -341.65606689453125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -8.423331260681152, "rewards/margins": 8.82611083984375, "rewards/rejected": -17.24944305419922, "step": 19235 }, { "epoch": 2.99, "learning_rate": 3.88723481508377e-08, "logits/chosen": -1.4649912118911743, "logits/rejected": -2.4833765029907227, "logps/chosen": -181.33856201171875, "logps/rejected": -645.0853881835938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.365008354187012, "rewards/margins": 17.91158676147461, "rewards/rejected": -24.276596069335938, "step": 19236 }, { "epoch": 2.99, "learning_rate": 3.8138907619689815e-08, "logits/chosen": -1.3679933547973633, "logits/rejected": -2.7298786640167236, "logps/chosen": -127.76244354248047, "logps/rejected": -469.8985290527344, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.211157321929932, "rewards/margins": 9.97106647491455, "rewards/rejected": -16.18222427368164, "step": 19237 }, { "epoch": 2.99, "learning_rate": 3.740546708854194e-08, "logits/chosen": -1.8785635232925415, "logits/rejected": -2.341498851776123, "logps/chosen": -299.73040771484375, "logps/rejected": -254.55999755859375, "loss": 0.1722, "rewards/accuracies": 1.0, "rewards/chosen": -9.963000297546387, "rewards/margins": 1.8749315738677979, "rewards/rejected": -11.837932586669922, "step": 19238 }, { "epoch": 2.99, "learning_rate": 3.667202655739406e-08, "logits/chosen": -2.7694568634033203, "logits/rejected": -2.556819200515747, "logps/chosen": -334.21209716796875, "logps/rejected": -262.46044921875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.750513076782227, "rewards/margins": 9.052512168884277, "rewards/rejected": -13.803025245666504, "step": 19239 }, { "epoch": 2.99, "learning_rate": 3.593858602624617e-08, "logits/chosen": -2.1570863723754883, "logits/rejected": -1.9999985694885254, "logps/chosen": -230.18194580078125, "logps/rejected": -353.01458740234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -10.164346694946289, "rewards/margins": 8.958136558532715, "rewards/rejected": -19.122482299804688, "step": 19240 }, { "epoch": 2.99, "learning_rate": 3.5205145495098295e-08, "logits/chosen": -2.72800350189209, "logits/rejected": -1.7181938886642456, "logps/chosen": -360.97900390625, "logps/rejected": -243.43690490722656, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -7.073737621307373, "rewards/margins": 7.7542829513549805, "rewards/rejected": -14.828020095825195, "step": 19241 }, { "epoch": 2.99, "learning_rate": 3.447170496395041e-08, "logits/chosen": -2.3945016860961914, "logits/rejected": -2.5567216873168945, "logps/chosen": -389.6968078613281, "logps/rejected": -526.5811767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.819518566131592, "rewards/margins": 12.954109191894531, "rewards/rejected": -20.77362823486328, "step": 19242 }, { "epoch": 2.99, "learning_rate": 3.3738264432802534e-08, "logits/chosen": -2.5592854022979736, "logits/rejected": -2.634538173675537, "logps/chosen": -167.54855346679688, "logps/rejected": -364.830322265625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -7.504823684692383, "rewards/margins": 8.422281265258789, "rewards/rejected": -15.927104949951172, "step": 19243 }, { "epoch": 2.99, "learning_rate": 3.300482390165465e-08, "logits/chosen": -1.8413203954696655, "logits/rejected": -2.7281646728515625, "logps/chosen": -443.2352294921875, "logps/rejected": -643.7594604492188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.353392124176025, "rewards/margins": 12.672202110290527, "rewards/rejected": -20.02559471130371, "step": 19244 }, { "epoch": 2.99, "learning_rate": 3.227138337050677e-08, "logits/chosen": -2.7400197982788086, "logits/rejected": -2.867708206176758, "logps/chosen": -157.81532287597656, "logps/rejected": -325.80792236328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -6.899410724639893, "rewards/margins": 9.62954330444336, "rewards/rejected": -16.528953552246094, "step": 19245 }, { "epoch": 2.99, "learning_rate": 3.153794283935889e-08, "logits/chosen": -2.699592113494873, "logits/rejected": -2.7388603687286377, "logps/chosen": -234.78689575195312, "logps/rejected": -340.1303405761719, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.041786193847656, "rewards/margins": 12.435220718383789, "rewards/rejected": -18.477006912231445, "step": 19246 }, { "epoch": 2.99, "learning_rate": 3.080450230821101e-08, "logits/chosen": -1.6797298192977905, "logits/rejected": -2.1330862045288086, "logps/chosen": -162.92527770996094, "logps/rejected": -333.36700439453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": -4.059939861297607, "rewards/margins": 11.293180465698242, "rewards/rejected": -15.353120803833008, "step": 19247 }, { "epoch": 2.99, "learning_rate": 3.0071061777063124e-08, "logits/chosen": -2.402336835861206, "logits/rejected": -2.7888875007629395, "logps/chosen": -146.7313995361328, "logps/rejected": -308.4713439941406, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -7.378003120422363, "rewards/margins": 5.957136154174805, "rewards/rejected": -13.335140228271484, "step": 19248 }, { "epoch": 2.99, "learning_rate": 2.9337621245915243e-08, "logits/chosen": -2.4862637519836426, "logits/rejected": -2.760225296020508, "logps/chosen": -273.8263854980469, "logps/rejected": -457.30230712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.140722751617432, "rewards/margins": 15.167837142944336, "rewards/rejected": -21.30855941772461, "step": 19249 }, { "epoch": 2.99, "learning_rate": 2.8604180714767363e-08, "logits/chosen": -1.566772222518921, "logits/rejected": -2.615626811981201, "logps/chosen": -313.57366943359375, "logps/rejected": -515.74560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.599740505218506, "rewards/margins": 10.265754699707031, "rewards/rejected": -17.865493774414062, "step": 19250 }, { "epoch": 2.99, "learning_rate": 2.7870740183619486e-08, "logits/chosen": -2.309025526046753, "logits/rejected": -2.862781047821045, "logps/chosen": -783.6826171875, "logps/rejected": -769.7305908203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -6.427672386169434, "rewards/margins": 16.790172576904297, "rewards/rejected": -23.217844009399414, "step": 19251 }, { "epoch": 2.99, "learning_rate": 2.7137299652471603e-08, "logits/chosen": -1.3577759265899658, "logits/rejected": -2.591308832168579, "logps/chosen": -236.49880981445312, "logps/rejected": -438.4068603515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -4.26518440246582, "rewards/margins": 11.062782287597656, "rewards/rejected": -15.327966690063477, "step": 19252 }, { "epoch": 2.99, "learning_rate": 2.640385912132372e-08, "logits/chosen": -2.8562967777252197, "logits/rejected": -2.8186848163604736, "logps/chosen": -287.48931884765625, "logps/rejected": -342.32232666015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -8.240211486816406, "rewards/margins": 8.597999572753906, "rewards/rejected": -16.838211059570312, "step": 19253 }, { "epoch": 2.99, "learning_rate": 2.567041859017584e-08, "logits/chosen": -2.780839681625366, "logits/rejected": -1.6769541501998901, "logps/chosen": -264.400146484375, "logps/rejected": -253.31784057617188, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -6.2793779373168945, "rewards/margins": 10.34561538696289, "rewards/rejected": -16.6249942779541, "step": 19254 }, { "epoch": 2.99, "learning_rate": 2.493697805902796e-08, "logits/chosen": -1.9709606170654297, "logits/rejected": -2.193896770477295, "logps/chosen": -285.5081787109375, "logps/rejected": -610.7255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -7.884490489959717, "rewards/margins": 18.94427490234375, "rewards/rejected": -26.828765869140625, "step": 19255 }, { "epoch": 2.99, "learning_rate": 2.420353752788008e-08, "logits/chosen": -2.1128530502319336, "logits/rejected": -2.870182991027832, "logps/chosen": -156.69076538085938, "logps/rejected": -352.62298583984375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -9.559324264526367, "rewards/margins": 9.02227783203125, "rewards/rejected": -18.581602096557617, "step": 19256 }, { "epoch": 2.99, "learning_rate": 2.3470096996732195e-08, "logits/chosen": -2.6116816997528076, "logits/rejected": -2.7693076133728027, "logps/chosen": -120.25093841552734, "logps/rejected": -309.9291076660156, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -4.630584716796875, "rewards/margins": 10.521417617797852, "rewards/rejected": -15.152002334594727, "step": 19257 }, { "epoch": 3.0, "learning_rate": 2.2736656465584315e-08, "logits/chosen": -2.5515589714050293, "logits/rejected": -2.301748752593994, "logps/chosen": -227.42391967773438, "logps/rejected": -545.1162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.481868743896484, "rewards/margins": 17.057798385620117, "rewards/rejected": -25.5396671295166, "step": 19258 }, { "epoch": 3.0, "learning_rate": 2.2003215934436432e-08, "logits/chosen": -1.9833184480667114, "logits/rejected": -2.653141498565674, "logps/chosen": -178.14013671875, "logps/rejected": -441.67071533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -8.341901779174805, "rewards/margins": 11.638492584228516, "rewards/rejected": -19.98039436340332, "step": 19259 }, { "epoch": 3.0, "learning_rate": 2.1269775403288555e-08, "logits/chosen": -1.6065365076065063, "logits/rejected": -2.1792218685150146, "logps/chosen": -287.2855529785156, "logps/rejected": -479.1236877441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": -5.667879104614258, "rewards/margins": 10.882257461547852, "rewards/rejected": -16.55013656616211, "step": 19260 } ], "logging_steps": 1, "max_steps": 19289, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 45, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }